1 #define BOOST_TEST_DYN_LINK
2 #define BOOST_TEST_MODULE test_mussa
3 #include <boost/test/unit_test.hpp>
5 #include <boost/filesystem/path.hpp>
6 #include <boost/filesystem/operations.hpp>
7 namespace fs = boost::filesystem;
8 #include <boost/assign/list_of.hpp>
9 #include <boost/assign/list_inserter.hpp>
10 #include <boost/assign.hpp>
11 namespace assign = boost::assign;
17 #include "alg/mussa.hpp"
18 #include "mussa_exceptions.hpp"
22 //! can we initialize a mussa object?
23 BOOST_AUTO_TEST_CASE( mussa_simple )
26 BOOST_CHECK_EQUAL(m.empty(), true);
27 BOOST_CHECK_EQUAL(m.get_name(), "" );
28 BOOST_CHECK_EQUAL(m.get_window(), 0);
29 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
30 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
31 m.set_name( "hello" );
32 BOOST_CHECK_EQUAL(m.get_name(), "hello" );
34 BOOST_CHECK_EQUAL(m.get_window(), 30);
36 BOOST_CHECK_EQUAL(m.get_threshold(), 21);
37 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
38 m.set_soft_threshold(19);
39 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
40 m.set_soft_threshold(35);
41 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 30);
42 m.set_soft_threshold(25);
43 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 25);
44 m.set_analysis_mode(Mussa::RadialNway);
45 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::RadialNway);
46 // make sure our path is empty
47 BOOST_CHECK_EQUAL(m.get_analysis_path().string(), fs::path().string() );
50 BOOST_CHECK_EQUAL(m.get_name(), "" );
51 BOOST_CHECK_EQUAL(m.get_window(), 0);
52 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
53 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
56 BOOST_AUTO_TEST_CASE ( mussa_title )
60 BOOST_CHECK_EQUAL( m.get_title(), "Unnamed");
63 BOOST_CHECK_EQUAL( m.get_title(), foo);
64 string foopath_name("/my/silly/path");
65 fs::path foopath(foopath_name);
66 m.set_analysis_path(foopath);
67 BOOST_CHECK_EQUAL( m.get_title().size(), 14);
70 BOOST_AUTO_TEST_CASE( mussa_analysis_name )
73 m.set_analysis_mode( Mussa::TransitiveNway );
74 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Transitive" );
75 m.set_analysis_mode( Mussa::RadialNway );
76 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Radial" );
77 m.set_analysis_mode( Mussa::EntropyNway );
78 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Entropy" );
79 m.set_analysis_mode( Mussa::RecursiveNway);
80 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "[deprecated] Recursive" );
83 BOOST_AUTO_TEST_CASE( mussa_sequences )
85 std::string s0("AAAANNNN");
86 std::string s1("GGGGNNNN");
87 std::string s2("TTTTNNNN");
90 BOOST_CHECK_EQUAL(analysis.empty(), true);
91 analysis.append_sequence(s0);
92 analysis.append_sequence(s1);
93 analysis.append_sequence(s2);
95 BOOST_CHECK_EQUAL( analysis.empty(), false);
96 BOOST_CHECK_EQUAL( analysis.sequences().size(), 3 );
97 BOOST_CHECK_EQUAL( *(analysis.sequences()[0]), s0);
98 BOOST_CHECK_EQUAL( *(analysis.sequences()[1]), s1);
99 BOOST_CHECK_EQUAL( *(analysis.sequences()[2]), s2);
102 // for some reason we can call nway once safely but it
103 // somehow changed things and caused a segfault
104 // fixed by adding a return statement in trans_path_search
105 BOOST_AUTO_TEST_CASE ( empty_mussa_set_threshold )
108 m.set_soft_threshold(15);
111 m.set_soft_threshold(25);
115 BOOST_AUTO_TEST_CASE( mussa_load_mupa_crlf )
117 fs::path example_path(EXAMPLE_DIR, fs::native);
118 fs::path seq_path(example_path / "seq" / "mouse_mck_pro.fa");
119 fs::path annot_path(example_path / "mm_mck3test.annot");
123 "ANA_NAME load_mupa_crlf\015\012");
124 mupa += "SEQUENCE " + seq_path.native_file_string() + "\015\012";
125 mupa += "ANNOTATION " + annot_path.native_file_string() + "\015\012";
127 istringstream mupa_stream(mupa);
130 m.load_mupa_stream( mupa_stream, base );
131 // Should run with no exceptions
134 BOOST_AUTO_TEST_CASE( mussa_load_mupa_comment_character )
136 fs::path mupa_path(EXAMPLE_DIR, fs::native);
137 fs::path seq_path = fs::initial_path() / "seq" / "mouse_mck_pro.fa";
138 fs::path annot_path = fs::initial_path() / "mm_mck3test.annot";
142 "ANA_NAME load_mupa_crlf\015\012");
143 mupa += "#SEQUENCE " + seq_path.native_file_string() + "\015\012";
144 mupa += "#ANNOTATION " + annot_path.native_file_string() + "\015\012";
146 istringstream mupa_stream(mupa);
149 m.load_mupa_stream( mupa_stream, base );
150 // Should run with no exceptions
153 BOOST_AUTO_TEST_CASE( mussa_load_mupa_exception )
157 "ANA_NAME load_mupa_crlf\015\012"
158 "mwahhaha I broke you!\n"
161 istringstream mupa_stream(mupa);
164 BOOST_CHECK_THROW(m.load_mupa_stream( mupa_stream, base ), mussa_load_error);
167 BOOST_AUTO_TEST_CASE( mussa_load_mupa )
169 fs::path mupa_path(EXAMPLE_DIR, fs::native);
170 fs::path result_path = fs::initial_path() / "mck3test_w30_t20";
171 mupa_path /= "mck3test.mupa";
174 m1.load_mupa_file( mupa_path );
176 m1.save( result_path );
177 BOOST_CHECK_EQUAL( m1.empty(), false);
178 BOOST_CHECK_EQUAL( m1.get_name(), std::string("mck3test") );
179 BOOST_CHECK( m1.size() > 0 );
180 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), result_path.string());
183 m2.load( result_path );
184 BOOST_CHECK_EQUAL( m2.empty(), false);
185 BOOST_CHECK_EQUAL( m2.get_name(), result_path.leaf() );
186 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
187 BOOST_CHECK_EQUAL( result_path.string(), m2.get_analysis_path().string() );
191 BOOST_CHECK_EQUAL( m2.empty(), true);
192 BOOST_CHECK_EQUAL( m2.is_dirty(), false );
193 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), fs::path().string());
196 BOOST_AUTO_TEST_CASE( mussa_load_full_path )
199 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "mck3test.mupa");
200 m1.load_mupa_file( full_path );
203 BOOST_CHECK( m1.size() > 0);
204 BOOST_CHECK_EQUAL( m1.get_window(), 30 );
205 BOOST_CHECK_EQUAL( m1.get_threshold(), 20);
206 BOOST_CHECK_EQUAL( m1.is_dirty(), true);
207 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), "");
210 BOOST_AUTO_TEST_CASE( mussa_valid_motifs_in_new_analysis )
213 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "mck3test.mupa");
214 m1.load_mupa_file( full_path );
217 BOOST_CHECK( m1.sequences().size() > 0 );
218 BOOST_CHECK_EQUAL( m1.sequences()[0]->motifs().size(), 0 );
221 // make sure we know that mupa files cannot be directories
222 BOOST_AUTO_TEST_CASE( mussa_mupa_is_file_not_directory )
224 fs::path curdir(".");
226 BOOST_CHECK_THROW(m1.load_mupa_file( curdir ), mussa_load_error );
229 // catch error if annotation isn't a file
230 BOOST_AUTO_TEST_CASE( mussa_annotation_is_not_file )
233 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "directory.mupa");
234 BOOST_CHECK_THROW( m1.load_mupa_file( full_path ), mussa_load_error );
237 BOOST_AUTO_TEST_CASE( mussa_load_analysis )
239 fs::path example_dir(EXAMPLE_DIR, fs::native);
241 m1.load_mupa_file( example_dir / "mck3test.mupa" );
245 fs::path analysis_path = fs::initial_path() / "mck3test_w30_t20";
246 m2.load( analysis_path );
248 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
249 BOOST_CHECK_EQUAL( m1.get_window(), m2.get_window() );
250 BOOST_CHECK_EQUAL( m1.get_threshold(), m2.get_threshold() );
251 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), analysis_path.string());
254 BOOST_AUTO_TEST_CASE( mussa_load_motif )
256 string data = "AAGG 1.0 1.0 0.0\n"
257 "GGTT 0.0 0.1 1.0 1.0\n";
259 istringstream test_istream(data);
262 m1.append_sequence("AAAAGGGGTTTT");
263 m1.append_sequence("GGGCCCCTTCCAATT");
264 m1.load_motifs(test_istream);
266 BOOST_CHECK_EQUAL( m1.motifs().size(), 2);
267 for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin();
268 seq_i != m1.sequences().end();
271 BOOST_CHECK( (*seq_i)->motifs().size() > 0 );
275 BOOST_AUTO_TEST_CASE( mussa_load_broken_motif )
277 string data = "AAGG 1.0 1.0 0.0\n"
278 "GGTT 0.0 0.1 1.0 1.0\n"
279 "ZZCTA 0.1 0.0 1.0\n";
281 istringstream test_istream(data);
284 m1.append_sequence("AAAAGGGGTTTT");
285 m1.append_sequence("GGGCCCCTTCCAATT");
286 BOOST_CHECK_THROW(m1.load_motifs(test_istream), motif_load_error);
288 BOOST_CHECK_EQUAL( m1.motifs().size(), 0);
291 BOOST_AUTO_TEST_CASE( mussa_named_motif )
293 string data = "CCAATT cat 0.1 0.2 0.3\n";
294 istringstream test_istream(data);
297 m1.append_sequence("AAAAGGGGTTTT");
298 m1.append_sequence("GGGCCCCTTCCAATT");
299 m1.load_motifs(test_istream);
301 std::set<Sequence> motifs = m1.motifs();
302 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
303 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat");
306 BOOST_AUTO_TEST_CASE( mussa_weirdly_spaced_named_motif )
308 string data = "CCAATT cat_meow123 0.1 0.2 0.3\n";
309 istringstream test_istream(data);
312 m1.append_sequence("AAAAGGGGTTTT");
313 m1.append_sequence("GGGCCCCTTCCAATT");
314 m1.load_motifs(test_istream);
316 std::set<Sequence> motifs = m1.motifs();
317 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
318 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat_meow123");
321 BOOST_AUTO_TEST_CASE( mussa_name_quoted_motif )
323 string data = "CCAATT \"cat meow 123\" 0.1 0.2 0.3\n";
324 istringstream test_istream(data);
327 m1.append_sequence("AAAAGGGGTTTT");
328 m1.append_sequence("GGGCCCCTTCCAATT");
329 m1.load_motifs(test_istream);
331 std::set<Sequence> motifs = m1.motifs();
332 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
333 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat meow 123");
336 BOOST_AUTO_TEST_CASE( mussa_name_embedded_quote_motif )
338 // pretty obviously this shouldn't work as " are our delimiter
339 // and i'm too lazy to add support for \ in the parser
340 string data = "ATA 0.5 0.5 0.5\n"
341 "CCAATT \"cat \"meow 123\" 0.1 0.2 0.3\n";
342 istringstream test_istream(data);
345 m1.append_sequence("AAAAGGGGTTTT");
346 m1.append_sequence("GGGCCCCTTCCAATT");
347 BOOST_CHECK_THROW( m1.load_motifs(test_istream), motif_load_error);
349 std::set<Sequence> motifs = m1.motifs();
350 BOOST_REQUIRE_EQUAL(motifs.size(), 0);
353 BOOST_AUTO_TEST_CASE( mussa_save_motif )
355 string data = "ATA 1 1 1 1\n"
356 "CAT \"my name\" 1 0 0.5 0.5\n";
357 istringstream data_istream(data);
360 m1.append_sequence("AAAAGGGGTTTT");
361 m1.append_sequence("GGGCCCCTTCCAATT");
362 m1.load_motifs(data_istream);
365 ostringstream save_ostream(save);
366 m1.save_motifs(save_ostream);
368 istringstream reloaded_istream(save_ostream.str());
370 m2.append_sequence("AAAAGGGGTTTT");
371 m2.append_sequence("GGGCCCCTTCCAATT");
372 m2.load_motifs(reloaded_istream);
374 BOOST_REQUIRE_EQUAL(m1.motifs().size(), m2.motifs().size());
375 Mussa::motif_set::const_iterator m1motif = m1.motifs().begin();
376 Mussa::motif_set::const_iterator m2motif = m2.motifs().begin();
378 m1motif != m1.motifs().end() and m2motif != m2.motifs().end();
379 ++m1motif, ++m2motif)
381 BOOST_CHECK_EQUAL(m1motif->get_sequence(), m2motif->get_sequence());
382 BOOST_CHECK_EQUAL(m1motif->get_name(), m2motif->get_name());
383 BOOST_CHECK_EQUAL(m1.colorMapper()->lookup("motif", m1motif->get_sequence()),
384 m2.colorMapper()->lookup("motif", m2motif->get_sequence()));
388 BOOST_AUTO_TEST_CASE( mussa_add_motif )
390 vector<Sequence> motifs;
391 motifs.push_back("AAGG");
392 vector<Color> colors;
393 colors.push_back(Color(1.0, 0.0, 0.0));
396 m1.append_sequence("AAAAGGGGTTTT");
397 m1.append_sequence("GGGCCCCTTGGTT");
398 m1.set_motifs(motifs, colors);
399 int first_size = m1.motifs().size();
400 BOOST_CHECK_EQUAL( first_size, 1 );
401 BOOST_REQUIRE(first_size > 0);
402 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
403 // make sure that our sequences have the right number of motifs
404 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
405 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1); // because of rc
407 // verify that setting the motif clears the arrays
408 m1.set_motifs(motifs, colors);
409 BOOST_CHECK_EQUAL( first_size, m1.motifs().size() );
410 // make sure that our sequences have the right number of motifs
411 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
412 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
414 // add a different motif
416 motifs.push_back("CCTTGG");
417 BOOST_CHECK_EQUAL(motifs.size(), 1);
418 m1.set_motifs(motifs, colors);
419 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
420 BOOST_REQUIRE(m1.motifs().size() > 0);
421 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
422 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
423 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
425 // try a motif that doesn't exist
427 motifs.push_back("CCTTGG");
428 BOOST_CHECK_EQUAL(motifs.size(), 1);
429 m1.set_motifs(motifs, colors);
430 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
431 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
432 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
437 two_way_local_align_test(const Mussa::vector_sequence_type &seqs,
438 const list<ConservedPath::path_type>& result,
439 const list<vector<bool> >& reversed)
441 map<char, vector <char> > m;
442 assign::insert(m)('A', assign::list_of('A')('T') )
443 ('T', assign::list_of('T')('A') )
444 ('G', assign::list_of('G')('C') )
445 ('C', assign::list_of('C')('G') );
446 list<vector<bool> >::const_iterator rc_i = reversed.begin();
448 for(list<ConservedPath::path_type>::const_iterator base_i = result.begin();
449 base_i != result.end();
452 // since the reverse compliment flag is relative to the first sequence
453 // the first one should always be false
454 BOOST_CHECK_EQUAL( (*rc_i)[0], false );
455 const int first_path_basepair_index = (*base_i)[0];
456 const int second_path_basepair_index = (*base_i)[1];
457 const char first_basepair = (*seqs[0])[first_path_basepair_index];
458 const char second_basepair = (*seqs[1])[second_path_basepair_index];
459 // get our index into our reverse compliment map m
460 const int second_compliment_index = (*rc_i)[1];
461 // lookup the forward or reverse compliment depending on our rc flag
462 const char complimented_second = m[second_basepair][second_compliment_index];
464 BOOST_CHECK_EQUAL( first_basepair, complimented_second) ;
468 BOOST_AUTO_TEST_CASE( two_way_local_alignment )
470 string s0("GCGCATAT");
471 string s1("AAAAAAAT");
475 analysis.append_sequence(s0);
476 analysis.append_sequence(s1);
477 analysis.set_threshold(3);
478 analysis.set_window(4);
480 NwayPaths npath = analysis.paths();
481 BOOST_REQUIRE_EQUAL( npath.pathz.size(), 2 );
483 list<ConservedPath::path_type> result;
484 list<vector<bool> > reversed;
485 list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
487 list<ConservedPath> selected_paths;
488 selected_paths.push_back(*pathz_i);
489 analysis.createLocalAlignment(selected_paths.begin(),
490 selected_paths.end(),
494 two_way_local_align_test(analysis.sequences(), result, reversed);
499 selected_paths.clear();
500 selected_paths.push_back(*pathz_i);
501 analysis.createLocalAlignment(selected_paths.begin(),
502 selected_paths.end(),
505 two_way_local_align_test(analysis.sequences(), result, reversed);
508 BOOST_AUTO_TEST_CASE( three_way_local_alignment )
510 string s0("AGCAGGGAGGGTTTAAATGGCACCCAGCAGTTGGTGTGAGG");
511 string s1("AGCGGGAAGGGTTTAAATGGCACCGGGCAGTTGGCGTGAGG");
512 string s2("CAGCGCCGGGGTTTAAATGGCACCGAGCAGTTGGCGCAGGG");
515 analysis.append_sequence(s0);
516 analysis.append_sequence(s1);
517 analysis.append_sequence(s2);
518 analysis.set_threshold(23);
519 analysis.set_window(30);
521 NwayPaths npath = analysis.paths();
522 BOOST_CHECK_EQUAL( npath.refined_pathz.size(), 1 );
524 list<ConservedPath::path_type> result;
525 list<vector<bool> > reversed;
526 // grab 1 path (since there's only one)
527 list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
528 list<ConservedPath> selected_paths;
529 selected_paths.push_back(*pathz_i);
530 analysis.createLocalAlignment(selected_paths.begin(),
531 selected_paths.end(),
535 for(std::list<ConservedPath::path_type>::iterator result_i = result.begin();
536 result_i != result.end();
539 ConservedPath::path_element first_element = *(result_i->begin());
540 for (ConservedPath::path_type::iterator element_i = result_i->begin();
541 element_i != result_i->end();
544 BOOST_CHECK_EQUAL( *element_i, first_element );
545 BOOST_CHECK_EQUAL( s0[*element_i], s1[*element_i] );
546 BOOST_CHECK_EQUAL( s1[*element_i], s2[*element_i] );
547 BOOST_CHECK_EQUAL( s0[*element_i], s2[*element_i] );
552 BOOST_AUTO_TEST_CASE( mussa_window_larger_than_sequence )
554 string s0("AGCAGGG");
555 string s1("CAGCGGG");
558 analysis.append_sequence(s0);
559 analysis.append_sequence(s1);
560 analysis.set_threshold(23);
561 analysis.set_window(30);
562 BOOST_CHECK_THROW(analysis.analyze(), seqcomp_error);
565 BOOST_AUTO_TEST_CASE( subanalysis )
567 Sequence s1("AATGAAGATTTTAATGCTTTAATTTTGTTTTGTAAACTTCGAATTTCCAAAATTTGAAA");
568 Sequence s2("AGGAGCAAGTTCGCTTCATCGAGAATTTTTAATTTTTAGTCAAATTTTCCAATGTCTGA");
571 analysis.append_sequence(s1);
572 analysis.append_sequence(s2);
573 analysis.set_threshold(8);
574 analysis.set_window(8);
577 NwayPaths perfect_path = analysis.paths();
578 int perfect_match_count = perfect_path.pathz.size();
580 Sequence sub1 = s1.subseq(2, s1.size()-4);
581 Sequence sub2 = s2.subseq(2, s2.size()-4);
583 subanalysis.append_sequence(sub1);
584 subanalysis.append_sequence(sub2);
585 subanalysis.set_threshold(7);
586 subanalysis.set_window(8);
587 subanalysis.analyze();
588 NwayPaths one_mismatch_path = subanalysis.paths();
589 int one_mismatch_count = one_mismatch_path.pathz.size();
591 BOOST_CHECK( perfect_match_count < one_mismatch_count );
594 BOOST_AUTO_TEST_CASE( dirty_flag )
597 BOOST_CHECK_EQUAL(m.is_dirty(), false);
599 BOOST_CHECK_EQUAL(m.is_dirty(), true);
602 BOOST_CHECK_EQUAL(m.is_dirty(), true);
605 BOOST_CHECK_EQUAL(m.is_dirty(), true);
607 m.set_soft_threshold(1);
608 BOOST_CHECK_EQUAL(m.is_dirty(), false);
610 m.append_sequence("AAGGCCTT");
611 BOOST_CHECK_EQUAL(m.is_dirty(), true);