1 #define BOOST_AUTO_TEST_MAIN
2 #include <boost/test/auto_unit_test.hpp>
3 #include <boost/filesystem/path.hpp>
4 #include <boost/filesystem/operations.hpp>
5 namespace fs = boost::filesystem;
6 #include <boost/assign/list_of.hpp>
7 #include <boost/assign/list_inserter.hpp>
8 #include <boost/assign.hpp>
9 namespace assign = boost::assign;
15 #include "alg/mussa.hpp"
16 #include "mussa_exceptions.hpp"
20 //! can we initialize a mussa object?
21 BOOST_AUTO_TEST_CASE( mussa_simple )
24 BOOST_CHECK_EQUAL(m.empty(), true);
25 BOOST_CHECK_EQUAL(m.get_name(), "" );
26 BOOST_CHECK_EQUAL(m.get_window(), 0);
27 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
28 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
29 m.set_name( "hello" );
30 BOOST_CHECK_EQUAL(m.get_name(), "hello" );
32 BOOST_CHECK_EQUAL(m.get_window(), 30);
34 BOOST_CHECK_EQUAL(m.get_threshold(), 21);
35 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
36 m.set_soft_threshold(19);
37 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
38 m.set_soft_threshold(35);
39 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 30);
40 m.set_soft_threshold(25);
41 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 25);
42 m.set_analysis_mode(Mussa::RadialNway);
43 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::RadialNway);
44 // make sure our path is empty
45 BOOST_CHECK_EQUAL(m.get_analysis_path().string(), fs::path().string() );
48 BOOST_CHECK_EQUAL(m.get_name(), "" );
49 BOOST_CHECK_EQUAL(m.get_window(), 0);
50 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
51 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
54 BOOST_AUTO_TEST_CASE ( mussa_title )
58 BOOST_CHECK_EQUAL( m.get_title(), "Unnamed");
61 BOOST_CHECK_EQUAL( m.get_title(), foo);
62 string foopath_name("/my/silly/path");
63 fs::path foopath(foopath_name);
64 m.set_analysis_path(foopath);
65 BOOST_CHECK_EQUAL( m.get_title().size(), 14);
68 BOOST_AUTO_TEST_CASE( mussa_analysis_name )
71 m.set_analysis_mode( Mussa::TransitiveNway );
72 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Transitive" );
73 m.set_analysis_mode( Mussa::RadialNway );
74 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Radial" );
75 m.set_analysis_mode( Mussa::EntropyNway );
76 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Entropy" );
77 m.set_analysis_mode( Mussa::RecursiveNway);
78 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "[deprecated] Recursive" );
81 BOOST_AUTO_TEST_CASE( mussa_sequences )
83 std::string s0("AAAANNNN");
84 std::string s1("GGGGNNNN");
85 std::string s2("TTTTNNNN");
88 BOOST_CHECK_EQUAL(analysis.empty(), true);
89 analysis.append_sequence(s0);
90 analysis.append_sequence(s1);
91 analysis.append_sequence(s2);
93 BOOST_CHECK_EQUAL( analysis.empty(), false);
94 BOOST_CHECK_EQUAL( analysis.sequences().size(), 3 );
95 BOOST_CHECK_EQUAL( *(analysis.sequences()[0]), s0);
96 BOOST_CHECK_EQUAL( *(analysis.sequences()[1]), s1);
97 BOOST_CHECK_EQUAL( *(analysis.sequences()[2]), s2);
100 // for some reason we can call nway once safely but it
101 // somehow changed things and caused a segfault
102 // fixed by adding a return statement in trans_path_search
103 BOOST_AUTO_TEST_CASE ( empty_mussa_set_threshold )
106 m.set_soft_threshold(15);
109 m.set_soft_threshold(25);
113 BOOST_AUTO_TEST_CASE( mussa_load_mupa_crlf )
115 fs::path example_path(EXAMPLE_DIR, fs::native);
116 fs::path seq_path(example_path / "seq" / "mouse_mck_pro.fa");
117 fs::path annot_path(example_path / "mm_mck3test.annot");
121 "ANA_NAME load_mupa_crlf\015\012");
122 mupa += "SEQUENCE " + seq_path.native_file_string() + "\015\012";
123 mupa += "ANNOTATION " + annot_path.native_file_string() + "\015\012";
125 istringstream mupa_stream(mupa);
128 m.load_mupa_stream( mupa_stream, base );
129 // Should run with no exceptions
132 BOOST_AUTO_TEST_CASE( mussa_load_mupa )
134 fs::path mupa_path(EXAMPLE_DIR, fs::native);
135 fs::path result_path = fs::initial_path() / "mck3test_w30_t20";
136 mupa_path /= "mck3test.mupa";
139 m1.load_mupa_file( mupa_path );
141 m1.save( result_path );
142 BOOST_CHECK_EQUAL( m1.empty(), false);
143 BOOST_CHECK_EQUAL( m1.get_name(), std::string("mck3test") );
144 BOOST_CHECK( m1.size() > 0 );
145 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), result_path.string());
148 m2.load( result_path );
149 BOOST_CHECK_EQUAL( m2.empty(), false);
150 BOOST_CHECK_EQUAL( m2.get_name(), result_path.leaf() );
151 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
152 BOOST_CHECK_EQUAL( result_path.string(), m2.get_analysis_path().string() );
156 BOOST_CHECK_EQUAL( m2.empty(), true);
157 BOOST_CHECK_EQUAL( m2.is_dirty(), false );
158 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), fs::path().string());
161 BOOST_AUTO_TEST_CASE( mussa_load_full_path )
164 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "mck3test.mupa");
165 m1.load_mupa_file( full_path );
168 BOOST_CHECK( m1.size() > 0);
169 BOOST_CHECK_EQUAL( m1.get_window(), 30 );
170 BOOST_CHECK_EQUAL( m1.get_threshold(), 20);
171 BOOST_CHECK_EQUAL( m1.is_dirty(), true);
172 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), "");
175 BOOST_AUTO_TEST_CASE( mussa_valid_motifs_in_new_analysis )
178 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "mck3test.mupa");
179 m1.load_mupa_file( full_path );
182 BOOST_CHECK( m1.sequences().size() > 0 );
183 BOOST_CHECK_EQUAL( m1.sequences()[0]->motifs().size(), 0 );
186 // make sure we know that mupa files cannot be directories
187 BOOST_AUTO_TEST_CASE( mussa_mupa_is_file_not_directory )
189 fs::path curdir(".");
191 BOOST_CHECK_THROW(m1.load_mupa_file( curdir ), mussa_load_error );
194 // catch error if annotation isn't a file
195 BOOST_AUTO_TEST_CASE( mussa_annotation_is_not_file )
198 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "directory.mupa");
199 BOOST_CHECK_THROW( m1.load_mupa_file( full_path ), mussa_load_error );
202 BOOST_AUTO_TEST_CASE( mussa_load_analysis )
204 fs::path example_dir(EXAMPLE_DIR, fs::native);
206 m1.load_mupa_file( example_dir / "mck3test.mupa" );
210 fs::path analysis_path = fs::initial_path() / "mck3test_w30_t20";
211 m2.load( analysis_path );
213 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
214 BOOST_CHECK_EQUAL( m1.get_window(), m2.get_window() );
215 BOOST_CHECK_EQUAL( m1.get_threshold(), m2.get_threshold() );
216 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), analysis_path.string());
219 BOOST_AUTO_TEST_CASE( mussa_load_motif )
221 string data = "AAGG 1.0 1.0 0.0\n"
222 "GGTT 0.0 0.1 1.0 1.0\n";
224 istringstream test_istream(data);
227 m1.append_sequence("AAAAGGGGTTTT");
228 m1.append_sequence("GGGCCCCTTCCAATT");
229 m1.load_motifs(test_istream);
231 BOOST_CHECK_EQUAL( m1.motifs().size(), 2);
232 for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin();
233 seq_i != m1.sequences().end();
236 BOOST_CHECK( (*seq_i)->motifs().size() > 0 );
240 BOOST_AUTO_TEST_CASE( mussa_load_broken_motif )
242 string data = "AAGG 1.0 1.0 0.0\n"
243 "GGTT 0.0 0.1 1.0 1.0\n"
244 "ZZCTA 0.1 0.0 1.0\n";
246 istringstream test_istream(data);
249 m1.append_sequence("AAAAGGGGTTTT");
250 m1.append_sequence("GGGCCCCTTCCAATT");
251 BOOST_CHECK_THROW(m1.load_motifs(test_istream), motif_load_error);
253 BOOST_CHECK_EQUAL( m1.motifs().size(), 0);
256 BOOST_AUTO_TEST_CASE( mussa_named_motif )
258 string data = "CCAATT cat 0.1 0.2 0.3\n";
259 istringstream test_istream(data);
262 m1.append_sequence("AAAAGGGGTTTT");
263 m1.append_sequence("GGGCCCCTTCCAATT");
264 m1.load_motifs(test_istream);
266 std::set<Sequence> motifs = m1.motifs();
267 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
268 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat");
271 BOOST_AUTO_TEST_CASE( mussa_weirdly_spaced_named_motif )
273 string data = "CCAATT cat_meow123 0.1 0.2 0.3\n";
274 istringstream test_istream(data);
277 m1.append_sequence("AAAAGGGGTTTT");
278 m1.append_sequence("GGGCCCCTTCCAATT");
279 m1.load_motifs(test_istream);
281 std::set<Sequence> motifs = m1.motifs();
282 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
283 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat_meow123");
286 BOOST_AUTO_TEST_CASE( mussa_name_quoted_motif )
288 string data = "CCAATT \"cat meow 123\" 0.1 0.2 0.3\n";
289 istringstream test_istream(data);
292 m1.append_sequence("AAAAGGGGTTTT");
293 m1.append_sequence("GGGCCCCTTCCAATT");
294 m1.load_motifs(test_istream);
296 std::set<Sequence> motifs = m1.motifs();
297 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
298 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat meow 123");
301 BOOST_AUTO_TEST_CASE( mussa_name_embedded_quote_motif )
303 // pretty obviously this shouldn't work as " are our delimiter
304 // and i'm too lazy to add support for \ in the parser
305 string data = "ATA 0.5 0.5 0.5\n"
306 "CCAATT \"cat \"meow 123\" 0.1 0.2 0.3\n";
307 istringstream test_istream(data);
310 m1.append_sequence("AAAAGGGGTTTT");
311 m1.append_sequence("GGGCCCCTTCCAATT");
312 BOOST_CHECK_THROW( m1.load_motifs(test_istream), motif_load_error);
314 std::set<Sequence> motifs = m1.motifs();
315 BOOST_REQUIRE_EQUAL(motifs.size(), 0);
318 BOOST_AUTO_TEST_CASE( mussa_save_motif )
320 string data = "ATA 1 1 1 1\n"
321 "CAT \"my name\" 1 0 0.5 0.5\n";
322 istringstream data_istream(data);
325 m1.append_sequence("AAAAGGGGTTTT");
326 m1.append_sequence("GGGCCCCTTCCAATT");
327 m1.load_motifs(data_istream);
330 ostringstream save_ostream(save);
331 m1.save_motifs(save_ostream);
333 istringstream reloaded_istream(save_ostream.str());
335 m2.append_sequence("AAAAGGGGTTTT");
336 m2.append_sequence("GGGCCCCTTCCAATT");
337 m2.load_motifs(reloaded_istream);
339 BOOST_REQUIRE_EQUAL(m1.motifs().size(), m2.motifs().size());
340 Mussa::motif_set::const_iterator m1motif = m1.motifs().begin();
341 Mussa::motif_set::const_iterator m2motif = m2.motifs().begin();
343 m1motif != m1.motifs().end() and m2motif != m2.motifs().end();
344 ++m1motif, ++m2motif)
346 BOOST_CHECK_EQUAL(m1motif->get_sequence(), m2motif->get_sequence());
347 BOOST_CHECK_EQUAL(m1motif->get_name(), m2motif->get_name());
348 BOOST_CHECK_EQUAL(m1.colorMapper()->lookup("motif", m1motif->get_sequence()),
349 m2.colorMapper()->lookup("motif", m2motif->get_sequence()));
353 BOOST_AUTO_TEST_CASE( mussa_add_motif )
355 vector<Sequence> motifs;
356 motifs.push_back("AAGG");
357 vector<Color> colors;
358 colors.push_back(Color(1.0, 0.0, 0.0));
361 m1.append_sequence("AAAAGGGGTTTT");
362 m1.append_sequence("GGGCCCCTTGGTT");
363 m1.set_motifs(motifs, colors);
364 int first_size = m1.motifs().size();
365 BOOST_CHECK_EQUAL( first_size, 1 );
366 BOOST_REQUIRE(first_size > 0);
367 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
368 // make sure that our sequences have the right number of motifs
369 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
370 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1); // because of rc
372 // verify that setting the motif clears the arrays
373 m1.set_motifs(motifs, colors);
374 BOOST_CHECK_EQUAL( first_size, m1.motifs().size() );
375 // make sure that our sequences have the right number of motifs
376 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
377 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
379 // add a different motif
381 motifs.push_back("CCTTGG");
382 BOOST_CHECK_EQUAL(motifs.size(), 1);
383 m1.set_motifs(motifs, colors);
384 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
385 BOOST_REQUIRE(m1.motifs().size() > 0);
386 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
387 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
388 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
390 // try a motif that doesn't exist
392 motifs.push_back("CCTTGG");
393 BOOST_CHECK_EQUAL(motifs.size(), 1);
394 m1.set_motifs(motifs, colors);
395 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
396 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
397 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
402 two_way_local_align_test(const Mussa::vector_sequence_type &seqs,
403 const list<ConservedPath::path_type>& result,
404 const list<vector<bool> >& reversed)
406 map<char, vector <char> > m;
407 assign::insert(m)('A', assign::list_of('A')('T') )
408 ('T', assign::list_of('T')('A') )
409 ('G', assign::list_of('G')('C') )
410 ('C', assign::list_of('C')('G') );
411 list<vector<bool> >::const_iterator rc_i = reversed.begin();
413 for(list<ConservedPath::path_type>::const_iterator base_i = result.begin();
414 base_i != result.end();
417 // since the reverse compliment flag is relative to the first sequence
418 // the first one should always be false
419 BOOST_CHECK_EQUAL( (*rc_i)[0], false );
420 const int first_path_basepair_index = (*base_i)[0];
421 const int second_path_basepair_index = (*base_i)[1];
422 const char first_basepair = (*seqs[0])[first_path_basepair_index];
423 const char second_basepair = (*seqs[1])[second_path_basepair_index];
424 // get our index into our reverse compliment map m
425 const int second_compliment_index = (*rc_i)[1];
426 // lookup the forward or reverse compliment depending on our rc flag
427 const char complimented_second = m[second_basepair][second_compliment_index];
429 BOOST_CHECK_EQUAL( first_basepair, complimented_second) ;
433 BOOST_AUTO_TEST_CASE( two_way_local_alignment )
435 string s0("GCGCATAT");
436 string s1("AAAAAAAT");
440 analysis.append_sequence(s0);
441 analysis.append_sequence(s1);
442 analysis.set_threshold(3);
443 analysis.set_window(4);
445 NwayPaths npath = analysis.paths();
446 BOOST_REQUIRE_EQUAL( npath.pathz.size(), 2 );
448 list<ConservedPath::path_type> result;
449 list<vector<bool> > reversed;
450 list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
452 list<ConservedPath> selected_paths;
453 selected_paths.push_back(*pathz_i);
454 analysis.createLocalAlignment(selected_paths.begin(),
455 selected_paths.end(),
459 two_way_local_align_test(analysis.sequences(), result, reversed);
464 selected_paths.clear();
465 selected_paths.push_back(*pathz_i);
466 analysis.createLocalAlignment(selected_paths.begin(),
467 selected_paths.end(),
470 two_way_local_align_test(analysis.sequences(), result, reversed);
473 BOOST_AUTO_TEST_CASE( three_way_local_alignment )
475 string s0("AGCAGGGAGGGTTTAAATGGCACCCAGCAGTTGGTGTGAGG");
476 string s1("AGCGGGAAGGGTTTAAATGGCACCGGGCAGTTGGCGTGAGG");
477 string s2("CAGCGCCGGGGTTTAAATGGCACCGAGCAGTTGGCGCAGGG");
480 analysis.append_sequence(s0);
481 analysis.append_sequence(s1);
482 analysis.append_sequence(s2);
483 analysis.set_threshold(23);
484 analysis.set_window(30);
486 NwayPaths npath = analysis.paths();
487 BOOST_CHECK_EQUAL( npath.refined_pathz.size(), 1 );
489 list<ConservedPath::path_type> result;
490 list<vector<bool> > reversed;
491 // grab 1 path (since there's only one)
492 list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
493 list<ConservedPath> selected_paths;
494 selected_paths.push_back(*pathz_i);
495 analysis.createLocalAlignment(selected_paths.begin(),
496 selected_paths.end(),
500 for(std::list<ConservedPath::path_type>::iterator result_i = result.begin();
501 result_i != result.end();
504 ConservedPath::path_element first_element = *(result_i->begin());
505 for (ConservedPath::path_type::iterator element_i = result_i->begin();
506 element_i != result_i->end();
509 BOOST_CHECK_EQUAL( *element_i, first_element );
510 BOOST_CHECK_EQUAL( s0[*element_i], s1[*element_i] );
511 BOOST_CHECK_EQUAL( s1[*element_i], s2[*element_i] );
512 BOOST_CHECK_EQUAL( s0[*element_i], s2[*element_i] );
517 BOOST_AUTO_TEST_CASE( mussa_window_larger_than_sequence )
519 string s0("AGCAGGG");
520 string s1("CAGCGGG");
523 analysis.append_sequence(s0);
524 analysis.append_sequence(s1);
525 analysis.set_threshold(23);
526 analysis.set_window(30);
527 BOOST_CHECK_THROW(analysis.analyze(), seqcomp_error);
530 BOOST_AUTO_TEST_CASE( subanalysis )
532 Sequence s1("AATGAAGATTTTAATGCTTTAATTTTGTTTTGTAAACTTCGAATTTCCAAAATTTGAAA");
533 Sequence s2("AGGAGCAAGTTCGCTTCATCGAGAATTTTTAATTTTTAGTCAAATTTTCCAATGTCTGA");
536 analysis.append_sequence(s1);
537 analysis.append_sequence(s2);
538 analysis.set_threshold(8);
539 analysis.set_window(8);
542 NwayPaths perfect_path = analysis.paths();
543 int perfect_match_count = perfect_path.pathz.size();
545 Sequence sub1 = s1.subseq(2, s1.size()-4);
546 Sequence sub2 = s2.subseq(2, s2.size()-4);
548 subanalysis.append_sequence(sub1);
549 subanalysis.append_sequence(sub2);
550 subanalysis.set_threshold(7);
551 subanalysis.set_window(8);
552 subanalysis.analyze();
553 NwayPaths one_mismatch_path = subanalysis.paths();
554 int one_mismatch_count = one_mismatch_path.pathz.size();
556 BOOST_CHECK( perfect_match_count < one_mismatch_count );
559 BOOST_AUTO_TEST_CASE( dirty_flag )
562 BOOST_CHECK_EQUAL(m.is_dirty(), false);
564 BOOST_CHECK_EQUAL(m.is_dirty(), true);
567 BOOST_CHECK_EQUAL(m.is_dirty(), true);
570 BOOST_CHECK_EQUAL(m.is_dirty(), true);
572 m.set_soft_threshold(1);
573 BOOST_CHECK_EQUAL(m.is_dirty(), false);
575 m.append_sequence("AAGGCCTT");
576 BOOST_CHECK_EQUAL(m.is_dirty(), true);