1 #include <boost/test/auto_unit_test.hpp>
2 #include <boost/filesystem/path.hpp>
3 #include <boost/filesystem/operations.hpp>
4 namespace fs = boost::filesystem;
5 #include <boost/assign/list_of.hpp>
6 #include <boost/assign/list_inserter.hpp>
7 #include <boost/assign.hpp>
8 namespace assign = boost::assign;
14 #include "alg/mussa.hpp"
15 #include "mussa_exceptions.hpp"
19 //! can we initialize a mussa object?
20 BOOST_AUTO_TEST_CASE( mussa_simple )
23 BOOST_CHECK_EQUAL(m.empty(), true);
24 BOOST_CHECK_EQUAL(m.get_name(), "" );
25 BOOST_CHECK_EQUAL(m.get_window(), 0);
26 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
27 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
28 m.set_name( "hello" );
29 BOOST_CHECK_EQUAL(m.get_name(), "hello" );
31 BOOST_CHECK_EQUAL(m.get_window(), 30);
33 BOOST_CHECK_EQUAL(m.get_threshold(), 21);
34 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
35 m.set_soft_threshold(19);
36 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
37 m.set_soft_threshold(35);
38 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 30);
39 m.set_soft_threshold(25);
40 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 25);
41 m.set_analysis_mode(Mussa::RadialNway);
42 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::RadialNway);
43 // make sure our path is empty
44 BOOST_CHECK_EQUAL(m.get_analysis_path().string(), fs::path().string() );
47 BOOST_CHECK_EQUAL(m.get_name(), "" );
48 BOOST_CHECK_EQUAL(m.get_window(), 0);
49 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
50 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
53 BOOST_AUTO_TEST_CASE ( mussa_title )
57 BOOST_CHECK_EQUAL( m.get_title(), "Unnamed");
60 BOOST_CHECK_EQUAL( m.get_title(), foo);
61 string foopath_name("/my/silly/path");
62 fs::path foopath(foopath_name);
63 m.set_analysis_path(foopath);
64 BOOST_CHECK_EQUAL( m.get_title().size(), 14);
67 BOOST_AUTO_TEST_CASE( mussa_analysis_name )
70 m.set_analysis_mode( Mussa::TransitiveNway );
71 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Transitive" );
72 m.set_analysis_mode( Mussa::RadialNway );
73 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Radial" );
74 m.set_analysis_mode( Mussa::EntropyNway );
75 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Entropy" );
76 m.set_analysis_mode( Mussa::RecursiveNway);
77 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "[deprecated] Recursive" );
80 BOOST_AUTO_TEST_CASE( mussa_sequences )
82 std::string s0("AAAANNNN");
83 std::string s1("GGGGNNNN");
84 std::string s2("TTTTNNNN");
87 BOOST_CHECK_EQUAL(analysis.empty(), true);
88 analysis.append_sequence(s0);
89 analysis.append_sequence(s1);
90 analysis.append_sequence(s2);
92 BOOST_CHECK_EQUAL( analysis.empty(), false);
93 BOOST_CHECK_EQUAL( analysis.sequences().size(), 3 );
94 BOOST_CHECK_EQUAL( *(analysis.sequences()[0]), s0);
95 BOOST_CHECK_EQUAL( *(analysis.sequences()[1]), s1);
96 BOOST_CHECK_EQUAL( *(analysis.sequences()[2]), s2);
99 // for some reason we can call nway once safely but it
100 // somehow changed things and caused a segfault
101 // fixed by adding a return statement in trans_path_search
102 BOOST_AUTO_TEST_CASE ( empty_mussa_set_threshold )
105 m.set_soft_threshold(15);
108 m.set_soft_threshold(25);
112 BOOST_AUTO_TEST_CASE( mussa_load_mupa )
114 fs::path mupa_path(EXAMPLE_DIR, fs::native);
115 fs::path result_path = fs::initial_path() / "mck3test_w30_t20";
116 mupa_path /= "mck3test.mupa";
119 m1.load_mupa_file( mupa_path );
121 m1.save( result_path );
122 BOOST_CHECK_EQUAL( m1.empty(), false);
123 BOOST_CHECK_EQUAL( m1.get_name(), std::string("mck3test") );
124 BOOST_CHECK( m1.size() > 0 );
125 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), result_path.string());
128 m2.load( result_path );
129 BOOST_CHECK_EQUAL( m2.empty(), false);
130 BOOST_CHECK_EQUAL( m2.get_name(), result_path.leaf() );
131 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
132 BOOST_CHECK_EQUAL( result_path.string(), m2.get_analysis_path().string() );
136 BOOST_CHECK_EQUAL( m2.empty(), true);
137 BOOST_CHECK_EQUAL( m2.is_dirty(), false );
138 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), fs::path().string());
141 BOOST_AUTO_TEST_CASE( mussa_load_full_path )
144 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "mck3test.mupa");
145 m1.load_mupa_file( full_path );
148 BOOST_CHECK( m1.size() > 0);
149 BOOST_CHECK_EQUAL( m1.get_window(), 30 );
150 BOOST_CHECK_EQUAL( m1.get_threshold(), 20);
151 BOOST_CHECK_EQUAL( m1.is_dirty(), true);
152 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), "");
155 // make sure we know that mupa files cannot be directories
156 BOOST_AUTO_TEST_CASE( mussa_mupa_is_file_not_directory )
158 fs::path curdir(".");
160 BOOST_CHECK_THROW(m1.load_mupa_file( curdir ), mussa_load_error );
163 BOOST_AUTO_TEST_CASE( mussa_load_analysis )
165 fs::path example_dir(EXAMPLE_DIR, fs::native);
167 m1.load_mupa_file( example_dir / "mck3test.mupa" );
171 fs::path analysis_path = fs::initial_path() / "mck3test_w30_t20";
172 m2.load( analysis_path );
174 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
175 BOOST_CHECK_EQUAL( m1.get_window(), m2.get_window() );
176 BOOST_CHECK_EQUAL( m1.get_threshold(), m2.get_threshold() );
177 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), analysis_path.string());
180 BOOST_AUTO_TEST_CASE( mussa_load_motif )
182 string data = "AAGG 1.0 1.0 0.0\n"
183 "GGTT 0.0 0.1 1.0 1.0\n";
185 istringstream test_istream(data);
188 m1.append_sequence("AAAAGGGGTTTT");
189 m1.append_sequence("GGGCCCCTTCCAATT");
190 m1.load_motifs(test_istream);
192 BOOST_CHECK_EQUAL( m1.motifs().size(), 2);
193 for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin();
194 seq_i != m1.sequences().end();
197 BOOST_CHECK( (*seq_i)->motifs().size() > 0 );
201 BOOST_AUTO_TEST_CASE( mussa_load_broken_motif )
203 string data = "AAGG 1.0 1.0 0.0\n"
204 "GGTT 0.0 0.1 1.0 1.0\n"
205 "ZZCTA 0.1 0.0 1.0\n";
207 istringstream test_istream(data);
210 m1.append_sequence("AAAAGGGGTTTT");
211 m1.append_sequence("GGGCCCCTTCCAATT");
212 BOOST_CHECK_THROW(m1.load_motifs(test_istream), motif_load_error);
214 BOOST_CHECK_EQUAL( m1.motifs().size(), 0);
217 BOOST_AUTO_TEST_CASE( mussa_named_motif )
219 string data = "CCAATT cat 0.1 0.2 0.3\n";
220 istringstream test_istream(data);
223 m1.append_sequence("AAAAGGGGTTTT");
224 m1.append_sequence("GGGCCCCTTCCAATT");
225 m1.load_motifs(test_istream);
227 std::set<Sequence> motifs = m1.motifs();
228 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
229 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat");
232 BOOST_AUTO_TEST_CASE( mussa_weirdly_spaced_named_motif )
234 string data = "CCAATT cat_meow123 0.1 0.2 0.3\n";
235 istringstream test_istream(data);
238 m1.append_sequence("AAAAGGGGTTTT");
239 m1.append_sequence("GGGCCCCTTCCAATT");
240 m1.load_motifs(test_istream);
242 std::set<Sequence> motifs = m1.motifs();
243 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
244 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat_meow123");
247 BOOST_AUTO_TEST_CASE( mussa_name_quoted_motif )
249 string data = "CCAATT \"cat meow 123\" 0.1 0.2 0.3\n";
250 istringstream test_istream(data);
253 m1.append_sequence("AAAAGGGGTTTT");
254 m1.append_sequence("GGGCCCCTTCCAATT");
255 m1.load_motifs(test_istream);
257 std::set<Sequence> motifs = m1.motifs();
258 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
259 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat meow 123");
262 BOOST_AUTO_TEST_CASE( mussa_name_embedded_quote_motif )
264 // pretty obviously this shouldn't work as " are our delimiter
265 // and i'm too lazy to add support for \ in the parser
266 string data = "ATA 0.5 0.5 0.5\n"
267 "CCAATT \"cat \"meow 123\" 0.1 0.2 0.3\n";
268 istringstream test_istream(data);
271 m1.append_sequence("AAAAGGGGTTTT");
272 m1.append_sequence("GGGCCCCTTCCAATT");
273 BOOST_CHECK_THROW( m1.load_motifs(test_istream), motif_load_error);
275 std::set<Sequence> motifs = m1.motifs();
276 BOOST_REQUIRE_EQUAL(motifs.size(), 0);
279 BOOST_AUTO_TEST_CASE( mussa_save_motif )
281 string data = "ATA 1 1 1 1\n"
282 "CAT \"my name\" 1 0 0.5 0.5\n";
283 istringstream data_istream(data);
286 m1.append_sequence("AAAAGGGGTTTT");
287 m1.append_sequence("GGGCCCCTTCCAATT");
288 m1.load_motifs(data_istream);
291 ostringstream save_ostream(save);
292 m1.save_motifs(save_ostream);
294 istringstream reloaded_istream(save_ostream.str());
296 m2.append_sequence("AAAAGGGGTTTT");
297 m2.append_sequence("GGGCCCCTTCCAATT");
298 m2.load_motifs(reloaded_istream);
300 BOOST_REQUIRE_EQUAL(m1.motifs().size(), m2.motifs().size());
301 Mussa::motif_set::const_iterator m1motif = m1.motifs().begin();
302 Mussa::motif_set::const_iterator m2motif = m2.motifs().begin();
304 m1motif != m1.motifs().end() and m2motif != m2.motifs().end();
305 ++m1motif, ++m2motif)
307 BOOST_CHECK_EQUAL(m1motif->get_sequence(), m2motif->get_sequence());
308 BOOST_CHECK_EQUAL(m1motif->get_name(), m2motif->get_name());
309 BOOST_CHECK_EQUAL(m1.colorMapper()->lookup("motif", m1motif->get_sequence()),
310 m2.colorMapper()->lookup("motif", m2motif->get_sequence()));
314 BOOST_AUTO_TEST_CASE( mussa_add_motif )
316 vector<Sequence> motifs;
317 motifs.push_back("AAGG");
318 vector<Color> colors;
319 colors.push_back(Color(1.0, 0.0, 0.0));
322 m1.append_sequence("AAAAGGGGTTTT");
323 m1.append_sequence("GGGCCCCTTGGTT");
324 m1.set_motifs(motifs, colors);
325 int first_size = m1.motifs().size();
326 BOOST_CHECK_EQUAL( first_size, 1 );
327 BOOST_REQUIRE(first_size > 0);
328 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
329 // make sure that our sequences have the right number of motifs
330 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
331 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1); // because of rc
333 // verify that setting the motif clears the arrays
334 m1.set_motifs(motifs, colors);
335 BOOST_CHECK_EQUAL( first_size, m1.motifs().size() );
336 // make sure that our sequences have the right number of motifs
337 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
338 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
340 // add a different motif
342 motifs.push_back("CCTTGG");
343 BOOST_CHECK_EQUAL(motifs.size(), 1);
344 m1.set_motifs(motifs, colors);
345 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
346 BOOST_REQUIRE(m1.motifs().size() > 0);
347 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
348 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
349 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
351 // try a motif that doesn't exist
353 motifs.push_back("CCTTGG");
354 BOOST_CHECK_EQUAL(motifs.size(), 1);
355 m1.set_motifs(motifs, colors);
356 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
357 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
358 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
363 two_way_local_align_test(const Mussa::vector_sequence_type &seqs,
364 const list<ConservedPath::path_type>& result,
365 const list<vector<bool> >& reversed)
367 map<char, vector <char> > m;
368 assign::insert(m)('A', assign::list_of('A')('T') )
369 ('T', assign::list_of('T')('A') )
370 ('G', assign::list_of('G')('C') )
371 ('C', assign::list_of('C')('G') );
372 list<vector<bool> >::const_iterator rc_i = reversed.begin();
374 for(list<ConservedPath::path_type>::const_iterator base_i = result.begin();
375 base_i != result.end();
378 // since the reverse compliment flag is relative to the first sequence
379 // the first one should always be false
380 BOOST_CHECK_EQUAL( (*rc_i)[0], false );
381 const int first_path_basepair_index = (*base_i)[0];
382 const int second_path_basepair_index = (*base_i)[1];
383 const char first_basepair = (*seqs[0])[first_path_basepair_index];
384 const char second_basepair = (*seqs[1])[second_path_basepair_index];
385 // get our index into our reverse compliment map m
386 const int second_compliment_index = (*rc_i)[1];
387 // lookup the forward or reverse compliment depending on our rc flag
388 const char complimented_second = m[second_basepair][second_compliment_index];
390 BOOST_CHECK_EQUAL( first_basepair, complimented_second) ;
394 BOOST_AUTO_TEST_CASE( two_way_local_alignment )
396 string s0("GCGCATAT");
397 string s1("AAAAAAAT");
401 analysis.append_sequence(s0);
402 analysis.append_sequence(s1);
403 analysis.set_threshold(3);
404 analysis.set_window(4);
406 NwayPaths npath = analysis.paths();
407 BOOST_REQUIRE_EQUAL( npath.pathz.size(), 2 );
409 list<ConservedPath::path_type> result;
410 list<vector<bool> > reversed;
411 list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
413 list<ConservedPath> selected_paths;
414 selected_paths.push_back(*pathz_i);
415 analysis.createLocalAlignment(selected_paths.begin(),
416 selected_paths.end(),
420 two_way_local_align_test(analysis.sequences(), result, reversed);
425 selected_paths.clear();
426 selected_paths.push_back(*pathz_i);
427 analysis.createLocalAlignment(selected_paths.begin(),
428 selected_paths.end(),
431 two_way_local_align_test(analysis.sequences(), result, reversed);
434 BOOST_AUTO_TEST_CASE( three_way_local_alignment )
436 string s0("AGCAGGGAGGGTTTAAATGGCACCCAGCAGTTGGTGTGAGG");
437 string s1("AGCGGGAAGGGTTTAAATGGCACCGGGCAGTTGGCGTGAGG");
438 string s2("CAGCGCCGGGGTTTAAATGGCACCGAGCAGTTGGCGCAGGG");
441 analysis.append_sequence(s0);
442 analysis.append_sequence(s1);
443 analysis.append_sequence(s2);
444 analysis.set_threshold(23);
445 analysis.set_window(30);
447 NwayPaths npath = analysis.paths();
448 BOOST_CHECK_EQUAL( npath.refined_pathz.size(), 1 );
450 list<ConservedPath::path_type> result;
451 list<vector<bool> > reversed;
452 // grab 1 path (since there's only one)
453 list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
454 list<ConservedPath> selected_paths;
455 selected_paths.push_back(*pathz_i);
456 analysis.createLocalAlignment(selected_paths.begin(),
457 selected_paths.end(),
461 for(std::list<ConservedPath::path_type>::iterator result_i = result.begin();
462 result_i != result.end();
465 ConservedPath::path_element first_element = *(result_i->begin());
466 for (ConservedPath::path_type::iterator element_i = result_i->begin();
467 element_i != result_i->end();
470 BOOST_CHECK_EQUAL( *element_i, first_element );
471 BOOST_CHECK_EQUAL( s0[*element_i], s1[*element_i] );
472 BOOST_CHECK_EQUAL( s1[*element_i], s2[*element_i] );
473 BOOST_CHECK_EQUAL( s0[*element_i], s2[*element_i] );
478 BOOST_AUTO_TEST_CASE( mussa_window_larger_than_sequence )
480 string s0("AGCAGGG");
481 string s1("CAGCGGG");
484 analysis.append_sequence(s0);
485 analysis.append_sequence(s1);
486 analysis.set_threshold(23);
487 analysis.set_window(30);
488 BOOST_CHECK_THROW(analysis.analyze(), seqcomp_error);
491 BOOST_AUTO_TEST_CASE( subanalysis )
493 Sequence s1("AATGAAGATTTTAATGCTTTAATTTTGTTTTGTAAACTTCGAATTTCCAAAATTTGAAA");
494 Sequence s2("AGGAGCAAGTTCGCTTCATCGAGAATTTTTAATTTTTAGTCAAATTTTCCAATGTCTGA");
497 analysis.append_sequence(s1);
498 analysis.append_sequence(s2);
499 analysis.set_threshold(8);
500 analysis.set_window(8);
503 NwayPaths perfect_path = analysis.paths();
504 int perfect_match_count = perfect_path.pathz.size();
506 Sequence sub1 = s1.subseq(2, s1.size()-4);
507 Sequence sub2 = s2.subseq(2, s2.size()-4);
509 subanalysis.append_sequence(sub1);
510 subanalysis.append_sequence(sub2);
511 subanalysis.set_threshold(7);
512 subanalysis.set_window(8);
513 subanalysis.analyze();
514 NwayPaths one_mismatch_path = subanalysis.paths();
515 int one_mismatch_count = one_mismatch_path.pathz.size();
517 BOOST_CHECK( perfect_match_count < one_mismatch_count );
520 BOOST_AUTO_TEST_CASE( dirty_flag )
523 BOOST_CHECK_EQUAL(m.is_dirty(), false);
525 BOOST_CHECK_EQUAL(m.is_dirty(), true);
528 BOOST_CHECK_EQUAL(m.is_dirty(), true);
531 BOOST_CHECK_EQUAL(m.is_dirty(), true);
533 m.set_soft_threshold(1);
534 BOOST_CHECK_EQUAL(m.is_dirty(), false);
536 m.append_sequence("AAGGCCTT");
537 BOOST_CHECK_EQUAL(m.is_dirty(), true);