1 #include <boost/test/auto_unit_test.hpp>
2 #include <boost/filesystem/path.hpp>
3 #include <boost/filesystem/operations.hpp>
4 namespace fs = boost::filesystem;
5 #include <boost/assign/list_of.hpp>
6 #include <boost/assign/list_inserter.hpp>
7 #include <boost/assign.hpp>
8 namespace assign = boost::assign;
14 #include "alg/mussa.hpp"
15 #include "mussa_exceptions.hpp"
19 //! can we initialize a mussa object?
20 BOOST_AUTO_TEST_CASE( mussa_simple )
23 BOOST_CHECK_EQUAL(m.empty(), true);
24 BOOST_CHECK_EQUAL(m.get_name(), "" );
25 BOOST_CHECK_EQUAL(m.get_window(), 0);
26 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
27 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
28 m.set_name( "hello" );
29 BOOST_CHECK_EQUAL(m.get_name(), "hello" );
31 BOOST_CHECK_EQUAL(m.get_window(), 30);
33 BOOST_CHECK_EQUAL(m.get_threshold(), 21);
34 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
35 m.set_soft_threshold(19);
36 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 21);
37 m.set_soft_threshold(35);
38 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 30);
39 m.set_soft_threshold(25);
40 BOOST_CHECK_EQUAL(m.get_soft_threshold(), 25);
41 m.set_analysis_mode(Mussa::RadialNway);
42 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::RadialNway);
43 // make sure our path is empty
44 BOOST_CHECK_EQUAL(m.get_analysis_path().string(), fs::path().string() );
47 BOOST_CHECK_EQUAL(m.get_name(), "" );
48 BOOST_CHECK_EQUAL(m.get_window(), 0);
49 BOOST_CHECK_EQUAL(m.get_threshold(), 0);
50 BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway);
53 BOOST_AUTO_TEST_CASE( mussa_analysis_name )
56 m.set_analysis_mode( Mussa::TransitiveNway );
57 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Transitive" );
58 m.set_analysis_mode( Mussa::RadialNway );
59 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Radial" );
60 m.set_analysis_mode( Mussa::EntropyNway );
61 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "Entropy" );
62 m.set_analysis_mode( Mussa::RecursiveNway);
63 BOOST_CHECK_EQUAL( m.get_analysis_mode_name(), "[deprecated] Recursive" );
66 BOOST_AUTO_TEST_CASE( mussa_sequences )
68 std::string s0("AAAANNNN");
69 std::string s1("GGGGNNNN");
70 std::string s2("TTTTNNNN");
73 BOOST_CHECK_EQUAL(analysis.empty(), true);
74 analysis.append_sequence(s0);
75 analysis.append_sequence(s1);
76 analysis.append_sequence(s2);
78 BOOST_CHECK_EQUAL( analysis.empty(), false);
79 BOOST_CHECK_EQUAL( analysis.sequences().size(), 3 );
80 BOOST_CHECK_EQUAL( *(analysis.sequences()[0]), s0);
81 BOOST_CHECK_EQUAL( *(analysis.sequences()[1]), s1);
82 BOOST_CHECK_EQUAL( *(analysis.sequences()[2]), s2);
85 // for some reason we can call nway once safely but it
86 // somehow changed things and caused a segfault
87 // fixed by adding a return statement in trans_path_search
88 BOOST_AUTO_TEST_CASE ( empty_mussa_set_threshold )
91 m.set_soft_threshold(15);
94 m.set_soft_threshold(25);
98 BOOST_AUTO_TEST_CASE( mussa_load_mupa )
100 fs::path mupa_path(EXAMPLE_DIR, fs::native);
101 fs::path result_path = fs::initial_path() / "mck3test_w30_t20";
102 mupa_path /= "mck3test.mupa";
105 m1.load_mupa_file( mupa_path );
107 m1.save( result_path );
108 BOOST_CHECK_EQUAL( m1.empty(), false);
109 BOOST_CHECK_EQUAL( m1.get_name(), std::string("mck3test") );
110 BOOST_CHECK( m1.size() > 0 );
111 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), result_path.string());
114 m2.load( result_path );
115 BOOST_CHECK_EQUAL( m2.empty(), false);
116 BOOST_CHECK_EQUAL( m2.get_name(), result_path.leaf() );
117 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
118 BOOST_CHECK_EQUAL( result_path.string(), m2.get_analysis_path().string() );
122 BOOST_CHECK_EQUAL( m2.empty(), true);
123 BOOST_CHECK_EQUAL( m2.is_dirty(), false );
124 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), fs::path().string());
127 BOOST_AUTO_TEST_CASE( mussa_load_full_path )
130 fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "mck3test.mupa");
131 m1.load_mupa_file( full_path );
134 BOOST_CHECK( m1.size() > 0);
135 BOOST_CHECK_EQUAL( m1.get_window(), 30 );
136 BOOST_CHECK_EQUAL( m1.get_threshold(), 20);
137 BOOST_CHECK_EQUAL( m1.is_dirty(), true);
138 BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), "");
141 // make sure we know that mupa files cannot be directories
142 BOOST_AUTO_TEST_CASE( mussa_mupa_is_file_not_directory )
144 fs::path curdir(".");
146 BOOST_CHECK_THROW(m1.load_mupa_file( curdir ), mussa_load_error );
149 BOOST_AUTO_TEST_CASE( mussa_load_analysis )
151 fs::path example_dir(EXAMPLE_DIR, fs::native);
153 m1.load_mupa_file( example_dir / "mck3test.mupa" );
157 fs::path analysis_path = fs::initial_path() / "mck3test_w30_t20";
158 m2.load( analysis_path );
160 BOOST_CHECK_EQUAL( m1.size(), m2.size() );
161 BOOST_CHECK_EQUAL( m1.get_window(), m2.get_window() );
162 BOOST_CHECK_EQUAL( m1.get_threshold(), m2.get_threshold() );
163 BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), analysis_path.string());
166 BOOST_AUTO_TEST_CASE( mussa_load_motif )
168 string data = "AAGG 1.0 1.0 0.0\n"
172 istringstream test_istream(data);
175 m1.append_sequence("AAAAGGGGTTTT");
176 m1.append_sequence("GGGCCCCTTCCAATT");
177 m1.load_motifs(test_istream);
179 for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin();
180 seq_i != m1.sequences().end();
183 BOOST_CHECK( (*seq_i)->motifs().size() > 0 );
187 BOOST_AUTO_TEST_CASE( mussa_named_motif )
189 string data = "CCAATT cat 0.1 0.2 0.3\n";
190 istringstream test_istream(data);
193 m1.append_sequence("AAAAGGGGTTTT");
194 m1.append_sequence("GGGCCCCTTCCAATT");
195 m1.load_motifs(test_istream);
197 std::set<Sequence> motifs = m1.motifs();
198 BOOST_REQUIRE_EQUAL(motifs.size(), 1);
199 BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat");
202 BOOST_AUTO_TEST_CASE( mussa_add_motif )
204 vector<Sequence> motifs;
205 motifs.push_back("AAGG");
206 vector<Color> colors;
207 colors.push_back(Color(1.0, 0.0, 0.0));
210 m1.append_sequence("AAAAGGGGTTTT");
211 m1.append_sequence("GGGCCCCTTGGTT");
212 m1.set_motifs(motifs, colors);
213 int first_size = m1.motifs().size();
214 BOOST_CHECK_EQUAL( first_size, 1 );
215 BOOST_REQUIRE(first_size > 0);
216 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
217 // make sure that our sequences have the right number of motifs
218 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
219 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1); // because of rc
221 // verify that setting the motif clears the arrays
222 m1.set_motifs(motifs, colors);
223 BOOST_CHECK_EQUAL( first_size, m1.motifs().size() );
224 // make sure that our sequences have the right number of motifs
225 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 1);
226 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
228 // add a different motif
230 motifs.push_back("CCTTGG");
231 BOOST_CHECK_EQUAL(motifs.size(), 1);
232 m1.set_motifs(motifs, colors);
233 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
234 BOOST_REQUIRE(m1.motifs().size() > 0);
235 BOOST_CHECK_EQUAL(*(m1.motifs().begin()), motifs.front());
236 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
237 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
239 // try a motif that doesn't exist
241 motifs.push_back("CCTTGG");
242 BOOST_CHECK_EQUAL(motifs.size(), 1);
243 m1.set_motifs(motifs, colors);
244 BOOST_CHECK_EQUAL(m1.motifs().size(), 1);
245 BOOST_CHECK_EQUAL(m1.sequences()[0]->motifs().size(), 0);
246 BOOST_CHECK_EQUAL(m1.sequences()[1]->motifs().size(), 1);
251 local_align_test(const Mussa::vector_sequence_type &seqs,
252 const list<ConservedPath::path_type>& result,
253 const list<vector<bool> >& reversed)
255 map<char, vector <char> > m;
256 assign::insert(m)('A', assign::list_of('A')('T') )
257 ('T', assign::list_of('T')('A') )
258 ('G', assign::list_of('G')('C') )
259 ('C', assign::list_of('C')('G') );
260 list<vector<bool> >::const_iterator rc_i = reversed.begin();
262 for(list<ConservedPath::path_type>::const_iterator base_i = result.begin();
263 base_i != result.end();
266 // since the reverse compliment flag is relative to the first sequence
267 // the first one should always be false
268 BOOST_CHECK_EQUAL( (*rc_i)[0], false );
269 const int first_path_basepair_index = (*base_i)[0];
270 const int second_path_basepair_index = (*base_i)[1];
271 const char first_basepair = (*seqs[0])[first_path_basepair_index];
272 const char second_basepair = (*seqs[1])[second_path_basepair_index];
273 // get our index into our reverse compliment map m
274 const int second_compliment_index = (*rc_i)[1];
275 // lookup the forward or reverse compliment depending on our rc flag
276 const char complimented_second = m[second_basepair][second_compliment_index];
278 BOOST_CHECK_EQUAL( first_basepair, complimented_second) ;
283 BOOST_AUTO_TEST_CASE( local_alignment )
285 string s0("GCGCATAT");
286 string s1("AAAAAAAT");
290 analysis.append_sequence(s0);
291 analysis.append_sequence(s1);
292 analysis.set_threshold(3);
293 analysis.set_window(4);
295 NwayPaths npath = analysis.paths();
296 list<ConservedPath::path_type> result;
297 list<vector<bool> > reversed;
298 list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
300 list<ConservedPath> selected_paths;
301 selected_paths.push_back(*pathz_i);
302 analysis.createLocalAlignment(selected_paths.begin(),
303 selected_paths.end(),
307 local_align_test(analysis.sequences(), result, reversed);
312 selected_paths.clear();
313 selected_paths.push_back(*pathz_i);
314 analysis.createLocalAlignment(selected_paths.begin(),
315 selected_paths.end(),
318 local_align_test(analysis.sequences(), result, reversed);
323 BOOST_AUTO_TEST_CASE( subanalysis )
325 Sequence s1("AATGAAGATTTTAATGCTTTAATTTTGTTTTGTAAACTTCGAATTTCCAAAATTTGAAA");
326 Sequence s2("AGGAGCAAGTTCGCTTCATCGAGAATTTTTAATTTTTAGTCAAATTTTCCAATGTCTGA");
329 analysis.append_sequence(s1);
330 analysis.append_sequence(s2);
331 analysis.set_threshold(8);
332 analysis.set_window(8);
335 NwayPaths perfect_path = analysis.paths();
336 int perfect_match_count = perfect_path.pathz.size();
338 Sequence sub1 = s1.subseq(2, s1.size()-4);
339 Sequence sub2 = s2.subseq(2, s2.size()-4);
341 subanalysis.append_sequence(sub1);
342 subanalysis.append_sequence(sub2);
343 subanalysis.set_threshold(7);
344 subanalysis.set_window(8);
345 subanalysis.analyze();
346 NwayPaths one_mismatch_path = subanalysis.paths();
347 int one_mismatch_count = one_mismatch_path.pathz.size();
349 BOOST_CHECK( perfect_match_count < one_mismatch_count );
352 BOOST_AUTO_TEST_CASE( dirty_flag )
355 BOOST_CHECK_EQUAL(m.is_dirty(), false);
357 BOOST_CHECK_EQUAL(m.is_dirty(), true);
360 BOOST_CHECK_EQUAL(m.is_dirty(), true);
363 BOOST_CHECK_EQUAL(m.is_dirty(), true);
365 m.set_soft_threshold(1);
366 BOOST_CHECK_EQUAL(m.is_dirty(), false);
368 m.append_sequence("AAGGCCTT");
369 BOOST_CHECK_EQUAL(m.is_dirty(), true);