9bd8b6432d7e218883da1e13a0aee58e67638fb2
[mussa.git] / alg / test / test_nway.cpp
1 #define BOOST_AUTO_TEST_MAIN
2 #include <boost/test/auto_unit_test.hpp>
3 #include <boost/filesystem/path.hpp>
4 namespace fs = boost::filesystem;
5
6 #include <string>
7 #include <iostream>
8
9 #include "alg/mussa.hpp"
10 #include "alg/nway_paths.hpp"
11 #include "alg/sequence.hpp"
12
13 using namespace std;
14
15 //! there should be no matches
16 BOOST_AUTO_TEST_CASE( nway_null )
17 {
18   string s0("AAAANNNN");
19   string s1("GGGGNNNN");
20   string s2("TTTTNNNN");
21
22   Mussa analysis;
23   analysis.append_sequence(s0);
24   analysis.append_sequence(s1);
25   analysis.append_sequence(s2);
26   analysis.set_window(4);
27   analysis.set_threshold(3);
28   analysis.analyze();
29   NwayPaths npath = analysis.paths();
30   // we added 3 sequences, but none-matched
31   BOOST_CHECK_EQUAL( npath.sequence_count(), 0); 
32   BOOST_CHECK_EQUAL( npath.size(), 0 );
33   BOOST_CHECK_EQUAL( npath.path_size(), 0 );
34   BOOST_CHECK_EQUAL( npath.refined_path_size(), 0 );
35   // there should be no paths for these sequences
36   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
37        pathz_i != npath.pathz.end();
38        ++pathz_i)
39   {
40     BOOST_CHECK_EQUAL( pathz_i->size(), 0);
41   }
42 }
43
44 BOOST_AUTO_TEST_CASE( nway_test )
45 {
46   string s0("ATATGCGC");
47   string s1("GGGGGGGC");
48   Sequence seq1(s1);
49
50   Mussa analysis;
51   analysis.append_sequence(s0);
52   analysis.append_sequence(s1);
53   analysis.set_window(4);
54   analysis.set_threshold(3);
55   analysis.analyze();
56   NwayPaths npath = analysis.paths();
57   BOOST_CHECK_EQUAL( npath.sequence_count(), 2); 
58   BOOST_CHECK_EQUAL( npath.size(), 2 );
59   BOOST_CHECK_EQUAL( npath.path_size(), 2 );
60   BOOST_CHECK_EQUAL( npath.refined_path_size(), 2 );
61   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
62        pathz_i != npath.pathz.end();
63        ++pathz_i)
64   {
65     for( ConservedPath::iterator path_i = pathz_i->begin();
66          path_i != pathz_i->end();
67          ++path_i)
68     {      
69       BOOST_CHECK( *path_i == 4 || *path_i == -4);
70     }
71   }
72 }
73
74 BOOST_AUTO_TEST_CASE( nway_refine )
75 {
76   fs::path mupa_path( EXAMPLE_DIR, fs::native );
77   mupa_path /= "mck3test.mupa";
78   Mussa m1;
79   m1.load_mupa_file( mupa_path );
80   m1.analyze();
81   const NwayPaths& npath = m1.paths();
82   //BOOST_CHECK_EQUAL (npath.path_size(), npath.refined_path_size());
83   // FIXME: shouldn't these be equal to start with?
84   BOOST_CHECK(npath.path_size() > npath.refined_path_size());
85   size_t first_refined_size = npath.refined_path_size();
86   BOOST_CHECK( first_refined_size > 0 );
87
88   // we're using a window size 30 (threshold 20) example
89   m1.set_soft_threshold(22);
90   m1.nway();
91   BOOST_CHECK_EQUAL( npath.path_size(), npath.size() );
92   BOOST_CHECK( npath.path_size() > 0 );
93   BOOST_CHECK( npath.refined_path_size() > 0);
94   BOOST_CHECK( npath.refined_path_size() < first_refined_size);
95
96   m1.set_soft_threshold(20);
97   m1.nway();
98   BOOST_CHECK_EQUAL(npath.refined_path_size(), first_refined_size);
99 }
100
101 // The following data causes a crash...
102 // ticket:85 is the user report 
103 // ticket:83 provided the sample data
104 // ticket:64 was my version where I didn't have a consistent way of 
105 // duplicating.
106 BOOST_AUTO_TEST_CASE( nway_threshold_crash )
107 {
108   string seq1 = "CACTCCCTCGAAGCTGCTGT\
109 TCTCTTGTCTGTCTGTCTCTGTCTTGAAGCTCAGCCAAGAAACTTTCCCGTGTCACGCCT\
110 GCGTCCCACCGTGGGGCTCTCTTGGAGCACCCAGGGACACCCAGCGTGCAACAGCCACGG\
111 GAAGCCTTTCTGCCGCCCAGGCCCACAGGTCTCGAGACGCACATGCACGCCTGGGCGTGG\
112 CAGCCTCACAGGGAACACGGGACAGACGCCGGCGACGCGCAGACACACGGACACGCGGAA\
113 GCCAAGCACACTCTGGCGGGTCCCGCAAGGGACGCCGTGGAAGAAAGGAGCCTGTGGCAA\
114 CAGGCGGCCGAGCTGCCGAATTCAGTTGACACGAGGCACAGAAAACAAATATCAAAGATC\
115 TAATAATACAAAACAAACTTGATTAAAACTGGTGCTTAAAGTTTATTACCCACAACTCCA\
116 CAGTCTCTGTGTAAACCACTCGACTCATCTTGTAGCTTATTTTTTTTTAAAGAGGACGTT\
117 TTCTACGGCTGTGGCCCGCCTCTGTGAACCATAGCGGTGTGCGGCGGGGGGTCTGCACCC\
118 GGGTGGGGGACAGAGGGACCTTTAAAGAAAACAAAACTGGACAGAAACAGGAATGTGAGC\
119 TGGGGGAGCTGGCTTGAGTTTCTCAAAAGCCATCGGAAGATGCGAGTTTGTGCCTTTTTT\
120 TTTATTGCTCTGGTGGATTTTTGTGGCTGGGTTTTCTGAAGTCTGAGGAACAATGCCTTA\
121 AGAAAAAACAAACAGCAGGAATCGGTGGGACAGTTTCCTGTGGCCAGCCGAGCCTGGCAG\
122 TGCTGGCACCGCGAGCTGGCCTGACGCCTCAAGCACGGGCACCAGCCGTCATCTCCGGGG\
123 CCAGGGGCTGCAGCCCGGCGGTCCCTGTTTTGCTTTATTGCTGTTTAAGAAAAATGGAGG\
124 TAGTTCCAAAAAAGTGGCAAATCCCGTTGGAGGTTTTGAAGTCCAACAAATTTTAAACGA\
125 ATCCAAAGTGTTCTCACACGTCACATACGATTGAGCATCTCCATCTGGTCGTGAAGCATG\
126 TGGTAGGCACACTTGCAGTGTTACGATCGGAATGCTTTTTATTAAAAGCAAGTAGCATGA\
127 AGTATTGCTTAAATTTTAGGTATAAATAAATATATATATGTATAATATATATTCCAATGT\
128 ATTCCAAGCTAAGAAACTTACTTGATTCTTATGAAATCTTGATAAAATATTTATAATGCA\
129 TTTATAGAAAAAGTATATATATATATATAAAATGAATGCAGATTGCGAAGGTCCCTGCAA\
130 ATGGATGGCTTGTGAATTTGCTCTCAAGGTGCTTATGGAAAGGGATCCTGATTGATTGAA\
131 ATTCATGTTTTCTCAAGCTCCAGATTGGCTAGATTTCAGATCGCCAACACATTCGCCACT\
132 GGGCAACTACCCTACAAGTTTGTACTTTCATTTTAATTATTTTCTAACAGAACCGCTCCC\
133 GTCTCCAAGCCTTCATGCACATATGTACCTAATGAGTTTTTATAGCAAAGAATATAAATT\
134 TGCTGTTGATTTTTGTATGAATTTTTTCACAAAAAGATCCTGAATAAGCATTGTTTTATG\
135 AATTTTACATTTTTCCTCACCATTTAGCAATTTTCTGAATGGTAATAATGTCTAAATCTT\
136 TTTCCTTTCTGAATTCTTGCTTGTACATTTTTTTTTACCTTTCAAAGGTTTTTAATTATT\
137 TTTGTTTTTATTTTTGTACGATGAGTTTTCTGCAGCGTACAGAATTGTTGCTGTCAGATT\
138 CTATTTTCAGAAAGTGAGAGGAGGGACCGTAGGTCTTTTCGGAGTGACACCAACGATTGT\
139 GTCTTTCCTGGTCTGTCCTAGGAGCTGTATAAAGAAGCCCAGGGGCTCTTTTTAACTTTC\
140 AACACTAGTAGTATTACGAGGGGTGGTGTGTTTTTCCCCTCCGTGGCAAGGGCAGGGAGG\
141 GTTGCTTAGGATGCCCGGCCACCCTGGGAGGCTTGCCAGATGCCGGGGGCAGTCAGCATT\
142 AATGAAACTCATGTTTAAACTTCTCTGACCACATCGTCAGGATAGAATTCTAACTTGAGT\
143 TTTCCAAAGACCTTTTGAGCATGTCAGCAATGCATGGGGCACACGTGGGGCTCTTTACCC\
144 ACTTGGGTTTTTCCACTGCAGCCACGTGGCCAGCCCTGGATTTTGGAGCCTGTGGCTGCA\
145 AGGAACCCAGGGACCCTTGTTGCCTGGTGAACCTGCAGGGAGGGTATGATTGCCTGACCA\
146 GGACAGCCAGTCTTTACTCTTTTTCTCTTCAACAGTAACTGACAGTCACGTTTTACTGGT\
147 AACTTATTTTCCAGCACATGAAGCCACCAGTTTCATTCCAAAGTGTATATTGGGTTCAGA\
148 CTTGGGGGCAGAAGTTCAGACACACCGTGCTCAGGAGGGACCCAGAGCCGAGTTTCGGAG\
149 TTTGGTAAAGTTTACAGGGTAGCTTCTGAAATTAACTCAAACTTTTGACCAAATGAGTGC\
150 AGATTCTTGGATTCACTTGGTCACTGGGCTGCTGATGGTCAGCTCTGAGACAGTGGTTTG\
151 AGAGCAGGCAGAACGGTCTTGGGACTTGTTTGACTTTCCCCTCCCTGGTGGCCACTCTTT\
152 GCTCTGAAGCCCAGATTGGCAAGAGGAGCTGGTCCATTCCCCATTCATGGCACAGAGCAG\
153 TGGCAGGGCCCAGCTAGCAGGCTCTTCTGGCCTCCTTGGCCTCATTCTCTGCATAGCCCT\
154 CTGGGGATCCTGCCACCTGCCCTCTTACCCCGCCGTGGCTTATGGGGAGGAATGCATCAT\
155 CTCACTTTTTTTTTTTAAGCAGATGATGGGATAACATGGACTGCTCAGTGGCCAGGTTAT\
156 CAGTGGGGGGACTTAATTCTAATCTCATTCAAATGGAGACGCCCTCTGCAAAGGCCTGGC\
157 AGGGGGAGGCACGTTTCATCTGTCAGCTCACTCCAGCTTCACAAATGTGCTGAGAGCATT\
158 ACTGTGTAGCCTTTTCTTTGAAGACACACTCGGCTCTTCTCCACAGCAAGCGTCCAGGGC\
159 AGATGGCAGAGGATCTGCCTCGGCGTCTGCAGGCGGGACCACGTCAGGGAGGGTTCCTTC\
160 ATGTGTTCTCCCTGTGGGTCCTTGGACCTTTAGCCTTTTTCTTCCTTTGCAAAGGCCTTG\
161 GGGGCACTGGCTGGGAGTCAGCAAGCGAGCACTTTATATCCCTTTGAGGGAAACCCTGAT\
162 GACGCCACTGGGCCTCTTGGCGTCTGCCCTGCCCTCGCGGCTTCCCGCCGTGCCGCAGCG\
163 TGCCCACGTGCCCACGCCCCACCAGCAGGCGGCTGTCCCGGAGGCCGTGGCCCGCTGGGA\
164 CTGGCCGCCCCTCCCCAGCGTCCCAGGGCTCTGGTTCTGGAGGGCCACTTTGTCAAGGTG\
165 TTTCAGTTTTTCTTTACTTCTTTTGAAAATCTGTTTGCAAGGGGAAGGACCATTTCGTAA\
166 TGGTCTGACACAAAAGCAAGTTTGATTTTTGCAGCACTAGCAATGGACTTTGTTGTTTTT\
167 CTTTTTGATCAGAACATTCCTTCTTTACTGGTCACAGCCACGTGCTCATTCCATTCTTCT\
168 TTTTGTAGACTTTGGGCCCACGTGTTTTATGGGCATTGATACATATATAAATATATAGAT\
169 ATAAATATATATGAATATATTTTTTTAAGTTTCCTACACCTGGAGGTTGCATGGACTGTA\
170 CGACCGGCATGACTTTATATTGTATACAGATTTTGCACGCCAAACTCGGCAGCTTTGGGG\
171 AAGAAGAAAAATGCCTTTCTGTTCCCCTCTCATGACATTTGCAGATACAAAAGATGGAAA\
172 TTTTTCTGTAAAACAAAACCTTGAAGGAGAGGAGGGCGGGGAAGTTTGCGTCTTATTGAA\
173 CTTATTCTTAAGAAATTGTACTTTTTATTGTAAGAAAAATAAAAAGGACTACTTAAACAT\
174 TTGTCATATTAAGAAAAAAAGTTTATCTAGCACTTGTGACATACCAATAATAGAGTTTAT\
175 TGTATTTATGTGGAAACAGTGTTTTAGGGAAACTACTCAGAATTCACAGTGAACTGCCTG\
176 TCTCTCTCGAGTTGATTTGGAGGAATTTTGTTTTGTTTTGTTTTGTTTGTTTCCTTTTAT\
177 CTCCTTCCACGGGCCAGGCGAGCGCCGCCCGCCCTCACTGGCCTTGTGACGGTTTATTCT\
178 GATTGAGAACTGGGCGGACTCGAAAGAGTCCCCTTTTCCGCACAGCTGTGTTGACTTTTT\
179 AATTACTTTTAGGTGATGTATGGCTAAGATTTCACTTTAAGCAGTCGTGAACTGTGCGAG\
180 CACTGTGGTTTACAATTATACTTTGCATCGAAAGGAAACCATTTCTTCATTGTAACGAAG\
181 CTGAGCGTGTTCTTAGCTCGGCCTCACTTTGTCTCTGGCATTGATTAAAAGTCTGCTATT";
182   string seq2="ATCCCAGCACATGACAACACTTCAGAAGGG\
183 TCCCCCTGCTGACTGGAGAGCTGGGAATATGGCATTTGGACACTTCATTTGTAAATAGTG\
184 TACATTTTAAACATTGGCTCGAAACTTCAGAGATAAGTCATGGAGAGGACATTGGAGGGG\
185 AGAAATGCAGTTGCTGACTGGGAATTTAAGAATGTGAACTTCTCACTAGAATTGGTATGG\
186 AAAAGCAAAATACTGTAAATAAACTTTTTTTCTAACAATTTGCC";
187   string seq3="GGAGCTGGATGAATGAGAGGCCCCCAGATGCAGAGAGACTGGAGAGGGT\
188 GGGGAGGGGCCCAGCGGCCTTGGTGACAGGCCCAGGGTGGGAGGGGTCGGGGCCCCTGGA\
189 GGGGCAATGGGGAGGTGATGTCTTCTCTCTGCTCAGAGAGCAGGGACTAGGGTAGGACCC\
190 TCACCGCTGCGTCCAGCAGACACTGAACCAGAATTGGAAACGTGCTTGAAACAATCACAC\
191 AGGACACTTTTCTACATTGGTGCAAAATGGAATATTTTGTACATTTTTAAAATGTGATTT\
192 TTGTATATACTTGTATATGTATGCCAATTTGGTGCTTTTTGTAAAGGAACTTTTGTATAA\
193 TAATGCCTGGTCGTTGGGTGACCTGCGATTGTCAGAAAGAGGGGAAGGAAGCCAGGTTGA\
194 TACAGCTGCCCACTTCCTTTCCTGAGCAGGAGGATGGGGTAGCACTCACAGGGACGATGT\
195 GCTGTATTTCAGTGCCTATCCCAGACATACGGGGTGGTAACTGAGTTTGTGTTATATGTT\
196 GTTTTAATAAATGCACAATGCTCTCTTCCTGTTCTTC";
197
198   // now that we've got some data lets see if it crashes
199   Mussa m1;
200   m1.append_sequence(seq1);
201   m1.append_sequence(seq2);
202   m1.append_sequence(seq3);
203
204   m1.set_window(10);
205   m1.set_threshold(8);
206   m1.analyze();
207   m1.set_soft_threshold(10);
208   m1.nway();
209 }