Update mussa to build on ubuntu 10.04 with qt 4.6.2 +boost 1.40.0.1
[mussa.git] / alg / test / test_nway.cpp
1 #define BOOST_TEST_DYN_LINK
2 #define BOOST_TEST_MODULE test_nway
3 #include <boost/test/unit_test.hpp>
4
5 #include <boost/filesystem/path.hpp>
6 namespace fs = boost::filesystem;
7
8 #include <string>
9 #include <iostream>
10
11 #include "alg/mussa.hpp"
12 #include "alg/nway_paths.hpp"
13 #include "alg/sequence.hpp"
14
15 using namespace std;
16
17 //! there should be no matches
18 BOOST_AUTO_TEST_CASE( nway_null )
19 {
20   string s0("AAAANNNN");
21   string s1("GGGGNNNN");
22   string s2("TTTTNNNN");
23
24   Mussa analysis;
25   analysis.append_sequence(s0);
26   analysis.append_sequence(s1);
27   analysis.append_sequence(s2);
28   analysis.set_window(4);
29   analysis.set_threshold(3);
30   analysis.analyze();
31   NwayPaths npath = analysis.paths();
32   // we added 3 sequences, but none-matched
33   BOOST_CHECK_EQUAL( npath.sequence_count(), 0); 
34   BOOST_CHECK_EQUAL( npath.size(), 0 );
35   BOOST_CHECK_EQUAL( npath.path_size(), 0 );
36   BOOST_CHECK_EQUAL( npath.refined_path_size(), 0 );
37   // there should be no paths for these sequences
38   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
39        pathz_i != npath.pathz.end();
40        ++pathz_i)
41   {
42     BOOST_CHECK_EQUAL( pathz_i->size(), 0);
43   }
44 }
45
46 BOOST_AUTO_TEST_CASE( nway_test )
47 {
48   string s0("ATATGCGC");
49   string s1("GGGGGGGC");
50   Sequence seq1(s1);
51
52   Mussa analysis;
53   analysis.append_sequence(s0);
54   analysis.append_sequence(s1);
55   analysis.set_window(4);
56   analysis.set_threshold(3);
57   analysis.analyze();
58   NwayPaths npath = analysis.paths();
59   BOOST_CHECK_EQUAL( npath.sequence_count(), 2); 
60   BOOST_CHECK_EQUAL( npath.size(), 2 );
61   BOOST_CHECK_EQUAL( npath.path_size(), 2 );
62   BOOST_CHECK_EQUAL( npath.refined_path_size(), 2 );
63   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
64        pathz_i != npath.pathz.end();
65        ++pathz_i)
66   {
67     for( ConservedPath::iterator path_i = pathz_i->begin();
68          path_i != pathz_i->end();
69          ++path_i)
70     {      
71       BOOST_CHECK( *path_i == 4 || *path_i == -4);
72     }
73   }
74 }
75
76 BOOST_AUTO_TEST_CASE( nway_refine )
77 {
78   fs::path mupa_path( EXAMPLE_DIR, fs::native );
79   mupa_path /= "mck3test.mupa";
80   Mussa m1;
81   m1.load_mupa_file( mupa_path );
82   m1.analyze();
83   const NwayPaths& npath = m1.paths();
84   //BOOST_CHECK_EQUAL (npath.path_size(), npath.refined_path_size());
85   // FIXME: shouldn't these be equal to start with?
86   BOOST_CHECK(npath.path_size() > npath.refined_path_size());
87   size_t first_refined_size = npath.refined_path_size();
88   BOOST_CHECK( first_refined_size > 0 );
89
90   // we're using a window size 30 (threshold 20) example
91   m1.set_soft_threshold(22);
92   m1.nway();
93   BOOST_CHECK_EQUAL( npath.path_size(), npath.size() );
94   BOOST_CHECK( npath.path_size() > 0 );
95   BOOST_CHECK( npath.refined_path_size() > 0);
96   BOOST_CHECK( npath.refined_path_size() < first_refined_size);
97
98   m1.set_soft_threshold(20);
99   m1.nway();
100   BOOST_CHECK_EQUAL(npath.refined_path_size(), first_refined_size);
101 }
102
103 // The following data causes a crash...
104 // ticket:85 is the user report 
105 // ticket:83 provided the sample data
106 // ticket:64 was my version where I didn't have a consistent way of 
107 // duplicating.
108 BOOST_AUTO_TEST_CASE( nway_threshold_crash )
109 {
110   string seq1 = "CACTCCCTCGAAGCTGCTGT\
111 TCTCTTGTCTGTCTGTCTCTGTCTTGAAGCTCAGCCAAGAAACTTTCCCGTGTCACGCCT\
112 GCGTCCCACCGTGGGGCTCTCTTGGAGCACCCAGGGACACCCAGCGTGCAACAGCCACGG\
113 GAAGCCTTTCTGCCGCCCAGGCCCACAGGTCTCGAGACGCACATGCACGCCTGGGCGTGG\
114 CAGCCTCACAGGGAACACGGGACAGACGCCGGCGACGCGCAGACACACGGACACGCGGAA\
115 GCCAAGCACACTCTGGCGGGTCCCGCAAGGGACGCCGTGGAAGAAAGGAGCCTGTGGCAA\
116 CAGGCGGCCGAGCTGCCGAATTCAGTTGACACGAGGCACAGAAAACAAATATCAAAGATC\
117 TAATAATACAAAACAAACTTGATTAAAACTGGTGCTTAAAGTTTATTACCCACAACTCCA\
118 CAGTCTCTGTGTAAACCACTCGACTCATCTTGTAGCTTATTTTTTTTTAAAGAGGACGTT\
119 TTCTACGGCTGTGGCCCGCCTCTGTGAACCATAGCGGTGTGCGGCGGGGGGTCTGCACCC\
120 GGGTGGGGGACAGAGGGACCTTTAAAGAAAACAAAACTGGACAGAAACAGGAATGTGAGC\
121 TGGGGGAGCTGGCTTGAGTTTCTCAAAAGCCATCGGAAGATGCGAGTTTGTGCCTTTTTT\
122 TTTATTGCTCTGGTGGATTTTTGTGGCTGGGTTTTCTGAAGTCTGAGGAACAATGCCTTA\
123 AGAAAAAACAAACAGCAGGAATCGGTGGGACAGTTTCCTGTGGCCAGCCGAGCCTGGCAG\
124 TGCTGGCACCGCGAGCTGGCCTGACGCCTCAAGCACGGGCACCAGCCGTCATCTCCGGGG\
125 CCAGGGGCTGCAGCCCGGCGGTCCCTGTTTTGCTTTATTGCTGTTTAAGAAAAATGGAGG\
126 TAGTTCCAAAAAAGTGGCAAATCCCGTTGGAGGTTTTGAAGTCCAACAAATTTTAAACGA\
127 ATCCAAAGTGTTCTCACACGTCACATACGATTGAGCATCTCCATCTGGTCGTGAAGCATG\
128 TGGTAGGCACACTTGCAGTGTTACGATCGGAATGCTTTTTATTAAAAGCAAGTAGCATGA\
129 AGTATTGCTTAAATTTTAGGTATAAATAAATATATATATGTATAATATATATTCCAATGT\
130 ATTCCAAGCTAAGAAACTTACTTGATTCTTATGAAATCTTGATAAAATATTTATAATGCA\
131 TTTATAGAAAAAGTATATATATATATATAAAATGAATGCAGATTGCGAAGGTCCCTGCAA\
132 ATGGATGGCTTGTGAATTTGCTCTCAAGGTGCTTATGGAAAGGGATCCTGATTGATTGAA\
133 ATTCATGTTTTCTCAAGCTCCAGATTGGCTAGATTTCAGATCGCCAACACATTCGCCACT\
134 GGGCAACTACCCTACAAGTTTGTACTTTCATTTTAATTATTTTCTAACAGAACCGCTCCC\
135 GTCTCCAAGCCTTCATGCACATATGTACCTAATGAGTTTTTATAGCAAAGAATATAAATT\
136 TGCTGTTGATTTTTGTATGAATTTTTTCACAAAAAGATCCTGAATAAGCATTGTTTTATG\
137 AATTTTACATTTTTCCTCACCATTTAGCAATTTTCTGAATGGTAATAATGTCTAAATCTT\
138 TTTCCTTTCTGAATTCTTGCTTGTACATTTTTTTTTACCTTTCAAAGGTTTTTAATTATT\
139 TTTGTTTTTATTTTTGTACGATGAGTTTTCTGCAGCGTACAGAATTGTTGCTGTCAGATT\
140 CTATTTTCAGAAAGTGAGAGGAGGGACCGTAGGTCTTTTCGGAGTGACACCAACGATTGT\
141 GTCTTTCCTGGTCTGTCCTAGGAGCTGTATAAAGAAGCCCAGGGGCTCTTTTTAACTTTC\
142 AACACTAGTAGTATTACGAGGGGTGGTGTGTTTTTCCCCTCCGTGGCAAGGGCAGGGAGG\
143 GTTGCTTAGGATGCCCGGCCACCCTGGGAGGCTTGCCAGATGCCGGGGGCAGTCAGCATT\
144 AATGAAACTCATGTTTAAACTTCTCTGACCACATCGTCAGGATAGAATTCTAACTTGAGT\
145 TTTCCAAAGACCTTTTGAGCATGTCAGCAATGCATGGGGCACACGTGGGGCTCTTTACCC\
146 ACTTGGGTTTTTCCACTGCAGCCACGTGGCCAGCCCTGGATTTTGGAGCCTGTGGCTGCA\
147 AGGAACCCAGGGACCCTTGTTGCCTGGTGAACCTGCAGGGAGGGTATGATTGCCTGACCA\
148 GGACAGCCAGTCTTTACTCTTTTTCTCTTCAACAGTAACTGACAGTCACGTTTTACTGGT\
149 AACTTATTTTCCAGCACATGAAGCCACCAGTTTCATTCCAAAGTGTATATTGGGTTCAGA\
150 CTTGGGGGCAGAAGTTCAGACACACCGTGCTCAGGAGGGACCCAGAGCCGAGTTTCGGAG\
151 TTTGGTAAAGTTTACAGGGTAGCTTCTGAAATTAACTCAAACTTTTGACCAAATGAGTGC\
152 AGATTCTTGGATTCACTTGGTCACTGGGCTGCTGATGGTCAGCTCTGAGACAGTGGTTTG\
153 AGAGCAGGCAGAACGGTCTTGGGACTTGTTTGACTTTCCCCTCCCTGGTGGCCACTCTTT\
154 GCTCTGAAGCCCAGATTGGCAAGAGGAGCTGGTCCATTCCCCATTCATGGCACAGAGCAG\
155 TGGCAGGGCCCAGCTAGCAGGCTCTTCTGGCCTCCTTGGCCTCATTCTCTGCATAGCCCT\
156 CTGGGGATCCTGCCACCTGCCCTCTTACCCCGCCGTGGCTTATGGGGAGGAATGCATCAT\
157 CTCACTTTTTTTTTTTAAGCAGATGATGGGATAACATGGACTGCTCAGTGGCCAGGTTAT\
158 CAGTGGGGGGACTTAATTCTAATCTCATTCAAATGGAGACGCCCTCTGCAAAGGCCTGGC\
159 AGGGGGAGGCACGTTTCATCTGTCAGCTCACTCCAGCTTCACAAATGTGCTGAGAGCATT\
160 ACTGTGTAGCCTTTTCTTTGAAGACACACTCGGCTCTTCTCCACAGCAAGCGTCCAGGGC\
161 AGATGGCAGAGGATCTGCCTCGGCGTCTGCAGGCGGGACCACGTCAGGGAGGGTTCCTTC\
162 ATGTGTTCTCCCTGTGGGTCCTTGGACCTTTAGCCTTTTTCTTCCTTTGCAAAGGCCTTG\
163 GGGGCACTGGCTGGGAGTCAGCAAGCGAGCACTTTATATCCCTTTGAGGGAAACCCTGAT\
164 GACGCCACTGGGCCTCTTGGCGTCTGCCCTGCCCTCGCGGCTTCCCGCCGTGCCGCAGCG\
165 TGCCCACGTGCCCACGCCCCACCAGCAGGCGGCTGTCCCGGAGGCCGTGGCCCGCTGGGA\
166 CTGGCCGCCCCTCCCCAGCGTCCCAGGGCTCTGGTTCTGGAGGGCCACTTTGTCAAGGTG\
167 TTTCAGTTTTTCTTTACTTCTTTTGAAAATCTGTTTGCAAGGGGAAGGACCATTTCGTAA\
168 TGGTCTGACACAAAAGCAAGTTTGATTTTTGCAGCACTAGCAATGGACTTTGTTGTTTTT\
169 CTTTTTGATCAGAACATTCCTTCTTTACTGGTCACAGCCACGTGCTCATTCCATTCTTCT\
170 TTTTGTAGACTTTGGGCCCACGTGTTTTATGGGCATTGATACATATATAAATATATAGAT\
171 ATAAATATATATGAATATATTTTTTTAAGTTTCCTACACCTGGAGGTTGCATGGACTGTA\
172 CGACCGGCATGACTTTATATTGTATACAGATTTTGCACGCCAAACTCGGCAGCTTTGGGG\
173 AAGAAGAAAAATGCCTTTCTGTTCCCCTCTCATGACATTTGCAGATACAAAAGATGGAAA\
174 TTTTTCTGTAAAACAAAACCTTGAAGGAGAGGAGGGCGGGGAAGTTTGCGTCTTATTGAA\
175 CTTATTCTTAAGAAATTGTACTTTTTATTGTAAGAAAAATAAAAAGGACTACTTAAACAT\
176 TTGTCATATTAAGAAAAAAAGTTTATCTAGCACTTGTGACATACCAATAATAGAGTTTAT\
177 TGTATTTATGTGGAAACAGTGTTTTAGGGAAACTACTCAGAATTCACAGTGAACTGCCTG\
178 TCTCTCTCGAGTTGATTTGGAGGAATTTTGTTTTGTTTTGTTTTGTTTGTTTCCTTTTAT\
179 CTCCTTCCACGGGCCAGGCGAGCGCCGCCCGCCCTCACTGGCCTTGTGACGGTTTATTCT\
180 GATTGAGAACTGGGCGGACTCGAAAGAGTCCCCTTTTCCGCACAGCTGTGTTGACTTTTT\
181 AATTACTTTTAGGTGATGTATGGCTAAGATTTCACTTTAAGCAGTCGTGAACTGTGCGAG\
182 CACTGTGGTTTACAATTATACTTTGCATCGAAAGGAAACCATTTCTTCATTGTAACGAAG\
183 CTGAGCGTGTTCTTAGCTCGGCCTCACTTTGTCTCTGGCATTGATTAAAAGTCTGCTATT";
184   string seq2="ATCCCAGCACATGACAACACTTCAGAAGGG\
185 TCCCCCTGCTGACTGGAGAGCTGGGAATATGGCATTTGGACACTTCATTTGTAAATAGTG\
186 TACATTTTAAACATTGGCTCGAAACTTCAGAGATAAGTCATGGAGAGGACATTGGAGGGG\
187 AGAAATGCAGTTGCTGACTGGGAATTTAAGAATGTGAACTTCTCACTAGAATTGGTATGG\
188 AAAAGCAAAATACTGTAAATAAACTTTTTTTCTAACAATTTGCC";
189   string seq3="GGAGCTGGATGAATGAGAGGCCCCCAGATGCAGAGAGACTGGAGAGGGT\
190 GGGGAGGGGCCCAGCGGCCTTGGTGACAGGCCCAGGGTGGGAGGGGTCGGGGCCCCTGGA\
191 GGGGCAATGGGGAGGTGATGTCTTCTCTCTGCTCAGAGAGCAGGGACTAGGGTAGGACCC\
192 TCACCGCTGCGTCCAGCAGACACTGAACCAGAATTGGAAACGTGCTTGAAACAATCACAC\
193 AGGACACTTTTCTACATTGGTGCAAAATGGAATATTTTGTACATTTTTAAAATGTGATTT\
194 TTGTATATACTTGTATATGTATGCCAATTTGGTGCTTTTTGTAAAGGAACTTTTGTATAA\
195 TAATGCCTGGTCGTTGGGTGACCTGCGATTGTCAGAAAGAGGGGAAGGAAGCCAGGTTGA\
196 TACAGCTGCCCACTTCCTTTCCTGAGCAGGAGGATGGGGTAGCACTCACAGGGACGATGT\
197 GCTGTATTTCAGTGCCTATCCCAGACATACGGGGTGGTAACTGAGTTTGTGTTATATGTT\
198 GTTTTAATAAATGCACAATGCTCTCTTCCTGTTCTTC";
199
200   // now that we've got some data lets see if it crashes
201   Mussa m1;
202   m1.append_sequence(seq1);
203   m1.append_sequence(seq2);
204   m1.append_sequence(seq3);
205
206   m1.set_window(10);
207   m1.set_threshold(8);
208   m1.analyze();
209   m1.set_soft_threshold(10);
210   m1.nway();
211 }