Tell unittests to use native file convention for EXAMPLE_DIR
[mussa.git] / alg / test / test_nway.cpp
1 #include <boost/test/auto_unit_test.hpp>
2 #include <boost/filesystem/path.hpp>
3 namespace fs = boost::filesystem;
4
5 #include <string>
6 #include <iostream>
7
8 #include "alg/mussa.hpp"
9 #include "alg/nway_paths.hpp"
10 #include "alg/sequence.hpp"
11
12 using namespace std;
13
14 //! there should be no matches
15 BOOST_AUTO_TEST_CASE( nway_null )
16 {
17   string s0("AAAANNNN");
18   string s1("GGGGNNNN");
19   string s2("TTTTNNNN");
20
21   Mussa analysis;
22   analysis.append_sequence(s0);
23   analysis.append_sequence(s1);
24   analysis.append_sequence(s2);
25   analysis.set_window(4);
26   analysis.set_threshold(3);
27   analysis.analyze();
28   NwayPaths npath = analysis.paths();
29   // there should be no paths for these sequences
30   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
31        pathz_i != npath.pathz.end();
32        ++pathz_i)
33   {
34     BOOST_CHECK_EQUAL( pathz_i->size(), 0);
35   }
36 }
37
38 BOOST_AUTO_TEST_CASE( nway_test )
39 {
40   string s0("ATATGCGC");
41   string s1("GGGGGGGC");
42   Sequence seq1(s1);
43
44   Mussa analysis;
45   analysis.append_sequence(s0);
46   analysis.append_sequence(s1);
47   analysis.set_window(4);
48   analysis.set_threshold(3);
49   analysis.analyze();
50   NwayPaths npath = analysis.paths();
51   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
52        pathz_i != npath.pathz.end();
53        ++pathz_i)
54   {
55     for( ConservedPath::iterator path_i = pathz_i->begin();
56          path_i != pathz_i->end();
57          ++path_i)
58     {      
59       BOOST_CHECK( *path_i == 4 || *path_i == -4);
60     }
61   }
62 }
63
64 BOOST_AUTO_TEST_CASE( nway_refine )
65 {
66   fs::path mupa_path( EXAMPLE_DIR, fs::native );
67   mupa_path /= "mck3test.mupa";
68   Mussa m1;
69   m1.load_mupa_file( mupa_path );
70   m1.analyze();
71   const NwayPaths& npath = m1.paths();
72   BOOST_CHECK_EQUAL (npath.path_size(), npath.refined_path_size());
73   size_t first_refined_size = npath.refined_path_size();
74   BOOST_CHECK( first_refined_size > 0 );
75
76   // we're using a window size 30 (threshold 20) example
77   m1.set_soft_threshold(22);
78   m1.nway();
79   BOOST_CHECK( npath.refined_path_size() > 0);
80   BOOST_CHECK( npath.refined_path_size() < first_refined_size);
81
82   m1.set_soft_threshold(20);
83   m1.nway();
84   BOOST_CHECK_EQUAL(npath.refined_path_size(), first_refined_size);
85 }
86
87 // The following data causes a crash...
88 // ticket:85 is the user report 
89 // ticket:83 provided the sample data
90 // ticket:64 was my version where I didn't have a consistent way of 
91 // duplicating.
92 BOOST_AUTO_TEST_CASE( nway_threshold_crash )
93 {
94   string seq1 = "CACTCCCTCGAAGCTGCTGT\
95 TCTCTTGTCTGTCTGTCTCTGTCTTGAAGCTCAGCCAAGAAACTTTCCCGTGTCACGCCT\
96 GCGTCCCACCGTGGGGCTCTCTTGGAGCACCCAGGGACACCCAGCGTGCAACAGCCACGG\
97 GAAGCCTTTCTGCCGCCCAGGCCCACAGGTCTCGAGACGCACATGCACGCCTGGGCGTGG\
98 CAGCCTCACAGGGAACACGGGACAGACGCCGGCGACGCGCAGACACACGGACACGCGGAA\
99 GCCAAGCACACTCTGGCGGGTCCCGCAAGGGACGCCGTGGAAGAAAGGAGCCTGTGGCAA\
100 CAGGCGGCCGAGCTGCCGAATTCAGTTGACACGAGGCACAGAAAACAAATATCAAAGATC\
101 TAATAATACAAAACAAACTTGATTAAAACTGGTGCTTAAAGTTTATTACCCACAACTCCA\
102 CAGTCTCTGTGTAAACCACTCGACTCATCTTGTAGCTTATTTTTTTTTAAAGAGGACGTT\
103 TTCTACGGCTGTGGCCCGCCTCTGTGAACCATAGCGGTGTGCGGCGGGGGGTCTGCACCC\
104 GGGTGGGGGACAGAGGGACCTTTAAAGAAAACAAAACTGGACAGAAACAGGAATGTGAGC\
105 TGGGGGAGCTGGCTTGAGTTTCTCAAAAGCCATCGGAAGATGCGAGTTTGTGCCTTTTTT\
106 TTTATTGCTCTGGTGGATTTTTGTGGCTGGGTTTTCTGAAGTCTGAGGAACAATGCCTTA\
107 AGAAAAAACAAACAGCAGGAATCGGTGGGACAGTTTCCTGTGGCCAGCCGAGCCTGGCAG\
108 TGCTGGCACCGCGAGCTGGCCTGACGCCTCAAGCACGGGCACCAGCCGTCATCTCCGGGG\
109 CCAGGGGCTGCAGCCCGGCGGTCCCTGTTTTGCTTTATTGCTGTTTAAGAAAAATGGAGG\
110 TAGTTCCAAAAAAGTGGCAAATCCCGTTGGAGGTTTTGAAGTCCAACAAATTTTAAACGA\
111 ATCCAAAGTGTTCTCACACGTCACATACGATTGAGCATCTCCATCTGGTCGTGAAGCATG\
112 TGGTAGGCACACTTGCAGTGTTACGATCGGAATGCTTTTTATTAAAAGCAAGTAGCATGA\
113 AGTATTGCTTAAATTTTAGGTATAAATAAATATATATATGTATAATATATATTCCAATGT\
114 ATTCCAAGCTAAGAAACTTACTTGATTCTTATGAAATCTTGATAAAATATTTATAATGCA\
115 TTTATAGAAAAAGTATATATATATATATAAAATGAATGCAGATTGCGAAGGTCCCTGCAA\
116 ATGGATGGCTTGTGAATTTGCTCTCAAGGTGCTTATGGAAAGGGATCCTGATTGATTGAA\
117 ATTCATGTTTTCTCAAGCTCCAGATTGGCTAGATTTCAGATCGCCAACACATTCGCCACT\
118 GGGCAACTACCCTACAAGTTTGTACTTTCATTTTAATTATTTTCTAACAGAACCGCTCCC\
119 GTCTCCAAGCCTTCATGCACATATGTACCTAATGAGTTTTTATAGCAAAGAATATAAATT\
120 TGCTGTTGATTTTTGTATGAATTTTTTCACAAAAAGATCCTGAATAAGCATTGTTTTATG\
121 AATTTTACATTTTTCCTCACCATTTAGCAATTTTCTGAATGGTAATAATGTCTAAATCTT\
122 TTTCCTTTCTGAATTCTTGCTTGTACATTTTTTTTTACCTTTCAAAGGTTTTTAATTATT\
123 TTTGTTTTTATTTTTGTACGATGAGTTTTCTGCAGCGTACAGAATTGTTGCTGTCAGATT\
124 CTATTTTCAGAAAGTGAGAGGAGGGACCGTAGGTCTTTTCGGAGTGACACCAACGATTGT\
125 GTCTTTCCTGGTCTGTCCTAGGAGCTGTATAAAGAAGCCCAGGGGCTCTTTTTAACTTTC\
126 AACACTAGTAGTATTACGAGGGGTGGTGTGTTTTTCCCCTCCGTGGCAAGGGCAGGGAGG\
127 GTTGCTTAGGATGCCCGGCCACCCTGGGAGGCTTGCCAGATGCCGGGGGCAGTCAGCATT\
128 AATGAAACTCATGTTTAAACTTCTCTGACCACATCGTCAGGATAGAATTCTAACTTGAGT\
129 TTTCCAAAGACCTTTTGAGCATGTCAGCAATGCATGGGGCACACGTGGGGCTCTTTACCC\
130 ACTTGGGTTTTTCCACTGCAGCCACGTGGCCAGCCCTGGATTTTGGAGCCTGTGGCTGCA\
131 AGGAACCCAGGGACCCTTGTTGCCTGGTGAACCTGCAGGGAGGGTATGATTGCCTGACCA\
132 GGACAGCCAGTCTTTACTCTTTTTCTCTTCAACAGTAACTGACAGTCACGTTTTACTGGT\
133 AACTTATTTTCCAGCACATGAAGCCACCAGTTTCATTCCAAAGTGTATATTGGGTTCAGA\
134 CTTGGGGGCAGAAGTTCAGACACACCGTGCTCAGGAGGGACCCAGAGCCGAGTTTCGGAG\
135 TTTGGTAAAGTTTACAGGGTAGCTTCTGAAATTAACTCAAACTTTTGACCAAATGAGTGC\
136 AGATTCTTGGATTCACTTGGTCACTGGGCTGCTGATGGTCAGCTCTGAGACAGTGGTTTG\
137 AGAGCAGGCAGAACGGTCTTGGGACTTGTTTGACTTTCCCCTCCCTGGTGGCCACTCTTT\
138 GCTCTGAAGCCCAGATTGGCAAGAGGAGCTGGTCCATTCCCCATTCATGGCACAGAGCAG\
139 TGGCAGGGCCCAGCTAGCAGGCTCTTCTGGCCTCCTTGGCCTCATTCTCTGCATAGCCCT\
140 CTGGGGATCCTGCCACCTGCCCTCTTACCCCGCCGTGGCTTATGGGGAGGAATGCATCAT\
141 CTCACTTTTTTTTTTTAAGCAGATGATGGGATAACATGGACTGCTCAGTGGCCAGGTTAT\
142 CAGTGGGGGGACTTAATTCTAATCTCATTCAAATGGAGACGCCCTCTGCAAAGGCCTGGC\
143 AGGGGGAGGCACGTTTCATCTGTCAGCTCACTCCAGCTTCACAAATGTGCTGAGAGCATT\
144 ACTGTGTAGCCTTTTCTTTGAAGACACACTCGGCTCTTCTCCACAGCAAGCGTCCAGGGC\
145 AGATGGCAGAGGATCTGCCTCGGCGTCTGCAGGCGGGACCACGTCAGGGAGGGTTCCTTC\
146 ATGTGTTCTCCCTGTGGGTCCTTGGACCTTTAGCCTTTTTCTTCCTTTGCAAAGGCCTTG\
147 GGGGCACTGGCTGGGAGTCAGCAAGCGAGCACTTTATATCCCTTTGAGGGAAACCCTGAT\
148 GACGCCACTGGGCCTCTTGGCGTCTGCCCTGCCCTCGCGGCTTCCCGCCGTGCCGCAGCG\
149 TGCCCACGTGCCCACGCCCCACCAGCAGGCGGCTGTCCCGGAGGCCGTGGCCCGCTGGGA\
150 CTGGCCGCCCCTCCCCAGCGTCCCAGGGCTCTGGTTCTGGAGGGCCACTTTGTCAAGGTG\
151 TTTCAGTTTTTCTTTACTTCTTTTGAAAATCTGTTTGCAAGGGGAAGGACCATTTCGTAA\
152 TGGTCTGACACAAAAGCAAGTTTGATTTTTGCAGCACTAGCAATGGACTTTGTTGTTTTT\
153 CTTTTTGATCAGAACATTCCTTCTTTACTGGTCACAGCCACGTGCTCATTCCATTCTTCT\
154 TTTTGTAGACTTTGGGCCCACGTGTTTTATGGGCATTGATACATATATAAATATATAGAT\
155 ATAAATATATATGAATATATTTTTTTAAGTTTCCTACACCTGGAGGTTGCATGGACTGTA\
156 CGACCGGCATGACTTTATATTGTATACAGATTTTGCACGCCAAACTCGGCAGCTTTGGGG\
157 AAGAAGAAAAATGCCTTTCTGTTCCCCTCTCATGACATTTGCAGATACAAAAGATGGAAA\
158 TTTTTCTGTAAAACAAAACCTTGAAGGAGAGGAGGGCGGGGAAGTTTGCGTCTTATTGAA\
159 CTTATTCTTAAGAAATTGTACTTTTTATTGTAAGAAAAATAAAAAGGACTACTTAAACAT\
160 TTGTCATATTAAGAAAAAAAGTTTATCTAGCACTTGTGACATACCAATAATAGAGTTTAT\
161 TGTATTTATGTGGAAACAGTGTTTTAGGGAAACTACTCAGAATTCACAGTGAACTGCCTG\
162 TCTCTCTCGAGTTGATTTGGAGGAATTTTGTTTTGTTTTGTTTTGTTTGTTTCCTTTTAT\
163 CTCCTTCCACGGGCCAGGCGAGCGCCGCCCGCCCTCACTGGCCTTGTGACGGTTTATTCT\
164 GATTGAGAACTGGGCGGACTCGAAAGAGTCCCCTTTTCCGCACAGCTGTGTTGACTTTTT\
165 AATTACTTTTAGGTGATGTATGGCTAAGATTTCACTTTAAGCAGTCGTGAACTGTGCGAG\
166 CACTGTGGTTTACAATTATACTTTGCATCGAAAGGAAACCATTTCTTCATTGTAACGAAG\
167 CTGAGCGTGTTCTTAGCTCGGCCTCACTTTGTCTCTGGCATTGATTAAAAGTCTGCTATT";
168   string seq2="ATCCCAGCACATGACAACACTTCAGAAGGG\
169 TCCCCCTGCTGACTGGAGAGCTGGGAATATGGCATTTGGACACTTCATTTGTAAATAGTG\
170 TACATTTTAAACATTGGCTCGAAACTTCAGAGATAAGTCATGGAGAGGACATTGGAGGGG\
171 AGAAATGCAGTTGCTGACTGGGAATTTAAGAATGTGAACTTCTCACTAGAATTGGTATGG\
172 AAAAGCAAAATACTGTAAATAAACTTTTTTTCTAACAATTTGCC";
173   string seq3="GGAGCTGGATGAATGAGAGGCCCCCAGATGCAGAGAGACTGGAGAGGGT\
174 GGGGAGGGGCCCAGCGGCCTTGGTGACAGGCCCAGGGTGGGAGGGGTCGGGGCCCCTGGA\
175 GGGGCAATGGGGAGGTGATGTCTTCTCTCTGCTCAGAGAGCAGGGACTAGGGTAGGACCC\
176 TCACCGCTGCGTCCAGCAGACACTGAACCAGAATTGGAAACGTGCTTGAAACAATCACAC\
177 AGGACACTTTTCTACATTGGTGCAAAATGGAATATTTTGTACATTTTTAAAATGTGATTT\
178 TTGTATATACTTGTATATGTATGCCAATTTGGTGCTTTTTGTAAAGGAACTTTTGTATAA\
179 TAATGCCTGGTCGTTGGGTGACCTGCGATTGTCAGAAAGAGGGGAAGGAAGCCAGGTTGA\
180 TACAGCTGCCCACTTCCTTTCCTGAGCAGGAGGATGGGGTAGCACTCACAGGGACGATGT\
181 GCTGTATTTCAGTGCCTATCCCAGACATACGGGGTGGTAACTGAGTTTGTGTTATATGTT\
182 GTTTTAATAAATGCACAATGCTCTCTTCCTGTTCTTC";
183
184   // now that we've got some data lets see if it crashes
185   Mussa m1;
186   m1.append_sequence(seq1);
187   m1.append_sequence(seq2);
188   m1.append_sequence(seq3);
189
190   m1.set_window(10);
191   m1.set_threshold(8);
192   m1.analyze();
193   m1.set_soft_threshold(10);
194   m1.nway();
195 }