Update mussa to build on ubuntu 10.04 with qt 4.6.2 +boost 1.40.0.1
[mussa.git] / alg / test / test_nway.cpp
index d67753c0aeb3e01e40713d8426e5deb4d701306d..9c7d34233ffae87ff562ecdf35d7bef3f817586b 100644 (file)
@@ -1,4 +1,9 @@
-#include <boost/test/auto_unit_test.hpp>
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MODULE test_nway
+#include <boost/test/unit_test.hpp>
+
+#include <boost/filesystem/path.hpp>
+namespace fs = boost::filesystem;
 
 #include <string>
 #include <iostream>
@@ -17,11 +22,18 @@ BOOST_AUTO_TEST_CASE( nway_null )
   string s2("TTTTNNNN");
 
   Mussa analysis;
-  analysis.add_a_seq(s0);
-  analysis.add_a_seq(s1);
-  analysis.add_a_seq(s2);
-  analysis.analyze(4,3);
+  analysis.append_sequence(s0);
+  analysis.append_sequence(s1);
+  analysis.append_sequence(s2);
+  analysis.set_window(4);
+  analysis.set_threshold(3);
+  analysis.analyze();
   NwayPaths npath = analysis.paths();
+  // we added 3 sequences, but none-matched
+  BOOST_CHECK_EQUAL( npath.sequence_count(), 0); 
+  BOOST_CHECK_EQUAL( npath.size(), 0 );
+  BOOST_CHECK_EQUAL( npath.path_size(), 0 );
+  BOOST_CHECK_EQUAL( npath.refined_path_size(), 0 );
   // there should be no paths for these sequences
   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
        pathz_i != npath.pathz.end();
@@ -38,10 +50,16 @@ BOOST_AUTO_TEST_CASE( nway_test )
   Sequence seq1(s1);
 
   Mussa analysis;
-  analysis.add_a_seq(s0);
-  analysis.add_a_seq(s1);
-  analysis.analyze(4,3);
+  analysis.append_sequence(s0);
+  analysis.append_sequence(s1);
+  analysis.set_window(4);
+  analysis.set_threshold(3);
+  analysis.analyze();
   NwayPaths npath = analysis.paths();
+  BOOST_CHECK_EQUAL( npath.sequence_count(), 2); 
+  BOOST_CHECK_EQUAL( npath.size(), 2 );
+  BOOST_CHECK_EQUAL( npath.path_size(), 2 );
+  BOOST_CHECK_EQUAL( npath.refined_path_size(), 2 );
   for (std::list<ConservedPath >::iterator pathz_i = npath.pathz.begin();
        pathz_i != npath.pathz.end();
        ++pathz_i)
@@ -55,4 +73,139 @@ BOOST_AUTO_TEST_CASE( nway_test )
   }
 }
 
+BOOST_AUTO_TEST_CASE( nway_refine )
+{
+  fs::path mupa_path( EXAMPLE_DIR, fs::native );
+  mupa_path /= "mck3test.mupa";
+  Mussa m1;
+  m1.load_mupa_file( mupa_path );
+  m1.analyze();
+  const NwayPaths& npath = m1.paths();
+  //BOOST_CHECK_EQUAL (npath.path_size(), npath.refined_path_size());
+  // FIXME: shouldn't these be equal to start with?
+  BOOST_CHECK(npath.path_size() > npath.refined_path_size());
+  size_t first_refined_size = npath.refined_path_size();
+  BOOST_CHECK( first_refined_size > 0 );
+
+  // we're using a window size 30 (threshold 20) example
+  m1.set_soft_threshold(22);
+  m1.nway();
+  BOOST_CHECK_EQUAL( npath.path_size(), npath.size() );
+  BOOST_CHECK( npath.path_size() > 0 );
+  BOOST_CHECK( npath.refined_path_size() > 0);
+  BOOST_CHECK( npath.refined_path_size() < first_refined_size);
+
+  m1.set_soft_threshold(20);
+  m1.nway();
+  BOOST_CHECK_EQUAL(npath.refined_path_size(), first_refined_size);
+}
 
+// The following data causes a crash...
+// ticket:85 is the user report 
+// ticket:83 provided the sample data
+// ticket:64 was my version where I didn't have a consistent way of 
+// duplicating.
+BOOST_AUTO_TEST_CASE( nway_threshold_crash )
+{
+  string seq1 = "CACTCCCTCGAAGCTGCTGT\
+TCTCTTGTCTGTCTGTCTCTGTCTTGAAGCTCAGCCAAGAAACTTTCCCGTGTCACGCCT\
+GCGTCCCACCGTGGGGCTCTCTTGGAGCACCCAGGGACACCCAGCGTGCAACAGCCACGG\
+GAAGCCTTTCTGCCGCCCAGGCCCACAGGTCTCGAGACGCACATGCACGCCTGGGCGTGG\
+CAGCCTCACAGGGAACACGGGACAGACGCCGGCGACGCGCAGACACACGGACACGCGGAA\
+GCCAAGCACACTCTGGCGGGTCCCGCAAGGGACGCCGTGGAAGAAAGGAGCCTGTGGCAA\
+CAGGCGGCCGAGCTGCCGAATTCAGTTGACACGAGGCACAGAAAACAAATATCAAAGATC\
+TAATAATACAAAACAAACTTGATTAAAACTGGTGCTTAAAGTTTATTACCCACAACTCCA\
+CAGTCTCTGTGTAAACCACTCGACTCATCTTGTAGCTTATTTTTTTTTAAAGAGGACGTT\
+TTCTACGGCTGTGGCCCGCCTCTGTGAACCATAGCGGTGTGCGGCGGGGGGTCTGCACCC\
+GGGTGGGGGACAGAGGGACCTTTAAAGAAAACAAAACTGGACAGAAACAGGAATGTGAGC\
+TGGGGGAGCTGGCTTGAGTTTCTCAAAAGCCATCGGAAGATGCGAGTTTGTGCCTTTTTT\
+TTTATTGCTCTGGTGGATTTTTGTGGCTGGGTTTTCTGAAGTCTGAGGAACAATGCCTTA\
+AGAAAAAACAAACAGCAGGAATCGGTGGGACAGTTTCCTGTGGCCAGCCGAGCCTGGCAG\
+TGCTGGCACCGCGAGCTGGCCTGACGCCTCAAGCACGGGCACCAGCCGTCATCTCCGGGG\
+CCAGGGGCTGCAGCCCGGCGGTCCCTGTTTTGCTTTATTGCTGTTTAAGAAAAATGGAGG\
+TAGTTCCAAAAAAGTGGCAAATCCCGTTGGAGGTTTTGAAGTCCAACAAATTTTAAACGA\
+ATCCAAAGTGTTCTCACACGTCACATACGATTGAGCATCTCCATCTGGTCGTGAAGCATG\
+TGGTAGGCACACTTGCAGTGTTACGATCGGAATGCTTTTTATTAAAAGCAAGTAGCATGA\
+AGTATTGCTTAAATTTTAGGTATAAATAAATATATATATGTATAATATATATTCCAATGT\
+ATTCCAAGCTAAGAAACTTACTTGATTCTTATGAAATCTTGATAAAATATTTATAATGCA\
+TTTATAGAAAAAGTATATATATATATATAAAATGAATGCAGATTGCGAAGGTCCCTGCAA\
+ATGGATGGCTTGTGAATTTGCTCTCAAGGTGCTTATGGAAAGGGATCCTGATTGATTGAA\
+ATTCATGTTTTCTCAAGCTCCAGATTGGCTAGATTTCAGATCGCCAACACATTCGCCACT\
+GGGCAACTACCCTACAAGTTTGTACTTTCATTTTAATTATTTTCTAACAGAACCGCTCCC\
+GTCTCCAAGCCTTCATGCACATATGTACCTAATGAGTTTTTATAGCAAAGAATATAAATT\
+TGCTGTTGATTTTTGTATGAATTTTTTCACAAAAAGATCCTGAATAAGCATTGTTTTATG\
+AATTTTACATTTTTCCTCACCATTTAGCAATTTTCTGAATGGTAATAATGTCTAAATCTT\
+TTTCCTTTCTGAATTCTTGCTTGTACATTTTTTTTTACCTTTCAAAGGTTTTTAATTATT\
+TTTGTTTTTATTTTTGTACGATGAGTTTTCTGCAGCGTACAGAATTGTTGCTGTCAGATT\
+CTATTTTCAGAAAGTGAGAGGAGGGACCGTAGGTCTTTTCGGAGTGACACCAACGATTGT\
+GTCTTTCCTGGTCTGTCCTAGGAGCTGTATAAAGAAGCCCAGGGGCTCTTTTTAACTTTC\
+AACACTAGTAGTATTACGAGGGGTGGTGTGTTTTTCCCCTCCGTGGCAAGGGCAGGGAGG\
+GTTGCTTAGGATGCCCGGCCACCCTGGGAGGCTTGCCAGATGCCGGGGGCAGTCAGCATT\
+AATGAAACTCATGTTTAAACTTCTCTGACCACATCGTCAGGATAGAATTCTAACTTGAGT\
+TTTCCAAAGACCTTTTGAGCATGTCAGCAATGCATGGGGCACACGTGGGGCTCTTTACCC\
+ACTTGGGTTTTTCCACTGCAGCCACGTGGCCAGCCCTGGATTTTGGAGCCTGTGGCTGCA\
+AGGAACCCAGGGACCCTTGTTGCCTGGTGAACCTGCAGGGAGGGTATGATTGCCTGACCA\
+GGACAGCCAGTCTTTACTCTTTTTCTCTTCAACAGTAACTGACAGTCACGTTTTACTGGT\
+AACTTATTTTCCAGCACATGAAGCCACCAGTTTCATTCCAAAGTGTATATTGGGTTCAGA\
+CTTGGGGGCAGAAGTTCAGACACACCGTGCTCAGGAGGGACCCAGAGCCGAGTTTCGGAG\
+TTTGGTAAAGTTTACAGGGTAGCTTCTGAAATTAACTCAAACTTTTGACCAAATGAGTGC\
+AGATTCTTGGATTCACTTGGTCACTGGGCTGCTGATGGTCAGCTCTGAGACAGTGGTTTG\
+AGAGCAGGCAGAACGGTCTTGGGACTTGTTTGACTTTCCCCTCCCTGGTGGCCACTCTTT\
+GCTCTGAAGCCCAGATTGGCAAGAGGAGCTGGTCCATTCCCCATTCATGGCACAGAGCAG\
+TGGCAGGGCCCAGCTAGCAGGCTCTTCTGGCCTCCTTGGCCTCATTCTCTGCATAGCCCT\
+CTGGGGATCCTGCCACCTGCCCTCTTACCCCGCCGTGGCTTATGGGGAGGAATGCATCAT\
+CTCACTTTTTTTTTTTAAGCAGATGATGGGATAACATGGACTGCTCAGTGGCCAGGTTAT\
+CAGTGGGGGGACTTAATTCTAATCTCATTCAAATGGAGACGCCCTCTGCAAAGGCCTGGC\
+AGGGGGAGGCACGTTTCATCTGTCAGCTCACTCCAGCTTCACAAATGTGCTGAGAGCATT\
+ACTGTGTAGCCTTTTCTTTGAAGACACACTCGGCTCTTCTCCACAGCAAGCGTCCAGGGC\
+AGATGGCAGAGGATCTGCCTCGGCGTCTGCAGGCGGGACCACGTCAGGGAGGGTTCCTTC\
+ATGTGTTCTCCCTGTGGGTCCTTGGACCTTTAGCCTTTTTCTTCCTTTGCAAAGGCCTTG\
+GGGGCACTGGCTGGGAGTCAGCAAGCGAGCACTTTATATCCCTTTGAGGGAAACCCTGAT\
+GACGCCACTGGGCCTCTTGGCGTCTGCCCTGCCCTCGCGGCTTCCCGCCGTGCCGCAGCG\
+TGCCCACGTGCCCACGCCCCACCAGCAGGCGGCTGTCCCGGAGGCCGTGGCCCGCTGGGA\
+CTGGCCGCCCCTCCCCAGCGTCCCAGGGCTCTGGTTCTGGAGGGCCACTTTGTCAAGGTG\
+TTTCAGTTTTTCTTTACTTCTTTTGAAAATCTGTTTGCAAGGGGAAGGACCATTTCGTAA\
+TGGTCTGACACAAAAGCAAGTTTGATTTTTGCAGCACTAGCAATGGACTTTGTTGTTTTT\
+CTTTTTGATCAGAACATTCCTTCTTTACTGGTCACAGCCACGTGCTCATTCCATTCTTCT\
+TTTTGTAGACTTTGGGCCCACGTGTTTTATGGGCATTGATACATATATAAATATATAGAT\
+ATAAATATATATGAATATATTTTTTTAAGTTTCCTACACCTGGAGGTTGCATGGACTGTA\
+CGACCGGCATGACTTTATATTGTATACAGATTTTGCACGCCAAACTCGGCAGCTTTGGGG\
+AAGAAGAAAAATGCCTTTCTGTTCCCCTCTCATGACATTTGCAGATACAAAAGATGGAAA\
+TTTTTCTGTAAAACAAAACCTTGAAGGAGAGGAGGGCGGGGAAGTTTGCGTCTTATTGAA\
+CTTATTCTTAAGAAATTGTACTTTTTATTGTAAGAAAAATAAAAAGGACTACTTAAACAT\
+TTGTCATATTAAGAAAAAAAGTTTATCTAGCACTTGTGACATACCAATAATAGAGTTTAT\
+TGTATTTATGTGGAAACAGTGTTTTAGGGAAACTACTCAGAATTCACAGTGAACTGCCTG\
+TCTCTCTCGAGTTGATTTGGAGGAATTTTGTTTTGTTTTGTTTTGTTTGTTTCCTTTTAT\
+CTCCTTCCACGGGCCAGGCGAGCGCCGCCCGCCCTCACTGGCCTTGTGACGGTTTATTCT\
+GATTGAGAACTGGGCGGACTCGAAAGAGTCCCCTTTTCCGCACAGCTGTGTTGACTTTTT\
+AATTACTTTTAGGTGATGTATGGCTAAGATTTCACTTTAAGCAGTCGTGAACTGTGCGAG\
+CACTGTGGTTTACAATTATACTTTGCATCGAAAGGAAACCATTTCTTCATTGTAACGAAG\
+CTGAGCGTGTTCTTAGCTCGGCCTCACTTTGTCTCTGGCATTGATTAAAAGTCTGCTATT";
+  string seq2="ATCCCAGCACATGACAACACTTCAGAAGGG\
+TCCCCCTGCTGACTGGAGAGCTGGGAATATGGCATTTGGACACTTCATTTGTAAATAGTG\
+TACATTTTAAACATTGGCTCGAAACTTCAGAGATAAGTCATGGAGAGGACATTGGAGGGG\
+AGAAATGCAGTTGCTGACTGGGAATTTAAGAATGTGAACTTCTCACTAGAATTGGTATGG\
+AAAAGCAAAATACTGTAAATAAACTTTTTTTCTAACAATTTGCC";
+  string seq3="GGAGCTGGATGAATGAGAGGCCCCCAGATGCAGAGAGACTGGAGAGGGT\
+GGGGAGGGGCCCAGCGGCCTTGGTGACAGGCCCAGGGTGGGAGGGGTCGGGGCCCCTGGA\
+GGGGCAATGGGGAGGTGATGTCTTCTCTCTGCTCAGAGAGCAGGGACTAGGGTAGGACCC\
+TCACCGCTGCGTCCAGCAGACACTGAACCAGAATTGGAAACGTGCTTGAAACAATCACAC\
+AGGACACTTTTCTACATTGGTGCAAAATGGAATATTTTGTACATTTTTAAAATGTGATTT\
+TTGTATATACTTGTATATGTATGCCAATTTGGTGCTTTTTGTAAAGGAACTTTTGTATAA\
+TAATGCCTGGTCGTTGGGTGACCTGCGATTGTCAGAAAGAGGGGAAGGAAGCCAGGTTGA\
+TACAGCTGCCCACTTCCTTTCCTGAGCAGGAGGATGGGGTAGCACTCACAGGGACGATGT\
+GCTGTATTTCAGTGCCTATCCCAGACATACGGGGTGGTAACTGAGTTTGTGTTATATGTT\
+GTTTTAATAAATGCACAATGCTCTCTTCCTGTTCTTC";
+
+  // now that we've got some data lets see if it crashes
+  Mussa m1;
+  m1.append_sequence(seq1);
+  m1.append_sequence(seq2);
+  m1.append_sequence(seq3);
+
+  m1.set_window(10);
+  m1.set_threshold(8);
+  m1.analyze();
+  m1.set_soft_threshold(10);
+  m1.nway();
+}