X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=blobdiff_plain;f=alg%2Ftest%2Ftest_mussa.cpp;h=5255fe26d7a309130c99b0369bbb8749fba7298a;hp=3a850367f1d9b109a1f23cde12ac3285a05b7081;hb=97498410e1fc5c39eac0282a6620b8fcb0f02ff3;hpb=fbd5136e8b95367e89b5c13478503276c17c7f33 diff --git a/alg/test/test_mussa.cpp b/alg/test/test_mussa.cpp index 3a85036..5255fe2 100644 --- a/alg/test/test_mussa.cpp +++ b/alg/test/test_mussa.cpp @@ -1,4 +1,8 @@ -#include +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE test_mussa +#include + +#include #include namespace fs = boost::filesystem; #include @@ -11,6 +15,7 @@ namespace assign = boost::assign; #include #include "alg/mussa.hpp" +#include "mussa_exceptions.hpp" using namespace std; @@ -18,6 +23,7 @@ using namespace std; BOOST_AUTO_TEST_CASE( mussa_simple ) { Mussa m; + BOOST_CHECK_EQUAL(m.empty(), true); BOOST_CHECK_EQUAL(m.get_name(), "" ); BOOST_CHECK_EQUAL(m.get_window(), 0); BOOST_CHECK_EQUAL(m.get_threshold(), 0); @@ -37,6 +43,8 @@ BOOST_AUTO_TEST_CASE( mussa_simple ) BOOST_CHECK_EQUAL(m.get_soft_threshold(), 25); m.set_analysis_mode(Mussa::RadialNway); BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::RadialNway); + // make sure our path is empty + BOOST_CHECK_EQUAL(m.get_analysis_path().string(), fs::path().string() ); m.clear(); BOOST_CHECK_EQUAL(m.get_name(), "" ); @@ -45,6 +53,20 @@ BOOST_AUTO_TEST_CASE( mussa_simple ) BOOST_CHECK_EQUAL(m.get_analysis_mode(), Mussa::TransitiveNway); } +BOOST_AUTO_TEST_CASE ( mussa_title ) +{ + Mussa m; + + BOOST_CHECK_EQUAL( m.get_title(), "Unnamed"); + string foo("foo"); + m.set_name(foo); + BOOST_CHECK_EQUAL( m.get_title(), foo); + string foopath_name("/my/silly/path"); + fs::path foopath(foopath_name); + m.set_analysis_path(foopath); + BOOST_CHECK_EQUAL( m.get_title().size(), 14); +} + BOOST_AUTO_TEST_CASE( mussa_analysis_name ) { Mussa m; @@ -65,10 +87,12 @@ BOOST_AUTO_TEST_CASE( mussa_sequences ) std::string s2("TTTTNNNN"); Mussa analysis; + BOOST_CHECK_EQUAL(analysis.empty(), true); analysis.append_sequence(s0); analysis.append_sequence(s1); analysis.append_sequence(s2); + BOOST_CHECK_EQUAL( analysis.empty(), false); BOOST_CHECK_EQUAL( analysis.sequences().size(), 3 ); BOOST_CHECK_EQUAL( *(analysis.sequences()[0]), s0); BOOST_CHECK_EQUAL( *(analysis.sequences()[1]), s1); @@ -88,6 +112,58 @@ BOOST_AUTO_TEST_CASE ( empty_mussa_set_threshold ) m.nway(); } +BOOST_AUTO_TEST_CASE( mussa_load_mupa_crlf ) +{ + fs::path example_path(EXAMPLE_DIR, fs::native); + fs::path seq_path(example_path / "seq" / "mouse_mck_pro.fa"); + fs::path annot_path(example_path / "mm_mck3test.annot"); + + std::string mupa( + "# hello\015\012" + "ANA_NAME load_mupa_crlf\015\012"); + mupa += "SEQUENCE " + seq_path.native_file_string() + "\015\012"; + mupa += "ANNOTATION " + annot_path.native_file_string() + "\015\012"; + + istringstream mupa_stream(mupa); + Mussa m; + fs::path base; + m.load_mupa_stream( mupa_stream, base ); + // Should run with no exceptions +} + +BOOST_AUTO_TEST_CASE( mussa_load_mupa_comment_character ) +{ + fs::path mupa_path(EXAMPLE_DIR, fs::native); + fs::path seq_path = fs::initial_path() / "seq" / "mouse_mck_pro.fa"; + fs::path annot_path = fs::initial_path() / "mm_mck3test.annot"; + + std::string mupa( + "# hello\015\012" + "ANA_NAME load_mupa_crlf\015\012"); + mupa += "#SEQUENCE " + seq_path.native_file_string() + "\015\012"; + mupa += "#ANNOTATION " + annot_path.native_file_string() + "\015\012"; + + istringstream mupa_stream(mupa); + Mussa m; + fs::path base; + m.load_mupa_stream( mupa_stream, base ); + // Should run with no exceptions +} + +BOOST_AUTO_TEST_CASE( mussa_load_mupa_exception ) +{ + std::string mupa( + "# hello\015\012" + "ANA_NAME load_mupa_crlf\015\012" + "mwahhaha I broke you!\n" + ); + + istringstream mupa_stream(mupa); + Mussa m; + fs::path base; + BOOST_CHECK_THROW(m.load_mupa_stream( mupa_stream, base ), mussa_load_error); +} + BOOST_AUTO_TEST_CASE( mussa_load_mupa ) { fs::path mupa_path(EXAMPLE_DIR, fs::native); @@ -98,13 +174,23 @@ BOOST_AUTO_TEST_CASE( mussa_load_mupa ) m1.load_mupa_file( mupa_path ); m1.analyze(); m1.save( result_path ); + BOOST_CHECK_EQUAL( m1.empty(), false); BOOST_CHECK_EQUAL( m1.get_name(), std::string("mck3test") ); BOOST_CHECK( m1.size() > 0 ); + BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), result_path.string()); Mussa m2; m2.load( result_path ); + BOOST_CHECK_EQUAL( m2.empty(), false); BOOST_CHECK_EQUAL( m2.get_name(), result_path.leaf() ); BOOST_CHECK_EQUAL( m1.size(), m2.size() ); + BOOST_CHECK_EQUAL( result_path.string(), m2.get_analysis_path().string() ); + + // check clear a bit + m2.clear(); + BOOST_CHECK_EQUAL( m2.empty(), true); + BOOST_CHECK_EQUAL( m2.is_dirty(), false ); + BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), fs::path().string()); } BOOST_AUTO_TEST_CASE( mussa_load_full_path ) @@ -117,6 +203,35 @@ BOOST_AUTO_TEST_CASE( mussa_load_full_path ) BOOST_CHECK( m1.size() > 0); BOOST_CHECK_EQUAL( m1.get_window(), 30 ); BOOST_CHECK_EQUAL( m1.get_threshold(), 20); + BOOST_CHECK_EQUAL( m1.is_dirty(), true); + BOOST_CHECK_EQUAL( m1.get_analysis_path().string(), ""); +} + +BOOST_AUTO_TEST_CASE( mussa_valid_motifs_in_new_analysis ) +{ + Mussa m1; + fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "mck3test.mupa"); + m1.load_mupa_file( full_path ); + m1.analyze(); + // check motifs + BOOST_CHECK( m1.sequences().size() > 0 ); + BOOST_CHECK_EQUAL( m1.sequences()[0]->motifs().size(), 0 ); +} + +// make sure we know that mupa files cannot be directories +BOOST_AUTO_TEST_CASE( mussa_mupa_is_file_not_directory ) +{ + fs::path curdir("."); + Mussa m1; + BOOST_CHECK_THROW(m1.load_mupa_file( curdir ), mussa_load_error ); +} + +// catch error if annotation isn't a file +BOOST_AUTO_TEST_CASE( mussa_annotation_is_not_file ) +{ + Mussa m1; + fs::path full_path(fs::path(EXAMPLE_DIR, fs::native) / "directory.mupa"); + BOOST_CHECK_THROW( m1.load_mupa_file( full_path ), mussa_load_error ); } BOOST_AUTO_TEST_CASE( mussa_load_analysis ) @@ -127,26 +242,28 @@ BOOST_AUTO_TEST_CASE( mussa_load_analysis ) m1.analyze(); Mussa m2; - m2.load( fs::initial_path() / "mck3test_w30_t20"); + fs::path analysis_path = fs::initial_path() / "mck3test_w30_t20"; + m2.load( analysis_path ); BOOST_CHECK_EQUAL( m1.size(), m2.size() ); BOOST_CHECK_EQUAL( m1.get_window(), m2.get_window() ); BOOST_CHECK_EQUAL( m1.get_threshold(), m2.get_threshold() ); + BOOST_CHECK_EQUAL( m2.get_analysis_path().string(), analysis_path.string()); } BOOST_AUTO_TEST_CASE( mussa_load_motif ) { string data = "AAGG 1.0 1.0 0.0\n" - "GGTT 0.0 0.1 1.0\n" - "ZXY 2 1.9 0\n"; + "GGTT 0.0 0.1 1.0 1.0\n"; istringstream test_istream(data); Mussa m1; m1.append_sequence("AAAAGGGGTTTT"); - m1.append_sequence("GGGCCCCTTGGTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); m1.load_motifs(test_istream); + BOOST_CHECK_EQUAL( m1.motifs().size(), 2); for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin(); seq_i != m1.sequences().end(); ++seq_i) @@ -155,6 +272,119 @@ BOOST_AUTO_TEST_CASE( mussa_load_motif ) } } +BOOST_AUTO_TEST_CASE( mussa_load_broken_motif ) +{ + string data = "AAGG 1.0 1.0 0.0\n" + "GGTT 0.0 0.1 1.0 1.0\n" + "ZZCTA 0.1 0.0 1.0\n"; + + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + BOOST_CHECK_THROW(m1.load_motifs(test_istream), motif_load_error); + + BOOST_CHECK_EQUAL( m1.motifs().size(), 0); +} + +BOOST_AUTO_TEST_CASE( mussa_named_motif ) +{ + string data = "CCAATT cat 0.1 0.2 0.3\n"; + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + m1.load_motifs(test_istream); + + std::set motifs = m1.motifs(); + BOOST_REQUIRE_EQUAL(motifs.size(), 1); + BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat"); +} + +BOOST_AUTO_TEST_CASE( mussa_weirdly_spaced_named_motif ) +{ + string data = "CCAATT cat_meow123 0.1 0.2 0.3\n"; + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + m1.load_motifs(test_istream); + + std::set motifs = m1.motifs(); + BOOST_REQUIRE_EQUAL(motifs.size(), 1); + BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat_meow123"); +} + +BOOST_AUTO_TEST_CASE( mussa_name_quoted_motif ) +{ + string data = "CCAATT \"cat meow 123\" 0.1 0.2 0.3\n"; + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + m1.load_motifs(test_istream); + + std::set motifs = m1.motifs(); + BOOST_REQUIRE_EQUAL(motifs.size(), 1); + BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat meow 123"); +} + +BOOST_AUTO_TEST_CASE( mussa_name_embedded_quote_motif ) +{ + // pretty obviously this shouldn't work as " are our delimiter + // and i'm too lazy to add support for \ in the parser + string data = "ATA 0.5 0.5 0.5\n" + "CCAATT \"cat \"meow 123\" 0.1 0.2 0.3\n"; + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + BOOST_CHECK_THROW( m1.load_motifs(test_istream), motif_load_error); + + std::set motifs = m1.motifs(); + BOOST_REQUIRE_EQUAL(motifs.size(), 0); +} + +BOOST_AUTO_TEST_CASE( mussa_save_motif ) +{ + string data = "ATA 1 1 1 1\n" + "CAT \"my name\" 1 0 0.5 0.5\n"; + istringstream data_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + m1.load_motifs(data_istream); + + string save; + ostringstream save_ostream(save); + m1.save_motifs(save_ostream); + + istringstream reloaded_istream(save_ostream.str()); + Mussa m2; + m2.append_sequence("AAAAGGGGTTTT"); + m2.append_sequence("GGGCCCCTTCCAATT"); + m2.load_motifs(reloaded_istream); + + BOOST_REQUIRE_EQUAL(m1.motifs().size(), m2.motifs().size()); + Mussa::motif_set::const_iterator m1motif = m1.motifs().begin(); + Mussa::motif_set::const_iterator m2motif = m2.motifs().begin(); + for (; + m1motif != m1.motifs().end() and m2motif != m2.motifs().end(); + ++m1motif, ++m2motif) + { + BOOST_CHECK_EQUAL(m1motif->get_sequence(), m2motif->get_sequence()); + BOOST_CHECK_EQUAL(m1motif->get_name(), m2motif->get_name()); + BOOST_CHECK_EQUAL(m1.colorMapper()->lookup("motif", m1motif->get_sequence()), + m2.colorMapper()->lookup("motif", m2motif->get_sequence())); + } +} + BOOST_AUTO_TEST_CASE( mussa_add_motif ) { vector motifs; @@ -204,9 +434,9 @@ BOOST_AUTO_TEST_CASE( mussa_add_motif ) } static void -local_align_test(const Mussa::vector_sequence_type &seqs, - const list& result, - const list >& reversed) +two_way_local_align_test(const Mussa::vector_sequence_type &seqs, + const list& result, + const list >& reversed) { map > m; assign::insert(m)('A', assign::list_of('A')('T') ) @@ -234,9 +464,8 @@ local_align_test(const Mussa::vector_sequence_type &seqs, BOOST_CHECK_EQUAL( first_basepair, complimented_second) ; } } - -BOOST_AUTO_TEST_CASE( local_alignment ) +BOOST_AUTO_TEST_CASE( two_way_local_alignment ) { string s0("GCGCATAT"); string s1("AAAAAAAT"); @@ -249,6 +478,8 @@ BOOST_AUTO_TEST_CASE( local_alignment ) analysis.set_window(4); analysis.analyze(); NwayPaths npath = analysis.paths(); + BOOST_REQUIRE_EQUAL( npath.pathz.size(), 2 ); + list result; list > reversed; list::iterator pathz_i = npath.pathz.begin(); @@ -260,7 +491,7 @@ BOOST_AUTO_TEST_CASE( local_alignment ) result, reversed); - local_align_test(analysis.sequences(), result, reversed); + two_way_local_align_test(analysis.sequences(), result, reversed); ++pathz_i; result.clear(); @@ -271,9 +502,64 @@ BOOST_AUTO_TEST_CASE( local_alignment ) selected_paths.end(), result, reversed); - local_align_test(analysis.sequences(), result, reversed); + two_way_local_align_test(analysis.sequences(), result, reversed); +} +BOOST_AUTO_TEST_CASE( three_way_local_alignment ) +{ + string s0("AGCAGGGAGGGTTTAAATGGCACCCAGCAGTTGGTGTGAGG"); + string s1("AGCGGGAAGGGTTTAAATGGCACCGGGCAGTTGGCGTGAGG"); + string s2("CAGCGCCGGGGTTTAAATGGCACCGAGCAGTTGGCGCAGGG"); + + Mussa analysis; + analysis.append_sequence(s0); + analysis.append_sequence(s1); + analysis.append_sequence(s2); + analysis.set_threshold(23); + analysis.set_window(30); + analysis.analyze(); + NwayPaths npath = analysis.paths(); + BOOST_CHECK_EQUAL( npath.refined_pathz.size(), 1 ); + + list result; + list > reversed; + // grab 1 path (since there's only one) + list::iterator pathz_i = npath.pathz.begin(); + list selected_paths; + selected_paths.push_back(*pathz_i); + analysis.createLocalAlignment(selected_paths.begin(), + selected_paths.end(), + result, + reversed); + + for(std::list::iterator result_i = result.begin(); + result_i != result.end(); + ++result_i) + { + ConservedPath::path_element first_element = *(result_i->begin()); + for (ConservedPath::path_type::iterator element_i = result_i->begin(); + element_i != result_i->end(); + ++element_i) + { + BOOST_CHECK_EQUAL( *element_i, first_element ); + BOOST_CHECK_EQUAL( s0[*element_i], s1[*element_i] ); + BOOST_CHECK_EQUAL( s1[*element_i], s2[*element_i] ); + BOOST_CHECK_EQUAL( s0[*element_i], s2[*element_i] ); + } + } +} +BOOST_AUTO_TEST_CASE( mussa_window_larger_than_sequence ) +{ + string s0("AGCAGGG"); + string s1("CAGCGGG"); + + Mussa analysis; + analysis.append_sequence(s0); + analysis.append_sequence(s1); + analysis.set_threshold(23); + analysis.set_window(30); + BOOST_CHECK_THROW(analysis.analyze(), seqcomp_error); } BOOST_AUTO_TEST_CASE( subanalysis )