X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=blobdiff_plain;f=alg%2Ftest%2Ftest_sequence.cpp;h=82ff14be6c1ff40c6d9ce1155def83c0c5e4dda7;hp=921c5ef7aa2437a177b6f8094279ccca50d71441;hb=67888dae3b16b9d69aa846e393f11e7ff3633f16;hpb=5c3dc8c42679629c19ece07c0e63a53b1c69663f diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 921c5ef..82ff14b 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -463,16 +463,16 @@ BOOST_AUTO_TEST_CASE( sequence_empty_reverse_iterator) BOOST_AUTO_TEST_CASE( annotation_load ) { string annot_data = "human\n" - "0 10 name type\n" - "10 20 myf7\n" - "20 30 myod\n" - "50\t55 anothername\n" - "60 50 backward\n" - ">ident3 asdf\n" + "0 10 name type\n" //0 + "10 20 myf7\n" //1 + "20 30 myod\n" //2 + "50\t55 anothername\n" //3 + "60 50 backward\n" //4 + ">ident3 asdf\n" //7 (as these are added last) "GCT\n" "gCTn\n" - "75\t90\tname2\ttype2\n" - "100 120 name-asdf type!@#$%\n" + "75\t90\tname2\ttype2\n" //5 + "100 120 name-asdf type!@#$%\n" //6 ; string s(100, 'A'); s += "GCTGCTAATT"; @@ -480,27 +480,37 @@ BOOST_AUTO_TEST_CASE( annotation_load ) //istringstream annot_stream(annot_data); seq.parse_annot(annot_data, 0, 0); - std::list annots_list = seq.annotations(); - std::vector annots(annots_list.begin(), annots_list.end()); + SeqSpanRefList annots_list(seq.annotations()); + std::vector annots(annots_list.begin(), annots_list.end()); BOOST_REQUIRE_EQUAL( annots.size(), 8); - BOOST_CHECK_EQUAL( annots[0].begin, 0 ); - BOOST_CHECK_EQUAL( annots[0].end, 10 ); - BOOST_CHECK_EQUAL( annots[0].type, "type"); - BOOST_CHECK_EQUAL( annots[0].name, "name"); - BOOST_CHECK_EQUAL( annots[1].name, "myf7"); - BOOST_CHECK_EQUAL( annots[2].name, "myod"); - BOOST_CHECK_EQUAL( annots[3].name, "anothername"); - BOOST_CHECK_EQUAL( annots[4].name, "backward"); - BOOST_CHECK_EQUAL( annots[5].name, "name2"); - BOOST_CHECK_EQUAL( annots[5].end, 90); - BOOST_CHECK_EQUAL( annots[6].begin, 100); - BOOST_CHECK_EQUAL( annots[6].end, 120); - BOOST_CHECK_EQUAL( annots[6].name, "name-asdf"); - BOOST_CHECK_EQUAL( annots[6].type, "type!@#$%"); + BOOST_CHECK_EQUAL( annots[0]->start(), 0 ); + BOOST_CHECK_EQUAL( annots[0]->stop(), 10 ); + BOOST_REQUIRE( annots[0]->annotations() ); + BOOST_CHECK_EQUAL( annots[0]->annotations()->get("type"), "type"); + BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "name"); + BOOST_REQUIRE( annots[1]->annotations() ); + BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "myf7"); + BOOST_REQUIRE( annots[2]->annotations() ); + BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "myod"); + BOOST_REQUIRE( annots[3]->annotations() ); + BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "anothername"); + BOOST_REQUIRE( annots[4]->annotations() ); + BOOST_CHECK_EQUAL( annots[4]->annotations()->name(), "backward"); + BOOST_REQUIRE( annots[5]->annotations() ); + BOOST_CHECK_EQUAL( annots[5]->annotations()->name(), "name2"); + BOOST_CHECK_EQUAL( annots[5]->start(), 75); + BOOST_CHECK_EQUAL( annots[5]->stop(), 90); + BOOST_CHECK_EQUAL( annots[6]->start(), 100); + BOOST_CHECK_EQUAL( annots[6]->stop(), 110); + BOOST_REQUIRE( annots[6]->annotations() ); + BOOST_CHECK_EQUAL( annots[6]->annotations()->name(), "name-asdf"); + BOOST_CHECK_EQUAL( annots[6]->annotations()->get("type"), "type!@#$%"); // sequence defined annotations will always be after the // absolute positions - BOOST_CHECK_EQUAL( annots[7].name, "ident3 asdf"); - BOOST_CHECK_EQUAL( annots[7].begin, 100); + BOOST_REQUIRE( annots[7]->annotations() ); + BOOST_CHECK_EQUAL( annots[7]->annotations()->name(), "ident3 asdf"); + BOOST_CHECK_EQUAL( annots[7]->start(), 100); + BOOST_CHECK_EQUAL( annots[7]->stop(), 107); //BOOST_CHECK_EQUAL( annots } @@ -545,7 +555,7 @@ BOOST_AUTO_TEST_CASE(annotation_ucsc_html_load) "GGTGGAGACGACCTGGACCCTAACTACGTGCTCAGCAGCCGCGTCCGCAC"; Sequence seq(s, reduced_dna_alphabet); seq.parse_annot(annot_data); - std::list annots = seq.annotations(); + SeqSpanRefList annots(seq.annotations()); BOOST_CHECK_EQUAL( annots.size(), 2); } @@ -568,12 +578,12 @@ BOOST_AUTO_TEST_CASE( annotation_load_no_species_name ) //istringstream annot_stream(annot_data); seq.parse_annot(annot_data, 0, 0); - std::list annots_list = seq.annotations(); - std::vector annots(annots_list.begin(), annots_list.end()); + SeqSpanRefList annots_list(seq.annotations()); + std::vector annots(annots_list.begin(), annots_list.end()); BOOST_REQUIRE_EQUAL( annots.size(), 8); - BOOST_CHECK_EQUAL( annots[0].begin, 0 ); - BOOST_CHECK_EQUAL( annots[0].end, 10 ); - BOOST_CHECK_EQUAL( annots[0].type, "type"); + BOOST_CHECK_EQUAL( annots[0]->start(), 0 ); + BOOST_CHECK_EQUAL( annots[0]->stop(), 10 ); + BOOST_CHECK_EQUAL( annots[0]->annotations()->get("type"), "type"); } // ticket:83 when you try to load a sequence from a file that doesn't @@ -742,12 +752,14 @@ BOOST_AUTO_TEST_CASE( sequence_motif_subseq) BOOST_AUTO_TEST_CASE( annot_test ) { - annot a(0, 10, "test", "thing"); - - BOOST_CHECK_EQUAL( a.begin, 0 ); - BOOST_CHECK_EQUAL( a.end, 10 ); - BOOST_CHECK_EQUAL( a.type, "test" ); - BOOST_CHECK_EQUAL( a.name, "thing" ); + Sequence s("AAAAAAAAAA"); + s.add_annotation("test", "thing", 0, 10); + SeqSpanRef a(s.annotations().front()); + + BOOST_CHECK_EQUAL( a->start(), 0 ); + BOOST_CHECK_EQUAL( a->stop(), 10 ); + BOOST_CHECK_EQUAL( a->annotations()->get("name"), "test" ); + BOOST_CHECK_EQUAL( a->annotations()->get("type"), "thing" ); motif m(10, "AAGGCC"); BOOST_CHECK_EQUAL( m.begin, 10 ); @@ -795,12 +807,13 @@ BOOST_AUTO_TEST_CASE( annotate_from_sequence ) } } BOOST_CHECK_EQUAL(seq.annotations().size(), count); - const std::list &a = seq.annotations(); - for (std::list::const_iterator annot_i = a.begin(); + const SeqSpanRefList& a = seq.annotations(); + for (SeqSpanRefList::const_iterator annot_i = a.begin(); annot_i != a.end(); ++annot_i) { - int count = annot_i->end - annot_i->begin ; + //FIXME: was I doing something here? + int count = (*annot_i)->stop() - (*annot_i)->start(); } } @@ -816,29 +829,36 @@ BOOST_AUTO_TEST_CASE( subseq_annotation_test ) "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT"); Sequence seq(s, reduced_dna_alphabet); - - seq.add_annotation(annot(0, 10, "0-10", "0-10")); - seq.add_annotation(annot(10, 20, "10-20", "10-20")); - seq.add_annotation(annot(0, 20, "0-20", "0-20")); - seq.add_annotation(annot(8, 12, "8-12", "8-12")); - seq.add_annotation(annot(100, 5000, "100-5000", "100-5000")); + seq.add_annotation("0-10", "0-10", 0, 10); + seq.add_annotation("10-20", "10-20", 10, 20); + seq.add_annotation("0-20", "0-20", 0, 20); + seq.add_annotation("8-12", "8-12", 8, 12); + seq.add_annotation("100-5000", "100-5000", 100, 5000); Sequence subseq = seq.subseq(5, 10); - const list annots = subseq.annotations(); - // generate some ground truth - list correct; - correct.push_back(annot(0, 5, "0-10", "0-10")); - correct.push_back(annot(5,10, "10-20", "10-20")); - correct.push_back(annot(0,10, "0-20", "0-20")); - correct.push_back(annot(3, 7, "8-12", "8-12")); - BOOST_REQUIRE_EQUAL( annots.size(), correct.size() ); - - list::iterator correct_i = correct.begin(); - list::const_iterator annot_i = annots.begin(); - for(; annot_i != annots.end(); ++annot_i, ++correct_i) - { - BOOST_CHECK( *annot_i == *correct_i ); - } + SeqSpanRefList annots_list = subseq.annotations(); + BOOST_REQUIRE_EQUAL( annots_list.size(), 4 ); + + std::vector annots(annots_list.begin(), annots_list.end()); + BOOST_CHECK_EQUAL( annots[0]->start(), 0); + BOOST_CHECK_EQUAL( annots[0]->size(), 5); + BOOST_REQUIRE( annots[0]->annotations() ); + BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "0-10"); + + BOOST_CHECK_EQUAL( annots[1]->start(), 5); + BOOST_CHECK_EQUAL( annots[1]->size(), 10); + BOOST_REQUIRE( annots[1]->annotations() ); + BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "10-20"); + + BOOST_CHECK_EQUAL( annots[2]->start(), 0); + BOOST_CHECK_EQUAL( annots[2]->size(), 10); + BOOST_REQUIRE( annots[2]->annotations() ); + BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "0-20"); + + BOOST_CHECK_EQUAL( annots[3]->start(), 3); + BOOST_CHECK_EQUAL( annots[3]->size(), 7); + BOOST_REQUIRE( annots[3]->annotations() ); + BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "8-12"); } BOOST_AUTO_TEST_CASE( motif_annotation_update ) @@ -856,9 +876,9 @@ BOOST_AUTO_TEST_CASE( motif_annotation_update ) // starting conditions BOOST_CHECK_EQUAL(seq.annotations().size(), 0); BOOST_CHECK_EQUAL(seq.motifs().size(), 0); - seq.add_annotation(annot(0, 10, "0-10", "0-10")); - seq.add_annotation(annot(10, 20, "10-20", "10-20")); - seq.add_annotation(annot(0, 20, "0-20", "0-20")); + seq.add_annotation("0-10", "0-10", 0, 10); + seq.add_annotation("10-20", "10-20", 10, 20); + seq.add_annotation("0-20", "0-20", 0, 20); BOOST_CHECK_EQUAL(seq.annotations().size(), 3); BOOST_CHECK_EQUAL(seq.motifs().size(), 0); seq.add_motif("CCGTCCC"); @@ -920,8 +940,7 @@ BOOST_AUTO_TEST_CASE( serialize_tree ) seq.set_species("ribbet"); seq.add_motif("AA"); seq.add_motif("GC"); - annot a1(6,7,"t","t"); - seq.add_annotation(a1); + seq.add_annotation("t", "t", 6, 7); std::ostringstream oss; // allocate/deallocate serialization components @@ -950,8 +969,7 @@ BOOST_AUTO_TEST_CASE( serialize_xml_sequence ) seq.set_species("ribbet"); seq.add_motif("AA"); seq.add_motif("GC"); - annot a1(6,7,"t","t"); - seq.add_annotation(a1); + seq.add_annotation("t", "t", 6, 7); std::ostringstream oss; // allocate/deallocate serialization components