X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=blobdiff_plain;f=alg%2Ftest%2Ftest_sequence.cpp;h=aefa6ac6c7aa6ec2b68285d674622d89d82ee127;hp=82ff14be6c1ff40c6d9ce1155def83c0c5e4dda7;hb=97498410e1fc5c39eac0282a6620b8fcb0f02ff3;hpb=67888dae3b16b9d69aa846e393f11e7ff3633f16 diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 82ff14b..aefa6ac 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -1,5 +1,7 @@ -#define BOOST_AUTO_TEST_MAIN -#include +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE +#include + #include #include namespace fs=boost::filesystem; @@ -586,6 +588,22 @@ BOOST_AUTO_TEST_CASE( annotation_load_no_species_name ) BOOST_CHECK_EQUAL( annots[0]->annotations()->get("type"), "type"); } +// when we do a subsequence (or something that calls copy_children) +// the annotations need to be updated to have the right parent +BOOST_AUTO_TEST_CASE( update_annotations_seqref ) +{ + Sequence s1("AAAAGGGG"); + s1.add_annotation("A", "A", 0, 4); + BOOST_CHECK_EQUAL(s1.annotations().size(), 1); + BOOST_CHECK_EQUAL(s1.seqspan(), s1.annotations().front()->parent() ); + + Sequence subseq1(s1.subseq(2,4)); + BOOST_CHECK_EQUAL(subseq1.annotations().size(), 1); + BOOST_CHECK_EQUAL(subseq1.annotations().front()->parentStart(), 0 ); + BOOST_CHECK_EQUAL(subseq1.annotations().front()->parentStop(), 2 ); + BOOST_CHECK_EQUAL(subseq1.seqspan(), subseq1.annotations().front()->parent() ); +} + // ticket:83 when you try to load a sequence from a file that doesn't // have fasta headers it crashes. BOOST_AUTO_TEST_CASE( sequence_past_end ) @@ -817,6 +835,48 @@ BOOST_AUTO_TEST_CASE( annotate_from_sequence ) } } +BOOST_AUTO_TEST_CASE( sequence_no_trailing_newline ) +{ + // sorry about the long string... + string s = "AATTACACAAGGAATATAGGTAGTTTGAATAAAAATATCTTTAACAGCTTGGAGCTATTGAGACAGGAACACTTCCACGCACATGCACAGTTAAACAACTTGAGTGCAACACACAACATTGGCACTAAACGAGATTGAAGGGGGACTTTTTGTGTGTTTTTTTTTCTCTTTTCTTTTTTTGTTATAGTTACTTCAAGTAACACAGCTTGCTTCATATAAATAAGTTAAAACATCTATTTTTTTTCAAGACAAAGCCATTCAGGACAAAGAGATGAACAGAAAGCAGATCTACTTATACAGGCGCTATAATGGCAATAAACAGGCTCATGATTAAAAGATGAATTAGGGCAACGAGAACAGGGCTTCTTCACAGAAGGAACACAAGGGAGTTTCAGAAAGTCACCTTAGTACTGACACTACGCGGGATCCGCTAATACTGCTCAGTACTTTAAACGCTCAGATACTCAGGGACGGAAGGCCCCTCCTGCCGCGGCCATGCTCATGCTTTTCAGCTTATTATCTTTTTTCCACTTCATTCTCCGGTTTTGGAACCAGATTTTAATTTGTCTCTCGGAGAGGCAAAGAGCATGTGCTATTTCAATCCTCCTTCTGCGGGTCAGGTAACGGTTGAAGTGGAACTCCTTCTCCAGCTCCAGGGTCTGGTAGCGCGTGTAGGCCGTCCGGGCCCTTTTGCCTTCCGGGCCGCCTATGTTGTCTGCAATAGAAAAGTCAGCGGTTTAGCCACCAACTCCTGTCTTCCAAAGTCCGCCAGGGGGACAAGCTTGGGTCATGAGCAGGGAACCCAGGCGAAAAGCTCAACAAGTTCTGCCTACCAGCCCGCACACCCCTCCCGAATTTCCTTCTCTCTTCCTTTCTAGAAAGAAAACAATACGATTTGGACCCTGGGAACAATCTGCCCATCTGAGGCTGGGGCCGTGTCCCGGCGGACTCCGGCTTTCCCTGGCCCCTCTCCTGCCCCCTCCGCCCTGCCCCGGGCGCCCCGATCGGGAGGCACAGCCCTCCCAGGCTGCCCACCGCACAGAAACCCAGGAAGCAAGGCCCTTTCCTGAGCGCCCAAGTGGCCTTCGGGTCACCCTCCCTCAAAGTTCCAGCCCCGAGAGCCGCCTCCCGTTTCCAGCCTGCAGGGTTGGGGAGCCTGTTTTCTTTTTCTTCCCTTTCCTTCTCTCTCCCTCCTGCCCCCAAAATTCAGAATCCTGCAGGCTCTCGCCTCGATTCTTTCCCCCAAGCCCCTTTTCGGGGGCTGTAATTAGTAACGCTGTTTCCCCAGCGTAGCCCTCCTCATAAATTATCCGCCGTGACAAGCCCGATTCACGGCTGCTACAGCCATCCTCTACCTCTCTGCGCCTTGCTCGGCTGGCCTGACCCGGGAGCGCGTCCCAAGGCGTGGGGTTCCAGAGGGGTTTTTTGCTTCCTCCCCCTTCCAACGTCTAAACTGTCCCAGAGAACGCCCATTTCCCCCACTATTTGTGAGCGCAGGGTGCTCGCAAAGAAGAGGAGGAAGGAGGAAGGCAGGGGAGGGAGAACGGCAAGGAGAGCTCCGCAGGGCTGGGAGAAATGAGACCAAGAGAGACTGGGAGAGGGCGGCAGAGAAGAGAGGGGGGACCGAGAGCCGCGTCCCCGCGGTCGCGTGGATTTAGAAAAAGGCTGGCTTTACCATGACTTATGTGCAGCTTGCGCATCCAGGGGTAGATCTGGGGTTGGGCGGGCGGCGCCGGGCTCGGCTCGCTCTGCGCACTCGCCTGCTCGCTGCTGGCAGGGGCGTCCTCCTCGGCTCCGGACGCCGTGCCAACCCCCTCTCTGCTGCTGATGTGGGTGCTGCCGGCGTCGGCCGAGGCGCCGCTGGAGTTGCTTAGGGAGTTTTTCCCGCCGTGGTGGCTGTCGCTGCCGGGCGAGGGGGCCACGGCGGAGCAGGGCAGCGGATCGGGCTGAGGAGAGTGCGTGGACGTGGCCGGCTGGCTGTACCTGGGCTCGGCGGGCGCCGCGCTGGCGCTGGCAGCGTAGCTGCGGGCGCGCTCTCCGGAGCCAAAGTGGCCGGAGCCCGAGCGGCCGACGCTGAGATCCATGCCATTGTAGCCGTAGCCGTACCTGCCGGAGTGCATGCTCGCCGAGTCCCTGAATTGCTCGCTCACGGAACTATGATCTCCATAATTATGCAACTGGTAGTCCGGGCCATTTGGATAGCGACCGCAAAATGAGTTTACAAAATAAGAGCTCATTTGTTTTTTGATATGTGTGCTTGATTTGTGGCTCGCGGTCGTTTGTGCGTCTATAGCACCCTT"; + std::string species = "HumanHXA5\n"; + std::string header0 = ">hg18_knownGene_NM_019102_0\n"; + std::string str0 = "GGGTGCTATAGACGCACAAACGACCGCGAGCCACAAATCAAGCACACATATCAAAAAACAAATGAGCTCTTATTTTGTAAACTCATTTTGCGGTCGCTATCCAAATGGCCCGGACTACCAGTTGCATAATTATGGAGATCATAGTTCCGTGAGCGAGCAATTCAGGGACTCGGCGAGCATGCACTCCGGCAGGTACGGCTACGGCTACAATGGCATGGATCTCAGCGTCGGCCGCTCGGGCTCCGGCCACTTTGGCTCCGGAGAGCGCGCCCGCAGCTACGCTGCCAGCGCCAGCGCGGCGCCCGCCGAGCCCAGGTACAGCCAGCCGGCCACGTCCACGCACTCTCCTCAGCCCGATCCGCTGCCCTGCTCCGCCGTGGCCCCCTCGCCCGGCAGCGACAGCCACCACGGCGGGAAAAACTCCCTAAGCAACTCCAGCGGCGCCTCGGCCGACGCCGGCAGCACCCACATCAGCAGCAGAGAGGGGGTTGGCACGGCGTCCGGAGCCGAGGAGGACGCCCCTGCCAGCAGCGAGCAGGCGAGTGCGCAGAGCGAGCCGAGCCCGGCGCCGCCCGCCCAACCCCAGATCTACCCCTGGATGCGCAAGCTGCACATAAGTCATG"; + std::string header1 = ">hg18_knownGene_NM_019102_1\n"; + std::string str1 = "ACAACATAGGCGGCCCGGAAGGCAAAAGGGCCCGGACGGCCTACACGCGCTACCAGACCCTGGAGCTGGAGAAGGAGTTCCACTTCAACCGTTACCTGACCCGCAGAAGGAGGATTGAAATAGCACATGCTCTTTGCCTCTCCGAGAGACAAATTAAAATCTGGTTCCAAAACCGGAGAATGAAGTGGAAAAAAGATAATAAGCTGAAAAGCATGAGCATGGCCGCGGCAGGAGGGGCCTTCCGTCCCTGAGTATCTGAGCGTTTAAAGTACTGAGCAGTATTAGCGGATCCCGCGTAGTGTCAGTACTAAGGTGACTTTCTGAAACTCCCTTGTGTTCCTTCTGTGAAGAAGCCCTGTTCTCGTTGCCCTAATTCATCTTTTAATCATGAGCCTGTTTATTGCCATTATAGCGCCTGTATAAGTAGATCTGCTTTCTGTTCATCTCTTTGTCCTGAATGGCTTTGTCTTGAAAAAAAATAGATGTTTTAACTTATTTATATGAAGCAAGCTGTGTTACTTGAAGTAACTATAACAAAAAAAGAAAAGAGAAAAAAAAACACACAAAAAGTCCCCCTTCAATCTCGTTTAGTGCCAATGTTGTGTGTTGCACTCAAGTTGTTTAACTGTGCATGTGCGTGGAAGTGTTCCTGTCTCAATAGCTCCAAGCTGTTAAAGATATTTTTATTCAAACTACCTATATTCCTTGT"; + stringstream annot; + annot << species + << header0 + << str0 << std::endl + << std::endl + << header1 + << str1; + // need to convert strings to sequences for reverse complementing + Sequence seq0(str0, reduced_dna_alphabet); + Sequence seq1(str1, reduced_dna_alphabet); + + Sequence annotated_seq(s, reduced_dna_alphabet); + annotated_seq.load_annot(annot, 0, 0); + + SeqSpanRefList annots_list = annotated_seq.annotations(); + // both sequences were found + BOOST_REQUIRE_EQUAL( annots_list.size(), 2 ); + + std::vector annots(annots_list.begin(), annots_list.end()); + // are they the same sequence? + BOOST_CHECK_EQUAL( annots[0]->size(), seq0.size()); + BOOST_CHECK_EQUAL( annots[0]->sequence(), seq0.rev_comp() ); + // this should hopefully catch the case when my hack in + // sequence.cpp::push_back_seq::operator() is no longer needed. + // spirit (or my grammar was duplicating the last char, + // the hack removes the duplicate. but if what ever's causing + // the dup gets fixed actual meaningful data will be being removed. + // see mussa ticket:265 for more information + BOOST_CHECK_EQUAL( annots[1]->size(), seq1.size()); + BOOST_CHECK_EQUAL( annots[1]->sequence(), seq1.rev_comp() ); + +} + BOOST_AUTO_TEST_CASE( subseq_annotation_test ) { string s("CCGCCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC" @@ -840,22 +900,22 @@ BOOST_AUTO_TEST_CASE( subseq_annotation_test ) BOOST_REQUIRE_EQUAL( annots_list.size(), 4 ); std::vector annots(annots_list.begin(), annots_list.end()); - BOOST_CHECK_EQUAL( annots[0]->start(), 0); + BOOST_CHECK_EQUAL( annots[0]->parentStart(), 0); BOOST_CHECK_EQUAL( annots[0]->size(), 5); BOOST_REQUIRE( annots[0]->annotations() ); BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "0-10"); - BOOST_CHECK_EQUAL( annots[1]->start(), 5); - BOOST_CHECK_EQUAL( annots[1]->size(), 10); + BOOST_CHECK_EQUAL( annots[1]->parentStart(), 5); + BOOST_CHECK_EQUAL( annots[1]->size(), 5); BOOST_REQUIRE( annots[1]->annotations() ); BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "10-20"); - BOOST_CHECK_EQUAL( annots[2]->start(), 0); + BOOST_CHECK_EQUAL( annots[2]->parentStart(), 0); BOOST_CHECK_EQUAL( annots[2]->size(), 10); BOOST_REQUIRE( annots[2]->annotations() ); BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "0-20"); - BOOST_CHECK_EQUAL( annots[3]->start(), 3); + BOOST_CHECK_EQUAL( annots[3]->parentStart(), 3); BOOST_CHECK_EQUAL( annots[3]->size(), 7); BOOST_REQUIRE( annots[3]->annotations() ); BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "8-12"); @@ -909,7 +969,7 @@ BOOST_AUTO_TEST_CASE( get_name ) seq.set_fasta_header("fasta human"); BOOST_CHECK_EQUAL( seq.get_name(), "fasta human"); } - +/* BOOST_AUTO_TEST_CASE( serialize_simple ) { std::string seq_string = "AAGGCCTT"; @@ -1017,3 +1077,4 @@ BOOST_AUTO_TEST_CASE( serialize_xml_two ) // test if our pointers are the same BOOST_CHECK_EQUAL(seq1_loaded.data(), seq2_loaded.data()); } +*/