+BOOST_AUTO_TEST_CASE( sequence_no_trailing_newline )
+{
+ // sorry about the long string...
+ string s = "AATTACACAAGGAATATAGGTAGTTTGAATAAAAATATCTTTAACAGCTTGGAGCTATTGAGACAGGAACACTTCCACGCACATGCACAGTTAAACAACTTGAGTGCAACACACAACATTGGCACTAAACGAGATTGAAGGGGGACTTTTTGTGTGTTTTTTTTTCTCTTTTCTTTTTTTGTTATAGTTACTTCAAGTAACACAGCTTGCTTCATATAAATAAGTTAAAACATCTATTTTTTTTCAAGACAAAGCCATTCAGGACAAAGAGATGAACAGAAAGCAGATCTACTTATACAGGCGCTATAATGGCAATAAACAGGCTCATGATTAAAAGATGAATTAGGGCAACGAGAACAGGGCTTCTTCACAGAAGGAACACAAGGGAGTTTCAGAAAGTCACCTTAGTACTGACACTACGCGGGATCCGCTAATACTGCTCAGTACTTTAAACGCTCAGATACTCAGGGACGGAAGGCCCCTCCTGCCGCGGCCATGCTCATGCTTTTCAGCTTATTATCTTTTTTCCACTTCATTCTCCGGTTTTGGAACCAGATTTTAATTTGTCTCTCGGAGAGGCAAAGAGCATGTGCTATTTCAATCCTCCTTCTGCGGGTCAGGTAACGGTTGAAGTGGAACTCCTTCTCCAGCTCCAGGGTCTGGTAGCGCGTGTAGGCCGTCCGGGCCCTTTTGCCTTCCGGGCCGCCTATGTTGTCTGCAATAGAAAAGTCAGCGGTTTAGCCACCAACTCCTGTCTTCCAAAGTCCGCCAGGGGGACAAGCTTGGGTCATGAGCAGGGAACCCAGGCGAAAAGCTCAACAAGTTCTGCCTACCAGCCCGCACACCCCTCCCGAATTTCCTTCTCTCTTCCTTTCTAGAAAGAAAACAATACGATTTGGACCCTGGGAACAATCTGCCCATCTGAGGCTGGGGCCGTGTCCCGGCGGACTCCGGCTTTCCCTGGCCCCTCTCCTGCCCCCTCCGCCCTGCCCCGGGCGCCCCGATCGGGAGGCACAGCCCTCCCAGGCTGCCCACCGCACAGAAACCCAGGAAGCAAGGCCCTTTCCTGAGCGCCCAAGTGGCCTTCGGGTCACCCTCCCTCAAAGTTCCAGCCCCGAGAGCCGCCTCCCGTTTCCAGCCTGCAGGGTTGGGGAGCCTGTTTTCTTTTTCTTCCCTTTCCTTCTCTCTCCCTCCTGCCCCCAAAATTCAGAATCCTGCAGGCTCTCGCCTCGATTCTTTCCCCCAAGCCCCTTTTCGGGGGCTGTAATTAGTAACGCTGTTTCCCCAGCGTAGCCCTCCTCATAAATTATCCGCCGTGACAAGCCCGATTCACGGCTGCTACAGCCATCCTCTACCTCTCTGCGCCTTGCTCGGCTGGCCTGACCCGGGAGCGCGTCCCAAGGCGTGGGGTTCCAGAGGGGTTTTTTGCTTCCTCCCCCTTCCAACGTCTAAACTGTCCCAGAGAACGCCCATTTCCCCCACTATTTGTGAGCGCAGGGTGCTCGCAAAGAAGAGGAGGAAGGAGGAAGGCAGGGGAGGGAGAACGGCAAGGAGAGCTCCGCAGGGCTGGGAGAAATGAGACCAAGAGAGACTGGGAGAGGGCGGCAGAGAAGAGAGGGGGGACCGAGAGCCGCGTCCCCGCGGTCGCGTGGATTTAGAAAAAGGCTGGCTTTACCATGACTTATGTGCAGCTTGCGCATCCAGGGGTAGATCTGGGGTTGGGCGGGCGGCGCCGGGCTCGGCTCGCTCTGCGCACTCGCCTGCTCGCTGCTGGCAGGGGCGTCCTCCTCGGCTCCGGACGCCGTGCCAACCCCCTCTCTGCTGCTGATGTGGGTGCTGCCGGCGTCGGCCGAGGCGCCGCTGGAGTTGCTTAGGGAGTTTTTCCCGCCGTGGTGGCTGTCGCTGCCGGGCGAGGGGGCCACGGCGGAGCAGGGCAGCGGATCGGGCTGAGGAGAGTGCGTGGACGTGGCCGGCTGGCTGTACCTGGGCTCGGCGGGCGCCGCGCTGGCGCTGGCAGCGTAGCTGCGGGCGCGCTCTCCGGAGCCAAAGTGGCCGGAGCCCGAGCGGCCGACGCTGAGATCCATGCCATTGTAGCCGTAGCCGTACCTGCCGGAGTGCATGCTCGCCGAGTCCCTGAATTGCTCGCTCACGGAACTATGATCTCCATAATTATGCAACTGGTAGTCCGGGCCATTTGGATAGCGACCGCAAAATGAGTTTACAAAATAAGAGCTCATTTGTTTTTTGATATGTGTGCTTGATTTGTGGCTCGCGGTCGTTTGTGCGTCTATAGCACCCTT";
+ std::string species = "HumanHXA5\n";
+ std::string header0 = ">hg18_knownGene_NM_019102_0\n";
+ std::string str0 = "GGGTGCTATAGACGCACAAACGACCGCGAGCCACAAATCAAGCACACATATCAAAAAACAAATGAGCTCTTATTTTGTAAACTCATTTTGCGGTCGCTATCCAAATGGCCCGGACTACCAGTTGCATAATTATGGAGATCATAGTTCCGTGAGCGAGCAATTCAGGGACTCGGCGAGCATGCACTCCGGCAGGTACGGCTACGGCTACAATGGCATGGATCTCAGCGTCGGCCGCTCGGGCTCCGGCCACTTTGGCTCCGGAGAGCGCGCCCGCAGCTACGCTGCCAGCGCCAGCGCGGCGCCCGCCGAGCCCAGGTACAGCCAGCCGGCCACGTCCACGCACTCTCCTCAGCCCGATCCGCTGCCCTGCTCCGCCGTGGCCCCCTCGCCCGGCAGCGACAGCCACCACGGCGGGAAAAACTCCCTAAGCAACTCCAGCGGCGCCTCGGCCGACGCCGGCAGCACCCACATCAGCAGCAGAGAGGGGGTTGGCACGGCGTCCGGAGCCGAGGAGGACGCCCCTGCCAGCAGCGAGCAGGCGAGTGCGCAGAGCGAGCCGAGCCCGGCGCCGCCCGCCCAACCCCAGATCTACCCCTGGATGCGCAAGCTGCACATAAGTCATG";
+ std::string header1 = ">hg18_knownGene_NM_019102_1\n";
+ std::string str1 = "ACAACATAGGCGGCCCGGAAGGCAAAAGGGCCCGGACGGCCTACACGCGCTACCAGACCCTGGAGCTGGAGAAGGAGTTCCACTTCAACCGTTACCTGACCCGCAGAAGGAGGATTGAAATAGCACATGCTCTTTGCCTCTCCGAGAGACAAATTAAAATCTGGTTCCAAAACCGGAGAATGAAGTGGAAAAAAGATAATAAGCTGAAAAGCATGAGCATGGCCGCGGCAGGAGGGGCCTTCCGTCCCTGAGTATCTGAGCGTTTAAAGTACTGAGCAGTATTAGCGGATCCCGCGTAGTGTCAGTACTAAGGTGACTTTCTGAAACTCCCTTGTGTTCCTTCTGTGAAGAAGCCCTGTTCTCGTTGCCCTAATTCATCTTTTAATCATGAGCCTGTTTATTGCCATTATAGCGCCTGTATAAGTAGATCTGCTTTCTGTTCATCTCTTTGTCCTGAATGGCTTTGTCTTGAAAAAAAATAGATGTTTTAACTTATTTATATGAAGCAAGCTGTGTTACTTGAAGTAACTATAACAAAAAAAGAAAAGAGAAAAAAAAACACACAAAAAGTCCCCCTTCAATCTCGTTTAGTGCCAATGTTGTGTGTTGCACTCAAGTTGTTTAACTGTGCATGTGCGTGGAAGTGTTCCTGTCTCAATAGCTCCAAGCTGTTAAAGATATTTTTATTCAAACTACCTATATTCCTTGT";
+ stringstream annot;
+ annot << species
+ << header0
+ << str0 << std::endl
+ << std::endl
+ << header1
+ << str1;
+ // need to convert strings to sequences for reverse complementing
+ Sequence seq0(str0, reduced_dna_alphabet);
+ Sequence seq1(str1, reduced_dna_alphabet);
+
+ Sequence annotated_seq(s, reduced_dna_alphabet);
+ annotated_seq.load_annot(annot, 0, 0);
+
+ SeqSpanRefList annots_list = annotated_seq.annotations();
+ // both sequences were found
+ BOOST_REQUIRE_EQUAL( annots_list.size(), 2 );
+
+ std::vector<SeqSpanRef> annots(annots_list.begin(), annots_list.end());
+ // are they the same sequence?
+ BOOST_CHECK_EQUAL( annots[0]->size(), seq0.size());
+ BOOST_CHECK_EQUAL( annots[0]->sequence(), seq0.rev_comp() );
+ // this should hopefully catch the case when my hack in
+ // sequence.cpp::push_back_seq::operator() is no longer needed.
+ // spirit (or my grammar was duplicating the last char,
+ // the hack removes the duplicate. but if what ever's causing
+ // the dup gets fixed actual meaningful data will be being removed.
+ // see mussa ticket:265 for more information
+ BOOST_CHECK_EQUAL( annots[1]->size(), seq1.size());
+ BOOST_CHECK_EQUAL( annots[1]->sequence(), seq1.rev_comp() );
+
+}
+
+BOOST_AUTO_TEST_CASE( subseq_annotation_test )
+{
+ string s("CCGCCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC"
+ "ACCCACCTGACCGCGGGCGGCTCCGGCCCCGCTTCGCCCCACTGCGATCA"
+ "GTCGCGTCCCGCAGGCCAGGCACGCCCCGCCGCTCCCGCTGCGCCGGGCG"
+ "TCTGGGACCTCGGGCGGCTCCTCCGAGGGGCGGGGCAGCCGGGAGCCACG"
+ "CCCCCGCAGGTGAGCCGGCCACGCCCACCGCCCGTGGGAAGTTCAGCCTC"
+ "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
+ "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
+ "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
+ Sequence seq(s, reduced_dna_alphabet);
+
+ seq.add_annotation("0-10", "0-10", 0, 10);
+ seq.add_annotation("10-20", "10-20", 10, 20);
+ seq.add_annotation("0-20", "0-20", 0, 20);
+ seq.add_annotation("8-12", "8-12", 8, 12);
+ seq.add_annotation("100-5000", "100-5000", 100, 5000);
+
+ Sequence subseq = seq.subseq(5, 10);
+ SeqSpanRefList annots_list = subseq.annotations();
+ BOOST_REQUIRE_EQUAL( annots_list.size(), 4 );
+
+ std::vector<SeqSpanRef> annots(annots_list.begin(), annots_list.end());
+ BOOST_CHECK_EQUAL( annots[0]->parentStart(), 0);
+ BOOST_CHECK_EQUAL( annots[0]->size(), 5);
+ BOOST_REQUIRE( annots[0]->annotations() );
+ BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "0-10");
+
+ BOOST_CHECK_EQUAL( annots[1]->parentStart(), 5);
+ BOOST_CHECK_EQUAL( annots[1]->size(), 5);
+ BOOST_REQUIRE( annots[1]->annotations() );
+ BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "10-20");
+
+ BOOST_CHECK_EQUAL( annots[2]->parentStart(), 0);
+ BOOST_CHECK_EQUAL( annots[2]->size(), 10);
+ BOOST_REQUIRE( annots[2]->annotations() );
+ BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "0-20");
+
+ BOOST_CHECK_EQUAL( annots[3]->parentStart(), 3);
+ BOOST_CHECK_EQUAL( annots[3]->size(), 7);
+ BOOST_REQUIRE( annots[3]->annotations() );
+ BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "8-12");
+}
+
+BOOST_AUTO_TEST_CASE( motif_annotation_update )
+{
+ string s("CCGTCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC"
+ "ACCCACCTGACCGCGGGCGGCTCCGGCCCCGCTTCGCCCCACTGCGATCA"
+ "GTCGCGTCCCGCAGGCCAGGCACGCCCCGCCGCTCCCGCTGCGCCGGGCG"
+ "TCTGGGACCTCGGGCGGCTCCTCCGAGGGGCGGGGCAGCCGGGAGCCACG"
+ "CCCCCGCAGGTGAGCCGGCCACGCCCACCGCCCGTGGGAAGTTCAGCCTC"
+ "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
+ "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
+ "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
+ Sequence seq(s, reduced_dna_alphabet);
+
+ // starting conditions
+ BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
+ BOOST_CHECK_EQUAL(seq.motifs().size(), 0);
+ seq.add_annotation("0-10", "0-10", 0, 10);
+ seq.add_annotation("10-20", "10-20", 10, 20);
+ seq.add_annotation("0-20", "0-20", 0, 20);
+ BOOST_CHECK_EQUAL(seq.annotations().size(), 3);
+ BOOST_CHECK_EQUAL(seq.motifs().size(), 0);
+ seq.add_motif("CCGTCCC");
+ BOOST_CHECK_EQUAL(seq.annotations().size(), 3);
+ BOOST_CHECK_EQUAL(seq.motifs().size(), 1);
+ seq.clear_motifs();
+ BOOST_CHECK_EQUAL(seq.annotations().size(), 3);
+ BOOST_CHECK_EQUAL(seq.motifs().size(), 0);
+}
+
+BOOST_AUTO_TEST_CASE( out_operator )
+{
+ string s("AAGGCCTT");
+ Sequence seq(s, reduced_dna_alphabet);
+
+ ostringstream buf;
+ buf << s;
+ BOOST_CHECK_EQUAL( s, buf.str() );
+}
+
+BOOST_AUTO_TEST_CASE( get_name )
+{
+ Sequence seq("AAGGCCTT", reduced_dna_alphabet);
+
+ BOOST_CHECK_EQUAL( seq.get_name(), "" );
+ seq.set_species("hooman"); // anyone remember tradewars?
+ BOOST_CHECK_EQUAL( seq.get_name(), "hooman");
+ seq.set_fasta_header("fasta human");
+ BOOST_CHECK_EQUAL( seq.get_name(), "fasta human");
+}
+/*
+BOOST_AUTO_TEST_CASE( serialize_simple )
+{
+ std::string seq_string = "AAGGCCTT";
+ Sequence seq(seq_string, reduced_dna_alphabet);
+ seq.set_species("ribbet");
+ std::ostringstream oss;
+ // allocate/deallocate serialization components
+ {
+ boost::archive::text_oarchive oarchive(oss);
+ const Sequence& const_seq(seq);
+ BOOST_CHECK_EQUAL(seq, const_seq);
+ oarchive << const_seq;
+ }
+ Sequence seq_loaded;
+ {
+ std::istringstream iss(oss.str());
+ boost::archive::text_iarchive iarchive(iss);
+ iarchive >> seq_loaded;
+ }
+ BOOST_CHECK_EQUAL(seq_loaded, seq);
+ BOOST_CHECK_EQUAL(seq.get_species(), "ribbet");
+}
+
+BOOST_AUTO_TEST_CASE( serialize_tree )
+{
+ std::string seq_string = "AAGGCCTT";
+ Sequence seq(seq_string, reduced_dna_alphabet);
+ seq.set_species("ribbet");
+ seq.add_motif("AA");
+ seq.add_motif("GC");
+ seq.add_annotation("t", "t", 6, 7);
+
+ std::ostringstream oss;
+ // allocate/deallocate serialization components
+ {
+ boost::archive::text_oarchive oarchive(oss);
+ const Sequence& const_seq(seq);
+ BOOST_CHECK_EQUAL(seq, const_seq);
+ oarchive << const_seq;
+ }
+
+ Sequence seq_loaded;
+ {
+ std::istringstream iss(oss.str());
+ boost::archive::text_iarchive iarchive(iss);
+ iarchive >> seq_loaded;
+ }
+ BOOST_CHECK_EQUAL(seq_loaded, seq);
+}
+
+// this writes out an "old" style annotated sequence
+// with annotations attached as "motifs" and "annots"
+BOOST_AUTO_TEST_CASE( serialize_xml_sequence )
+{
+ std::string seq_string = "AAGGCCTT";
+ Sequence seq(seq_string, reduced_dna_alphabet);
+ seq.set_species("ribbet");
+ seq.add_motif("AA");
+ seq.add_motif("GC");
+ seq.add_annotation("t", "t", 6, 7);
+
+ std::ostringstream oss;
+ // allocate/deallocate serialization components
+ {
+ boost::archive::xml_oarchive oarchive(oss);
+ const Sequence& const_seq(seq);
+ BOOST_CHECK_EQUAL(seq, const_seq);
+ oarchive << boost::serialization::make_nvp("root", const_seq);
+ }
+ Sequence seq_loaded;
+ {
+ std::istringstream iss(oss.str());
+ boost::archive::xml_iarchive iarchive(iss);
+ iarchive >> boost::serialization::make_nvp("root", seq_loaded);
+ }
+ BOOST_CHECK_EQUAL(seq_loaded, seq);
+}
+
+BOOST_AUTO_TEST_CASE( serialize_xml_two )
+{
+ std::string seq_string = "AAGGCCTT";
+ Sequence seq1(seq_string, reduced_dna_alphabet);
+ Sequence seq2(seq1);
+
+ std::ostringstream oss;
+ // allocate/deallocate serialization components
+ {
+ boost::archive::xml_oarchive oarchive(oss);
+ const Sequence& const_seq1(seq1);
+ const Sequence& const_seq2(seq2);
+ oarchive << boost::serialization::make_nvp("seq1", const_seq1);
+ oarchive << boost::serialization::make_nvp("seq2", const_seq2);
+ }
+ //std::cout << "xml: " << oss.str() << std::endl;
+ Sequence seq1_loaded;
+ Sequence seq2_loaded;
+ {
+ std::istringstream iss(oss.str());
+ boost::archive::xml_iarchive iarchive(iss);
+ iarchive >> boost::serialization::make_nvp("seq1", seq1_loaded);
+ iarchive >> boost::serialization::make_nvp("seq2", seq2_loaded);
+ }
+ BOOST_CHECK_EQUAL(seq1_loaded, seq1);
+ BOOST_CHECK_EQUAL(seq2_loaded, seq2);
+ // test if our pointers are the same
+ BOOST_CHECK_EQUAL(seq1_loaded.data(), seq2_loaded.data());
+}
+*/