From 9cd3cb5a74f50e17d160ff5a053dbc4d27aaef10 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 17 May 2006 01:57:30 +0000 Subject: [PATCH] use spirit parser for reading annot file This basically duplicates tristans original annot parser using boost::spirit. That was more complicated than I thought, and required implementing a functor struct that stored data when the class was instantiated and then performed some action with the operator() was called. Look at the comment at sequence.cpp:push_back_annot for more descriptions of how that worked. I improved the annotate_from_sequence unit test to not use hard coded values. I have the start of a unit test fort the annot parsing code. (currently it parses a string and then dumps out the result) Though I did run the gui using this code and annotations did show up in roughly the right places. --- alg/sequence.cpp | 388 ++++++++++++++++++++++--------------- alg/sequence.hpp | 7 +- alg/test/test_sequence.cpp | 73 +++++-- 3 files changed, 296 insertions(+), 172 deletions(-) diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 53a13f0..d5c77c8 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -22,6 +22,12 @@ // ---------- sequence.cc ----------- // ---------------------------------------- #include +namespace fs = boost::filesystem; + +#include +#include +#include +namespace spirit = boost::spirit; #include "alg/sequence.hpp" #include "mussa_exceptions.hpp" @@ -30,9 +36,6 @@ #include #include -namespace fs = boost::filesystem; -using namespace std; - annot::annot() : start(0), end(0), @@ -64,7 +67,7 @@ Sequence::Sequence() motif_list.clear(); } -Sequence::Sequence(string seq) +Sequence::Sequence(std::string seq) { set_filtered_sequence(seq); } @@ -91,7 +94,7 @@ char Sequence::operator[](int index) const return sequence[index]; } -ostream& operator<<(ostream& out, const Sequence& seq) +std::ostream& operator<<(std::ostream& out, const Sequence& seq) { out << "Sequence(" << seq.get_seq() << ")"; return out; @@ -110,14 +113,14 @@ Sequence::load_fasta(fs::path file_path, int seq_num, int start_index, int end_index) { fs::fstream data_file; - string file_data_line; + std::string file_data_line; int header_counter = 0; bool read_seq = true; - string rev_comp; - string sequence_raw; - string seq_tmp; // holds sequence during basic filtering + std::string rev_comp; + std::string sequence_raw; + std::string seq_tmp; // holds sequence during basic filtering - data_file.open(file_path, ios::in); + data_file.open(file_path, std::ios::in); if (seq_num == 0) { throw mussa_load_error("fasta sequence number is 1 based (can't be 0)"); @@ -158,18 +161,18 @@ Sequence::load_fasta(fs::path file_path, int seq_num, // sequence filtering for upcasing agctn and convert non AGCTN to N set_filtered_sequence(sequence_raw, start_index, end_index-start_index); } else { - stringstream errormsg; + std::stringstream errormsg; errormsg << file_path.native_file_string() - << " did not have any fasta sequences" << endl; + << " did not have any fasta sequences" << std::endl; throw mussa_load_error(errormsg.str()); } data_file.close(); } } -void Sequence::set_filtered_sequence(const string &old_seq, - string::size_type start, - string::size_type count) +void Sequence::set_filtered_sequence(const std::string &old_seq, + std::string::size_type start, + std::string::size_type count) { char conversionTable[257]; @@ -200,7 +203,7 @@ void Sequence::set_filtered_sequence(const string &old_seq, conversionTable[(int)'c'] = 'C'; // finally, the actual conversion loop - for(string::size_type seq_index = 0; seq_index < count; seq_index++) + for(std::string::size_type seq_index = 0; seq_index < count; seq_index++) { sequence += conversionTable[ (int)old_seq[seq_index+start]]; } @@ -209,105 +212,178 @@ void Sequence::set_filtered_sequence(const string &old_seq, // this doesn't work properly under gcc 3.x ... it can't recognize toupper //transform(sequence.begin(), sequence.end(), sequence.begin(), toupper); - void Sequence::load_annot(fs::path file_path, int start_index, int end_index) { - fs::fstream data_file; - string file_data_line; - annot an_annot; - string::size_type space_split_i; - string annot_value; - list::iterator list_i; - string err_msg; - - - annots.clear(); - data_file.open(file_path, ios::in); - - if (!data_file) + fs::fstream data_stream(file_path, std::ios::in); + if (!data_stream) { throw mussa_load_error("Sequence File: " + file_path.string() + " not found"); } - // if file opened okay, read it - else + // so i should probably be passing the parse function some iterators + // but the annotations files are (currently) small, so i think i can + // get away with loading the whole file into memory + std::string data; + char c; + while(data_stream.good()) { + data_stream.get(c); + data.push_back(c); + } + data_stream.close(); + + parse_annot(data, start_index, end_index); +} + +/* If this works, yikes, this is some brain hurting code. + * + * what's going on is that when pb_annot is instantiated it stores references + * to begin, end, name, type, declared in the parse function, then + * when operator() is called it grabs values from those references + * and uses that to instantiate an annot object and append that to our + * annotation list. + * + * This weirdness is because the spirit library requires that actions + * conform to a specific prototype operator()(IteratorT, IteratorT) + * which doesn't provide any useful opportunity for me to actually + * grab the results of our parsing. + * + * so I instantiate this structure in order to have a place to grab + * my data from. + */ + +struct push_back_annot { + std::list& annot_list; + int& begin; + int& end; + std::string& name; + std::string& type; + + push_back_annot(std::list& annot_list_, + int& begin_, + int& end_, + std::string& name_, + std::string& type_) + : annot_list(annot_list_), + begin(begin_), + end(end_), + name(name_), + type(type_) { - getline(data_file,file_data_line); - species = file_data_line; + } - // end_index = 0 means no end was specified, so cut to the end - if (end_index == 0) - end_index = sequence.length(); + void operator()(std::string::const_iterator, + std::string::const_iterator) const + { + annot_list.push_back(annot(begin, end, name, type)); + }; +}; - //cout << "START: " << start_index << " END: " << end_index << endl; - while ( !data_file.eof() ) +void +Sequence::parse_annot(std::string data, int start_index, int end_index) +{ + std::string species_name; + int start=0; + int end=0; + std::string name; + std::string type; + + + bool status = spirit::parse(data.begin(), data.end(), + //begin grammar + ( + (+(spirit::alpha_p))[spirit::assign_a(species_name)] >> + *((spirit::uint_p[spirit::assign_a(start)] >> + spirit::uint_p[spirit::assign_a(end)] >> + (*(spirit::alpha_p))[spirit::assign_a(name)] >> + (*(spirit::alpha_p))[spirit::assign_a(type)] + // to understand, read the comment above + // struct push_back_annot + )[push_back_annot(annots, start, end, name, type)]) + ), + //end grammar + spirit::space_p).full; +} + +/* +void +Sequence::load_annot(std::istream& data_stream, int start_index, int end_index) +{ + std::string file_data_line; + annot an_annot; + std::string::size_type space_split_i; + std::string annot_value; + std::list::iterator list_i; + std::string err_msg; + + annots.clear(); + + getline(data_stream,file_data_line); + species = file_data_line; + + // end_index = 0 means no end was specified, so cut to the end + if (end_index == 0) + end_index = sequence.length(); + + //std::cout << "START: " << start_index << " END: " << end_index << std::endl; + + while ( !data_stream.eof() ) + { + getline(data_stream,file_data_line); + if (file_data_line != "") { - getline(data_file,file_data_line); - if (file_data_line != "") + // need to get 4 values...almost same code 4 times... + // get annot start index + space_split_i = file_data_line.find(" "); + annot_value = file_data_line.substr(0,space_split_i); + an_annot.start = atoi (annot_value.c_str()); + file_data_line = file_data_line.substr(space_split_i+1); + // get annot end index + space_split_i = file_data_line.find(" "); + annot_value = file_data_line.substr(0,space_split_i); + an_annot.end = atoi (annot_value.c_str()); + file_data_line = file_data_line.substr(space_split_i+1); + + //std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end + // << std::endl; + + // get annot name + space_split_i = file_data_line.find(" "); + if (space_split_i == std::string::npos) // no entries for name & type + { + std::cout << "seq, annots - no name or type\n"; + an_annot.name = ""; + an_annot.type = ""; + } + else { - // need to get 4 values...almost same code 4 times... - // get annot start index - space_split_i = file_data_line.find(" "); - annot_value = file_data_line.substr(0,space_split_i); - an_annot.start = atoi (annot_value.c_str()); - file_data_line = file_data_line.substr(space_split_i+1); - // get annot end index - space_split_i = file_data_line.find(" "); annot_value = file_data_line.substr(0,space_split_i); - an_annot.end = atoi (annot_value.c_str()); + an_annot.name = annot_value; file_data_line = file_data_line.substr(space_split_i+1); - - //cout << "seq, annots: " << an_annot.start << ", " << an_annot.end - // << endl; - - // get annot name + // get annot type space_split_i = file_data_line.find(" "); - if (space_split_i == string::npos) // no entries for name & type - { - cout << "seq, annots - no name or type\n"; - an_annot.name = ""; + if (space_split_i == std::string::npos) // no entry for type an_annot.type = ""; - } else { annot_value = file_data_line.substr(0,space_split_i); - an_annot.name = annot_value; - file_data_line = file_data_line.substr(space_split_i+1); - // get annot type - space_split_i = file_data_line.find(" "); - if (space_split_i == string::npos) // no entry for type - an_annot.type = ""; - else - { - annot_value = file_data_line.substr(0,space_split_i); - an_annot.type = annot_value; - } + an_annot.type = annot_value; } + } - // add annot to list if it falls within the range of sequence specified - if ((start_index <= an_annot.start) && (end_index >= an_annot.end)) - { - an_annot.start -= start_index; - an_annot.end -= start_index; - annots.push_back(an_annot); - } - // else no (or bogus) annotations + // add annot to list if it falls within the range of sequence specified + if ((start_index <= an_annot.start) && (end_index >= an_annot.end)) + { + an_annot.start -= start_index; + an_annot.end -= start_index; + annots.push_back(an_annot); } + // else no (or bogus) annotations } - - data_file.close(); - /* - // debugging check - for(list_i = annots.begin(); list_i != annots.end(); ++list_i) - { - cout << (*list_i).start << "," << (*list_i).end << "\t"; - cout << (*list_i).name << "\t" << (*list_i).type << endl; - } - */ } } +*/ const std::string& Sequence::get_species() const { @@ -324,12 +400,12 @@ const std::list& Sequence::annotations() const return annots; } -string::size_type Sequence::length() const +std::string::size_type Sequence::length() const { return size(); } -string::size_type Sequence::size() const +std::string::size_type Sequence::size() const { return sequence.size(); } @@ -355,14 +431,14 @@ Sequence::const_iterator Sequence::end() const } -const string& +const std::string& Sequence::get_seq() const { return sequence; } -string +std::string Sequence::subseq(int start, int end) const { return sequence.substr(start, end); @@ -375,10 +451,10 @@ Sequence::c_seq() const return sequence.c_str(); } -string +std::string Sequence::rev_comp() const { - string rev_comp; + std::string rev_comp; char conversionTable[257]; int seq_i, table_i, len; @@ -416,14 +492,14 @@ Sequence::rev_comp() const } -const string& +const std::string& Sequence::get_header() const { return header; } /* //FIXME: i don't think this code is callable -string +std::string Sequence::sp_name() const { return species; @@ -431,14 +507,14 @@ Sequence::sp_name() const */ void -Sequence::set_seq(const string& a_seq) +Sequence::set_seq(const std::string& a_seq) { set_filtered_sequence(a_seq); } /* -string +std::string Sequence::species() { return species; @@ -456,28 +532,28 @@ Sequence::clear() void Sequence::save(fs::fstream &save_file) - //string save_file_path) + //std::string save_file_path) { //fstream save_file; - list::iterator annots_i; + std::list::iterator annots_i; // not sure why, or if i'm doing something wrong, but can't seem to pass // file pointers down to this method from the mussa control class // so each call to save a sequence appends to the file started by mussa_class - //save_file.open(save_file_path.c_str(), ios::app); + //save_file.open(save_file_path.c_str(), std::ios::app); - save_file << "" << endl; - save_file << sequence << endl; - save_file << "" << endl; + save_file << "" << std::endl; + save_file << sequence << std::endl; + save_file << "" << std::endl; - save_file << "" << endl; - save_file << species << endl; + save_file << "" << std::endl; + save_file << species << std::endl; for (annots_i = annots.begin(); annots_i != annots.end(); ++annots_i) { save_file << annots_i->start << " " << annots_i->end << " " ; - save_file << annots_i->name << " " << annots_i->type << endl; + save_file << annots_i->name << " " << annots_i->type << std::endl; } - save_file << "" << endl; + save_file << "" << std::endl; //save_file.close(); } @@ -485,14 +561,14 @@ void Sequence::load_museq(fs::path load_file_path, int seq_num) { fs::fstream load_file; - string file_data_line; + std::string file_data_line; int seq_counter; annot an_annot; - string::size_type space_split_i; - string annot_value; + std::string::size_type space_split_i; + std::string annot_value; annots.clear(); - load_file.open(load_file_path, ios::in); + load_file.open(load_file_path, std::ios::in); seq_counter = 0; // search for the seq_num-th sequence @@ -526,9 +602,9 @@ Sequence::load_museq(fs::path load_file_path, int seq_num) annot_value = file_data_line.substr(0,space_split_i); an_annot.end = atoi (annot_value.c_str()); - if (space_split_i == string::npos) // no entry for type or name + if (space_split_i == std::string::npos) // no entry for type or name { - cout << "seq, annots - no type or name\n"; + std::cout << "seq, annots - no type or name\n"; an_annot.type = ""; an_annot.name = ""; } @@ -538,9 +614,9 @@ Sequence::load_museq(fs::path load_file_path, int seq_num) space_split_i = file_data_line.find(" "); annot_value = file_data_line.substr(0,space_split_i); an_annot.type = annot_value; - if (space_split_i == string::npos) // no entry for name + if (space_split_i == std::string::npos) // no entry for name { - cout << "seq, annots - no name\n"; + std::cout << "seq, annots - no name\n"; an_annot.name = ""; } else // get annot name @@ -553,18 +629,18 @@ Sequence::load_museq(fs::path load_file_path, int seq_num) } annots.push_back(an_annot); // don't forget to actually add the annot } - //cout << "seq, annots: " << an_annot.start << ", " << an_annot.end - // << "-->" << an_annot.type << "::" << an_annot.name << endl; + //std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end + // << "-->" << an_annot.type << "::" << an_annot.name << std::endl; } } load_file.close(); } -string -Sequence::rc_motif(string a_motif) +std::string +Sequence::rc_motif(std::string a_motif) { - string rev_comp; + std::string rev_comp; char conversionTable[257]; int seq_i, table_i, len; @@ -575,7 +651,7 @@ Sequence::rc_motif(string a_motif) { conversionTable[table_i] = '~'; } - // add end of string character for printing out table for testing purposes + // add end of std::string character for printing out table for testing purposes conversionTable[256] = '\0'; // add in the characters for the bases we want to convert (IUPAC) @@ -598,21 +674,21 @@ Sequence::rc_motif(string a_motif) // finally, the actual conversion loop for(seq_i = len - 1; seq_i >= 0; seq_i--) { - //cout << "** i = " << seq_i << " bp = " << + //std::cout << "** i = " << seq_i << " bp = " << table_i = (int) a_motif[seq_i]; rev_comp += conversionTable[table_i]; } - //cout << "seq: " << a_motif << endl; - //cout << "rc: " << rev_comp << endl; + //std::cout << "seq: " << a_motif << std::endl; + //std::cout << "rc: " << rev_comp << std::endl; return rev_comp; } -string -Sequence::motif_normalize(string a_motif) +std::string +Sequence::motif_normalize(std::string a_motif) { - string valid_motif; + std::string valid_motif; int seq_i, len; len = a_motif.length(); @@ -653,21 +729,21 @@ Sequence::motif_normalize(string a_motif) else if ((a_motif[seq_i] == 'b') || (a_motif[seq_i] == 'B')) valid_motif += 'B'; else { - string msg = "Letter "; + std::string msg = "Letter "; msg += a_motif[seq_i]; msg += " is not a valid IUPAC symbol"; throw motif_normalize_error(msg); } } - //cout << "valid_motif is: " << valid_motif << endl; + //std::cout << "valid_motif is: " << valid_motif << std::endl; return valid_motif; } -void Sequence::add_motif(string a_motif) +void Sequence::add_motif(std::string a_motif) { - vector motif_starts = find_motif(a_motif); + std::vector motif_starts = find_motif(a_motif); - for(vector::iterator motif_start_i = motif_starts.begin(); + for(std::vector::iterator motif_start_i = motif_starts.begin(); motif_start_i != motif_starts.end(); ++motif_start_i) { @@ -680,26 +756,26 @@ void Sequence::clear_motifs() motif_list.clear(); } -const list& Sequence::motifs() const +const std::list& Sequence::motifs() const { return motif_list; } -vector -Sequence::find_motif(string a_motif) +std::vector +Sequence::find_motif(std::string a_motif) { - vector motif_match_starts; - string a_motif_rc; + std::vector motif_match_starts; + std::string a_motif_rc; motif_match_starts.clear(); - //cout << "motif is: " << a_motif << endl; + //std::cout << "motif is: " << a_motif << std::endl; a_motif = motif_normalize(a_motif); - //cout << "motif is: " << a_motif << endl; + //std::cout << "motif is: " << a_motif << std::endl; if (a_motif != "") { - //cout << "Sequence: none blank motif\n"; + //std::cout << "Sequence: none blank motif\n"; motif_scan(a_motif, &motif_match_starts); a_motif_rc = rc_motif(a_motif); @@ -711,30 +787,30 @@ Sequence::find_motif(string a_motif) } void -Sequence::motif_scan(string a_motif, vector * motif_match_starts) +Sequence::motif_scan(std::string a_motif, std::vector * motif_match_starts) { char * seq_c; - string::size_type seq_i; + std::string::size_type seq_i; int motif_i, motif_len; - // faster to loop thru the sequence as a old c string (ie char array) + // faster to loop thru the sequence as a old c std::string (ie char array) seq_c = (char*)sequence.c_str(); - //cout << "Sequence: motif, seq len = " << sequence.length() << endl; + //std::cout << "Sequence: motif, seq len = " << sequence.length() << std::endl; motif_len = a_motif.length(); - //cout << "motif_length: " << motif_len << endl; - //cout << "RAAARRRRR\n"; + //std::cout << "motif_length: " << motif_len << std::endl; + //std::cout << "RAAARRRRR\n"; motif_i = 0; - //cout << "motif: " << a_motif << endl; + //std::cout << "motif: " << a_motif << std::endl; - //cout << "Sequence: motif, length= " << length << endl; + //std::cout << "Sequence: motif, length= " << length << std::endl; seq_i = 0; while (seq_i < sequence.length()) { - //cout << seq_c[seq_i]; - //cout << seq_c[seq_i] << "?" << a_motif[motif_i] << ":" << motif_i << " "; + //std::cout << seq_c[seq_i]; + //std::cout << seq_c[seq_i] << "?" << a_motif[motif_i] << ":" << motif_i << " "; // this is pretty much a straight translation of Nora's python code // to match iupac letter codes if (a_motif[motif_i] =='N') @@ -785,7 +861,7 @@ Sequence::motif_scan(string a_motif, vector * motif_match_starts) // end Nora stuff, now we see if a match is found this pass if (motif_i == motif_len) { - //cout << "!!"; + //std::cout << "!!"; annot new_motif; motif_match_starts->push_back(seq_i - motif_len + 1); motif_i = 0; @@ -793,15 +869,15 @@ Sequence::motif_scan(string a_motif, vector * motif_match_starts) seq_i++; } - //cout << endl; + //std::cout << std::endl; } void Sequence::add_string_annotation(std::string a_seq, std::string name) { - vector seq_starts = find_motif(a_seq); + std::vector seq_starts = find_motif(a_seq); - for(vector::iterator seq_start_i = seq_starts.begin(); + for(std::vector::iterator seq_start_i = seq_starts.begin(); seq_start_i != seq_starts.end(); ++seq_start_i) { diff --git a/alg/sequence.hpp b/alg/sequence.hpp index dac29db..c3fb53f 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -89,9 +89,12 @@ class Sequence void load_fasta(const boost::filesystem::path file_path, int seq_num=1, int start_index=0, int end_index=0); //! load sequence annotations - /*! \throws mussa_load_error - */ + //! \throws mussa_load_error void load_annot(const boost::filesystem::path file_path, int start_index, int end_index); + //! load sequence annotations + //! \throws mussa_load_error + //void load_annot(std::istream& data_stream, int start_index, int end_index); + void parse_annot(std::string data, int start_index, int end_index); const std::list& annotations() const; const std::list& motifs() const; const std::string& get_species() const; diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index ae2726c..1e478fa 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -5,6 +5,7 @@ namespace fs=boost::filesystem; #include #include +#include #include "alg/sequence.hpp" #include "mussa_exceptions.hpp" @@ -63,6 +64,29 @@ BOOST_AUTO_TEST_CASE( sequence_load ) "5' flank"); } +BOOST_AUTO_TEST_CASE( annotation_load ) +{ + string annot_data = "human\n" + "0 10 name type\n" + "10 20 name\n" + "20 30\n" + "15 20 backward\n"; + string s('A',100); + s += "GCTGCT"; + Sequence seq(s); + + //istringstream annot_stream(annot_data); + seq.parse_annot(annot_data, 0, 0); + typedef std::list annot_list_t; + annot_list_t annots = seq.annotations(); + for(annot_list_t::iterator annot_i = annots.begin(); + annot_i != annots.end(); + ++annot_i) + { + std::cout << "start " << annot_i->start << endl; + } +} + // ticket:83 when you try to load a sequence from a file that doesn't // have fasta headers it crashes. BOOST_AUTO_TEST_CASE( sequence_past_end ) @@ -161,21 +185,42 @@ BOOST_AUTO_TEST_CASE( annot_test ) BOOST_AUTO_TEST_CASE( annotate_from_sequence ) { - Sequence s("CCGCCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC" - "ACCCACCTGACCGCGGGCGGCTCCGGCCCCGCTTCGCCCCACTGCGATCA" - "GTCGCGTCCCGCAGGCCAGGCACGCCCCGCCGCTCCCGCTGCGCCGGGCG" - "TCTGGGACCTCGGGCGGCTCCTCCGAGGGGCGGGGCAGCCGGGAGCCACG" - "CCCCCGCAGGTGAGCCGGCCACGCCCACCGCCCGTGGGAAGTTCAGCCTC" - "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA" - "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA" - "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT"); + string s("CCGCCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC" + "ACCCACCTGACCGCGGGCGGCTCCGGCCCCGCTTCGCCCCACTGCGATCA" + "GTCGCGTCCCGCAGGCCAGGCACGCCCCGCCGCTCCCGCTGCGCCGGGCG" + "TCTGGGACCTCGGGCGGCTCCTCCGAGGGGCGGGGCAGCCGGGAGCCACG" + "CCCCCGCAGGTGAGCCGGCCACGCCCACCGCCCGTGGGAAGTTCAGCCTC" + "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA" + "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA" + "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT"); + string gc("GCCCCC"); + string gga("GGACACCTC"); + Sequence seq(s); std::list query_list; - query_list.push_back(Sequence("GCCCCC")); - query_list.push_back(Sequence("GGACACCTC")); - - BOOST_CHECK_EQUAL( s.annotations().size(), 0 ); - s.find_sequences(query_list.begin(), query_list.end()); - BOOST_CHECK_EQUAL( s.annotations().size(), 4 ); + std::list string_list; + query_list.push_back(Sequence(gc)); + string_list.push_back(gc); + query_list.push_back(Sequence(gga)); + string_list.push_back(gga); + + BOOST_CHECK_EQUAL( seq.annotations().size(), 0 ); + seq.find_sequences(query_list.begin(), query_list.end()); + + int count = 0; + for(list::iterator string_i = string_list.begin(); + string_i != string_list.end(); + ++string_i) + { + string::size_type pos=0; + while(pos != string::npos) { + pos = s.find(*string_i, pos); + if (pos != string::npos) { + ++count; + ++pos; + } + } + } + BOOST_CHECK_EQUAL(seq.annotations().size(), count); } -- 2.30.2