From: Diane Trout Date: Wed, 17 May 2006 19:23:45 +0000 (+0000) Subject: finish implementing annotation by sequence X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=commitdiff_plain;h=ca1b7c45c3f77b3899a7a95986fe84f8ddd92337 finish implementing annotation by sequence I connected the previous find_sequence code with the new spirit annotation loading file. So now I can actually use (nearly exact) sequences for annotating. --- diff --git a/alg/sequence.cpp b/alg/sequence.cpp index f3f68a8..d7b4acd 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -275,7 +275,7 @@ struct push_back_annot { void operator()(std::string::const_iterator, std::string::const_iterator) const { - std::cout << "adding annot: " << begin << " " << end << " " << name << " " << type << std::endl; + //std::cout << "adding annot: " << begin << " " << end << " " << name << " " << type << std::endl; annot_list.push_back(annot(begin, end, name, type)); }; }; @@ -305,7 +305,7 @@ struct push_back_seq { { if (*seq_i != '\n') new_seq += *seq_i; } - std::cout << "adding seq: " << name << " " << new_seq << std::endl; + //std::cout << "adding seq: " << name << " " << new_seq << std::endl; Sequence s(new_seq); s.set_header(name); @@ -360,6 +360,9 @@ Sequence::parse_annot(std::string data, int start_index, int end_index) //end grammar ) /*, spirit::space_p*/).full; + + // go seearch for query sequences + find_sequences(query_seqs.begin(), query_seqs.end()); } /* @@ -937,6 +940,8 @@ void Sequence::add_string_annotation(std::string a_seq, std::string name) { std::vector seq_starts = find_motif(a_seq); + + //std::cout << "searching for " << a_seq << " found " << seq_starts.size() << std::endl; for(std::vector::iterator seq_start_i = seq_starts.begin(); seq_start_i != seq_starts.end(); diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 660f4df..0036924 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -75,21 +75,31 @@ BOOST_AUTO_TEST_CASE( annotation_load ) ">ident3 asdf\n" "GCT\n" "GCTN\n" + "75\t90\tname2\ttype2\n" ; - string s('A',100); - s += "GCTGCT"; + string s(100, 'A'); + s += "GCTGCTAATT"; Sequence seq(s); //istringstream annot_stream(annot_data); seq.parse_annot(annot_data, 0, 0); std::list annots_list = seq.annotations(); std::vector annots(annots_list.begin(), annots_list.end()); - BOOST_REQUIRE_EQUAL( annots.size(), 5); + BOOST_REQUIRE_EQUAL( annots.size(), 7); BOOST_CHECK_EQUAL( annots[0].start, 0 ); BOOST_CHECK_EQUAL( annots[0].end, 10 ); - //BOOST_CHECK_EQUAL( annots[0].type, "type"); + BOOST_CHECK_EQUAL( annots[0].type, "type"); BOOST_CHECK_EQUAL( annots[0].name, "name"); - //BOOST_CHECK_EQUAL( annots[1].name, "myf7"); + BOOST_CHECK_EQUAL( annots[1].name, "myf7"); + BOOST_CHECK_EQUAL( annots[2].name, "myod"); + BOOST_CHECK_EQUAL( annots[3].name, "anothername"); + BOOST_CHECK_EQUAL( annots[4].name, "backward"); + BOOST_CHECK_EQUAL( annots[5].name, "name2"); + BOOST_CHECK_EQUAL( annots[5].end, 90); + // sequence defined annotations will always be after the + // absolute positions + BOOST_CHECK_EQUAL( annots[6].name, "ident3 asdf"); + BOOST_CHECK_EQUAL( annots[6].start, 100); //BOOST_CHECK_EQUAL( annots }