From b035f5a0b69e647fc0987643c5ebd65234bb7edf Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 17 May 2006 08:36:18 +0000 Subject: [PATCH] switch to a character based spirit parser by not letting spirit automatically deal with the spacing I was able to actually get parsing to work correctly. --- alg/sequence.cpp | 41 ++++++++++++++++++++++++++++---------- alg/test/test_sequence.cpp | 8 ++++---- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/alg/sequence.cpp b/alg/sequence.cpp index f0cb6e5..f3f68a8 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -297,8 +297,17 @@ struct push_back_seq { void operator()(std::string::const_iterator, std::string::const_iterator) const { - std::cout << "adding seq: " << name << " " << seq << std::endl; - Sequence s(seq); + // filter out newlines from our sequence + std::string new_seq; + for(std::string::const_iterator seq_i = seq.begin(); + seq_i != seq.end(); + ++seq_i) + { + if (*seq_i != '\n') new_seq += *seq_i; + } + std::cout << "adding seq: " << name << " " << new_seq << std::endl; + + Sequence s(new_seq); s.set_header(name); seq_list.push_back(s); }; @@ -315,30 +324,42 @@ Sequence::parse_annot(std::string data, int start_index, int end_index) std::list query_seqs; bool status = spirit::parse(data.begin(), data.end(), - //begin grammar ( + //begin grammar (+(spirit::alpha_p))[spirit::assign_a(species)] >> + +(spirit::space_p) >> *( - // parse an absolute location name + ( // parse an absolute location name (spirit::uint_p[spirit::assign_a(start)] >> + +spirit::space_p >> spirit::uint_p[spirit::assign_a(end)] >> - (*(spirit::alpha_p))[spirit::assign_a(name)]/* >> - (*(spirit::alpha_p))[spirit::assign_a(type)]*/ + +spirit::space_p >> + (*(spirit::alpha_p|spirit::digit_p))[spirit::assign_a(name)] >> + // optional type + !( + +spirit::space_p >> + (*(spirit::alpha_p))[spirit::assign_a(type)] + ) // to understand how this group gets set // read the comment above struct push_back_annot )[push_back_annot(annots, start, end, type, name)] | (spirit::ch_p('>') >> - (*(spirit::alpha_p))[spirit::assign_a(name)] >> + (*(~spirit::chlit('\n')))[spirit::assign_a(name)] >> + +spirit::space_p >> (+(spirit::ch_p('A')| spirit::ch_p('G')| spirit::ch_p('C')| - spirit::ch_p('T'))[spirit::assign_a(seq)]) + spirit::ch_p('T')| + spirit::ch_p('N')| + spirit::ch_p('\n')))[spirit::assign_a(seq)] )[push_back_seq(query_seqs, name, seq)] + ) >> + *spirit::space_p ) - ), //end grammar - spirit::space_p).full; + ) /*, + spirit::space_p*/).full; } /* diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index c2dea4e..660f4df 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -67,14 +67,14 @@ BOOST_AUTO_TEST_CASE( sequence_load ) BOOST_AUTO_TEST_CASE( annotation_load ) { string annot_data = "human\n" - "0 10 name\n" // type\n" - "10 20 myf\n" + "0 10 name type\n" + "10 20 myf7\n" "20 30 myod\n" "50\t55 anothername\n" "60 50 backward\n" - ">ident\n" - "GCT\n" + ">ident3 asdf\n" "GCT\n" + "GCTN\n" ; string s('A',100); s += "GCTGCT"; -- 2.30.2