throw errors when spirit parsing fails
[mussa.git] / alg / sequence.cpp
index ae58e5c87ed2da52f47193c679a5377b41aa03fe..e1923c68d49e484ac769800c66fccff7952ef6a5 100644 (file)
@@ -345,17 +345,20 @@ struct push_back_annot {
   int& end;
   std::string& name;
   std::string& type;
+  int &parsed;
 
   push_back_annot(std::list<annot>& annot_list_, 
                   int& begin_, 
                   int& end_, 
                   std::string& name_, 
-                  std::string& type_) 
+                  std::string& type_,
+                  int &parsed_) 
   : annot_list(annot_list_), 
     begin(begin_),
     end(end_),
     name(name_),
-    type(type_)
+    type(type_),
+    parsed(parsed_)
   {
   }
 
@@ -364,6 +367,7 @@ struct push_back_annot {
   {
     //std::cout << "adding annot: " << begin << "|" << end << "|" << name << "|" << type << std::endl;
     annot_list.push_back(annot(begin, end, name, type));
+    ++parsed;
   };
 };
 
@@ -371,13 +375,16 @@ struct push_back_seq {
   std::list<Sequence>& seq_list;
   std::string& name;
   std::string& seq;
+  int &parsed;
 
   push_back_seq(std::list<Sequence>& seq_list_,
                 std::string& name_, 
-                std::string& seq_)
+                std::string& seq_,
+                int &parsed_)
   : seq_list(seq_list_), 
     name(name_),
-    seq(seq_)
+    seq(seq_),
+    parsed(parsed_)
   {
   }
 
@@ -397,10 +404,11 @@ struct push_back_seq {
     Sequence s(new_seq);
     s.set_fasta_header(name);
     seq_list.push_back(s);
+    ++parsed;
   };
 };
 
-bool
+void
 Sequence::parse_annot(std::string data, int start_index, int end_index)
 {
   int start=0;
@@ -408,62 +416,69 @@ Sequence::parse_annot(std::string data, int start_index, int end_index)
   std::string name;
   std::string type;
   std::string seq;
+  std::list<annot> parsed_annots;
   std::list<Sequence> query_seqs;
-
-  bool status = spirit::parse(data.begin(), data.end(),
-                (
-                 //begin grammar
-                   !(
-                      (
-                        spirit::alpha_p >> 
-                        +(spirit::graph_p)
-                      )[spirit::assign_a(species)] >> 
-                      +(spirit::space_p)
-                    ) >>
-                    *(
-                       ( // ignore html tags
-                         *(spirit::space_p) >>
-                         spirit::ch_p('<') >> 
-                         +(~spirit::ch_p('>')) >>
-                         spirit::ch_p('>') >>
-                         *(spirit::space_p)
-                       )
-                     |
-                      ( // parse an absolute location name
-                       (spirit::uint_p[spirit::assign_a(start)] >> 
-                        +spirit::space_p >>
-                        spirit::uint_p[spirit::assign_a(end)] >> 
-                        +spirit::space_p >>
-                        ( 
-                           spirit::alpha_p >> 
-                           *spirit::graph_p
-                        )[spirit::assign_a(name)] >> 
-                        // optional type
-                        !(
-                            +spirit::space_p >>
-                            (
-                              spirit::alpha_p >>
-                              *spirit::graph_p
-                            )[spirit::assign_a(type)]
-                        )
-                        // to understand how this group gets set
-                        // read the comment above struct push_back_annot
-                       )[push_back_annot(annots, start, end, type, name)]
-                     |
-                      ((spirit::ch_p('>')|spirit::str_p("&gt;")) >> 
-                         (*(spirit::print_p))[spirit::assign_a(name)] >>
-                         spirit::eol_p >> 
-                         (+(spirit::chset<>(Alphabet::nucleic_alphabet.c_str())))[spirit::assign_a(seq)]
-                       )[push_back_seq(query_seqs, name, seq)]
-                      ) >>
-                      *spirit::space_p
+  int parsed=1;
+
+  bool ok = spirit::parse(data.begin(), data.end(),
+              (
+               //begin grammar
+                 !(
+                    (
+                      spirit::alpha_p >> 
+                      +(spirit::graph_p)
+                    )[spirit::assign_a(species)] >> 
+                    +(spirit::space_p)
+                  ) >>
+                  *(
+                     ( // ignore html tags
+                       *(spirit::space_p) >>
+                       spirit::ch_p('<') >> 
+                       +(~spirit::ch_p('>')) >>
+                       spirit::ch_p('>') >>
+                       *(spirit::space_p)
                      )
-                //end grammar
-                )).full;
-                
+                   |
+                    ( // parse an absolute location name
+                     (spirit::uint_p[spirit::assign_a(start)] >> 
+                      +spirit::space_p >>
+                      spirit::uint_p[spirit::assign_a(end)] >> 
+                      +spirit::space_p >>
+                      ( 
+                         spirit::alpha_p >> 
+                         *spirit::graph_p
+                      )[spirit::assign_a(name)] >> 
+                      // optional type
+                      !(
+                          +spirit::space_p >>
+                          (
+                            spirit::alpha_p >>
+                            *spirit::graph_p
+                          )[spirit::assign_a(type)]
+                      )
+                      // to understand how this group gets set
+                      // read the comment above struct push_back_annot
+                     )[push_back_annot(parsed_annots, start, end, type, name, parsed)]
+                   |
+                    ((spirit::ch_p('>')|spirit::str_p("&gt;")) >> 
+                       (*(spirit::print_p))[spirit::assign_a(name)] >>
+                       spirit::eol_p >> 
+                       (+(spirit::chset<>(Alphabet::nucleic_alphabet.c_str())))[spirit::assign_a(seq)]
+                     )[push_back_seq(query_seqs, name, seq, parsed)]
+                    ) >>
+                    *spirit::space_p
+                   )
+              //end grammar
+              )).full;
+  if (not ok) {
+    std::stringstream msg;
+    msg << "Error parsing annotation #" << parsed;
+    throw annotation_load_error(msg.str());
+  }
+  // add newly parsed annotations to our sequence
+  std::copy(parsed_annots.begin(), parsed_annots.end(), std::back_inserter(annots));
   // go seearch for query sequences 
   find_sequences(query_seqs.begin(), query_seqs.end());
-  return status;
 }
 
 void Sequence::add_annotation(const annot& a)