try to read fasta blocks in the annotation file
[mussa.git] / alg / sequence.cpp
index d5c77c87ea240e47d667f43f2c8e4776cf17b343..f0cb6e5178fbc19cdabbc9e7c37154ed4210e4fa 100644 (file)
@@ -220,6 +220,7 @@ Sequence::load_annot(fs::path file_path, int start_index, int end_index)
   {
     throw mussa_load_error("Sequence File: " + file_path.string() + " not found");
   }
+
   // so i should probably be passing the parse function some iterators
   // but the annotations files are (currently) small, so i think i can 
   // get away with loading the whole file into memory
@@ -274,32 +275,67 @@ struct push_back_annot {
   void operator()(std::string::const_iterator, 
                   std::string::const_iterator) const 
   {
+    std::cout << "adding annot: " << begin << " " << end << " " << name << " " << type << std::endl;
     annot_list.push_back(annot(begin, end, name, type));
   };
 };
 
+struct push_back_seq {
+  std::list<Sequence>& seq_list;
+  std::string& name;
+  std::string& seq;
+
+  push_back_seq(std::list<Sequence>& seq_list_,
+                std::string& name_, 
+                std::string& seq_)
+  : seq_list(seq_list_), 
+    name(name_),
+    seq(seq_)
+  {
+  }
+
+  void operator()(std::string::const_iterator, 
+                  std::string::const_iterator) const 
+  {
+    std::cout << "adding seq: " << name << " " << seq << std::endl;
+    Sequence s(seq);
+    s.set_header(name);
+    seq_list.push_back(s);
+  };
+};
 
 void
 Sequence::parse_annot(std::string data, int start_index, int end_index)
 {
-  std::string species_name;
   int start=0;
   int end=0;
   std::string name;
   std::string type;
-
+  std::string seq;
+  std::list<Sequence> query_seqs;
 
   bool status = spirit::parse(data.begin(), data.end(),
                 //begin grammar
                 (
-                (+(spirit::alpha_p))[spirit::assign_a(species_name)] >> 
-                    *((spirit::uint_p[spirit::assign_a(start)] >> 
-                      spirit::uint_p[spirit::assign_a(end)] >> 
-                      (*(spirit::alpha_p))[spirit::assign_a(name)] >> 
-                      (*(spirit::alpha_p))[spirit::assign_a(type)]
-                     // to understand, read the comment above 
-                     // struct push_back_annot
-                    )[push_back_annot(annots, start, end, name, type)])
+                (+(spirit::alpha_p))[spirit::assign_a(species)] >> 
+                    *(
+                       // parse an absolute location name
+                       (spirit::uint_p[spirit::assign_a(start)] >> 
+                        spirit::uint_p[spirit::assign_a(end)] >> 
+                        (*(spirit::alpha_p))[spirit::assign_a(name)]/* >> 
+                        (*(spirit::alpha_p))[spirit::assign_a(type)]*/
+                        // to understand how this group gets set
+                        // read the comment above struct push_back_annot
+                       )[push_back_annot(annots, start, end, type, name)]
+                     |
+                      (spirit::ch_p('>') >> 
+                       (*(spirit::alpha_p))[spirit::assign_a(name)] >>
+                       (+(spirit::ch_p('A')|
+                          spirit::ch_p('G')|
+                          spirit::ch_p('C')|
+                          spirit::ch_p('T'))[spirit::assign_a(seq)])
+                       )[push_back_seq(query_seqs, name, seq)]
+                     )
                 ),
                 //end grammar
                 spirit::space_p).full;
@@ -491,6 +527,10 @@ Sequence::rev_comp() const
   return rev_comp;
 }
 
+void Sequence::set_header(std::string &header_)
+{
+  header = header_;
+}
 
 const std::string&
 Sequence::get_header() const