{
throw mussa_load_error("Sequence File: " + file_path.string() + " not found");
}
+
// so i should probably be passing the parse function some iterators
// but the annotations files are (currently) small, so i think i can
// get away with loading the whole file into memory
void operator()(std::string::const_iterator,
std::string::const_iterator) const
{
+ std::cout << "adding annot: " << begin << " " << end << " " << name << " " << type << std::endl;
annot_list.push_back(annot(begin, end, name, type));
};
};
+struct push_back_seq {
+ std::list<Sequence>& seq_list;
+ std::string& name;
+ std::string& seq;
+
+ push_back_seq(std::list<Sequence>& seq_list_,
+ std::string& name_,
+ std::string& seq_)
+ : seq_list(seq_list_),
+ name(name_),
+ seq(seq_)
+ {
+ }
+
+ void operator()(std::string::const_iterator,
+ std::string::const_iterator) const
+ {
+ std::cout << "adding seq: " << name << " " << seq << std::endl;
+ Sequence s(seq);
+ s.set_header(name);
+ seq_list.push_back(s);
+ };
+};
void
Sequence::parse_annot(std::string data, int start_index, int end_index)
{
- std::string species_name;
int start=0;
int end=0;
std::string name;
std::string type;
-
+ std::string seq;
+ std::list<Sequence> query_seqs;
bool status = spirit::parse(data.begin(), data.end(),
//begin grammar
(
- (+(spirit::alpha_p))[spirit::assign_a(species_name)] >>
- *((spirit::uint_p[spirit::assign_a(start)] >>
- spirit::uint_p[spirit::assign_a(end)] >>
- (*(spirit::alpha_p))[spirit::assign_a(name)] >>
- (*(spirit::alpha_p))[spirit::assign_a(type)]
- // to understand, read the comment above
- // struct push_back_annot
- )[push_back_annot(annots, start, end, name, type)])
+ (+(spirit::alpha_p))[spirit::assign_a(species)] >>
+ *(
+ // parse an absolute location name
+ (spirit::uint_p[spirit::assign_a(start)] >>
+ spirit::uint_p[spirit::assign_a(end)] >>
+ (*(spirit::alpha_p))[spirit::assign_a(name)]/* >>
+ (*(spirit::alpha_p))[spirit::assign_a(type)]*/
+ // to understand how this group gets set
+ // read the comment above struct push_back_annot
+ )[push_back_annot(annots, start, end, type, name)]
+ |
+ (spirit::ch_p('>') >>
+ (*(spirit::alpha_p))[spirit::assign_a(name)] >>
+ (+(spirit::ch_p('A')|
+ spirit::ch_p('G')|
+ spirit::ch_p('C')|
+ spirit::ch_p('T'))[spirit::assign_a(seq)])
+ )[push_back_seq(query_seqs, name, seq)]
+ )
),
//end grammar
spirit::space_p).full;
return rev_comp;
}
+void Sequence::set_header(std::string &header_)
+{
+ header = header_;
+}
const std::string&
Sequence::get_header() const
BOOST_AUTO_TEST_CASE( annotation_load )
{
string annot_data = "human\n"
- "0 10 name type\n"
- "10 20 name\n"
- "20 30\n"
- "15 20 backward\n";
+ "0 10 name\n" // type\n"
+ "10 20 myf\n"
+ "20 30 myod\n"
+ "50\t55 anothername\n"
+ "60 50 backward\n"
+ ">ident\n"
+ "GCT\n"
+ "GCT\n"
+ ;
string s('A',100);
s += "GCTGCT";
Sequence seq(s);
//istringstream annot_stream(annot_data);
seq.parse_annot(annot_data, 0, 0);
- typedef std::list<annot> annot_list_t;
- annot_list_t annots = seq.annotations();
- for(annot_list_t::iterator annot_i = annots.begin();
- annot_i != annots.end();
- ++annot_i)
- {
- std::cout << "start " << annot_i->start << endl;
- }
+ std::list<annot> annots_list = seq.annotations();
+ std::vector<annot> annots(annots_list.begin(), annots_list.end());
+ BOOST_REQUIRE_EQUAL( annots.size(), 5);
+ BOOST_CHECK_EQUAL( annots[0].start, 0 );
+ BOOST_CHECK_EQUAL( annots[0].end, 10 );
+ //BOOST_CHECK_EQUAL( annots[0].type, "type");
+ BOOST_CHECK_EQUAL( annots[0].name, "name");
+ //BOOST_CHECK_EQUAL( annots[1].name, "myf7");
+
+ //BOOST_CHECK_EQUAL( annots
}
// ticket:83 when you try to load a sequence from a file that doesn't