};
};
-void
+bool
Sequence::parse_annot(std::string data, int start_index, int end_index)
{
int start=0;
+(spirit::space_p)
) >>
*(
+ ( // ignore html tags
+ *(spirit::space_p) >>
+ spirit::ch_p('<') >>
+ +(~spirit::ch_p('>')) >>
+ spirit::ch_p('>') >>
+ *(spirit::space_p)
+ )
+ |
( // parse an absolute location name
(spirit::uint_p[spirit::assign_a(start)] >>
+spirit::space_p >>
// read the comment above struct push_back_annot
)[push_back_annot(annots, start, end, type, name)]
|
- (spirit::ch_p('>') >>
+ ((spirit::ch_p('>')|spirit::str_p(">")) >>
(*(spirit::print_p))[spirit::assign_a(name)] >>
spirit::eol_p >>
(+(spirit::chset<>(iupac_alphabet)))[spirit::assign_a(seq)]
// go seearch for query sequences
find_sequences(query_seqs.begin(), query_seqs.end());
+ return status;
}
void Sequence::add_annotation(const annot& a)
//! load sequence annotations
//! \throws mussa_load_error
void load_annot(std::fstream& data_stream, int start_index, int end_index);
- void parse_annot(std::string data, int start_index, int end_index);
+ bool parse_annot(std::string data, int start_index=0, int end_index=0);
//! add an annotation to our list of annotations
void add_annotation(const annot& a);
const std::list<annot>& annotations() const;
//BOOST_CHECK_EQUAL( annots
}
+
+BOOST_AUTO_TEST_CASE(annotation_ucsc_html_load)
+{
+ // this actually is basically what's returned by UCSC
+ // (well actually with some of the sequence and copies of fasta blocks
+ // removed to make the example shorter
+ string annot_data = "\n"
+ "<PRE>\n"
+ ">hg17_knownGene_NM_001824_0 range=chr19:50517919-50517974 5'pad=0 3'pad=0 revComp=TRUE strand=- repeatMasking=none\n"
+ "GGGTCAGTGTCACCTCCAGGATACAGACAG\n"
+ ">hg17_knownGene_NM_001824_3 range=chr19:50510563-50510695 5'pad=0 3'pad=0 revComp=TRUE strand=- repeatMasking=none\n"
+ "GGTGGAGACGACCTGGACCCTAACTACGT\n"
+ "</PRE>\n"
+ "\n"
+ "</BODY>\n"
+ "</HTML>\n"
+ ;
+
+ string s =
+ "TGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTCAGCCCAGCCCAGCCAG"
+ "AAAAA"
+ "GGTGGAGACGACCTGGACCCTAACTACGTGCTCAGCAGCCGCGTCCGCAC";
+ Sequence seq(s);
+ seq.parse_annot(annot_data);
+ std::list<annot> annots = seq.annotations();
+ BOOST_CHECK_EQUAL( annots.size(), 2);
+}
+
BOOST_AUTO_TEST_CASE( annotation_load_no_species_name )
{
string annot_data = "0 10 name type\n"