// Helper functor to append created motifs to our Mussa analysis
struct push_back_motif {
- std::set<Sequence>& motif_set;
+ Mussa::motif_set& motifs;
boost::shared_ptr<AnnotationColors> color_mapper;
std::string& seq_string;
std::string& name;
float& green;
float& blue;
float& alpha;
+ int& parsed;
- push_back_motif(std::set<Sequence>& motif_set_,
+ push_back_motif(Mussa::motif_set& motifs_,
boost::shared_ptr<AnnotationColors> color_mapper_,
std::string& seq_,
std::string& name_,
- float &red_, float &green_, float &blue_, float &alpha_)
- : motif_set(motif_set_),
+ float &red_, float &green_, float &blue_, float &alpha_,
+ int &parsed_)
+ : motifs(motifs_),
color_mapper(color_mapper_),
seq_string(seq_),
name(name_),
red(red_),
green(green_),
blue(blue_),
- alpha(alpha_)
+ alpha(alpha_),
+ parsed(parsed_)
{
}
// just attach colors directly to the motif.
Color c(red, green, blue);
color_mapper->appendInstanceColor("motif", seq.c_str(), c);
- motif_set.insert(seq);
+ motifs.insert(seq);
+ ++parsed;
};
};
+void Mussa::load_motifs(fs::path filename)
+{
+ fs::ifstream f;
+ f.open(filename, ifstream::in);
+ load_motifs(f);
+}
+
// I mostly split the ifstream out so I can use a stringstream to test it.
void Mussa::load_motifs(std::istream &in)
{
float green = 0.0;
float blue = 0.0;
float alpha = 1.0;
+ int parsed = 1;
// slurp our data into a string
std::streamsize bytes_read = 1;
spirit::real_p[spirit::assign_a(red)] >> +spirit::space_p >>
spirit::real_p[spirit::assign_a(green)] >> +spirit::space_p >>
spirit::real_p[spirit::assign_a(blue)] >> +spirit::space_p
- )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue, alpha)]
+ )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue, alpha, parsed)]
)).full;
if (not ok) {
- std::clog << "Error parsing motif stream " << std::endl;
+ stringstream msg;
+ msg << "Error parsing motif #" << parsed;
+ // erase our potentially broken motif list
+ motif_sequences.clear();
+ throw motif_load_error(msg.str());
}
update_sequences_motifs();
}
-void Mussa::load_motifs(fs::path filename)
-{
- fs::ifstream f;
- f.open(filename, ifstream::in);
- load_motifs(f);
-}
-
void Mussa::update_sequences_motifs()
{
// once we've loaded all the motifs from the file,
void progress(const std::string& description, int cur, int max);
public:
+ typedef std::set<Sequence> motif_set;
enum analysis_modes { TransitiveNway, RadialNway, EntropyNway,
RecursiveNway };
*/
void set_motifs(const std::vector<Sequence>& motifs,
const std::vector<Color>& colors);
- //! load motifs from an ifstream
/*! The file should look something like
* <sequence> <red> <green> <blue>
* where sequence is a string of IUPAC symbols
* and red,green,blue are a white space separated list of floats
* in the range [0.0, 1.0]
*/
- void load_motifs(std::istream &);
//! load a list of motifs from a file named filename
void load_motifs(boost::filesystem::path filename);
+ //! load motifs from an ifstream
+ /*! \sa Mussa::load_motifs(boost::filesystem::path)
+ */
+ void load_motifs(std::istream &);
//! return our motifs;
- const std::set<Sequence>& motifs() const;
+ const motif_set& motifs() const;
//! return color mapper
boost::shared_ptr<AnnotationColors> colorMapper();
NwayPaths the_paths;
//! motif list
- std::set<Sequence> motif_sequences;
+ motif_set motif_sequences;
//! color manager
boost::shared_ptr<AnnotationColors> color_mapper;
//! path to our analysis
int& end;
std::string& name;
std::string& type;
+ int &parsed;
push_back_annot(std::list<annot>& annot_list_,
int& begin_,
int& end_,
std::string& name_,
- std::string& type_)
+ std::string& type_,
+ int &parsed_)
: annot_list(annot_list_),
begin(begin_),
end(end_),
name(name_),
- type(type_)
+ type(type_),
+ parsed(parsed_)
{
}
{
//std::cout << "adding annot: " << begin << "|" << end << "|" << name << "|" << type << std::endl;
annot_list.push_back(annot(begin, end, name, type));
+ ++parsed;
};
};
std::list<Sequence>& seq_list;
std::string& name;
std::string& seq;
+ int &parsed;
push_back_seq(std::list<Sequence>& seq_list_,
std::string& name_,
- std::string& seq_)
+ std::string& seq_,
+ int &parsed_)
: seq_list(seq_list_),
name(name_),
- seq(seq_)
+ seq(seq_),
+ parsed(parsed_)
{
}
Sequence s(new_seq);
s.set_fasta_header(name);
seq_list.push_back(s);
+ ++parsed;
};
};
-bool
+void
Sequence::parse_annot(std::string data, int start_index, int end_index)
{
int start=0;
std::string name;
std::string type;
std::string seq;
+ std::list<annot> parsed_annots;
std::list<Sequence> query_seqs;
-
- bool status = spirit::parse(data.begin(), data.end(),
- (
- //begin grammar
- !(
- (
- spirit::alpha_p >>
- +(spirit::graph_p)
- )[spirit::assign_a(species)] >>
- +(spirit::space_p)
- ) >>
- *(
- ( // ignore html tags
- *(spirit::space_p) >>
- spirit::ch_p('<') >>
- +(~spirit::ch_p('>')) >>
- spirit::ch_p('>') >>
- *(spirit::space_p)
- )
- |
- ( // parse an absolute location name
- (spirit::uint_p[spirit::assign_a(start)] >>
- +spirit::space_p >>
- spirit::uint_p[spirit::assign_a(end)] >>
- +spirit::space_p >>
- (
- spirit::alpha_p >>
- *spirit::graph_p
- )[spirit::assign_a(name)] >>
- // optional type
- !(
- +spirit::space_p >>
- (
- spirit::alpha_p >>
- *spirit::graph_p
- )[spirit::assign_a(type)]
- )
- // to understand how this group gets set
- // read the comment above struct push_back_annot
- )[push_back_annot(annots, start, end, type, name)]
- |
- ((spirit::ch_p('>')|spirit::str_p(">")) >>
- (*(spirit::print_p))[spirit::assign_a(name)] >>
- spirit::eol_p >>
- (+(spirit::chset<>(Alphabet::nucleic_alphabet.c_str())))[spirit::assign_a(seq)]
- )[push_back_seq(query_seqs, name, seq)]
- ) >>
- *spirit::space_p
+ int parsed=1;
+
+ bool ok = spirit::parse(data.begin(), data.end(),
+ (
+ //begin grammar
+ !(
+ (
+ spirit::alpha_p >>
+ +(spirit::graph_p)
+ )[spirit::assign_a(species)] >>
+ +(spirit::space_p)
+ ) >>
+ *(
+ ( // ignore html tags
+ *(spirit::space_p) >>
+ spirit::ch_p('<') >>
+ +(~spirit::ch_p('>')) >>
+ spirit::ch_p('>') >>
+ *(spirit::space_p)
)
- //end grammar
- )).full;
-
+ |
+ ( // parse an absolute location name
+ (spirit::uint_p[spirit::assign_a(start)] >>
+ +spirit::space_p >>
+ spirit::uint_p[spirit::assign_a(end)] >>
+ +spirit::space_p >>
+ (
+ spirit::alpha_p >>
+ *spirit::graph_p
+ )[spirit::assign_a(name)] >>
+ // optional type
+ !(
+ +spirit::space_p >>
+ (
+ spirit::alpha_p >>
+ *spirit::graph_p
+ )[spirit::assign_a(type)]
+ )
+ // to understand how this group gets set
+ // read the comment above struct push_back_annot
+ )[push_back_annot(parsed_annots, start, end, type, name, parsed)]
+ |
+ ((spirit::ch_p('>')|spirit::str_p(">")) >>
+ (*(spirit::print_p))[spirit::assign_a(name)] >>
+ spirit::eol_p >>
+ (+(spirit::chset<>(Alphabet::nucleic_alphabet.c_str())))[spirit::assign_a(seq)]
+ )[push_back_seq(query_seqs, name, seq, parsed)]
+ ) >>
+ *spirit::space_p
+ )
+ //end grammar
+ )).full;
+ if (not ok) {
+ std::stringstream msg;
+ msg << "Error parsing annotation #" << parsed;
+ throw annotation_load_error(msg.str());
+ }
+ // add newly parsed annotations to our sequence
+ std::copy(parsed_annots.begin(), parsed_annots.end(), std::back_inserter(annots));
// go seearch for query sequences
find_sequences(query_seqs.begin(), query_seqs.end());
- return status;
}
void Sequence::add_annotation(const annot& a)
//! load sequence annotations
//! \throws mussa_load_error
void load_annot(std::fstream& data_stream, int start_index, int end_index);
- bool parse_annot(std::string data, int start_index=0, int end_index=0);
+ //! parse annotation file
+ /*! \throws annotation_load_error
+ */
+ void parse_annot(std::string data, int start_index=0, int end_index=0);
//! add an annotation to our list of annotations
void add_annotation(const annot& a);
const std::list<annot>& annotations() const;
BOOST_AUTO_TEST_CASE( mussa_load_motif )
{
string data = "AAGG 1.0 1.0 0.0\n"
- "GGTT 0.0 0.1 1.0\n"
- "ZXY 2 1.9 0\n";
+ "GGTT 0.0 0.1 1.0\n";
istringstream test_istream(data);
m1.append_sequence("GGGCCCCTTCCAATT");
m1.load_motifs(test_istream);
+ BOOST_CHECK_EQUAL( m1.motifs().size(), 2);
for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin();
seq_i != m1.sequences().end();
++seq_i)
}
}
+BOOST_AUTO_TEST_CASE( mussa_load_broken_motif )
+{
+ string data = "AAGG 1.0 1.0 0.0\n"
+ "GGTT 0.0 0.1 1.0 1.0\n"
+ "ZZCTA 0.1 0.0 1.0\n";
+
+ istringstream test_istream(data);
+
+ Mussa m1;
+ m1.append_sequence("AAAAGGGGTTTT");
+ m1.append_sequence("GGGCCCCTTCCAATT");
+ BOOST_CHECK_THROW(m1.load_motifs(test_istream), motif_load_error);
+
+ BOOST_CHECK_EQUAL( m1.motifs().size(), 0);
+}
+
BOOST_AUTO_TEST_CASE( mussa_named_motif )
{
string data = "CCAATT cat 0.1 0.2 0.3\n";
//BOOST_CHECK_EQUAL( annots
}
+BOOST_AUTO_TEST_CASE( annotation_broken_load )
+{
+ string annot_data = "human\n"
+ "0 10 name type\n"
+ "blah60 50 backward\n"
+ ">ident3 asdf\n"
+ "GCT\n"
+ "gCTn\n"
+ ;
+ string s(100, 'A');
+ s += "GCTGCTAATT";
+ Sequence seq(s, Sequence::reduced_dna_alphabet);
+
+ BOOST_CHECK_THROW(seq.parse_annot(annot_data, 0, 0), annotation_load_error);
+ BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
+ }
BOOST_AUTO_TEST_CASE(annotation_ucsc_html_load)
{
explicit sequence_invalid_load_error(const std::string& msg) :
sequence_load_error(msg) {};
};
+
+//! Error loading sequence annotation
+class annotation_load_error : public sequence_load_error
+{
+public:
+ explicit annotation_load_error(const std::string& msg) :
+ sequence_load_error(msg) {};
+};
+
//! failure running analysis
class mussa_analysis_error : public mussa_error
{
mussa_error(msg) {};
};
+//! couldn't normalize a motif
+/*
class motif_normalize_error : public mussa_error
{
public:
explicit motif_normalize_error(const std::string& msg) :
mussa_error(msg) {};
};
+*/
+
+//! something went wrong loading a motif
+class motif_load_error : public mussa_load_error
+{
+public:
+ explicit motif_load_error(const std::string& msg) :
+ mussa_load_error(msg) {};
+};
//! ConservedPath::nextTo had two paths that weren't the same size
class conserved_path_size_mismatch : public mussa_error