From 81354ba90b6b670d361263bb3ff70bef67f83d9f Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 11 Oct 2006 20:59:30 +0000 Subject: [PATCH] throw errors when spirit parsing fails ticket:126 this patch adds code to throw errors (and give some rough idea where the error was) when the spirit parser fails. (Currently either loading motifs, or loading sequence annotations). Also I added a typedef to shorten std::set to motif_set --- alg/mussa.cpp | 39 ++++++----- alg/mussa.hpp | 11 ++-- alg/sequence.cpp | 129 +++++++++++++++++++++---------------- alg/sequence.hpp | 5 +- alg/test/test_mussa.cpp | 20 +++++- alg/test/test_sequence.cpp | 16 +++++ mussa_exceptions.hpp | 20 ++++++ 7 files changed, 161 insertions(+), 79 deletions(-) diff --git a/alg/mussa.cpp b/alg/mussa.cpp index 50991c1..634cb6d 100644 --- a/alg/mussa.cpp +++ b/alg/mussa.cpp @@ -764,7 +764,7 @@ void Mussa::set_motifs(const vector& motifs, // Helper functor to append created motifs to our Mussa analysis struct push_back_motif { - std::set& motif_set; + Mussa::motif_set& motifs; boost::shared_ptr color_mapper; std::string& seq_string; std::string& name; @@ -772,20 +772,23 @@ struct push_back_motif { float& green; float& blue; float& alpha; + int& parsed; - push_back_motif(std::set& motif_set_, + push_back_motif(Mussa::motif_set& motifs_, boost::shared_ptr color_mapper_, std::string& seq_, std::string& name_, - float &red_, float &green_, float &blue_, float &alpha_) - : motif_set(motif_set_), + float &red_, float &green_, float &blue_, float &alpha_, + int &parsed_) + : motifs(motifs_), color_mapper(color_mapper_), seq_string(seq_), name(name_), red(red_), green(green_), blue(blue_), - alpha(alpha_) + alpha(alpha_), + parsed(parsed_) { } @@ -801,10 +804,18 @@ struct push_back_motif { // just attach colors directly to the motif. Color c(red, green, blue); color_mapper->appendInstanceColor("motif", seq.c_str(), c); - motif_set.insert(seq); + motifs.insert(seq); + ++parsed; }; }; +void Mussa::load_motifs(fs::path filename) +{ + fs::ifstream f; + f.open(filename, ifstream::in); + load_motifs(f); +} + // I mostly split the ifstream out so I can use a stringstream to test it. void Mussa::load_motifs(std::istream &in) { @@ -816,6 +827,7 @@ void Mussa::load_motifs(std::istream &in) float green = 0.0; float blue = 0.0; float alpha = 1.0; + int parsed = 1; // slurp our data into a string std::streamsize bytes_read = 1; @@ -840,21 +852,18 @@ void Mussa::load_motifs(std::istream &in) spirit::real_p[spirit::assign_a(red)] >> +spirit::space_p >> spirit::real_p[spirit::assign_a(green)] >> +spirit::space_p >> spirit::real_p[spirit::assign_a(blue)] >> +spirit::space_p - )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue, alpha)] + )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue, alpha, parsed)] )).full; if (not ok) { - std::clog << "Error parsing motif stream " << std::endl; + stringstream msg; + msg << "Error parsing motif #" << parsed; + // erase our potentially broken motif list + motif_sequences.clear(); + throw motif_load_error(msg.str()); } update_sequences_motifs(); } -void Mussa::load_motifs(fs::path filename) -{ - fs::ifstream f; - f.open(filename, ifstream::in); - load_motifs(f); -} - void Mussa::update_sequences_motifs() { // once we've loaded all the motifs from the file, diff --git a/alg/mussa.hpp b/alg/mussa.hpp index cc80607..64f4649 100644 --- a/alg/mussa.hpp +++ b/alg/mussa.hpp @@ -40,6 +40,7 @@ signals: void progress(const std::string& description, int cur, int max); public: + typedef std::set motif_set; enum analysis_modes { TransitiveNway, RadialNway, EntropyNway, RecursiveNway }; @@ -168,18 +169,20 @@ public: */ void set_motifs(const std::vector& motifs, const std::vector& colors); - //! load motifs from an ifstream /*! The file should look something like * * where sequence is a string of IUPAC symbols * and red,green,blue are a white space separated list of floats * in the range [0.0, 1.0] */ - void load_motifs(std::istream &); //! load a list of motifs from a file named filename void load_motifs(boost::filesystem::path filename); + //! load motifs from an ifstream + /*! \sa Mussa::load_motifs(boost::filesystem::path) + */ + void load_motifs(std::istream &); //! return our motifs; - const std::set& motifs() const; + const motif_set& motifs() const; //! return color mapper boost::shared_ptr colorMapper(); @@ -214,7 +217,7 @@ public: NwayPaths the_paths; //! motif list - std::set motif_sequences; + motif_set motif_sequences; //! color manager boost::shared_ptr color_mapper; //! path to our analysis diff --git a/alg/sequence.cpp b/alg/sequence.cpp index ae58e5c..e1923c6 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -345,17 +345,20 @@ struct push_back_annot { int& end; std::string& name; std::string& type; + int &parsed; push_back_annot(std::list& annot_list_, int& begin_, int& end_, std::string& name_, - std::string& type_) + std::string& type_, + int &parsed_) : annot_list(annot_list_), begin(begin_), end(end_), name(name_), - type(type_) + type(type_), + parsed(parsed_) { } @@ -364,6 +367,7 @@ struct push_back_annot { { //std::cout << "adding annot: " << begin << "|" << end << "|" << name << "|" << type << std::endl; annot_list.push_back(annot(begin, end, name, type)); + ++parsed; }; }; @@ -371,13 +375,16 @@ struct push_back_seq { std::list& seq_list; std::string& name; std::string& seq; + int &parsed; push_back_seq(std::list& seq_list_, std::string& name_, - std::string& seq_) + std::string& seq_, + int &parsed_) : seq_list(seq_list_), name(name_), - seq(seq_) + seq(seq_), + parsed(parsed_) { } @@ -397,10 +404,11 @@ struct push_back_seq { Sequence s(new_seq); s.set_fasta_header(name); seq_list.push_back(s); + ++parsed; }; }; -bool +void Sequence::parse_annot(std::string data, int start_index, int end_index) { int start=0; @@ -408,62 +416,69 @@ Sequence::parse_annot(std::string data, int start_index, int end_index) std::string name; std::string type; std::string seq; + std::list parsed_annots; std::list query_seqs; - - bool status = spirit::parse(data.begin(), data.end(), - ( - //begin grammar - !( - ( - spirit::alpha_p >> - +(spirit::graph_p) - )[spirit::assign_a(species)] >> - +(spirit::space_p) - ) >> - *( - ( // ignore html tags - *(spirit::space_p) >> - spirit::ch_p('<') >> - +(~spirit::ch_p('>')) >> - spirit::ch_p('>') >> - *(spirit::space_p) - ) - | - ( // parse an absolute location name - (spirit::uint_p[spirit::assign_a(start)] >> - +spirit::space_p >> - spirit::uint_p[spirit::assign_a(end)] >> - +spirit::space_p >> - ( - spirit::alpha_p >> - *spirit::graph_p - )[spirit::assign_a(name)] >> - // optional type - !( - +spirit::space_p >> - ( - spirit::alpha_p >> - *spirit::graph_p - )[spirit::assign_a(type)] - ) - // to understand how this group gets set - // read the comment above struct push_back_annot - )[push_back_annot(annots, start, end, type, name)] - | - ((spirit::ch_p('>')|spirit::str_p(">")) >> - (*(spirit::print_p))[spirit::assign_a(name)] >> - spirit::eol_p >> - (+(spirit::chset<>(Alphabet::nucleic_alphabet.c_str())))[spirit::assign_a(seq)] - )[push_back_seq(query_seqs, name, seq)] - ) >> - *spirit::space_p + int parsed=1; + + bool ok = spirit::parse(data.begin(), data.end(), + ( + //begin grammar + !( + ( + spirit::alpha_p >> + +(spirit::graph_p) + )[spirit::assign_a(species)] >> + +(spirit::space_p) + ) >> + *( + ( // ignore html tags + *(spirit::space_p) >> + spirit::ch_p('<') >> + +(~spirit::ch_p('>')) >> + spirit::ch_p('>') >> + *(spirit::space_p) ) - //end grammar - )).full; - + | + ( // parse an absolute location name + (spirit::uint_p[spirit::assign_a(start)] >> + +spirit::space_p >> + spirit::uint_p[spirit::assign_a(end)] >> + +spirit::space_p >> + ( + spirit::alpha_p >> + *spirit::graph_p + )[spirit::assign_a(name)] >> + // optional type + !( + +spirit::space_p >> + ( + spirit::alpha_p >> + *spirit::graph_p + )[spirit::assign_a(type)] + ) + // to understand how this group gets set + // read the comment above struct push_back_annot + )[push_back_annot(parsed_annots, start, end, type, name, parsed)] + | + ((spirit::ch_p('>')|spirit::str_p(">")) >> + (*(spirit::print_p))[spirit::assign_a(name)] >> + spirit::eol_p >> + (+(spirit::chset<>(Alphabet::nucleic_alphabet.c_str())))[spirit::assign_a(seq)] + )[push_back_seq(query_seqs, name, seq, parsed)] + ) >> + *spirit::space_p + ) + //end grammar + )).full; + if (not ok) { + std::stringstream msg; + msg << "Error parsing annotation #" << parsed; + throw annotation_load_error(msg.str()); + } + // add newly parsed annotations to our sequence + std::copy(parsed_annots.begin(), parsed_annots.end(), std::back_inserter(annots)); // go seearch for query sequences find_sequences(query_seqs.begin(), query_seqs.end()); - return status; } void Sequence::add_annotation(const annot& a) diff --git a/alg/sequence.hpp b/alg/sequence.hpp index dc9d594..20bf018 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -218,7 +218,10 @@ public: //! load sequence annotations //! \throws mussa_load_error void load_annot(std::fstream& data_stream, int start_index, int end_index); - bool parse_annot(std::string data, int start_index=0, int end_index=0); + //! parse annotation file + /*! \throws annotation_load_error + */ + void parse_annot(std::string data, int start_index=0, int end_index=0); //! add an annotation to our list of annotations void add_annotation(const annot& a); const std::list& annotations() const; diff --git a/alg/test/test_mussa.cpp b/alg/test/test_mussa.cpp index dd68250..74f3c20 100644 --- a/alg/test/test_mussa.cpp +++ b/alg/test/test_mussa.cpp @@ -180,8 +180,7 @@ BOOST_AUTO_TEST_CASE( mussa_load_analysis ) BOOST_AUTO_TEST_CASE( mussa_load_motif ) { string data = "AAGG 1.0 1.0 0.0\n" - "GGTT 0.0 0.1 1.0\n" - "ZXY 2 1.9 0\n"; + "GGTT 0.0 0.1 1.0\n"; istringstream test_istream(data); @@ -190,6 +189,7 @@ BOOST_AUTO_TEST_CASE( mussa_load_motif ) m1.append_sequence("GGGCCCCTTCCAATT"); m1.load_motifs(test_istream); + BOOST_CHECK_EQUAL( m1.motifs().size(), 2); for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin(); seq_i != m1.sequences().end(); ++seq_i) @@ -198,6 +198,22 @@ BOOST_AUTO_TEST_CASE( mussa_load_motif ) } } +BOOST_AUTO_TEST_CASE( mussa_load_broken_motif ) +{ + string data = "AAGG 1.0 1.0 0.0\n" + "GGTT 0.0 0.1 1.0 1.0\n" + "ZZCTA 0.1 0.0 1.0\n"; + + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + BOOST_CHECK_THROW(m1.load_motifs(test_istream), motif_load_error); + + BOOST_CHECK_EQUAL( m1.motifs().size(), 0); +} + BOOST_AUTO_TEST_CASE( mussa_named_motif ) { string data = "CCAATT cat 0.1 0.2 0.3\n"; diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index d4b2821..f9634ce 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -325,6 +325,22 @@ BOOST_AUTO_TEST_CASE( annotation_load ) //BOOST_CHECK_EQUAL( annots } +BOOST_AUTO_TEST_CASE( annotation_broken_load ) +{ + string annot_data = "human\n" + "0 10 name type\n" + "blah60 50 backward\n" + ">ident3 asdf\n" + "GCT\n" + "gCTn\n" + ; + string s(100, 'A'); + s += "GCTGCTAATT"; + Sequence seq(s, Sequence::reduced_dna_alphabet); + + BOOST_CHECK_THROW(seq.parse_annot(annot_data, 0, 0), annotation_load_error); + BOOST_CHECK_EQUAL(seq.annotations().size(), 0); + } BOOST_AUTO_TEST_CASE(annotation_ucsc_html_load) { diff --git a/mussa_exceptions.hpp b/mussa_exceptions.hpp index 065347a..a78dd4a 100644 --- a/mussa_exceptions.hpp +++ b/mussa_exceptions.hpp @@ -62,6 +62,15 @@ public: explicit sequence_invalid_load_error(const std::string& msg) : sequence_load_error(msg) {}; }; + +//! Error loading sequence annotation +class annotation_load_error : public sequence_load_error +{ +public: + explicit annotation_load_error(const std::string& msg) : + sequence_load_error(msg) {}; +}; + //! failure running analysis class mussa_analysis_error : public mussa_error { @@ -70,12 +79,23 @@ public: mussa_error(msg) {}; }; +//! couldn't normalize a motif +/* class motif_normalize_error : public mussa_error { public: explicit motif_normalize_error(const std::string& msg) : mussa_error(msg) {}; }; +*/ + +//! something went wrong loading a motif +class motif_load_error : public mussa_load_error +{ +public: + explicit motif_load_error(const std::string& msg) : + mussa_load_error(msg) {}; +}; //! ConservedPath::nextTo had two paths that weren't the same size class conserved_path_size_mismatch : public mussa_error -- 2.30.2