From 3814f060a887355f15c923d3b3d1a9b322779832 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Thu, 14 Sep 2006 01:33:19 +0000 Subject: [PATCH] Implement an optional name in the motif parser ticket:116 Using the spirit parser the motif file now takes a sequence, optional name, and the RGB float colors. The down side to this is I don't currently know how to get spirit to report parse errors. --- alg/mussa.cpp | 116 ++++++++++++++++++++++++++-------------- alg/test/test_mussa.cpp | 17 +++++- 2 files changed, 93 insertions(+), 40 deletions(-) diff --git a/alg/mussa.cpp b/alg/mussa.cpp index cc5825e..ccba221 100644 --- a/alg/mussa.cpp +++ b/alg/mussa.cpp @@ -16,6 +16,12 @@ #include namespace fs = boost::filesystem; +#include +#include +#include +#include +namespace spirit = boost::spirit; + #include #include @@ -737,54 +743,86 @@ void Mussa::set_motifs(const vector& motifs, update_sequences_motifs(); } + +// Helper functor to append created motifs to our Mussa analysis +struct push_back_motif { + std::set& motif_set; + boost::shared_ptr color_mapper; + std::string& seq_string; + std::string& name; + float& red; + float& green; + float& blue; + + push_back_motif(std::set& motif_set_, + boost::shared_ptr color_mapper_, + std::string& seq_, + std::string& name_, + float red_, float green_, float blue_) + : motif_set(motif_set_), + color_mapper(color_mapper_), + seq_string(seq_), + name(name_), + red(red_), + green(green_), + blue(blue_) + { + } + + void operator()(std::string::const_iterator, + std::string::const_iterator) const + { + //std::cout << "motif: " << seq_string << "/" << name << endl; + + Sequence seq(seq_string); + // shouldn't we have a better field than "fasta header" and speices? + seq.set_fasta_header(name); + // we need to clear the name in case the next motif doesn't have one. + name.clear(); + // be nice if glsequence was a subclass of sequence so we could + // just attach colors directly to the motif. + Color c(red, green, blue); + color_mapper->appendInstanceColor("motif", seq.c_str(), c); + motif_set.insert(seq); + }; +}; + // I mostly split the ifstream out so I can use a stringstream to test it. void Mussa::load_motifs(std::istream &in) { + std::string data; + const char *alphabet = Sequence::nucleic_iupac_alphabet.c_str(); string seq; + string name; float red; float green; float blue; - while(in.good()) - { - in >> seq >> red >> green >> blue; - // if we couldn't read this line 'cause we're like at the end of the file - // try to exit the loop - if (!in.good()) - break; - try { - seq = Sequence::motif_normalize(seq); - } catch(motif_normalize_error e) { - clog << "unable to parse " << seq << " skipping" << endl; - clog << e.what() << endl; - continue; - } - if (red < 0.0 or red > 1.0) { - clog << "invalid red value " << red << ". must be in range [0..1]" - << endl; - continue; - } - if (green < 0.0 or green > 1.0) { - clog << "invalid green value " << green << ". must be in range [0..1]" - << endl; - continue; - } - if (blue < 0.0 or blue > 1.0) { - clog << "invalid blue value " << blue << ". must be in range [0..1]" - << endl; - continue; - } - if (motif_sequences.find(seq) == motif_sequences.end()) { - // sequence wasn't found - motif_sequences.insert(seq); - Color c(red, green, blue); - color_mapper->appendInstanceColor("motif", seq, c); - } else { - clog << "sequence " << seq << " was already defined skipping" - << endl; - continue; - } + // slurp our data into a string + std::streamsize bytes_read = 1; + while (in.good() and bytes_read) { + const std::streamsize bufsiz=512; + char buf[bufsiz]; + bytes_read = in.readsome(buf, bufsiz); + data.append(buf, buf+bytes_read); } + // parse our string + bool status = spirit::parse(data.begin(), data.end(), + *( + ( + ( + (+spirit::chset<>(alphabet))[spirit::assign_a(seq)] >> + +spirit::space_p + ) >> + !( + (spirit::alpha_p >> *spirit::alnum_p)[spirit::assign_a(name)] + >> +spirit::space_p + ) >> + spirit::real_p[spirit::assign_a(red)] >> +spirit::space_p >> + spirit::real_p[spirit::assign_a(green)] >> +spirit::space_p >> + spirit::real_p[spirit::assign_a(blue)] >> +spirit::space_p + )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue)] + )).full; update_sequences_motifs(); } diff --git a/alg/test/test_mussa.cpp b/alg/test/test_mussa.cpp index 4b88c24..bebb38d 100644 --- a/alg/test/test_mussa.cpp +++ b/alg/test/test_mussa.cpp @@ -173,7 +173,7 @@ BOOST_AUTO_TEST_CASE( mussa_load_motif ) Mussa m1; m1.append_sequence("AAAAGGGGTTTT"); - m1.append_sequence("GGGCCCCTTGGTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); m1.load_motifs(test_istream); for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin(); @@ -184,6 +184,21 @@ BOOST_AUTO_TEST_CASE( mussa_load_motif ) } } +BOOST_AUTO_TEST_CASE( mussa_named_motif ) +{ + string data = "CCAATT cat 0.1 0.2 0.3\n"; + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + m1.load_motifs(test_istream); + + std::set motifs = m1.motifs(); + BOOST_REQUIRE_EQUAL(motifs.size(), 1); + BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat"); +} + BOOST_AUTO_TEST_CASE( mussa_add_motif ) { vector motifs; -- 2.30.2