#include <boost/filesystem/fstream.hpp>
namespace fs = boost::filesystem;
+#include <boost/spirit/core.hpp>
+#include <boost/spirit/actor/push_back_actor.hpp>
+#include <boost/spirit/iterator/file_iterator.hpp>
+#include <boost/spirit/utility/chset.hpp>
+namespace spirit = boost::spirit;
+
#include <iostream>
#include <sstream>
update_sequences_motifs();
}
+
+// Helper functor to append created motifs to our Mussa analysis
+struct push_back_motif {
+ std::set<Sequence>& motif_set;
+ boost::shared_ptr<AnnotationColors> color_mapper;
+ std::string& seq_string;
+ std::string& name;
+ float& red;
+ float& green;
+ float& blue;
+
+ push_back_motif(std::set<Sequence>& motif_set_,
+ boost::shared_ptr<AnnotationColors> color_mapper_,
+ std::string& seq_,
+ std::string& name_,
+ float red_, float green_, float blue_)
+ : motif_set(motif_set_),
+ color_mapper(color_mapper_),
+ seq_string(seq_),
+ name(name_),
+ red(red_),
+ green(green_),
+ blue(blue_)
+ {
+ }
+
+ void operator()(std::string::const_iterator,
+ std::string::const_iterator) const
+ {
+ //std::cout << "motif: " << seq_string << "/" << name << endl;
+
+ Sequence seq(seq_string);
+ // shouldn't we have a better field than "fasta header" and speices?
+ seq.set_fasta_header(name);
+ // we need to clear the name in case the next motif doesn't have one.
+ name.clear();
+ // be nice if glsequence was a subclass of sequence so we could
+ // just attach colors directly to the motif.
+ Color c(red, green, blue);
+ color_mapper->appendInstanceColor("motif", seq.c_str(), c);
+ motif_set.insert(seq);
+ };
+};
+
// I mostly split the ifstream out so I can use a stringstream to test it.
void Mussa::load_motifs(std::istream &in)
{
+ std::string data;
+ const char *alphabet = Sequence::nucleic_iupac_alphabet.c_str();
string seq;
+ string name;
float red;
float green;
float blue;
- while(in.good())
- {
- in >> seq >> red >> green >> blue;
- // if we couldn't read this line 'cause we're like at the end of the file
- // try to exit the loop
- if (!in.good())
- break;
- try {
- seq = Sequence::motif_normalize(seq);
- } catch(motif_normalize_error e) {
- clog << "unable to parse " << seq << " skipping" << endl;
- clog << e.what() << endl;
- continue;
- }
- if (red < 0.0 or red > 1.0) {
- clog << "invalid red value " << red << ". must be in range [0..1]"
- << endl;
- continue;
- }
- if (green < 0.0 or green > 1.0) {
- clog << "invalid green value " << green << ". must be in range [0..1]"
- << endl;
- continue;
- }
- if (blue < 0.0 or blue > 1.0) {
- clog << "invalid blue value " << blue << ". must be in range [0..1]"
- << endl;
- continue;
- }
- if (motif_sequences.find(seq) == motif_sequences.end()) {
- // sequence wasn't found
- motif_sequences.insert(seq);
- Color c(red, green, blue);
- color_mapper->appendInstanceColor("motif", seq, c);
- } else {
- clog << "sequence " << seq << " was already defined skipping"
- << endl;
- continue;
- }
+ // slurp our data into a string
+ std::streamsize bytes_read = 1;
+ while (in.good() and bytes_read) {
+ const std::streamsize bufsiz=512;
+ char buf[bufsiz];
+ bytes_read = in.readsome(buf, bufsiz);
+ data.append(buf, buf+bytes_read);
}
+ // parse our string
+ bool status = spirit::parse(data.begin(), data.end(),
+ *(
+ (
+ (
+ (+spirit::chset<>(alphabet))[spirit::assign_a(seq)] >>
+ +spirit::space_p
+ ) >>
+ !(
+ (spirit::alpha_p >> *spirit::alnum_p)[spirit::assign_a(name)]
+ >> +spirit::space_p
+ ) >>
+ spirit::real_p[spirit::assign_a(red)] >> +spirit::space_p >>
+ spirit::real_p[spirit::assign_a(green)] >> +spirit::space_p >>
+ spirit::real_p[spirit::assign_a(blue)] >> +spirit::space_p
+ )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue)]
+ )).full;
update_sequences_motifs();
}
Mussa m1;
m1.append_sequence("AAAAGGGGTTTT");
- m1.append_sequence("GGGCCCCTTGGTT");
+ m1.append_sequence("GGGCCCCTTCCAATT");
m1.load_motifs(test_istream);
for (Mussa::vector_sequence_type::const_iterator seq_i = m1.sequences().begin();
}
}
+BOOST_AUTO_TEST_CASE( mussa_named_motif )
+{
+ string data = "CCAATT cat 0.1 0.2 0.3\n";
+ istringstream test_istream(data);
+
+ Mussa m1;
+ m1.append_sequence("AAAAGGGGTTTT");
+ m1.append_sequence("GGGCCCCTTCCAATT");
+ m1.load_motifs(test_istream);
+
+ std::set<Sequence> motifs = m1.motifs();
+ BOOST_REQUIRE_EQUAL(motifs.size(), 1);
+ BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat");
+}
+
BOOST_AUTO_TEST_CASE( mussa_add_motif )
{
vector<Sequence> motifs;