From: Diane Trout Date: Wed, 11 Oct 2006 21:37:48 +0000 (+0000) Subject: add Mussa::save_motif() X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=commitdiff_plain;h=c760a5d3839b241f2f94437731050622b197bf03 add Mussa::save_motif() necessary for ticket:133 This adds the low level motif saving code to Mussa. To make things work a bit more reliably, the parse code can understand the alpha channel now (so I can save hidden motifs), and can parse a motif name that is quoted like "foo asdf". I added that as I expect users will put spaces in their motif names. Although things'll probably break if someone puts a " in a motif name. see ticket:149 --- diff --git a/alg/color.cpp b/alg/color.cpp index cbcc59e..64eb782 100644 --- a/alg/color.cpp +++ b/alg/color.cpp @@ -90,9 +90,6 @@ bool operator==(const Color &x, const Color &y) std::ostream &operator<<(std::ostream &out, const Color &c) { - out << "Color(" << c.r() << ", " - << c.g() << ", " - << c.b() << ", " - << c.a() << ")"; + out << c.r() << " " << c.g() << " " << c.b() << " " << c.a(); return out; } diff --git a/alg/mussa.cpp b/alg/mussa.cpp index 634cb6d..18539b8 100644 --- a/alg/mussa.cpp +++ b/alg/mussa.cpp @@ -802,8 +802,9 @@ struct push_back_motif { name.clear(); // be nice if glsequence was a subclass of sequence so we could // just attach colors directly to the motif. - Color c(red, green, blue); + Color c(red, green, blue, alpha); color_mapper->appendInstanceColor("motif", seq.c_str(), c); + alpha = 1.0; motifs.insert(seq); ++parsed; }; @@ -839,19 +840,29 @@ void Mussa::load_motifs(std::istream &in) } // parse our string bool ok = spirit::parse(data.begin(), data.end(), - *( + *( ( ( (+spirit::chset<>(alphabet))[spirit::assign_a(seq)] >> +spirit::space_p ) >> !( - (spirit::alpha_p >> *spirit::graph_p)[spirit::assign_a(name)] - >> +spirit::space_p + ( + // names can either be letter followed by non-space characters + (spirit::alpha_p >> *spirit::graph_p)[spirit::assign_a(name)] + | + // or a quoted string + ( + spirit::ch_p('"') >> + (+(~spirit::ch_p('"')))[spirit::assign_a(name)] >> + spirit::ch_p('"') + ) + ) >> +spirit::space_p ) >> spirit::real_p[spirit::assign_a(red)] >> +spirit::space_p >> spirit::real_p[spirit::assign_a(green)] >> +spirit::space_p >> - spirit::real_p[spirit::assign_a(blue)] >> +spirit::space_p + spirit::real_p[spirit::assign_a(blue)] >> +spirit::space_p >> + !(spirit::real_p[spirit::assign_a(alpha)] >> +spirit::space_p) )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue, alpha, parsed)] )).full; if (not ok) { @@ -864,6 +875,28 @@ void Mussa::load_motifs(std::istream &in) update_sequences_motifs(); } +void Mussa::save_motifs(fs::path filename) +{ + fs::ofstream out_stream; + out_stream.open(filename, ofstream::out); + save_motifs(out_stream); +} + +void Mussa::save_motifs(std::ostream& out) +{ + for(motif_set::iterator motif_i = motif_sequences.begin(); + motif_i != motif_sequences.end(); + ++motif_i) + { + out << motif_i->get_sequence() << " "; + if (motif_i->get_name().size() > 0) { + out << "\"" << motif_i->get_name() << "\" "; + } + out << color_mapper->lookup("motif", motif_i->get_sequence()); + out << std::endl; + } +} + void Mussa::update_sequences_motifs() { // once we've loaded all the motifs from the file, diff --git a/alg/mussa.hpp b/alg/mussa.hpp index 64f4649..27cd239 100644 --- a/alg/mussa.hpp +++ b/alg/mussa.hpp @@ -181,6 +181,10 @@ public: /*! \sa Mussa::load_motifs(boost::filesystem::path) */ void load_motifs(std::istream &); + //! save motif list to the specified filename + void save_motifs(boost::filesystem::path filename); + //! save motif list to an ostream + void save_motifs(std::ostream &); //! return our motifs; const motif_set& motifs() const; diff --git a/alg/test/test_mussa.cpp b/alg/test/test_mussa.cpp index 74f3c20..cfa8735 100644 --- a/alg/test/test_mussa.cpp +++ b/alg/test/test_mussa.cpp @@ -180,7 +180,7 @@ BOOST_AUTO_TEST_CASE( mussa_load_analysis ) BOOST_AUTO_TEST_CASE( mussa_load_motif ) { string data = "AAGG 1.0 1.0 0.0\n" - "GGTT 0.0 0.1 1.0\n"; + "GGTT 0.0 0.1 1.0 1.0\n"; istringstream test_istream(data); @@ -243,6 +243,74 @@ BOOST_AUTO_TEST_CASE( mussa_weirdly_spaced_named_motif ) BOOST_REQUIRE_EQUAL(motifs.size(), 1); BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat_meow123"); } + +BOOST_AUTO_TEST_CASE( mussa_name_quoted_motif ) +{ + string data = "CCAATT \"cat meow 123\" 0.1 0.2 0.3\n"; + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + m1.load_motifs(test_istream); + + std::set motifs = m1.motifs(); + BOOST_REQUIRE_EQUAL(motifs.size(), 1); + BOOST_CHECK_EQUAL(motifs.begin()->get_name(), "cat meow 123"); +} + +BOOST_AUTO_TEST_CASE( mussa_name_embedded_quote_motif ) +{ + // pretty obviously this shouldn't work as " are our delimiter + // and i'm too lazy to add support for \ in the parser + string data = "ATA 0.5 0.5 0.5\n" + "CCAATT \"cat \"meow 123\" 0.1 0.2 0.3\n"; + istringstream test_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + BOOST_CHECK_THROW( m1.load_motifs(test_istream), motif_load_error); + + std::set motifs = m1.motifs(); + BOOST_REQUIRE_EQUAL(motifs.size(), 0); +} + +BOOST_AUTO_TEST_CASE( mussa_save_motif ) +{ + string data = "ATA 1 1 1 1\n" + "CAT \"my name\" 1 0 0.5 0.5\n"; + istringstream data_istream(data); + + Mussa m1; + m1.append_sequence("AAAAGGGGTTTT"); + m1.append_sequence("GGGCCCCTTCCAATT"); + m1.load_motifs(data_istream); + + string save; + ostringstream save_ostream(save); + m1.save_motifs(save_ostream); + + istringstream reloaded_istream(save_ostream.str()); + Mussa m2; + m2.append_sequence("AAAAGGGGTTTT"); + m2.append_sequence("GGGCCCCTTCCAATT"); + m2.load_motifs(reloaded_istream); + + BOOST_REQUIRE_EQUAL(m1.motifs().size(), m2.motifs().size()); + Mussa::motif_set::const_iterator m1motif = m1.motifs().begin(); + Mussa::motif_set::const_iterator m2motif = m2.motifs().begin(); + for (; + m1motif != m1.motifs().end() and m2motif != m2.motifs().end(); + ++m1motif, ++m2motif) + { + BOOST_CHECK_EQUAL(m1motif->get_sequence(), m2motif->get_sequence()); + BOOST_CHECK_EQUAL(m1motif->get_name(), m2motif->get_name()); + BOOST_CHECK_EQUAL(m1.colorMapper()->lookup("motif", m1motif->get_sequence()), + m2.colorMapper()->lookup("motif", m2motif->get_sequence())); + } +} + BOOST_AUTO_TEST_CASE( mussa_add_motif ) { vector motifs;