fix motif loader under release build
authorDiane Trout <diane@caltech.edu>
Thu, 19 Oct 2006 23:27:07 +0000 (23:27 +0000)
committerDiane Trout <diane@caltech.edu>
Thu, 19 Oct 2006 23:27:07 +0000 (23:27 +0000)
tciet:163
It turns out that allocating a static class under a release build with a
static char might not be so reliable. This showed up as two problems
first was the test_algorithm unit test would fail.

The second was that the motif parser wouldn't work. In my efforts to
diagnose this I ended up moving the motif file parser to its own file
and splitting the spirit semantic actions up into multiple functors.
(functors remind me of anonmyous classes in java, useful but so annoying
synatacticaly).

Eventually I solved the problem by making the const char *alphabets
just available directly from the Alphabet class, as I couldn't figure out
a way to get the static function returning a reference to a static local
variable (which according to the C++ faq should've been allocated when called
and not before).

alg/CMakeLists.txt
alg/alphabet.cpp
alg/alphabet.hpp
alg/motif_parser.cpp [new file with mode: 0644]
alg/motif_parser.hpp [new file with mode: 0644]
alg/mussa.cpp
alg/sequence.cpp
alg/test/test_alphabet.cpp

index e77e1d89fcf007b9028fc99343a2e047c9356364..dcf9dcade486c34bd794d8eb15a8f6cc9be22311 100644 (file)
@@ -19,6 +19,7 @@ SET(SOURCES alphabet.cpp
             glseqbrowser.cpp 
             glsequence.cpp 
             mussa.cpp 
+            motif_parser.cpp 
             nway_entropy.cpp
             nway_other.cpp 
             nway_paths.cpp
index bd29dd2480f47ace4caa84e24decba9549e936cd..114674f2379b50e366484778dfc8535db9a9340d 100644 (file)
@@ -3,17 +3,36 @@
 // some standard dna alphabets 
 // Include nl (\012), and cr (\015) to make sequence parsing eol 
 // convention independent.
-const Alphabet Alphabet::reduced_dna_alphabet("AaCcGgTtNn\012\015");
-const Alphabet Alphabet::reduced_rna_alphabet("AaCcGgUuNn\012\015");
-const Alphabet Alphabet::reduced_nucleic_alphabet("AaCcGgTtUuNn\012\015");
+const char *Alphabet::reduced_dna_cstr = "AaCcGgTtNn\012\015";
+const char *Alphabet::reduced_rna_cstr = "AaCcGgUuNn\012\015";
+const char *Alphabet::reduced_nucleic_cstr = "AaCcGgTtUuNn\012\015";
 //! this is the general iupac alphabet for nucleotides
-const Alphabet Alphabet::nucleic_alphabet(
-  "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn-~.?\012\015"
-);
+const char *Alphabet::nucleic_cstr =
+  "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn-~.?\012\015";
 //! the protein alphabet
-const Alphabet Alphabet::protein_alphabet(
-  "AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015"
-);
+const char *Alphabet::protein_cstr = 
+  "AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015";
+
+const Alphabet& Alphabet::reduced_dna_alphabet() {
+  static Alphabet *a = new Alphabet(reduced_dna_cstr);
+  return *a;
+}
+const Alphabet& Alphabet::reduced_rna_alphabet() {
+  static Alphabet *a = new Alphabet(reduced_rna_cstr);
+  return *a;
+}
+const Alphabet& Alphabet::reduced_nucleic_alphabet() {
+  static Alphabet *a = new Alphabet(reduced_nucleic_cstr);
+  return *a;
+}
+const Alphabet& Alphabet::nucleic_alphabet() {
+  static Alphabet *a = new Alphabet(nucleic_cstr);
+  return *a;
+}
+const Alphabet& Alphabet::protein_alphabet() {
+  static Alphabet *a = new Alphabet(protein_cstr);
+  return *a;
+}
 
 Alphabet::Alphabet(const char *a) :
   alphabet(a)
@@ -28,11 +47,6 @@ void Alphabet::assign(const Alphabet& a)
   alphabet_set.insert(alphabet.begin(), alphabet.end());
 }
 
-const char *Alphabet::c_str() const
-{
-  alphabet.c_str();
-}
-
 bool Alphabet::exists(const char c) const
 {
   return (alphabet_set.find(c) != alphabet_set.end());
index e7d35c944fe5e5847f05676beebb5a1cdf3ec4d2..399125478b57d514d4475bfff65685de8877fa68 100644 (file)
@@ -15,23 +15,26 @@ friend class Sequence;
 public:
   typedef std::string::const_iterator const_iterator;
 
-  //! return reference to the characters in our alphabet  
-  const char *c_str() const;
   //! case-insensitive test to check a character for existence in our alphabet
   bool exists(const char) const;
   
   // note, if you want to define an alphabet for a sequence, you probably want 
   // to update the enumeration in Sequence, and Sequence::get_sequence
   //! The standard DNA alphabet, with unique, and unknown characters
-  static const Alphabet reduced_dna_alphabet;
+  static const char *reduced_dna_cstr;
+  static const Alphabet &reduced_dna_alphabet();
   //! The standard RNA alphabet, with unique, and unknown characters
-  static const Alphabet reduced_rna_alphabet;
+  static const char *reduced_rna_cstr;
+  static const Alphabet &reduced_rna_alphabet();
   //! The standard DNA/RNA alphabet, with unique, and unknown characters
-  static const Alphabet reduced_nucleic_alphabet;
+  static const char *reduced_nucleic_cstr;
+  static const Alphabet &reduced_nucleic_alphabet();
   //! this is the general IUPAC alphabet for nucleotides
-  static const Alphabet nucleic_alphabet;
+  static const char *nucleic_cstr;
+  static const Alphabet &nucleic_alphabet();
   //! the protein alphabet
-  static const Alphabet protein_alphabet;
+  static const char *protein_cstr;  
+  static const Alphabet &protein_alphabet(); 
     
 private:
   //! what are allowable symbols in our alphabet
diff --git a/alg/motif_parser.cpp b/alg/motif_parser.cpp
new file mode 100644 (file)
index 0000000..633e905
--- /dev/null
@@ -0,0 +1,132 @@
+#include "mussa_exceptions.hpp"
+#include "alg/alphabet.hpp"
+#include "alg/motif_parser.hpp"
+
+#include <boost/spirit/core.hpp>
+#include <boost/spirit/actor/push_back_actor.hpp>
+#include <boost/spirit/iterator/file_iterator.hpp>
+#include <boost/spirit/utility/chset.hpp>
+namespace spirit = boost::spirit;
+
+#include <sstream>
+#include <stdexcept>
+
+motif_parser::push_channel::push_channel( motif_parser::ParsedMotifs *parsed_ ) :
+  parsed(parsed_)
+{
+}
+  
+void motif_parser::push_channel::operator()(float f) const
+{
+  parsed->channels.push_back(f);
+}
+
+motif_parser::push_sequence::push_sequence( ParsedMotifs *parsed_) :
+  parsed(parsed_)
+{
+}
+  
+template<typename Iterator>
+void motif_parser::push_sequence::operator()(
+  Iterator start,
+  Iterator end) const
+{
+  std::copy(start, end, std::back_inserter(parsed->sequence));   
+}
+
+motif_parser::push_name::push_name( ParsedMotifs *parsed_) :
+  parsed(parsed_)
+{
+}
+
+template<typename Iterator>
+void motif_parser::push_name::operator()(
+  Iterator start,
+  Iterator end) const
+{
+  std::copy(start, end, std::back_inserter(parsed->name));   
+}
+
+motif_parser::push_motif::push_motif( ParsedMotifs *parsed_) :
+  parsed(parsed_) {}
+    
+template<typename Iterator>
+void motif_parser::push_motif::operator()(
+  Iterator start,
+  Iterator end) const
+{
+  float red, green, blue, alpha;
+  Sequence seq(parsed->sequence, Sequence::nucleic_alphabet);
+  seq.set_fasta_header(parsed->name);
+  
+  alpha = 1.0;
+  switch (parsed->channels.size()) {
+    case 4:
+      alpha = parsed->channels[3];
+      // note fall through.
+  case 3:
+    red = parsed->channels[0];
+    green = parsed->channels[1];
+    blue = parsed->channels[2];
+    break;        
+  default:
+    throw std::runtime_error("wrong number of channels");
+    break;
+  }
+  Color c(red, green, blue, alpha);
+  parsed->color_mapper->appendInstanceColor("motif", seq.c_str(), c);
+  parsed->motifs.insert(seq);
+  
+  parsed->sequence.clear();
+  parsed->name.clear();
+  parsed->channels.clear();
+}
+
+motif_parser::ParsedMotifs::ParsedMotifs(
+  Mussa::motif_set& motifs_, 
+  boost::shared_ptr<AnnotationColors> color_mapper_) :
+  motifs(motifs_),
+  color_mapper(color_mapper_)
+{
+}
+
+void motif_parser::ParsedMotifs::parse(const std::string &data)
+{
+  const char *alphabet = Alphabet::nucleic_cstr;
+
+  // parse our string
+  spirit::parse_info<std::string::const_iterator> result;
+  result = spirit::parse(data.begin(), data.end(),
+     *(
+       ( 
+        (
+         (+spirit::chset<>(alphabet))[motif_parser::push_sequence(this)] >> 
+         +spirit::space_p
+        ) >>
+        !(
+          (
+            // names can either be letter followed by non-space characters
+            (spirit::alpha_p >> *spirit::graph_p)[motif_parser::push_name(this)]
+            |
+            // or a quoted string
+            (
+             spirit::ch_p('"') >> 
+               (+(~spirit::ch_p('"')))[motif_parser::push_name(this)] >>
+             spirit::ch_p('"')
+            )
+          ) >> +spirit::space_p
+        ) >>
+        spirit::real_p[motif_parser::push_channel(this)] >> +spirit::space_p >>
+        spirit::real_p[motif_parser::push_channel(this)] >> +spirit::space_p >>
+        spirit::real_p[motif_parser::push_channel(this)] >> +spirit::space_p >>
+        !(spirit::real_p[motif_parser::push_channel(this)] >> +spirit::space_p)
+       )[push_motif(this)]
+     ));
+  if (not result.full) {
+    std::stringstream msg;
+    msg << "Error at character " << result.length; 
+    // erase our potentially broken motif list
+    motifs.clear();
+    throw motif_load_error(msg.str());
+  }
+}
\ No newline at end of file
diff --git a/alg/motif_parser.hpp b/alg/motif_parser.hpp
new file mode 100644 (file)
index 0000000..e667c17
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef MOTIF_PARSER_HPP_
+#define MOTIF_PARSER_HPP_
+
+#include <string>
+#include <vector>
+
+#include <boost/shared_ptr.hpp>
+
+#include "alg/mussa.hpp"
+#include "alg/annotation_colors.hpp"
+
+namespace motif_parser {
+  // Helper functor to append created motifs to our Mussa analysis
+  class ParsedMotifs {
+  public:
+    ParsedMotifs(Mussa::motif_set& motifs_, 
+                 boost::shared_ptr<AnnotationColors> color_mapper_);
+                 
+    void parse(const std::string &data);             
+  private:
+    friend struct push_name;
+    friend struct push_sequence;
+    friend struct push_channel;
+    friend struct push_motif;
+    
+    std::string sequence;
+    std::string name;
+    std::vector<float> channels;
+
+    Mussa::motif_set& motifs;
+    boost::shared_ptr<AnnotationColors> color_mapper;
+  };
+  
+  struct push_name {
+    push_name(ParsedMotifs *p);
+    template<typename Iterator>
+    void operator()(Iterator, Iterator) const;
+  private:
+    ParsedMotifs *parsed;
+  };
+  struct push_sequence {
+    push_sequence(ParsedMotifs *p);
+    template<typename Iterator>
+    void operator()(Iterator, Iterator) const;
+  private:
+    ParsedMotifs *parsed;
+  };
+  struct push_channel {
+    push_channel(ParsedMotifs *p);
+    void operator()(float) const;
+  private:
+    ParsedMotifs *parsed;
+  };
+  struct push_motif {
+    push_motif(ParsedMotifs *p);
+    template<typename Iterator>
+    void operator()(Iterator, Iterator) const;
+  private:
+    ParsedMotifs *parsed;
+  };
+};
+
+#endif /*MOTIF_PARSER_HPP_*/
index 19591914e50beeb933cc196d550b11f6d5847ff7..6978ef24356f0775ead016f6deabb8aabe9bbca1 100644 (file)
 #include <boost/filesystem/fstream.hpp>
 namespace fs = boost::filesystem;
 
-#include <boost/spirit/core.hpp>
-#include <boost/spirit/actor/push_back_actor.hpp>
-#include <boost/spirit/iterator/file_iterator.hpp>
-#include <boost/spirit/utility/chset.hpp>
-namespace spirit = boost::spirit;
-
 #include <iostream>
 #include <sstream>
 
 #include "mussa_exceptions.hpp"
-#include "alg/mussa.hpp"
 #include "alg/flp.hpp"
+#include "alg/mussa.hpp"
+#include "alg/motif_parser.hpp"
 
 using namespace std;
 
@@ -775,73 +770,18 @@ void Mussa::set_motifs(const vector<Sequence>& motifs,
   update_sequences_motifs();
 }
 
-// Helper functor to append created motifs to our Mussa analysis
-struct push_back_motif {
-  Mussa::motif_set& motifs;
-  boost::shared_ptr<AnnotationColors> color_mapper;
-  std::string& seq_string;
-  std::string& name;
-  float& red;
-  float& green;
-  float& blue;
-  float& alpha;
-  int& parsed;
-
-  push_back_motif(Mussa::motif_set& motifs_,
-                  boost::shared_ptr<AnnotationColors> color_mapper_,
-                  std::string& seq_, 
-                  std::string& name_,
-                  float &red_, float &green_, float &blue_, float &alpha_,
-                  int &parsed_)
-    : motifs(motifs_),
-      color_mapper(color_mapper_),
-      seq_string(seq_),
-      name(name_),
-      red(red_),
-      green(green_),
-      blue(blue_),
-      alpha(alpha_),
-      parsed(parsed_)
-  {
-  }
-
-  void operator()(std::string::const_iterator, 
-                  std::string::const_iterator) const 
-  {
-    Sequence seq(seq_string, Sequence::nucleic_alphabet);
-    // shouldn't we have a better field than "fasta header" and speices?
-    seq.set_fasta_header(name);
-    // we need to clear the name in case the next motif doesn't have one.
-    name.clear();
-    // be nice if glsequence was a subclass of sequence so we could
-    // just attach colors directly to the motif.
-    Color c(red, green, blue, alpha);
-    color_mapper->appendInstanceColor("motif", seq.c_str(), c);
-    alpha = 1.0;
-    motifs.insert(seq);
-    ++parsed;
-  };
-};
-
 void Mussa::load_motifs(fs::path filename)
 {
   fs::ifstream f;
   f.open(filename, ifstream::in);
   load_motifs(f);
 }
-
-// I mostly split the ifstream out so I can use a stringstream to test it.
+  
 void Mussa::load_motifs(std::istream &in)
 {
   std::string data;
-  const char *alphabet = Alphabet::nucleic_alphabet.c_str();
-  string seq;
-  string name;
-  float red = 1.0;
-  float green = 0.0;
-  float blue = 0.0;
-  float alpha = 1.0;
-  int parsed = 1;
+  const char *alphabet = Alphabet::nucleic_cstr;
+  motif_parser::ParsedMotifs parsed_motifs(motif_sequences, color_mapper);
 
   // slurp our data into a string
   std::streamsize bytes_read = 1;
@@ -851,40 +791,7 @@ void Mussa::load_motifs(std::istream &in)
     bytes_read = in.readsome(buf, bufsiz);
     data.append(buf, buf+bytes_read);
   }
-  // parse our string
-  bool ok = spirit::parse(data.begin(), data.end(),
-     *(
-       ( 
-        (
-         (+spirit::chset<>(alphabet))[spirit::assign_a(seq)] >> 
-         +spirit::space_p
-        ) >>
-        !(
-          (
-            // names can either be letter followed by non-space characters
-            (spirit::alpha_p >> *spirit::graph_p)[spirit::assign_a(name)]
-            |
-            // or a quoted string
-            (
-             spirit::ch_p('"') >> 
-               (+(~spirit::ch_p('"')))[spirit::assign_a(name)] >>
-             spirit::ch_p('"')
-            )
-          ) >> +spirit::space_p
-        ) >>
-        spirit::real_p[spirit::assign_a(red)] >> +spirit::space_p >>
-        spirit::real_p[spirit::assign_a(green)] >> +spirit::space_p >>
-        spirit::real_p[spirit::assign_a(blue)] >> +spirit::space_p >>
-        !(spirit::real_p[spirit::assign_a(alpha)] >> +spirit::space_p)
-       )[push_back_motif(motif_sequences, color_mapper, seq, name, red, green, blue, alpha, parsed)]
-     )).full;
-  if (not ok) {
-    stringstream msg;
-    msg << "Error parsing motif #" << parsed;
-    // erase our potentially broken motif list
-    motif_sequences.clear();
-    throw motif_load_error(msg.str());
-  }
+  parsed_motifs.parse(data);
   update_sequences_motifs();
 }
 
index 9abbbcde8b6285e7c6f01d6d3746023b833a7a74..270cc4e9ed5900db1eed1f7c4e37978fa47b9409 100644 (file)
@@ -463,7 +463,7 @@ Sequence::parse_annot(std::string data, int start_index, int end_index)
                     ((spirit::ch_p('>')|spirit::str_p("&gt;")) >> 
                        (*(spirit::print_p))[spirit::assign_a(name)] >>
                        spirit::eol_p >> 
-                       (+(spirit::chset<>(Alphabet::nucleic_alphabet.c_str())))[spirit::assign_a(seq)]
+                       (+(spirit::chset<>(Alphabet::nucleic_cstr)))[spirit::assign_a(seq)]
                      )[push_back_seq(query_seqs, name, seq, parsed)]
                     ) >>
                     *spirit::space_p
@@ -633,15 +633,15 @@ const Alphabet& Sequence::get_alphabet(alphabet_ref alpha) const
 {
   switch (alpha) {
     case reduced_dna_alphabet:
-      return Alphabet::reduced_dna_alphabet;
+      return Alphabet::reduced_dna_alphabet();
     case reduced_rna_alphabet:
-      return Alphabet::reduced_rna_alphabet;
+      return Alphabet::reduced_rna_alphabet();
     case reduced_nucleic_alphabet:
-      return Alphabet::reduced_nucleic_alphabet;
+      return Alphabet::reduced_nucleic_alphabet();
     case nucleic_alphabet:
-      return Alphabet::nucleic_alphabet;
+      return Alphabet::nucleic_alphabet();
     case protein_alphabet:
-      return Alphabet::protein_alphabet;    
+      return Alphabet::protein_alphabet();    
     default:
       throw std::runtime_error("unrecognized alphabet type");
       break;
index bfe6e3e3e63e447405a42b0061d8ab88c5d98f6b..076acea7d0f54d94bd01f42abd4f4a7c8af46985 100644 (file)
 
 BOOST_AUTO_TEST_CASE( alphabet_simple )
 {
-  const Alphabet &a = Alphabet::reduced_dna_alphabet;
+  Alphabet a(Alphabet::reduced_dna_alphabet());
   // exists is case insensitive
   BOOST_CHECK_EQUAL( a.exists('a'), true);
   BOOST_CHECK_EQUAL( a.exists('A'), true);
   BOOST_CHECK_EQUAL( a.exists('Q'), false);
   BOOST_CHECK_EQUAL( a.exists('q'), false);
   
-  BOOST_CHECK_EQUAL( a.c_str(), "AaCcGgTtNn\012\015"); // copied from alphabet.cpp
+  // copied from alphabet.cpp
+  BOOST_CHECK_EQUAL( Alphabet::reduced_dna_cstr, "AaCcGgTtNn\012\015");
 }