put basepair alphabets into the Sequence class
authorDiane Trout <diane@caltech.edu>
Tue, 29 Aug 2006 22:12:31 +0000 (22:12 +0000)
committerDiane Trout <diane@caltech.edu>
Tue, 29 Aug 2006 22:12:31 +0000 (22:12 +0000)
thanks to google i found the proper way to initialize a static const string.
I also added the protein 1-letter alphabet. At some point it should be possible
to initialize different alphabets for a sequence.

alg/sequence.cpp
alg/sequence.hpp

index 0d1594f686a26fc98ab21c234851ffae524439f4..f6065971eec8ace25f503554670d0828722d0d90 100644 (file)
@@ -37,15 +37,6 @@ namespace spirit = boost::spirit;
 #include <iostream>
 #include <sstream>
 
-// some standard dna alphabets 
-// \012 = nl
-// \015 = cr
-// this should make our sequence parsing end-of-line convention 
-// independent
-static const char* dna_alphabet = "AaCcGgTtNn\012\015";
-static const char* rna_alphabet = "AaCcGgNnUu\012\015";
-static const char* iupac_alphabet = "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn\012\015";
-
 annot::annot() 
  : begin(0),
    end(0),
@@ -84,6 +75,13 @@ motif::~motif()
 {
 }
 
+const std::string Sequence::dna_alphabet("AaCcGgTtNn\012\015");
+const std::string Sequence::rna_alphabet("AaCcGgNnUu\012\015");
+  //! this is the general iupac alphabet for nucleotides
+const std::string Sequence::nucleic_iupac_alphabet("AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn\012\015");
+  //! the protein alphabet
+const std::string Sequence::protein_alphabet("AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015");
+
 Sequence::Sequence()
   :  seq(""),
      header(""),
@@ -444,7 +442,7 @@ Sequence::parse_annot(std::string data, int start_index, int end_index)
                       ((spirit::ch_p('>')|spirit::str_p("&gt;")) >> 
                          (*(spirit::print_p))[spirit::assign_a(name)] >>
                          spirit::eol_p >> 
-                         (+(spirit::chset<>(iupac_alphabet)))[spirit::assign_a(seq)]
+                         (+(spirit::chset<>(nucleic_iupac_alphabet.c_str())))[spirit::assign_a(seq)]
                        )[push_back_seq(query_seqs, name, seq)]
                       ) >>
                       *spirit::space_p
index 67f6c9f84dad1ae36212c79ff444f529d810c972..0e0e2e6b2c8e42d2c91bdbbbb3f46af762f9329b 100644 (file)
@@ -93,6 +93,17 @@ public:
   typedef std::string::size_type size_type;
   static const size_type npos = std::string::npos;
 
+  // some standard dna alphabets 
+  // Include nl (\012), and cr (\015) to make sequence parsing eol 
+  // convention independent.
+
+  static const std::string dna_alphabet;
+  static const std::string rna_alphabet;
+  //! this is the general iupac alphabet for nucleotides
+  static const std::string nucleic_iupac_alphabet;
+  //! the protein alphabet
+  static const std::string protein_alphabet;
+
   Sequence();
   ~Sequence();
   Sequence(const char* seq);