put basepair alphabets into the Sequence class
[mussa.git] / alg / sequence.cpp
index 0d1594f686a26fc98ab21c234851ffae524439f4..f6065971eec8ace25f503554670d0828722d0d90 100644 (file)
@@ -37,15 +37,6 @@ namespace spirit = boost::spirit;
 #include <iostream>
 #include <sstream>
 
-// some standard dna alphabets 
-// \012 = nl
-// \015 = cr
-// this should make our sequence parsing end-of-line convention 
-// independent
-static const char* dna_alphabet = "AaCcGgTtNn\012\015";
-static const char* rna_alphabet = "AaCcGgNnUu\012\015";
-static const char* iupac_alphabet = "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn\012\015";
-
 annot::annot() 
  : begin(0),
    end(0),
@@ -84,6 +75,13 @@ motif::~motif()
 {
 }
 
+const std::string Sequence::dna_alphabet("AaCcGgTtNn\012\015");
+const std::string Sequence::rna_alphabet("AaCcGgNnUu\012\015");
+  //! this is the general iupac alphabet for nucleotides
+const std::string Sequence::nucleic_iupac_alphabet("AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn\012\015");
+  //! the protein alphabet
+const std::string Sequence::protein_alphabet("AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015");
+
 Sequence::Sequence()
   :  seq(""),
      header(""),
@@ -444,7 +442,7 @@ Sequence::parse_annot(std::string data, int start_index, int end_index)
                       ((spirit::ch_p('>')|spirit::str_p("&gt;")) >> 
                          (*(spirit::print_p))[spirit::assign_a(name)] >>
                          spirit::eol_p >> 
-                         (+(spirit::chset<>(iupac_alphabet)))[spirit::assign_a(seq)]
+                         (+(spirit::chset<>(nucleic_iupac_alphabet.c_str())))[spirit::assign_a(seq)]
                        )[push_back_seq(query_seqs, name, seq)]
                       ) >>
                       *spirit::space_p