move strand into seqspan

[mussa.git] / alg / alphabet.hpp
diff --git a/alg/alphabet.hpp b/alg/alphabet.hpp

index 513ea767b11081a9ea30c27d877f2c7d44f92306..7652ddfb08b27daa428a58eed9e691c7fb7bcb57 100644 (file)
--- a/alg/alphabet.hpp
+++ b/alg/alphabet.hpp
@@ -7,12 +7,16 @@
  #include <boost/serialization/utility.hpp>
  #include <boost/serialization/version.hpp>
  
+#include <boost/shared_ptr.hpp>
+
  #include <set>
  #include <ostream>
  
  //! this is a helper class for sequence
-enum AlphabetRef { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet, 
-                   nucleic_alphabet, protein_alphabet, empty_alphabet=255 };
+enum AlphabetRef { reduced_dna_alphabet,     dna_alphabet, 
+                   reduced_rna_alphabet,     rna_alphabet,
+                   reduced_nucleic_alphabet, nucleic_alphabet, 
+                   protein_alphabet, empty_alphabet=255 };
                     
  class Alphabet {
  friend class Sequence;
@@ -28,19 +32,39 @@ public:
    
    //! return an alphabet given an AlphabetRef enumeration
    static const Alphabet &get_alphabet(AlphabetRef);
+  //! return a map to reverse complement an symbols from a nucleic alphabet
+  std::string create_complement_map(const std::string &) const;
+  //! return compelement map
+  std::string get_complement_map() const { return complement_map; }
+  
+  //! return a pointer to a reverse complemented string
+  boost::shared_ptr<std::string> reverse_complement(const std::string &) const;
+  
    // note, if you want to define an alphabet for a sequence, you probably want 
    // to update the enumeration in Sequence, and Sequence::get_sequence
    //! The standard DNA alphabet, with unique, and unknown characters
    static const char *reduced_dna_cstr;
+  static const char *reduced_dna_reverse_cstr;
    static const Alphabet &reduced_dna_alphabet();
    //! The standard RNA alphabet, with unique, and unknown characters
    static const char *reduced_rna_cstr;
+  static const char *reduced_rna_reverse_cstr;
    static const Alphabet &reduced_rna_alphabet();
-  //! The standard DNA/RNA alphabet, with unique, and unknown characters
+  //! The full IUPAC DNA alphabet, with unique, and unknown characters
+  static const char *dna_cstr;
+  static const char *dna_reverse_cstr;
+  static const Alphabet &dna_alphabet();
+  //! the full IUPAC RNA alphabet
+  static const char *rna_cstr;
+  static const char *rna_reverse_cstr;
+  static const Alphabet &rna_alphabet();
+  //! reduced (DNA/RNA) nucelic alphabet
    static const char *reduced_nucleic_cstr;
+  static const char *reduced_nucleic_reverse_cstr;
    static const Alphabet &reduced_nucleic_alphabet();
-  //! this is the general IUPAC alphabet for nucleotides
+  //! reduced (DNA/RNA) nucelic alphabet
    static const char *nucleic_cstr;
+  static const char *nucleic_reverse_cstr;
    static const Alphabet &nucleic_alphabet();
    //! the protein alphabet
    static const char *protein_cstr;  
@@ -52,11 +76,12 @@ public:
  private:
    //! what are allowable symbols in our alphabet
    std::string alphabet;
+  std::string complement_map;
    //! internal variable to make exists() faster
    std::set<std::string::value_type> alphabet_set;
    
    //! some necessary string api access
-  Alphabet(const char *a);
+  Alphabet(const char *a, const char *reverse_a);
    //! allow sequence to copy one alphabet to another (needed when unserializing) 
    void assign(const Alphabet& a);
    const_iterator begin() const { return alphabet.begin(); }
@@ -67,6 +92,7 @@ private:
    template<class Archive>
    void serialize(Archive& ar, const unsigned int /*version*/) {
      ar & BOOST_SERIALIZATION_NVP(alphabet);
+    ar & BOOST_SERIALIZATION_NVP(complement_map);
      alphabet_set.clear();
      alphabet_set.insert(alphabet.begin(), alphabet.end());
    }