X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=blobdiff_plain;f=alg%2Falphabet.hpp;h=513ea767b11081a9ea30c27d877f2c7d44f92306;hp=399125478b57d514d4475bfff65685de8877fa68;hb=f1724abab87d2e5b160620b10cb81eabf56aadeb;hpb=7d4fbcb6060a60a8ea25ca1303fcaaaf8574f24a

diff --git a/alg/alphabet.hpp b/alg/alphabet.hpp
index 3991254..513ea76 100644
--- a/alg/alphabet.hpp
+++ b/alg/alphabet.hpp
@@ -8,16 +8,26 @@
 #include <boost/serialization/version.hpp>
 
 #include <set>
+#include <ostream>
 
 //! this is a helper class for sequence
+enum AlphabetRef { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet, 
+                   nucleic_alphabet, protein_alphabet, empty_alphabet=255 };
+                   
 class Alphabet {
 friend class Sequence;
 public:
   typedef std::string::const_iterator const_iterator;
-
+  //! define the various alphabet types (as python corebio)
+  
+  friend bool operator==(const Alphabet&, const Alphabet&);
+  friend std::ostream& operator<<(std::ostream&, const Alphabet&);
+     
   //! case-insensitive test to check a character for existence in our alphabet
   bool exists(const char) const;
   
+  //! return an alphabet given an AlphabetRef enumeration
+  static const Alphabet &get_alphabet(AlphabetRef);
   // note, if you want to define an alphabet for a sequence, you probably want 
   // to update the enumeration in Sequence, and Sequence::get_sequence
   //! The standard DNA alphabet, with unique, and unknown characters
@@ -35,6 +45,9 @@ public:
   //! the protein alphabet
   static const char *protein_cstr;  
   static const Alphabet &protein_alphabet(); 
+  
+  static const char *empty_cstr;
+  static const Alphabet &empty_alphabet();
     
 private:
   //! what are allowable symbols in our alphabet