X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=blobdiff_plain;f=alg%2Falphabet.hpp;h=513ea767b11081a9ea30c27d877f2c7d44f92306;hp=399125478b57d514d4475bfff65685de8877fa68;hb=f1724abab87d2e5b160620b10cb81eabf56aadeb;hpb=7d4fbcb6060a60a8ea25ca1303fcaaaf8574f24a diff --git a/alg/alphabet.hpp b/alg/alphabet.hpp index 3991254..513ea76 100644 --- a/alg/alphabet.hpp +++ b/alg/alphabet.hpp @@ -8,16 +8,26 @@ #include #include +#include //! this is a helper class for sequence +enum AlphabetRef { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet, + nucleic_alphabet, protein_alphabet, empty_alphabet=255 }; + class Alphabet { friend class Sequence; public: typedef std::string::const_iterator const_iterator; - + //! define the various alphabet types (as python corebio) + + friend bool operator==(const Alphabet&, const Alphabet&); + friend std::ostream& operator<<(std::ostream&, const Alphabet&); + //! case-insensitive test to check a character for existence in our alphabet bool exists(const char) const; + //! return an alphabet given an AlphabetRef enumeration + static const Alphabet &get_alphabet(AlphabetRef); // note, if you want to define an alphabet for a sequence, you probably want // to update the enumeration in Sequence, and Sequence::get_sequence //! The standard DNA alphabet, with unique, and unknown characters @@ -35,6 +45,9 @@ public: //! the protein alphabet static const char *protein_cstr; static const Alphabet &protein_alphabet(); + + static const char *empty_cstr; + static const Alphabet &empty_alphabet(); private: //! what are allowable symbols in our alphabet