4 #include <boost/serialization/export.hpp>
5 #include <boost/serialization/nvp.hpp>
6 #include <boost/serialization/string.hpp>
7 #include <boost/serialization/utility.hpp>
8 #include <boost/serialization/version.hpp>
10 #include <boost/shared_ptr.hpp>
15 //! this is a helper class for sequence
16 enum AlphabetRef { reduced_dna_alphabet, dna_alphabet,
17 reduced_rna_alphabet, rna_alphabet,
18 reduced_nucleic_alphabet, nucleic_alphabet,
19 protein_alphabet, empty_alphabet=255 };
22 friend class Sequence;
24 typedef std::string::const_iterator const_iterator;
25 //! define the various alphabet types (as python corebio)
27 friend bool operator==(const Alphabet&, const Alphabet&);
28 friend std::ostream& operator<<(std::ostream&, const Alphabet&);
30 //! case-insensitive test to check a character for existence in our alphabet
31 bool exists(const char) const;
33 //! return an alphabet given an AlphabetRef enumeration
34 static const Alphabet &get_alphabet(AlphabetRef);
35 //! return a map to reverse complement an symbols from a nucleic alphabet
36 std::string create_complement_map(const std::string &) const;
37 //! return compelement map
38 std::string get_complement_map() const { return complement_map; }
40 //! return a pointer to a reverse complemented string
41 boost::shared_ptr<std::string> reverse_complement(const std::string &) const;
43 // note, if you want to define an alphabet for a sequence, you probably want
44 // to update the enumeration in Sequence, and Sequence::get_sequence
45 //! The standard DNA alphabet, with unique, and unknown characters
46 static const char *reduced_dna_cstr;
47 static const char *reduced_dna_reverse_cstr;
48 static const Alphabet &reduced_dna_alphabet();
49 //! The standard RNA alphabet, with unique, and unknown characters
50 static const char *reduced_rna_cstr;
51 static const char *reduced_rna_reverse_cstr;
52 static const Alphabet &reduced_rna_alphabet();
53 //! The full IUPAC DNA alphabet, with unique, and unknown characters
54 static const char *dna_cstr;
55 static const char *dna_reverse_cstr;
56 static const Alphabet &dna_alphabet();
57 //! the full IUPAC RNA alphabet
58 static const char *rna_cstr;
59 static const char *rna_reverse_cstr;
60 static const Alphabet &rna_alphabet();
61 //! reduced (DNA/RNA) nucelic alphabet
62 static const char *reduced_nucleic_cstr;
63 static const char *reduced_nucleic_reverse_cstr;
64 static const Alphabet &reduced_nucleic_alphabet();
65 //! reduced (DNA/RNA) nucelic alphabet
66 static const char *nucleic_cstr;
67 static const char *nucleic_reverse_cstr;
68 static const Alphabet &nucleic_alphabet();
69 //! the protein alphabet
70 static const char *protein_cstr;
71 static const Alphabet &protein_alphabet();
73 static const char *empty_cstr;
74 static const Alphabet &empty_alphabet();
77 //! what are allowable symbols in our alphabet
79 std::string complement_map;
80 //! internal variable to make exists() faster
81 std::set<std::string::value_type> alphabet_set;
83 //! some necessary string api access
84 Alphabet(const char *a, const char *reverse_a);
85 //! allow sequence to copy one alphabet to another (needed when unserializing)
86 void assign(const Alphabet& a);
87 const_iterator begin() const { return alphabet.begin(); }
88 const_iterator end() const { return alphabet.end(); }
91 friend class boost::serialization::access;
92 template<class Archive>
93 void serialize(Archive& ar, const unsigned int /*version*/) {
94 ar & BOOST_SERIALIZATION_NVP(alphabet);
95 ar & BOOST_SERIALIZATION_NVP(complement_map);
97 alphabet_set.insert(alphabet.begin(), alphabet.end());