#include <boost/serialization/utility.hpp>
#include <boost/serialization/version.hpp>
+#include <boost/shared_ptr.hpp>
+
#include <set>
#include <ostream>
//! this is a helper class for sequence
-enum AlphabetRef { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet,
- nucleic_alphabet, protein_alphabet, empty_alphabet=255 };
+enum AlphabetRef { reduced_dna_alphabet, dna_alphabet,
+ reduced_rna_alphabet, rna_alphabet,
+ reduced_nucleic_alphabet, nucleic_alphabet,
+ protein_alphabet, empty_alphabet=255 };
class Alphabet {
friend class Sequence;
//! return an alphabet given an AlphabetRef enumeration
static const Alphabet &get_alphabet(AlphabetRef);
+ //! return a map to reverse complement an symbols from a nucleic alphabet
+ std::string create_complement_map(const std::string &) const;
+ //! return compelement map
+ std::string get_complement_map() const { return complement_map; }
+
+ //! return a pointer to a reverse complemented string
+ boost::shared_ptr<std::string> reverse_complement(const std::string &) const;
+
// note, if you want to define an alphabet for a sequence, you probably want
// to update the enumeration in Sequence, and Sequence::get_sequence
//! The standard DNA alphabet, with unique, and unknown characters
static const char *reduced_dna_cstr;
+ static const char *reduced_dna_reverse_cstr;
static const Alphabet &reduced_dna_alphabet();
//! The standard RNA alphabet, with unique, and unknown characters
static const char *reduced_rna_cstr;
+ static const char *reduced_rna_reverse_cstr;
static const Alphabet &reduced_rna_alphabet();
- //! The standard DNA/RNA alphabet, with unique, and unknown characters
+ //! The full IUPAC DNA alphabet, with unique, and unknown characters
+ static const char *dna_cstr;
+ static const char *dna_reverse_cstr;
+ static const Alphabet &dna_alphabet();
+ //! the full IUPAC RNA alphabet
+ static const char *rna_cstr;
+ static const char *rna_reverse_cstr;
+ static const Alphabet &rna_alphabet();
+ //! reduced (DNA/RNA) nucelic alphabet
static const char *reduced_nucleic_cstr;
+ static const char *reduced_nucleic_reverse_cstr;
static const Alphabet &reduced_nucleic_alphabet();
- //! this is the general IUPAC alphabet for nucleotides
+ //! reduced (DNA/RNA) nucelic alphabet
static const char *nucleic_cstr;
+ static const char *nucleic_reverse_cstr;
static const Alphabet &nucleic_alphabet();
//! the protein alphabet
static const char *protein_cstr;
private:
//! what are allowable symbols in our alphabet
std::string alphabet;
+ std::string complement_map;
//! internal variable to make exists() faster
std::set<std::string::value_type> alphabet_set;
//! some necessary string api access
- Alphabet(const char *a);
+ Alphabet(const char *a, const char *reverse_a);
//! allow sequence to copy one alphabet to another (needed when unserializing)
void assign(const Alphabet& a);
const_iterator begin() const { return alphabet.begin(); }
template<class Archive>
void serialize(Archive& ar, const unsigned int /*version*/) {
ar & BOOST_SERIALIZATION_NVP(alphabet);
+ ar & BOOST_SERIALIZATION_NVP(complement_map);
alphabet_set.clear();
alphabet_set.insert(alphabet.begin(), alphabet.end());
}