+#include <stdexcept>
+
+#include "mussa_exceptions.hpp"
#include "alg/alphabet.hpp"
// some standard dna alphabets
// Include nl (\012), and cr (\015) to make sequence parsing eol
// convention independent.
-const Alphabet Alphabet::reduced_dna_alphabet("AaCcGgTtNn\012\015");
-const Alphabet Alphabet::reduced_rna_alphabet("AaCcGgUuNn\012\015");
-const Alphabet Alphabet::reduced_nucleic_alphabet("AaCcGgTtUuNn\012\015");
+const char *Alphabet::reduced_dna_cstr = "AaCcGgTtNn\012\015";
+const char *Alphabet::reduced_dna_reverse_cstr = "TtGgCcAaNn\012\015";
+const char *Alphabet::reduced_rna_cstr = "AaCcGgUuNn\012\015";
+const char *Alphabet::reduced_rna_reverse_cstr = "UuGgCcAaNn\012\015";
+const char *Alphabet::reduced_nucleic_cstr = "AaCcGgTtUuNn\012\015";
+const char *Alphabet::reduced_nucleic_reverse_cstr = "TtGgCcAaAaNn\012\015";
//! this is the general iupac alphabet for nucleotides
-const Alphabet Alphabet::nucleic_alphabet(
- "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn-~.?\012\015"
-);
+const char *Alphabet::dna_cstr =
+ "AaCcGgTtRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
+const char *Alphabet::dna_reverse_cstr =
+ "TtGgCcAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
+const char *Alphabet::rna_cstr =
+ "AaCcGgUuRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
+const char *Alphabet::rna_reverse_cstr =
+ "UuGgCcAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
+const char *Alphabet::nucleic_cstr =
+ "AaCcGgTtUuRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
+const char *Alphabet::nucleic_reverse_cstr =
+ "TtGgCcAaAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
//! the protein alphabet
-const Alphabet Alphabet::protein_alphabet(
- "AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015"
-);
+const char *Alphabet::protein_cstr =
+ "AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015";
+const char *Alphabet::empty_cstr = "";
+
+const Alphabet& Alphabet::reduced_dna_alphabet() {
+ static Alphabet *a = new Alphabet(reduced_dna_cstr, reduced_dna_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::reduced_rna_alphabet() {
+ static Alphabet *a = new Alphabet(reduced_rna_cstr, reduced_rna_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::reduced_nucleic_alphabet() {
+ static Alphabet *a = new Alphabet(reduced_nucleic_cstr, reduced_nucleic_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::dna_alphabet() {
+ static Alphabet *a = new Alphabet(dna_cstr, dna_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::rna_alphabet() {
+ static Alphabet *a = new Alphabet(rna_cstr, rna_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::nucleic_alphabet() {
+ static Alphabet *a = new Alphabet(nucleic_cstr, nucleic_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::protein_alphabet() {
+ static Alphabet *a = new Alphabet(protein_cstr, protein_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::empty_alphabet() {
+ static Alphabet *a = new Alphabet(empty_cstr, empty_cstr);
+ return *a;
+}
-Alphabet::Alphabet(const char *a) :
- alphabet(a)
+Alphabet::Alphabet(const char *a, const char *reverse_alphabet) :
+ alphabet(a),
+ complement_map(create_complement_map(reverse_alphabet))
{
alphabet_set.insert(alphabet.begin(), alphabet.end());
}
void Alphabet::assign(const Alphabet& a)
{
alphabet = a.alphabet;
+ complement_map = a.complement_map;
alphabet_set.clear();
alphabet_set.insert(alphabet.begin(), alphabet.end());
}
-const char *Alphabet::c_str() const
+bool operator==(const Alphabet& x, const Alphabet& y)
{
- alphabet.c_str();
+ return x.alphabet == y.alphabet;
+}
+
+std::ostream& operator<<(std::ostream& out, const Alphabet& a)
+{
+ out << a.alphabet;
}
bool Alphabet::exists(const char c) const
{
return (alphabet_set.find(c) != alphabet_set.end());
-}
\ No newline at end of file
+}
+
+const Alphabet& Alphabet::get_alphabet(AlphabetRef alpha)
+{
+ switch (alpha) {
+ case ::reduced_dna_alphabet:
+ return Alphabet::reduced_dna_alphabet();
+ case ::reduced_rna_alphabet:
+ return Alphabet::reduced_rna_alphabet();
+ case ::reduced_nucleic_alphabet:
+ return Alphabet::reduced_nucleic_alphabet();
+ case ::dna_alphabet:
+ return Alphabet::dna_alphabet();
+ case ::rna_alphabet:
+ return Alphabet::rna_alphabet();
+ case ::nucleic_alphabet:
+ return Alphabet::nucleic_alphabet();
+ case ::protein_alphabet:
+ return Alphabet::protein_alphabet();
+ case ::empty_alphabet:
+ return Alphabet::empty_alphabet();
+ default:
+ throw std::runtime_error("unrecognized alphabet type");
+ break;
+ }
+}
+
+std::string Alphabet::create_complement_map(const std::string& ra) const
+{
+ if (ra.size() == 0) {
+ // reverse complement is undefined
+ throw mussa_error("no reverse complement");
+ }
+ std::string rc_map(256,'~');
+
+ for(int i = 0; alphabet[i] and ra[i]; ++i) {
+ rc_map[ alphabet[i] ] = ra[i];
+ }
+ return rc_map;
+}
+
+boost::shared_ptr<std::string> Alphabet::reverse_complement(const std::string &seq) const
+{
+ boost::shared_ptr<std::string> new_seq(new std::string());
+
+ new_seq->reserve(seq.size());
+
+ for(std::string::const_reverse_iterator seq_i = seq.rbegin();
+ seq_i != seq.rend();
+ ++seq_i)
+ {
+ new_seq->append(1, complement_map[ *seq_i ]);
+ }
+ return new_seq;
+}