3 #include "mussa_exceptions.hpp"
4 #include "alg/alphabet.hpp"
6 // some standard dna alphabets
7 // Include nl (\012), and cr (\015) to make sequence parsing eol
8 // convention independent.
9 const char *Alphabet::reduced_dna_cstr = "AaCcGgTtNn\012\015";
10 const char *Alphabet::reduced_dna_reverse_cstr = "TtGgCcAaNn\012\015";
11 const char *Alphabet::reduced_rna_cstr = "AaCcGgUuNn\012\015";
12 const char *Alphabet::reduced_rna_reverse_cstr = "UuGgCcAaNn\012\015";
13 const char *Alphabet::reduced_nucleic_cstr = "AaCcGgTtUuNn\012\015";
14 const char *Alphabet::reduced_nucleic_reverse_cstr = "TtGgCcAaAaNn\012\015";
15 //! this is the general iupac alphabet for nucleotides
16 const char *Alphabet::dna_cstr =
17 "AaCcGgTtRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
18 const char *Alphabet::dna_reverse_cstr =
19 "TtGgCcAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
20 const char *Alphabet::rna_cstr =
21 "AaCcGgUuRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
22 const char *Alphabet::rna_reverse_cstr =
23 "UuGgCcAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
24 const char *Alphabet::nucleic_cstr =
25 "AaCcGgTtUuRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
26 const char *Alphabet::nucleic_reverse_cstr =
27 "TtGgCcAaAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
28 //! the protein alphabet
29 const char *Alphabet::protein_cstr =
30 "AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015";
31 const char *Alphabet::empty_cstr = "";
33 const Alphabet& Alphabet::reduced_dna_alphabet() {
34 static Alphabet *a = new Alphabet(reduced_dna_cstr, reduced_dna_reverse_cstr);
37 const Alphabet& Alphabet::reduced_rna_alphabet() {
38 static Alphabet *a = new Alphabet(reduced_rna_cstr, reduced_rna_reverse_cstr);
41 const Alphabet& Alphabet::reduced_nucleic_alphabet() {
42 static Alphabet *a = new Alphabet(reduced_nucleic_cstr, reduced_nucleic_reverse_cstr);
45 const Alphabet& Alphabet::dna_alphabet() {
46 static Alphabet *a = new Alphabet(dna_cstr, dna_reverse_cstr);
49 const Alphabet& Alphabet::rna_alphabet() {
50 static Alphabet *a = new Alphabet(rna_cstr, rna_reverse_cstr);
53 const Alphabet& Alphabet::nucleic_alphabet() {
54 static Alphabet *a = new Alphabet(nucleic_cstr, nucleic_reverse_cstr);
57 const Alphabet& Alphabet::protein_alphabet() {
58 static Alphabet *a = new Alphabet(protein_cstr, protein_cstr);
61 const Alphabet& Alphabet::empty_alphabet() {
62 static Alphabet *a = new Alphabet(empty_cstr, empty_cstr);
66 Alphabet::Alphabet(const char *a, const char *reverse_alphabet) :
68 complement_map(create_complement_map(reverse_alphabet))
70 alphabet_set.insert(alphabet.begin(), alphabet.end());
73 void Alphabet::assign(const Alphabet& a)
75 alphabet = a.alphabet;
76 complement_map = a.complement_map;
78 alphabet_set.insert(alphabet.begin(), alphabet.end());
81 bool operator==(const Alphabet& x, const Alphabet& y)
83 return x.alphabet == y.alphabet;
86 std::ostream& operator<<(std::ostream& out, const Alphabet& a)
91 bool Alphabet::exists(const char c) const
93 return (alphabet_set.find(c) != alphabet_set.end());
96 const Alphabet& Alphabet::get_alphabet(AlphabetRef alpha)
99 case ::reduced_dna_alphabet:
100 return Alphabet::reduced_dna_alphabet();
101 case ::reduced_rna_alphabet:
102 return Alphabet::reduced_rna_alphabet();
103 case ::reduced_nucleic_alphabet:
104 return Alphabet::reduced_nucleic_alphabet();
106 return Alphabet::dna_alphabet();
108 return Alphabet::rna_alphabet();
109 case ::nucleic_alphabet:
110 return Alphabet::nucleic_alphabet();
111 case ::protein_alphabet:
112 return Alphabet::protein_alphabet();
113 case ::empty_alphabet:
114 return Alphabet::empty_alphabet();
116 throw std::runtime_error("unrecognized alphabet type");
121 std::string Alphabet::create_complement_map(const std::string& ra) const
123 if (ra.size() == 0) {
124 // reverse complement is undefined
125 throw mussa_error("no reverse complement");
127 std::string rc_map(256,'~');
129 for(int i = 0; alphabet[i] and ra[i]; ++i) {
130 rc_map[ alphabet[i] ] = ra[i];
135 boost::shared_ptr<std::string> Alphabet::reverse_complement(const std::string &seq) const
137 boost::shared_ptr<std::string> new_seq(new std::string());
139 new_seq->reserve(seq.size());
141 for(std::string::const_reverse_iterator seq_i = seq.rbegin();
145 new_seq->append(1, complement_map[ *seq_i ]);