#include <stdexcept>
+#include "mussa_exceptions.hpp"
#include "alg/alphabet.hpp"
// some standard dna alphabets
// Include nl (\012), and cr (\015) to make sequence parsing eol
// convention independent.
const char *Alphabet::reduced_dna_cstr = "AaCcGgTtNn\012\015";
+const char *Alphabet::reduced_dna_reverse_cstr = "TtGgCcAaNn\012\015";
const char *Alphabet::reduced_rna_cstr = "AaCcGgUuNn\012\015";
+const char *Alphabet::reduced_rna_reverse_cstr = "UuGgCcAaNn\012\015";
const char *Alphabet::reduced_nucleic_cstr = "AaCcGgTtUuNn\012\015";
+const char *Alphabet::reduced_nucleic_reverse_cstr = "TtGgCcAaAaNn\012\015";
//! this is the general iupac alphabet for nucleotides
+const char *Alphabet::dna_cstr =
+ "AaCcGgTtRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
+const char *Alphabet::dna_reverse_cstr =
+ "TtGgCcAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
+const char *Alphabet::rna_cstr =
+ "AaCcGgUuRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
+const char *Alphabet::rna_reverse_cstr =
+ "UuGgCcAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
const char *Alphabet::nucleic_cstr =
- "AaCcGgTtUuRrYyMmKkSsWwBbDdHhVvNn-~.?\012\015";
+ "AaCcGgTtUuRrYySsWwKkMmBbDdHhVvNnXx-~.?\012\015";
+const char *Alphabet::nucleic_reverse_cstr =
+ "TtGgCcAaAaYyRrSsWwMmKkVvHhDdBbNnXx-~.?\012\015";
//! the protein alphabet
const char *Alphabet::protein_cstr =
"AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015";
const char *Alphabet::empty_cstr = "";
const Alphabet& Alphabet::reduced_dna_alphabet() {
- static Alphabet *a = new Alphabet(reduced_dna_cstr);
+ static Alphabet *a = new Alphabet(reduced_dna_cstr, reduced_dna_reverse_cstr);
return *a;
}
const Alphabet& Alphabet::reduced_rna_alphabet() {
- static Alphabet *a = new Alphabet(reduced_rna_cstr);
+ static Alphabet *a = new Alphabet(reduced_rna_cstr, reduced_rna_reverse_cstr);
return *a;
}
const Alphabet& Alphabet::reduced_nucleic_alphabet() {
- static Alphabet *a = new Alphabet(reduced_nucleic_cstr);
+ static Alphabet *a = new Alphabet(reduced_nucleic_cstr, reduced_nucleic_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::dna_alphabet() {
+ static Alphabet *a = new Alphabet(dna_cstr, dna_reverse_cstr);
+ return *a;
+}
+const Alphabet& Alphabet::rna_alphabet() {
+ static Alphabet *a = new Alphabet(rna_cstr, rna_reverse_cstr);
return *a;
}
const Alphabet& Alphabet::nucleic_alphabet() {
- static Alphabet *a = new Alphabet(nucleic_cstr);
+ static Alphabet *a = new Alphabet(nucleic_cstr, nucleic_reverse_cstr);
return *a;
}
const Alphabet& Alphabet::protein_alphabet() {
- static Alphabet *a = new Alphabet(protein_cstr);
+ static Alphabet *a = new Alphabet(protein_cstr, protein_cstr);
return *a;
}
const Alphabet& Alphabet::empty_alphabet() {
- static Alphabet *a = new Alphabet(empty_cstr);
+ static Alphabet *a = new Alphabet(empty_cstr, empty_cstr);
return *a;
}
-Alphabet::Alphabet(const char *a) :
- alphabet(a)
+Alphabet::Alphabet(const char *a, const char *reverse_alphabet) :
+ alphabet(a),
+ complement_map(create_complement_map(reverse_alphabet))
{
alphabet_set.insert(alphabet.begin(), alphabet.end());
}
void Alphabet::assign(const Alphabet& a)
{
alphabet = a.alphabet;
+ complement_map = a.complement_map;
alphabet_set.clear();
alphabet_set.insert(alphabet.begin(), alphabet.end());
}
return Alphabet::reduced_rna_alphabet();
case ::reduced_nucleic_alphabet:
return Alphabet::reduced_nucleic_alphabet();
+ case ::dna_alphabet:
+ return Alphabet::dna_alphabet();
+ case ::rna_alphabet:
+ return Alphabet::rna_alphabet();
case ::nucleic_alphabet:
return Alphabet::nucleic_alphabet();
case ::protein_alphabet:
- return Alphabet::protein_alphabet();
+ return Alphabet::protein_alphabet();
+ case ::empty_alphabet:
+ return Alphabet::empty_alphabet();
default:
throw std::runtime_error("unrecognized alphabet type");
break;
}
+}
+
+std::string Alphabet::create_complement_map(const std::string& ra) const
+{
+ if (ra.size() == 0) {
+ // reverse complement is undefined
+ throw mussa_error("no reverse complement");
+ }
+ std::string rc_map(256,'~');
+
+ for(int i = 0; alphabet[i] and ra[i]; ++i) {
+ rc_map[ alphabet[i] ] = ra[i];
+ }
+ return rc_map;
+}
+
+boost::shared_ptr<std::string> Alphabet::reverse_complement(const std::string &seq) const
+{
+ boost::shared_ptr<std::string> new_seq(new std::string());
+
+ new_seq->reserve(seq.size());
+
+ for(std::string::const_reverse_iterator seq_i = seq.rbegin();
+ seq_i != seq.rend();
+ ++seq_i)
+ {
+ new_seq->append(1, complement_map[ *seq_i ]);
+ }
+ return new_seq;
}
\ No newline at end of file