}
-Sequence::Sequence(alphabet_ref alphabet_)
- : seq(new SeqSpan("")),
- alphabet(alphabet_),
+Sequence::Sequence(AlphabetRef alphabet)
+ : seq(new SeqSpan("", alphabet)),
strand(UnknownStrand)
{
}
{
}
-Sequence::Sequence(const char *seq, alphabet_ref alphabet_)
- : alphabet(alphabet_),
- strand(UnknownStrand),
+Sequence::Sequence(const char *seq, AlphabetRef alphabet_)
+ : strand(UnknownStrand),
header(""),
species("")
{
- set_filtered_sequence(seq, alphabet);
+ set_filtered_sequence(seq, alphabet_);
}
-Sequence::Sequence(const std::string& seq, alphabet_ref alphabet_)
- : alphabet(alphabet_),
- strand(UnknownStrand),
+Sequence::Sequence(const std::string& seq, AlphabetRef alphabet_)
+ : strand(UnknownStrand),
header(""),
species("")
{
- set_filtered_sequence(seq, alphabet);
+ set_filtered_sequence(seq, alphabet_);
}
Sequence::Sequence(const Sequence& o)
: seq(o.seq),
- alphabet(o.alphabet),
strand(o.strand),
header(o.header),
species(o.species),
Sequence::Sequence(const Sequence* o)
: seq(o->seq),
- alphabet(o->alphabet),
strand(o->strand),
header(o->header),
species(o->species),
{
}
-Sequence::Sequence(const SeqSpanRef& seq_ref, alphabet_ref alphabet_)
+Sequence::Sequence(const SeqSpanRef& seq_ref)
: seq(seq_ref),
- alphabet(alphabet),
strand(UnknownStrand),
header(""),
species("")
{
if (this != &s) {
seq = s.seq;
- alphabet = s.alphabet;
strand = s.strand;
header = s.header;
species = s.species;
void Sequence::load_fasta(fs::path file_path, int seq_num, int start_index, int end_index)
{
- load_fasta(file_path, alphabet, seq_num, start_index, end_index);
+ load_fasta(file_path, reduced_nucleic_alphabet, seq_num, start_index, end_index);
}
//! load a fasta file into a sequence
-void Sequence::load_fasta(fs::path file_path, alphabet_ref a,
+void Sequence::load_fasta(fs::path file_path, AlphabetRef a,
int seq_num, int start_index, int end_index)
{
fs::fstream data_file;
void Sequence::load_fasta(std::istream& file,
int seq_num, int start_index, int end_index)
{
- load_fasta(file, alphabet, seq_num, start_index, end_index);
+ load_fasta(file, reduced_nucleic_alphabet, seq_num, start_index, end_index);
}
void
-Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
+Sequence::load_fasta(std::istream& data_file, AlphabetRef a,
int seq_num,
int start_index, int end_index)
{
std::string rev_comp;
std::string sequence_raw;
std::string seq_tmp; // holds sequence during basic filtering
- const Alphabet &alpha = get_alphabet(a);
+ const Alphabet &alpha = get_alphabet();
if (seq_num == 0) {
throw mussa_load_error("fasta sequence number is 1 based (can't be 0)");
}
void Sequence::set_filtered_sequence(const std::string &in_seq,
- alphabet_ref alphabet_,
+ AlphabetRef alphabet_,
size_type start,
size_type count,
strand_type strand_)
{
- alphabet = alphabet_;
if ( count == npos)
count = in_seq.size() - start;
std::string new_seq;
new_seq.reserve(count);
// finally, the actual conversion loop
- const Alphabet& alpha_impl = get_alphabet(); // go get one of our actual alphabets
+ const Alphabet& alpha_impl = Alphabet::get_alphabet(alphabet_); // go get one of our actual alphabets
std::string::const_iterator seq_i = in_seq.begin()+start;
for(size_type i = 0; i != count; ++i, ++seq_i)
{
new_seq.append(1, 'N');
}
}
- SeqSpanRef new_seq_ref(new SeqSpan(new_seq));
+ SeqSpanRef new_seq_ref(new SeqSpan(new_seq, alphabet_));
seq = new_seq_ref;
strand = strand_;
}
return new_seq;
}
-std::string Sequence::create_reverse_map() const
-{
- std::string rc_map(256, '~');
- // if we're rna, use U instead of T
- // we might want to add an "is_rna" to sequence at somepoint
- char TU = (alphabet == reduced_rna_alphabet) ? 'U' : 'T';
- char tu = (alphabet == reduced_rna_alphabet) ? 'u' : 't';
- rc_map['A'] = TU ; rc_map['a'] = tu ;
- rc_map['T'] = 'A'; rc_map['t'] = 'a';
- rc_map['U'] = 'A'; rc_map['u'] = 'a';
- rc_map['G'] = 'C'; rc_map['g'] = 'c';
- rc_map['C'] = 'G'; rc_map['c'] = 'g';
- rc_map['M'] = 'K'; rc_map['m'] = 'k';
- rc_map['R'] = 'Y'; rc_map['r'] = 'y';
- rc_map['W'] = 'W'; rc_map['w'] = 'w';
- rc_map['S'] = 'S'; rc_map['s'] = 's';
- rc_map['Y'] = 'R'; rc_map['y'] = 'r';
- rc_map['K'] = 'M'; rc_map['k'] = 'm';
- rc_map['V'] = 'B'; rc_map['v'] = 'b';
- rc_map['H'] = 'D'; rc_map['h'] = 'd';
- rc_map['D'] = 'H'; rc_map['d'] = 'h';
- rc_map['B'] = 'V'; rc_map['b'] = 'v';
- rc_map['N'] = 'N'; rc_map['n'] = 'n';
- rc_map['X'] = 'X'; rc_map['x'] = 'x';
- rc_map['?'] = '?';
- rc_map['.'] = '.';
- rc_map['-'] = '-';
- rc_map['~'] = '~'; // not really needed, but perhaps it's clearer.
- return rc_map;
-}
+// FIXME: This needs to be moved into SeqSpan
Sequence Sequence::rev_comp() const
{
std::string rev_comp;
rev_comp.reserve(length());
- std::string rc_map = create_reverse_map();
+ std::string rc_map = seq->seq->create_reverse_complement_map();
// reverse and convert
Sequence::const_reverse_iterator seq_i;
{
rev_comp.append(1, rc_map[*seq_i]);
}
- return Sequence(rev_comp, alphabet);
+ return Sequence(rev_comp, seq->seq->get_alphabet_ref());
+}
+
+const Alphabet& Sequence::get_alphabet() const
+{
+ return (seq) ? seq->get_alphabet() : Alphabet::empty_alphabet();
}
void Sequence::set_fasta_header(std::string header_)
return "";
}
-const Alphabet& Sequence::get_alphabet() const
-{
- return get_alphabet(alphabet);
-}
-
-const Alphabet& Sequence::get_alphabet(alphabet_ref alpha) const
-{
- switch (alpha) {
- case reduced_dna_alphabet:
- return Alphabet::reduced_dna_alphabet();
- case reduced_rna_alphabet:
- return Alphabet::reduced_rna_alphabet();
- case reduced_nucleic_alphabet:
- return Alphabet::reduced_nucleic_alphabet();
- case nucleic_alphabet:
- return Alphabet::nucleic_alphabet();
- case protein_alphabet:
- return Alphabet::protein_alphabet();
- default:
- throw std::runtime_error("unrecognized alphabet type");
- break;
- }
-}
-
-void Sequence::set_sequence(const std::string& s, alphabet_ref a)
+void Sequence::set_sequence(const std::string& s, AlphabetRef a)
{
set_filtered_sequence(s, a);
}