}
-Sequence::Sequence(alphabet_ref alphabet_)
- : seq(new SeqSpan("")),
- alphabet(alphabet_),
- strand(UnknownStrand)
+Sequence::Sequence(AlphabetRef alphabet)
+ : seq(new SeqSpan("", alphabet, SeqSpan::PlusStrand))
{
}
{
}
-Sequence::Sequence(const char *seq, alphabet_ref alphabet_)
- : alphabet(alphabet_),
- strand(UnknownStrand),
- header(""),
+Sequence::Sequence(const char *seq, AlphabetRef alphabet_, SeqSpan::strand_type strand_)
+ : header(""),
species("")
{
- set_filtered_sequence(seq, alphabet);
+ set_filtered_sequence(seq, alphabet_, 0, npos, strand_);
}
-Sequence::Sequence(const std::string& seq, alphabet_ref alphabet_)
- : alphabet(alphabet_),
- strand(UnknownStrand),
- header(""),
+Sequence::Sequence(const std::string& seq,
+ AlphabetRef alphabet_,
+ SeqSpan::strand_type strand_)
+ : header(""),
species("")
{
- set_filtered_sequence(seq, alphabet);
+ set_filtered_sequence(seq, alphabet_, 0, seq.size(), strand_);
}
Sequence::Sequence(const Sequence& o)
: seq(o.seq),
- alphabet(o.alphabet),
- strand(o.strand),
header(o.header),
species(o.species),
annots(o.annots),
Sequence::Sequence(const Sequence* o)
: seq(o->seq),
- alphabet(o->alphabet),
- strand(o->strand),
header(o->header),
species(o->species),
annots(o->annots),
{
}
-Sequence::Sequence(const SeqSpanRef& seq_ref, alphabet_ref alphabet_)
+Sequence::Sequence(const SeqSpanRef& seq_ref)
: seq(seq_ref),
- alphabet(alphabet),
- strand(UnknownStrand),
header(""),
species("")
{
{
if (this != &s) {
seq = s.seq;
- alphabet = s.alphabet;
- strand = s.strand;
header = s.header;
species = s.species;
annots = s.annots;
void Sequence::load_fasta(fs::path file_path, int seq_num, int start_index, int end_index)
{
- load_fasta(file_path, alphabet, seq_num, start_index, end_index);
+ load_fasta(file_path, reduced_nucleic_alphabet, seq_num, start_index, end_index);
}
//! load a fasta file into a sequence
-void Sequence::load_fasta(fs::path file_path, alphabet_ref a,
+void Sequence::load_fasta(fs::path file_path, AlphabetRef a,
int seq_num, int start_index, int end_index)
{
fs::fstream data_file;
void Sequence::load_fasta(std::istream& file,
int seq_num, int start_index, int end_index)
{
- load_fasta(file, alphabet, seq_num, start_index, end_index);
+ load_fasta(file, reduced_nucleic_alphabet, seq_num, start_index, end_index);
}
void
-Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
+Sequence::load_fasta(std::istream& data_file, AlphabetRef a,
int seq_num,
int start_index, int end_index)
{
std::string rev_comp;
std::string sequence_raw;
std::string seq_tmp; // holds sequence during basic filtering
- const Alphabet &alpha = get_alphabet(a);
+ const Alphabet &alpha = Alphabet::get_alphabet(a);
if (seq_num == 0) {
throw mussa_load_error("fasta sequence number is 1 based (can't be 0)");
std::string msg("The selected sequence appears to be empty");
throw sequence_empty_error(msg);
}
- set_filtered_sequence(sequence_raw, a, start_index, end_index-start_index);
+ set_filtered_sequence(sequence_raw, a, start_index, end_index-start_index, SeqSpan::PlusStrand);
} else {
std::string errormsg("There were no fasta sequences");
throw sequence_empty_file_error(errormsg);
}
void Sequence::set_filtered_sequence(const std::string &in_seq,
- alphabet_ref alphabet_,
+ AlphabetRef alphabet_,
size_type start,
size_type count,
- strand_type strand_)
+ SeqSpan::strand_type strand_)
{
- alphabet = alphabet_;
if ( count == npos)
count = in_seq.size() - start;
std::string new_seq;
new_seq.reserve(count);
// finally, the actual conversion loop
- const Alphabet& alpha_impl = get_alphabet(); // go get one of our actual alphabets
+ const Alphabet& alpha_impl = Alphabet::get_alphabet(alphabet_); // go get one of our actual alphabets
std::string::const_iterator seq_i = in_seq.begin()+start;
for(size_type i = 0; i != count; ++i, ++seq_i)
{
new_seq.append(1, 'N');
}
}
- SeqSpanRef new_seq_ref(new SeqSpan(new_seq));
+ SeqSpanRef new_seq_ref(new SeqSpan(new_seq, alphabet_, strand_));
seq = new_seq_ref;
- strand = strand_;
}
void
}
Sequence
-Sequence::subseq(size_type start, size_type count) const
+Sequence::subseq(size_type start, size_type count, SeqSpan::strand_type strand) const
{
+ // FIXME: should i really allow a subsequence of an empty sequence?
if (!seq) {
Sequence new_seq;
return new_seq;
}
Sequence new_seq = *this;
- new_seq.seq = seq->subseq(start, count);
+ new_seq.seq = seq->subseq(start, count, strand);
copy_children(new_seq, start, count);
return new_seq;
}
-std::string Sequence::create_reverse_map() const
+
+// FIXME: This needs to be moved into SeqSpan
+Sequence Sequence::rev_comp() const
{
- std::string rc_map(256, '~');
- // if we're rna, use U instead of T
- // we might want to add an "is_rna" to sequence at somepoint
- char TU = (alphabet == reduced_rna_alphabet) ? 'U' : 'T';
- char tu = (alphabet == reduced_rna_alphabet) ? 'u' : 't';
- rc_map['A'] = TU ; rc_map['a'] = tu ;
- rc_map['T'] = 'A'; rc_map['t'] = 'a';
- rc_map['U'] = 'A'; rc_map['u'] = 'a';
- rc_map['G'] = 'C'; rc_map['g'] = 'c';
- rc_map['C'] = 'G'; rc_map['c'] = 'g';
- rc_map['M'] = 'K'; rc_map['m'] = 'k';
- rc_map['R'] = 'Y'; rc_map['r'] = 'y';
- rc_map['W'] = 'W'; rc_map['w'] = 'w';
- rc_map['S'] = 'S'; rc_map['s'] = 's';
- rc_map['Y'] = 'R'; rc_map['y'] = 'r';
- rc_map['K'] = 'M'; rc_map['k'] = 'm';
- rc_map['V'] = 'B'; rc_map['v'] = 'b';
- rc_map['H'] = 'D'; rc_map['h'] = 'd';
- rc_map['D'] = 'H'; rc_map['d'] = 'h';
- rc_map['B'] = 'V'; rc_map['b'] = 'v';
- rc_map['N'] = 'N'; rc_map['n'] = 'n';
- rc_map['X'] = 'X'; rc_map['x'] = 'x';
- rc_map['?'] = '?';
- rc_map['.'] = '.';
- rc_map['-'] = '-';
- rc_map['~'] = '~'; // not really needed, but perhaps it's clearer.
- return rc_map;
+ // a reverse complement is the whole opposite strand
+ return subseq(0, npos, SeqSpan::OppositeStrand);
}
-Sequence Sequence::rev_comp() const
+const Alphabet& Sequence::get_alphabet() const
{
- std::string rev_comp;
- rev_comp.reserve(length());
-
- std::string rc_map = create_reverse_map();
-
- // reverse and convert
- Sequence::const_reverse_iterator seq_i;
- Sequence::const_reverse_iterator seq_end = rend();
- for(seq_i = rbegin();
- seq_i != seq_end;
- ++seq_i)
- {
- rev_comp.append(1, rc_map[*seq_i]);
- }
- return Sequence(rev_comp, alphabet);
+ return (seq) ? seq->get_alphabet() : Alphabet::empty_alphabet();
}
void Sequence::set_fasta_header(std::string header_)
return "";
}
-const Alphabet& Sequence::get_alphabet() const
-{
- return get_alphabet(alphabet);
-}
-
-const Alphabet& Sequence::get_alphabet(alphabet_ref alpha) const
-{
- switch (alpha) {
- case reduced_dna_alphabet:
- return Alphabet::reduced_dna_alphabet();
- case reduced_rna_alphabet:
- return Alphabet::reduced_rna_alphabet();
- case reduced_nucleic_alphabet:
- return Alphabet::reduced_nucleic_alphabet();
- case nucleic_alphabet:
- return Alphabet::nucleic_alphabet();
- case protein_alphabet:
- return Alphabet::protein_alphabet();
- default:
- throw std::runtime_error("unrecognized alphabet type");
- break;
- }
-}
-
-void Sequence::set_sequence(const std::string& s, alphabet_ref a)
+void Sequence::set_sequence(const std::string& s, AlphabetRef a)
{
- set_filtered_sequence(s, a);
+ set_filtered_sequence(s, a, 0, s.size(), SeqSpan::PlusStrand);
}
std::string Sequence::get_sequence() const
Sequence::clear()
{
seq.reset();
- strand = UnknownStrand;
header.clear();
species.clear();
annots.clear();
}
getline(load_file, file_data_line);
// looks like the sequence is written as a single line
- set_filtered_sequence(file_data_line, reduced_dna_alphabet);
+ set_filtered_sequence(file_data_line, reduced_dna_alphabet, 0, file_data_line.size(), SeqSpan::PlusStrand);
getline(load_file, file_data_line);
getline(load_file, file_data_line);
if (file_data_line == "<Annotations>")