const std::string Sequence::protein_alphabet("AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015");
Sequence::Sequence()
+ : seq_start(0),
+ seq_count(0)
{
}
}
Sequence::Sequence(const char *seq)
- : header(""),
+ : seq_start(0),
+ seq_count(0),
+ header(""),
species("")
{
set_filtered_sequence(seq);
}
Sequence::Sequence(const std::string& seq)
- : header(""),
+ : seq_start(0),
+ seq_count(0),
+ header(""),
species("")
{
set_filtered_sequence(seq);
Sequence::Sequence(const Sequence& o)
: seq(o.seq),
+ seq_start(o.seq_start),
+ seq_count(o.seq_count),
header(o.header),
species(o.species),
annots(o.annots),
Sequence &Sequence::operator=(const Sequence& s)
{
if (this != &s) {
- //sequence = s.sequence;
seq = s.seq;
+ seq_start = s.seq_start;
+ seq_count = s.seq_count;
header = s.header;
species = s.species;
annots = s.annots;
bool read_seq = true;
std::string rev_comp;
std::string sequence_raw;
- std::string seq_tmp; // holds sequence during basic filtering
+ std::string seq_tmp; // holds sequence during basic filtering
if (seq_num == 0) {
throw mussa_load_error("fasta sequence number is 1 based (can't be 0)");
new_seq->append(1, conversionTable[ (int)old_seq[seq_index+start]]);
}
seq = new_seq;
+ seq_start = 0;
+ seq_count = count;
}
void
*spirit::space_p
)
//end grammar
- ) /*,
- spirit::space_p*/).full;
+ )).full;
// go seearch for query sequences
find_sequences(query_seqs.begin(), query_seqs.end());
if ( count == npos || start+count > size()) {
count = size()-start;
}
- Sequence new_seq(seq->substr(start, count));
- new_seq.set_fasta_header(get_fasta_header());
- new_seq.set_species(get_species());
+ Sequence new_seq(*this);
+ new_seq.seq_start = start;
+ new_seq.seq_count = count;
new_seq.motif_list = motif_list;
+ new_seq.annots.clear();
// attempt to copy & reannotate position based annotations
int end = start+count;
// finally, the actual conversion loop
for(seq_i = len - 1; seq_i >= 0; seq_i--)
{
- table_i = (int) seq->at(seq_i);
+ table_i = (int) at(seq_i);
rev_comp += conversionTable[table_i];
}
Sequence::const_reference Sequence::at(Sequence::size_type i) const
{
if (!seq) throw std::out_of_range("empty sequence");
- return seq->at(i);
+ return seq->at(i+seq_start);
}
void
Sequence::clear()
{
seq.reset();
+ seq_start = 0;
+ seq_count = 0;
header.clear();
species.clear();
annots.clear();
const char *Sequence::c_str() const
{
if (seq)
- return seq->c_str();
+ return seq->c_str()+seq_start;
else
return 0;
}
Sequence::const_iterator Sequence::begin() const
{
- if (seq)
- return seq->begin();
+ if (seq and seq_count != 0)
+ return seq->begin()+seq_start;
else
return Sequence::const_iterator(0);
}
Sequence::const_iterator Sequence::end() const
{
- if (seq)
- return seq->end();
- else
+ if (seq and seq_count != 0) {
+ return seq->begin() + seq_start + seq_count;
+ } else {
return Sequence::const_iterator(0);
+ }
}
bool Sequence::empty() const
{
- if (seq)
- return seq->empty();
- else
- return true;
+ return (seq_count == 0) ? true : false;
}
Sequence::size_type Sequence::size() const
{
- if (seq)
- return seq->size();
- else
- return 0;
+ return seq_count;
}
Sequence::size_type Sequence::length() const
{
Sequence::const_iterator x_i = x.begin();
Sequence::const_iterator y_i = y.begin();
-
+ // for sequences there's some computation associated with computing .end
+ // so lets cache it.
+ Sequence::const_iterator xend = x.end();
+ Sequence::const_iterator yend = y.end();
while(1) {
- if( x_i == x.end() and y_i == y.end() ) {
+ if( x_i == xend and y_i == yend ) {
return false;
- } else if ( x_i == x.end() ) {
+ } else if ( x_i == xend ) {
return true;
- } else if ( y_i == y.end() ) {
+ } else if ( y_i == yend ) {
return false;
} else if ( (*x_i) < (*y_i)) {
return true;
// we know they can't be equal. (and we need to do this
// to prevent dereferencing an empty pointer)
return false;
- } else if ( *(x.seq) == *(y.seq)) {
- // and x.annots == y.annots and x.motif_list == y.motif_list) {
- return true;
- } else {
+ } else if (x.seq_count != y.seq_count) {
+ // if they're of different lenghts, they're not equal
return false;
}
+ Sequence::const_iterator xseq_i = x.begin();
+ Sequence::const_iterator yseq_i = y.begin();
+ // since the length of the two sequences is equal, we only need to
+ // test one.
+ for(; xseq_i != x.end(); ++xseq_i, ++yseq_i) {
+ if (*xseq_i != *yseq_i) {
+ return false;
+ }
+ }
+ return true;
}