: seq(o.seq),
seq_start(o.seq_start),
seq_count(o.seq_count),
- parent(o.parent)
+ parent(o.parent),
+ rc_seq(o.rc_seq)
{
}
: seq(p->seq),
seq_start(p->seq_start),
seq_count(p->seq_count),
- parent(p->parent)
+ parent(p->parent),
+ rc_seq(p->rc_seq)
{
}
SeqSpan::SeqSpan(const std::string &seq_,
- AlphabetRef a)
- : seq(new SeqString(seq_, a)),
- seq_start(0),
+ AlphabetRef a,
+ strand_type strand_)
+ : seq_start(0),
seq_count(seq_.length()),
parent()
{
+ switch (strand_) {
+ case PlusStrand:
+ case SingleStrand:
+ // do nothing
+ seq_strand = strand_;
+ seq.reset(new SeqString(seq_, a));
+ break;
+ default:
+ throw sequence_invalid_strand(
+ "Can only initialize a Plus or Single strand from a string"
+ );
+ break;
+ }
}
-SeqSpan::SeqSpan(const SeqSpanRef parent_, size_type start_, size_type count_)
+SeqSpan::SeqSpan(const SeqSpanRef parent_,
+ size_type start_,
+ size_type count_,
+ strand_type strand_)
: seq(parent_->seq),
seq_start(parent_->seq_start + start_),
parent(parent_)
seq_count = parent_->seq_count;
else
seq_count = count_;
+
+ // blech this strand logic is too long
+ switch(strand_) {
+ case UnknownStrand:
+ case PlusStrand:
+ case MinusStrand:
+ case BothStrand:
+ seq_strand = strand_;
+ break;
+ case SameStrand:
+ if (parent) {
+ seq_strand = parent->strand();
+ } else {
+ throw sequence_invalid_strand("SameStrand is undefined with no parent");
+ }
+ break;
+ case OppositeStrand:
+ if (parent) {
+ switch (parent->strand()) {
+ case PlusStrand:
+ seq_strand = MinusStrand;
+ break;
+ case MinusStrand:
+ seq_strand = PlusStrand;
+ break;
+ case UnknownStrand:
+ case BothStrand:
+ seq_strand = parent->strand();
+ break;
+ case SingleStrand:
+ throw sequence_invalid_strand(
+ "OppositeStrand is undefined with SingleStrands"
+ );
+ break;
+ default:
+ throw sequence_invalid_strand("Parent has an invalid strand type");
+ }
+ } else {
+ throw sequence_invalid_strand("SameStrand is undefined with no parent");
+ }
+ break;
+ case SingleStrand:
+ if (parent) {
+ if (parent->strand() == SingleStrand) {
+ seq_strand = SingleStrand;
+ } else {
+ throw sequence_invalid_strand("Can't change single strandedness");
+ }
+ } else {
+ seq_strand = SingleStrand;
+ }
+ break;
+ default:
+ throw sequence_invalid_strand("unrecognized strand identifier");
+ break;
+ }
+
+ // Ack the complexity increases!
+ // If our parent is on the minus strand, we need to adjust the start
+ // and count to look like we're selecting from the right side of the
+ // parent sequence (AKA the start of the minus strand)
+ if (parent and parent->strand() == MinusStrand) {
+ seq_start = parent->start() + parent->size() - (start_ + seq_count);
+ }
}
//////
seq_start = s.seq_start;
seq_count = s.seq_count;
parent = s.parent;
+ rc_seq = s.rc_seq;
}
return *this;
}
}
}
*/
-#include <iostream>
+
bool operator==(const SeqSpan& a, const SeqSpan& b)
{
if (SeqSpan::isFamily(a, b)) {
// can do fast comparison
if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
- return true;
+ // unknown strands match anything
+ if (a.seq_strand == SeqSpan::UnknownStrand or
+ b.seq_strand == SeqSpan::UnknownStrand) {
+ return true;
+ } else {
+ // otherwise our stranded-ness needs to match
+ return (a.seq_strand == b.seq_strand);
+ }
} else {
return false;
}
- }
+ }
+ // if we're not part of the same seqspan heirarchy we're never equal
+ // the string equality is handled off in Sequence
return false;
}
SeqSpan::const_reference SeqSpan::at(SeqSpan::size_type i) const
{
if (!seq) throw std::out_of_range("empty sequence");
- return seq->at(i+seq_start);
+ if (seq_strand == PlusStrand) {
+ return seq->at(seq_start+i);
+ } else {
+ return seq->rc_map[seq->at((seq_start+seq_count-1)-i)];
+ }
}
const char *SeqSpan::data() const
SeqSpan::const_iterator SeqSpan::begin() const
{
- if (seq and seq_count != 0)
- return seq->begin()+seq_start;
- else
- return SeqSpan::const_iterator(0);
+ if (seq and seq_count != 0) {
+ if (seq_strand != MinusStrand) {
+ return seq->begin()+seq_start;
+ } else {
+ if (not rc_seq) {
+ initialize_rc_seq();
+ }
+ return rc_seq->begin();
+ }
+ }
+ return SeqSpan::const_iterator(0);
}
SeqSpan::const_iterator SeqSpan::end() const
{
if (seq and seq_count != 0) {
- return seq->begin() + seq_start + seq_count;
- } else {
- return SeqSpan::const_iterator(0);
- }
+ if (seq_strand != MinusStrand) {
+ return seq->begin() + seq_start + seq_count;
+ } else {
+ if (not rc_seq) {
+ initialize_rc_seq();
+ }
+ return rc_seq->end();
+ }
+ }
+ return SeqSpan::const_iterator(0);
}
SeqSpan::const_reverse_iterator SeqSpan::rbegin() const
{
- if (seq and seq_count != 0)
- return seq->rbegin()+(seq->size()-(seq_start+seq_count));
- else
- return SeqSpan::const_reverse_iterator();
+ if (seq and seq_count != 0) {
+ if (seq_strand != MinusStrand) {
+ return seq->rbegin()+(seq->size()-(seq_start+seq_count));
+ } else {
+ if (not rc_seq) {
+ initialize_rc_seq();
+ }
+ return rc_seq->rbegin();
+ }
+ }
+ return SeqSpan::const_reverse_iterator();
}
SeqSpan::const_reverse_iterator SeqSpan::rend() const
{
if (seq and seq_count != 0) {
- return rbegin() + seq_count;
- } else {
- return SeqSpan::const_reverse_iterator();
+ if (seq_strand != MinusStrand) {
+ return rbegin() + seq_count;
+ } else {
+ if (not rc_seq) {
+ initialize_rc_seq();
+ }
+ return rc_seq->rend();
+ }
}
+ return SeqSpan::const_reverse_iterator();
}
bool SeqSpan::empty() const
typedef std::set<std::string::value_type> sequence_set;
sequence_set match_set;
- for(const_iterator query_item = query.begin();
+ for(std::string::const_iterator query_item = query.begin();
query_item != query.end();
++query_item)
{
setStop(parent->start() + v);
}
-SeqSpanRef SeqSpan::subseq(size_type start, size_type count)
+SeqSpanRef SeqSpan::subseq(size_type start, size_type count, strand_type strand)
{
count = std::min<size_type>(count, seq_count - start);
- SeqSpanRef new_span(new SeqSpan(this->shared_from_this(), start, count));
+ SeqSpanRef new_span(new SeqSpan(this->shared_from_this(), start, count, strand));
return new_span;
}
std::string SeqSpan::sequence() const
{
if (seq) {
- return seq->substr(seq_start, seq_count);
+ if (seq_strand != MinusStrand)
+ return seq->substr(seq_start, seq_count);
+ else {
+ initialize_rc_seq();
+ return *rc_seq;
+ }
} else {
return std::string();
}
{
return a.seq.get() == b.seq.get();
}
+
+void SeqSpan::initialize_rc_seq() const
+{
+ std::string new_rc_seq;
+ std::string rc_map(seq->get_alphabet().get_complement_map());
+
+ new_rc_seq.reserve(size());
+ size_type i = (seq_start + seq_count);
+ while( i != seq_start ) {
+ --i;
+ new_rc_seq.append(1, rc_map[ seq->at(i) ]);
+ }
+
+ SeqStringRef new_rc_ref(new SeqString(new_rc_seq, seq->get_alphabet_ref()));
+ const_cast<SeqStringRef *>(&rc_seq)->swap(new_rc_ref);
+}