5 #include "seq_span.hpp"
6 #include "mussa_exceptions.hpp"
8 SeqSpan::SeqSpan(const SeqSpan &o)
10 seq_start(o.seq_start),
11 seq_count(o.seq_count),
17 SeqSpan::SeqSpan(const SeqSpan *p)
19 seq_start(p->seq_start),
20 seq_count(p->seq_count),
26 SeqSpan::SeqSpan(const std::string &seq_,
30 seq_count(seq_.length()),
38 seq.reset(new SeqString(seq_, a));
41 throw sequence_invalid_strand(
42 "Can only initialize a Plus or Single strand from a string"
48 SeqSpan::SeqSpan(const SeqSpanRef parent_,
53 seq_start(parent_->seq_start + start_),
57 seq_count = parent_->seq_count;
61 // blech this strand logic is too long
71 seq_strand = parent->strand();
73 throw sequence_invalid_strand("SameStrand is undefined with no parent");
78 switch (parent->strand()) {
80 seq_strand = MinusStrand;
83 seq_strand = PlusStrand;
87 seq_strand = parent->strand();
90 throw sequence_invalid_strand(
91 "OppositeStrand is undefined with SingleStrands"
95 throw sequence_invalid_strand("Parent has an invalid strand type");
98 throw sequence_invalid_strand("SameStrand is undefined with no parent");
103 if (parent->strand() == SingleStrand) {
104 seq_strand = SingleStrand;
106 throw sequence_invalid_strand("Can't change single strandedness");
109 seq_strand = SingleStrand;
113 throw sequence_invalid_strand("unrecognized strand identifier");
117 // Ack the complexity increases!
118 // If our parent is on the minus strand, we need to adjust the start
119 // and count to look like we're selecting from the right side of the
120 // parent sequence (AKA the start of the minus strand)
121 if (parent and parent->strand() == MinusStrand) {
122 seq_start = parent->start() + parent->size() - (start_ + seq_count);
128 SeqSpan &SeqSpan::operator=(const SeqSpan& s)
132 seq_start = s.seq_start;
133 seq_count = s.seq_count;
140 std::ostream& operator<<(std::ostream& out, const SeqSpan& s)
145 /* Not implemented yet
146 //! compare two spans
147 //! \throws sequence_invalid_comparison
148 friend bool operator<(const SeqSpan&, const SeqSpan&);
149 bool operator<(const SeqSpan& a, const SeqSpan& b)
151 // are we subcomponents of the same sequence region?
152 if (a.seq.get() == b.seq.get()) {
153 if (a.seq_start < b.seq_start)
158 throw mussa_error("can only compare two spans from the same sequence");
163 bool operator==(const SeqSpan& a, const SeqSpan& b)
165 if (SeqSpan::isFamily(a, b)) {
166 // can do fast comparison
167 if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
168 // unknown strands match anything
169 if (a.seq_strand == SeqSpan::UnknownStrand or
170 b.seq_strand == SeqSpan::UnknownStrand) {
173 // otherwise our stranded-ness needs to match
174 return (a.seq_strand == b.seq_strand);
180 // if we're not part of the same seqspan heirarchy we're never equal
181 // the string equality is handled off in Sequence
185 bool operator!=(const SeqSpan& a, const SeqSpan& b)
190 SeqSpan::const_reference SeqSpan::operator[](SeqSpan::size_type i) const
195 SeqSpan::const_reference SeqSpan::at(SeqSpan::size_type i) const
197 if (!seq) throw std::out_of_range("empty sequence");
198 if (seq_strand == PlusStrand) {
199 return seq->at(seq_start+i);
201 return seq->rc_map[seq->at((seq_start+seq_count-1)-i)];
205 const char *SeqSpan::data() const
208 return seq->c_str()+seq_start;
213 SeqSpan::const_iterator SeqSpan::begin() const
215 if (seq and seq_count != 0) {
216 if (seq_strand != MinusStrand) {
217 return seq->begin()+seq_start;
222 return rc_seq->begin();
225 return SeqSpan::const_iterator(0);
228 SeqSpan::const_iterator SeqSpan::end() const
230 if (seq and seq_count != 0) {
231 if (seq_strand != MinusStrand) {
232 return seq->begin() + seq_start + seq_count;
237 return rc_seq->end();
240 return SeqSpan::const_iterator(0);
243 SeqSpan::const_reverse_iterator SeqSpan::rbegin() const
245 if (seq and seq_count != 0) {
246 if (seq_strand != MinusStrand) {
247 return seq->rbegin()+(seq->size()-(seq_start+seq_count));
252 return rc_seq->rbegin();
255 return SeqSpan::const_reverse_iterator();
258 SeqSpan::const_reverse_iterator SeqSpan::rend() const
260 if (seq and seq_count != 0) {
261 if (seq_strand != MinusStrand) {
262 return rbegin() + seq_count;
267 return rc_seq->rend();
270 return SeqSpan::const_reverse_iterator();
273 bool SeqSpan::empty() const
275 return (seq_count == 0) ? true : false;
278 SeqSpan::size_type SeqSpan::find_first_not_of(
279 const std::string& query,
280 SeqSpan::size_type index) const
282 typedef std::set<std::string::value_type> sequence_set;
283 sequence_set match_set;
285 for(std::string::const_iterator query_item = query.begin();
286 query_item != query.end();
289 match_set.insert(*query_item);
291 for(const_iterator base = begin();
295 if(match_set.find(*base) == match_set.end()) {
299 return SeqSpan::npos;
302 void SeqSpan::setStart(SeqSpan::size_type v)
306 throw mussa_error("can't set Start > Stop");
308 seq_count += seq_start - v;
312 void SeqSpan::setStop(SeqSpan::size_type v)
315 // negative sized sequences are bad
316 throw mussa_error("can't set Stop < Start");
318 seq_count = std::min<size_type>(v - seq_start, parentSize()-parentStart());
321 SeqSpan::size_type SeqSpan::parentStart() const
327 return start() - parent->start();
331 void SeqSpan::setParentStart(SeqSpan::size_type v)
333 setStart(parent->start() + v);
336 SeqSpan::size_type SeqSpan::parentStop() const
342 return stop() - parent->start();
346 void SeqSpan::setParentStop(SeqSpan::size_type v)
348 setStop(parent->start() + v);
351 SeqSpanRef SeqSpan::subseq(size_type start, size_type count, strand_type strand)
353 count = std::min<size_type>(count, seq_count - start);
355 SeqSpanRef new_span(new SeqSpan(this->shared_from_this(), start, count, strand));
359 std::string SeqSpan::sequence() const
362 if (seq_strand != MinusStrand)
363 return seq->substr(seq_start, seq_count);
369 return std::string();
373 bool SeqSpan::isFamily(const SeqSpan& a, const SeqSpan& b)
375 return a.seq.get() == b.seq.get();
378 void SeqSpan::initialize_rc_seq() const
380 std::string new_rc_seq;
381 std::string rc_map(seq->get_alphabet().get_complement_map());
383 new_rc_seq.reserve(size());
384 size_type i = (seq_start + seq_count);
385 while( i != seq_start ) {
387 new_rc_seq.append(1, rc_map[ seq->at(i) ]);
390 SeqStringRef new_rc_ref(new SeqString(new_rc_seq, seq->get_alphabet_ref()));
391 const_cast<SeqStringRef *>(&rc_seq)->swap(new_rc_ref);