5 #include "seq_span.hpp"
6 #include "mussa_exceptions.hpp"
8 SeqSpan::SeqSpan(const SeqSpan &o)
10 seq_start(o.seq_start),
11 seq_count(o.seq_count),
17 SeqSpan::SeqSpan(const SeqSpan *p)
19 seq_start(p->seq_start),
20 seq_count(p->seq_count),
26 SeqSpan::SeqSpan(const std::string &seq_,
30 seq_count(seq_.length()),
38 seq.reset(new SeqString(seq_, a));
41 throw sequence_invalid_strand(
42 "Can only initialize a Plus or Single strand from a string"
48 SeqSpan::SeqSpan(const SeqSpanRef parent_,
53 seq_start(parent_->seq_start + start_),
57 seq_count = parent_->seq_count;
61 // blech this strand logic is too long
71 seq_strand = parent->strand();
73 throw sequence_invalid_strand("SameStrand is undefined with no parent");
78 switch (parent->strand()) {
80 seq_strand = MinusStrand;
83 seq_strand = PlusStrand;
87 seq_strand = parent->strand();
90 throw sequence_invalid_strand(
91 "OppositeStrand is undefined with SingleStrands"
95 throw sequence_invalid_strand("Parent has an invalid strand type");
98 throw sequence_invalid_strand("SameStrand is undefined with no parent");
103 if (parent->strand() == SingleStrand) {
104 seq_strand = SingleStrand;
106 throw sequence_invalid_strand("Can't change single strandedness");
109 seq_strand = SingleStrand;
113 throw sequence_invalid_strand("unrecognized strand identifier");
120 SeqSpan &SeqSpan::operator=(const SeqSpan& s)
124 seq_start = s.seq_start;
125 seq_count = s.seq_count;
132 std::ostream& operator<<(std::ostream& out, const SeqSpan& s)
137 /* Not implemented yet
138 //! compare two spans
139 //! \throws sequence_invalid_comparison
140 friend bool operator<(const SeqSpan&, const SeqSpan&);
141 bool operator<(const SeqSpan& a, const SeqSpan& b)
143 // are we subcomponents of the same sequence region?
144 if (a.seq.get() == b.seq.get()) {
145 if (a.seq_start < b.seq_start)
150 throw mussa_error("can only compare two spans from the same sequence");
155 bool operator==(const SeqSpan& a, const SeqSpan& b)
157 if (SeqSpan::isFamily(a, b)) {
158 // can do fast comparison
159 if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
160 // unknown strands match anything
161 if (a.seq_strand == SeqSpan::UnknownStrand or
162 b.seq_strand == SeqSpan::UnknownStrand) {
165 // otherwise our stranded-ness needs to match
166 return (a.seq_strand == b.seq_strand);
172 // if we're not part of the same seqspan heirarchy we're never equal
173 // the string equality is handled off in Sequence
177 bool operator!=(const SeqSpan& a, const SeqSpan& b)
182 SeqSpan::const_reference SeqSpan::operator[](SeqSpan::size_type i) const
187 SeqSpan::const_reference SeqSpan::at(SeqSpan::size_type i) const
189 if (!seq) throw std::out_of_range("empty sequence");
190 if (seq_strand == PlusStrand) {
191 return seq->at(seq_start+i);
193 return seq->rc_map[seq->at((seq_start+seq_count-1)-i)];
197 const char *SeqSpan::data() const
200 return seq->c_str()+seq_start;
205 SeqSpan::const_iterator SeqSpan::begin() const
207 if (seq and seq_count != 0) {
208 if (seq_strand != MinusStrand) {
209 return seq->begin()+seq_start;
214 return rc_seq->begin();
217 return SeqSpan::const_iterator(0);
220 SeqSpan::const_iterator SeqSpan::end() const
222 if (seq and seq_count != 0) {
223 if (seq_strand != MinusStrand) {
224 return seq->begin() + seq_start + seq_count;
229 return rc_seq->end();
232 return SeqSpan::const_iterator(0);
235 SeqSpan::const_reverse_iterator SeqSpan::rbegin() const
237 if (seq and seq_count != 0) {
238 if (seq_strand != MinusStrand) {
239 return seq->rbegin()+(seq->size()-(seq_start+seq_count));
244 return rc_seq->rbegin();
247 return SeqSpan::const_reverse_iterator();
250 SeqSpan::const_reverse_iterator SeqSpan::rend() const
252 if (seq and seq_count != 0) {
253 if (seq_strand != MinusStrand) {
254 return rbegin() + seq_count;
259 return rc_seq->rend();
262 return SeqSpan::const_reverse_iterator();
265 bool SeqSpan::empty() const
267 return (seq_count == 0) ? true : false;
270 SeqSpan::size_type SeqSpan::find_first_not_of(
271 const std::string& query,
272 SeqSpan::size_type index) const
274 typedef std::set<std::string::value_type> sequence_set;
275 sequence_set match_set;
277 for(std::string::const_iterator query_item = query.begin();
278 query_item != query.end();
281 match_set.insert(*query_item);
283 for(const_iterator base = begin();
287 if(match_set.find(*base) == match_set.end()) {
291 return SeqSpan::npos;
294 void SeqSpan::setStart(SeqSpan::size_type v)
298 throw mussa_error("can't set Start > Stop");
300 seq_count += seq_start - v;
304 void SeqSpan::setStop(SeqSpan::size_type v)
307 // negative sized sequences are bad
308 throw mussa_error("can't set Stop < Start");
310 seq_count = std::min<size_type>(v - seq_start, parentSize()-parentStart());
313 SeqSpan::size_type SeqSpan::parentStart() const
319 return start() - parent->start();
323 void SeqSpan::setParentStart(SeqSpan::size_type v)
325 setStart(parent->start() + v);
328 SeqSpan::size_type SeqSpan::parentStop() const
334 return stop() - parent->start();
338 void SeqSpan::setParentStop(SeqSpan::size_type v)
340 setStop(parent->start() + v);
343 SeqSpanRef SeqSpan::subseq(size_type start, size_type count, strand_type strand)
345 count = std::min<size_type>(count, seq_count - start);
347 SeqSpanRef new_span(new SeqSpan(this->shared_from_this(), start, count, strand));
351 std::string SeqSpan::sequence() const
354 if (seq_strand != MinusStrand)
355 return seq->substr(seq_start, seq_count);
361 return std::string();
365 bool SeqSpan::isFamily(const SeqSpan& a, const SeqSpan& b)
367 return a.seq.get() == b.seq.get();
370 void SeqSpan::initialize_rc_seq() const
372 std::string new_rc_seq;
373 std::string rc_map(seq->get_alphabet().get_complement_map());
375 new_rc_seq.reserve(size());
376 size_type i = (seq_start + seq_count);
377 while( i != seq_start ) {
379 new_rc_seq.append(1, rc_map[ seq->at(i) ]);
382 SeqStringRef new_rc_ref(new SeqString(new_rc_seq, seq->get_alphabet_ref()));
383 const_cast<SeqStringRef *>(&rc_seq)->swap(new_rc_ref);