5 #include "seq_span.hpp"
6 #include "mussa_exceptions.hpp"
8 SeqSpan::SeqSpan(const SeqSpan &o)
10 seq_start(o.seq_start),
11 seq_count(o.seq_count),
12 parent_seq(o.parent_seq),
14 seq_annotations(o.seq_annotations),
15 seq_drawable(o.seq_drawable)
19 SeqSpan::SeqSpan(const SeqSpan *p)
21 seq_start(p->seq_start),
22 seq_count(p->seq_count),
23 parent_seq(p->parent_seq),
25 seq_annotations(p->seq_annotations),
26 seq_drawable(p->seq_drawable)
30 SeqSpan::SeqSpan(const std::string &seq_,
34 seq_count(seq_.length()),
42 seq.reset(new SeqString(seq_, a));
45 throw sequence_invalid_strand(
46 "Can only initialize a Plus or Single strand from a string"
52 SeqSpan::SeqSpan(const SeqSpanRef parent_,
57 seq_start(parent_->seq_start + start_),
61 seq_count = parent_->seq_count;
65 // blech this strand logic is too long
75 seq_strand = parent_seq->strand();
77 throw sequence_invalid_strand("SameStrand is undefined with no parent");
82 switch (parent_seq->strand()) {
84 seq_strand = MinusStrand;
87 seq_strand = PlusStrand;
91 seq_strand = parent_seq->strand();
94 throw sequence_invalid_strand(
95 "OppositeStrand is undefined with SingleStrands"
99 throw sequence_invalid_strand("Parent has an invalid strand type");
102 throw sequence_invalid_strand("SameStrand is undefined with no parent");
107 if (parent_seq->strand() == SingleStrand) {
108 seq_strand = SingleStrand;
110 throw sequence_invalid_strand("Can't change single strandedness");
113 seq_strand = SingleStrand;
117 throw sequence_invalid_strand("unrecognized strand identifier");
121 // Ack the complexity increases!
122 // If our parent is on the minus strand, we need to adjust the start
123 // and count to look like we're selecting from the right side of the
124 // parent sequence (AKA the start of the minus strand)
125 if (parent_seq and parent_seq->strand() == MinusStrand) {
126 seq_start = parent_seq->start() + parent_seq->size() - (start_ + seq_count);
132 SeqSpan &SeqSpan::operator=(const SeqSpan& s)
136 seq_start = s.seq_start;
137 seq_count = s.seq_count;
138 parent_seq = s.parent_seq;
140 //seq_annotations.reset(s.seq_annotations);
141 //seq_drawable.reset(s.seq_drawable);
146 std::ostream& operator<<(std::ostream& out, const SeqSpan& s)
151 /* Not implemented yet
152 //! compare two spans
153 //! \throws sequence_invalid_comparison
154 friend bool operator<(const SeqSpan&, const SeqSpan&);
155 bool operator<(const SeqSpan& a, const SeqSpan& b)
157 // are we subcomponents of the same sequence region?
158 if (a.seq.get() == b.seq.get()) {
159 if (a.seq_start < b.seq_start)
164 throw mussa_error("can only compare two spans from the same sequence");
169 bool operator==(const SeqSpan& a, const SeqSpan& b)
171 if (SeqSpan::isFamily(a, b)) {
172 // can do fast comparison
173 if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
174 // unknown strands match anything
175 if (a.seq_strand == SeqSpan::UnknownStrand or
176 b.seq_strand == SeqSpan::UnknownStrand) {
179 // otherwise our stranded-ness needs to match
180 return (a.seq_strand == b.seq_strand);
186 // if we're not part of the same seqspan heirarchy we're never equal
187 // the string equality is handled off in Sequence
191 bool operator!=(const SeqSpan& a, const SeqSpan& b)
196 SeqSpan::const_reference SeqSpan::operator[](SeqSpan::size_type i) const
201 SeqSpan::const_reference SeqSpan::at(SeqSpan::size_type i) const
203 if (!seq) throw std::out_of_range("empty sequence");
204 if (seq_strand == PlusStrand) {
205 return seq->at(seq_start+i);
207 return seq->rc_map[seq->at((seq_start+seq_count-1)-i)];
211 const char *SeqSpan::data() const
214 return seq->c_str()+seq_start;
219 SeqSpan::const_iterator SeqSpan::begin() const
221 if (seq and seq_count != 0) {
222 if (seq_strand != MinusStrand) {
223 return seq->begin()+seq_start;
228 return rc_seq->begin();
231 return SeqSpan::const_iterator(0);
234 SeqSpan::const_iterator SeqSpan::end() const
236 if (seq and seq_count != 0) {
237 if (seq_strand != MinusStrand) {
238 return seq->begin() + seq_start + seq_count;
243 return rc_seq->end();
246 return SeqSpan::const_iterator(0);
249 SeqSpan::const_reverse_iterator SeqSpan::rbegin() const
251 if (seq and seq_count != 0) {
252 if (seq_strand != MinusStrand) {
253 return seq->rbegin()+(seq->size()-(seq_start+seq_count));
258 return rc_seq->rbegin();
261 return SeqSpan::const_reverse_iterator();
264 SeqSpan::const_reverse_iterator SeqSpan::rend() const
266 if (seq and seq_count != 0) {
267 if (seq_strand != MinusStrand) {
268 return rbegin() + seq_count;
273 return rc_seq->rend();
276 return SeqSpan::const_reverse_iterator();
279 bool SeqSpan::empty() const
281 return (seq_count == 0) ? true : false;
284 SeqSpan::size_type SeqSpan::find_first_not_of(
285 const std::string& query,
286 SeqSpan::size_type index) const
288 typedef std::set<std::string::value_type> sequence_set;
289 sequence_set match_set;
291 for(std::string::const_iterator query_item = query.begin();
292 query_item != query.end();
295 match_set.insert(*query_item);
297 for(const_iterator base = begin();
301 if(match_set.find(*base) == match_set.end()) {
305 return SeqSpan::npos;
308 void SeqSpan::setStart(SeqSpan::size_type v)
312 throw mussa_error("can't set Start > Stop");
314 seq_count += seq_start - v;
318 void SeqSpan::setStop(SeqSpan::size_type v)
321 // negative sized sequences are bad
322 throw mussa_error("can't set Stop < Start");
324 seq_count = std::min<size_type>(v - seq_start, parentSize()-parentStart());
327 SeqSpan::size_type SeqSpan::parentStart() const
333 return start() - parent_seq->start();
337 void SeqSpan::setParentStart(SeqSpan::size_type v)
339 setStart(parent_seq->start() + v);
342 SeqSpan::size_type SeqSpan::parentStop() const
348 return stop() - parent_seq->start();
352 void SeqSpan::setParentStop(SeqSpan::size_type v)
354 setStop(parent_seq->start() + v);
357 SeqSpanRef SeqSpan::subseq(size_type start, size_type count, strand_type strand)
359 count = std::min<size_type>(count, seq_count - start);
361 SeqSpanRef new_span(new SeqSpan(this->shared_from_this(), start, count, strand));
365 std::string SeqSpan::sequence() const
368 if (seq_strand != MinusStrand)
369 return seq->substr(seq_start, seq_count);
375 return std::string();
379 bool SeqSpan::isFamily(const SeqSpan& a, const SeqSpan& b)
381 return a.seq.get() == b.seq.get();
384 void SeqSpan::initialize_rc_seq() const
386 std::string new_rc_seq;
387 std::string rc_map(seq->get_alphabet().get_complement_map());
389 new_rc_seq.reserve(size());
390 size_type i = (seq_start + seq_count);
391 while( i != seq_start ) {
393 new_rc_seq.append(1, rc_map[ seq->at(i) ]);
396 SeqStringRef new_rc_ref(new SeqString(new_rc_seq, seq->get_alphabet_ref()));
397 const_cast<SeqStringRef *>(&rc_seq)->swap(new_rc_ref);