nway_other.cpp
nway_paths.cpp
parse_options.cpp
+ seq_span.cpp
sequence.cpp
sequence_location.cpp )
ADD_LIBRARY( mussa_core STATIC ${SOURCES} ${MOC_SOURCES})
INCLUDE_DIRECTORIES(${OPENGL_INCLUDE_DIR}
${BOOST_INCLUDE_DIR}
- ${QT_INCLUDES})
+ ${QT_INCLUDES}
+ ${CMAKE_CURRENT_SOURCE_DIR})
SET_TARGET_PROPERTIES(
mussa_core PROPERTIES
int start_i, seq1_i, seq2_i, win_i; // loop variables
int matches; // number of matches in to a window
int i2_offset;
- const char *seq1 = sseq1.c_str();
- const char *seq2 = sseq2.c_str();
+ const char *seq1 = sseq1.data();
+ const char *seq2 = sseq2.data();
validate_sequence(sseq1);
validate_sequence(sseq2);
void GlSeqBrowser::push_sequence(const Sequence& s)
{
- boost::shared_ptr<Sequence> seq_copy(new Sequence(s));
- GlSequence gs(seq_copy, color_mapper);
+ GlSequence gs(s, color_mapper);
push_sequence(gs);
}
-
void GlSeqBrowser::push_sequence(boost::shared_ptr<Sequence> s)
{
- boost::shared_ptr<GlSequence> gs(new GlSequence(s, color_mapper));
+ boost::shared_ptr<GlSequence> gs(new GlSequence(*s, color_mapper));
push_sequence(gs);
}
//! copy sequence from selected track using formating function
template<class Item>
size_t GlSeqBrowser::copySelectedTracks(std::list<Item>& result,
- Item (*formatter)(boost::shared_ptr<Sequence> s,
- int left,
- int right))
+ Item (*formatter)(const Sequence& s, int left, int right))
{
size_t base_pairs_copied = 0;
result.clear();
<< endl;
} else {
// we should be safe
- boost::shared_ptr<Sequence> seq = track_container[track_index]->sequence();
+ Sequence seq(*track_container[track_index]);
result.push_back(formatter(seq, track_i->left, track_i->right));
base_pairs_copied += max(track_i->right-track_i->left, 0);
}
{
std::list<std::string> result;
struct AsFasta {
- static string formatter(boost::shared_ptr<Sequence> seq,
- int left,
- int right)
+ static string formatter(const Sequence& seq, int left, int right)
{
stringstream s;
- s << ">" << seq->get_fasta_header()
+ s << ">" << seq.get_fasta_header()
<< "|" << "subregion=" << left << "-" << right+1
<< std::endl
- << seq->subseq(left, right-left+1) << std::endl;
+ << seq.subseq(left, right-left+1) << std::endl;
return s.str();
}
};
size_t GlSeqBrowser::copySelectedTracksAsSequences(std::list<Sequence>& result)
{
struct AsSequence {
- static Sequence formatter(boost::shared_ptr<Sequence> seq,
+ static Sequence formatter(const Sequence& seq,
int left,
int right)
{
- return seq->subseq(left, right-left+1);
+ return seq.subseq(left, right-left+1);
}
};
return copySelectedTracks(result, AsSequence::formatter);
std::list<SequenceLocation>& result)
{
struct AsSeqLocation {
- static SequenceLocation formatter(boost::shared_ptr<Sequence> seq,
+ static SequenceLocation formatter(const Sequence& seq,
int left,
int right)
{
{
std::list<string> result;
struct AsString {
- static string formatter(boost::shared_ptr<Sequence> seq,
+ static string formatter(const Sequence& seq,
int left,
int right)
{
stringstream s;
- s << seq->subseq(left, right-left+1);
+ s << seq.subseq(left, right-left+1);
return s.str();
}
};
//! copy sequence from selected track using formating function
template<class Item>
size_t copySelectedTracks(std::list<Item>& result,
- Item (*format_track)(boost::shared_ptr<Sequence> s,
- int left,
- int right));
+ Item (*format_track)(const Sequence& s, int left, int right));
//! copy sequence from selected tracks as FASTA sequences
/*! \return number of base pairs copied
*/
#include <stdexcept>
using namespace std;
-GlSequence::GlSequence(boost::shared_ptr<Sequence> s,
+GlSequence::GlSequence(const Sequence &s,
boost::shared_ptr<AnnotationColors> cm)
- : seq(s),
+ : Sequence(s),
seq_x(0.0),
seq_y(0.0),
seq_z(1.0),
seq_height(12.0),
color_mapper(cm),
- drawColor(0.0, 0.0, 0.0),
+ drawColor(new Color(0.0, 0.0, 0.0)),
char_pix_per_world_unit(2.5)
{
}
GlSequence::GlSequence(const GlSequence &s)
- : seq(s.seq),
+ : Sequence(s),
seq_x(s.seq_x),
seq_y(s.seq_y),
seq_z(s.seq_z),
{
}
+GlSequence::GlSequence(const GlSequence *s)
+ : Sequence(s),
+ seq_x(s->seq_x),
+ seq_y(s->seq_y),
+ seq_z(s->seq_z),
+ seq_height(s->seq_height),
+ color_mapper(s->color_mapper),
+ drawColor(s->drawColor),
+ char_pix_per_world_unit(s->char_pix_per_world_unit)
+{
+}
+
GlSequence &GlSequence::operator=(const GlSequence & s)
{
if (this != &s) {
- seq = s.seq;
+ Sequence::operator=(s);
seq_x = s.seq_x;
seq_y = s.seq_y;
seq_z = s.seq_z;
return *this;
}
-boost::shared_ptr<Sequence> GlSequence::sequence()
-{
- return seq;
-}
-
void GlSequence::setX(GLfloat value)
{
seq_x = value;
GLfloat GlSequence::size() const
{
- return seq->size();
+ return Sequence::size();
}
Sequence::size_type GlSequence::leftbase(GLfloat left) const
left = ceil(left - seq_x);
if (left < 0)
return 0;
- else if (left > seq->size() )
- return seq->size();
+ else if (left > Sequence::size() )
+ return Sequence::size();
else
return (Sequence::size_type)left;
}
Sequence::size_type GlSequence::rightbase(GLfloat right) const
{
right = floor(right) - seq_x;
- if (right > seq->size())
- return seq->size();
+ if (right > Sequence::size())
+ return Sequence::size();
else if ( right < 0)
return 0;
else
return (Sequence::size_type)right;
}
-Sequence::const_iterator GlSequence::sequence_begin() const
-{
- return seq->begin();
-}
-
-Sequence::const_iterator GlSequence::sequence_end() const
-{
- return seq->end();
-}
-
Sequence::const_iterator
-GlSequence::sequence_begin(GLfloat left, GLfloat right) const
+GlSequence::region_begin(GLfloat left, GLfloat right) const
{
- if ( leftbase(left) > seq->size() or left > right )
- return seq->end();
+ if ( leftbase(left) > Sequence::size() or left > right )
+ return Sequence::end();
else
- return seq->begin() + leftbase(left);
+ return Sequence::begin() + leftbase(left);
}
Sequence::const_iterator
-GlSequence::sequence_end(GLfloat left, GLfloat right) const
+GlSequence::region_end(GLfloat left, GLfloat right) const
{
- if ( rightbase(right) > seq->size() or left > right )
- return seq->end();
+ if ( rightbase(right) > Sequence::size() or left > right )
+ return Sequence::end();
else
- return seq->begin() + rightbase(right);
+ return Sequence::begin() + rightbase(right);
}
+GlSequence GlSequence::subseq(size_type start, size_type count) const
+{
+ GlSequence new_seq(*this);
+ new_seq.seq = seq->subseq(start, count);
+ copy_children(new_seq, start, count);
+
+ return new_seq;
+}
//! set default track draw color
-void GlSequence::setColor(Color &c)
+void GlSequence::setColor(boost::shared_ptr<Color> &c)
{
drawColor = c;
}
//! get default track draw color
-Color GlSequence::color()
+boost::shared_ptr<Color> GlSequence::color()
{
return drawColor;
}
return viewport[3]; // grab the viewport width
}
-GLfloat GlSequence::get_pixel_width(GLfloat left, GLfloat right) const
+GLfloat GlSequence::pixelWidth(GLfloat left, GLfloat right) const
{
- return get_pixel_width(left, right, get_viewport_width_in_pixels());
+ return pixelWidth(left, right, get_viewport_width_in_pixels());
}
GLfloat
-GlSequence::get_pixel_width(GLfloat left, GLfloat right, int vp_width) const
+GlSequence::pixelWidth(GLfloat left, GLfloat right, int vp_width) const
{
return round((right-left)/vp_width);
}
GLfloat left, GLfloat right,
GLfloat height, GLfloat z) const
{
- GLfloat pixel_width = get_pixel_width(world_left, world_right);
+ GLfloat pixel_width = pixelWidth(world_left, world_right);
GLfloat offset = height/2.0;
GLfloat top = seq_y + offset;
GLfloat bottom = seq_y - offset;
void GlSequence::draw_track(GLfloat left, GLfloat right) const
{
- glColor3fv(drawColor.get());
+ glColor3fv(drawColor->get());
// draw main sequence track
- draw_box(left, right, seq_x, seq_x+seq->size(), seq_height, 0.0);
+ draw_box(left, right, seq_x, seq_x+Sequence::size(), seq_height, 0.0);
}
void GlSequence::draw_annotations(GLfloat left, GLfloat right) const
{
// draw annotations
GLfloat annotation_z = seq_z + 10.0;
- const std::list<annot>& annots = seq->annotations();
- const std::list<motif>& motifs = seq->motifs();
+ const std::list<annot>& annots = Sequence::annotations();
+ const std::list<motif>& motifs = Sequence::motifs();
for (std::list<annot>::const_iterator annot_itor = annots.begin();
annot_itor != annots.end();
++annot_itor)
// FIXME: basically this needs to be greater than the number of annotations
const GLfloat z = 30;
glLineWidth(1);
- glColor3fv(drawColor.get());
+ glColor3fv(drawColor->get());
- Sequence::const_iterator seq_itor = sequence_begin(left, right);
- Sequence::const_iterator seq_end = sequence_end(left, right);
+ Sequence::const_iterator seq_itor = region_begin(left, right);
+ Sequence::const_iterator seq_end = region_end(left, right);
Sequence::size_type basepair = 0;
const float bp_per_world = 1.0; //( world coord )
const float glyph_x_scale = 0.125; // unit = ( world coord / glyph coord )
assert(seq_end - seq_itor >= 0);
while(seq_itor != seq_end)
{
- assert ( basepair < seq->size() );
+ assert ( basepair < Sequence::size() );
glPushMatrix();
glTranslatef( seq_x+leftbase(left) + basepair + glyph_margin, seq_y, 1.0 );
glScalef(glyph_x_scale, 1.0, 1.0);
/*! The idea is this will keep track of the location of where the sequence
* is being rendered, and handle displaying annotations on that track
*/
-class GlSequence
+class GlSequence : public Sequence
{
public:
- GlSequence(boost::shared_ptr<Sequence> s,
+ GlSequence(const Sequence & s,
boost::shared_ptr<AnnotationColors> cm);
GlSequence(const GlSequence & s);
+ GlSequence(const GlSequence *);
GlSequence &operator=(const GlSequence &s);
//! draw a track
*/
void draw(GLfloat left, GLfloat right) const;
- boost::shared_ptr<Sequence> sequence();
//! set our starting x (horizontal) coordinate
void setX(GLfloat);
//! get our starting x (horizontal) coordinate
//! done mostly so all the iterator logic continues to work correctly.
Sequence::size_type rightbase(GLfloat right) const;
- //! return iterator to the start of the stored sequence
- Sequence::const_iterator sequence_begin() const;
- //! return iterator to the end of the stored sequence
- Sequence::const_iterator sequence_end() const;
//! provide an iterator to the sequence starting at world coordinate left
- Sequence::const_iterator sequence_begin(GLfloat left, GLfloat right) const;
+ Sequence::const_iterator region_begin(GLfloat left, GLfloat right) const;
//! provide an iterator to the sequence ending at world coordinate right
- Sequence::const_iterator sequence_end(GLfloat left, GLfloat right) const;
+ Sequence::const_iterator region_end(GLfloat left, GLfloat right) const;
+ //! return a subsequence as a GlSequence (instead of a Sequence subsequence)
+ GlSequence subseq(size_type start, size_type count) const;
+
//! set track color
- void setColor(Color &);
- Color color();
+ void setColor(boost::shared_ptr<Color> &);
+ boost::shared_ptr<Color> color();
//! how big is a pixel in world coordinats
GLfloat get_pixel_width(GLfloat, GLfloat) const;
//! how big is a pixel in world coordinats (specifying viewport size)
- GLfloat get_pixel_width(GLfloat, GLfloat, int) const;
+ GLfloat pixelWidth(GLfloat, GLfloat, int) const;
//! are we close enough that it would make sense to view the base pairs?
bool is_sequence_renderable(GLfloat left, GLfloat right) const;
friend bool operator==(const GlSequence &left, const GlSequence &right);
protected:
- boost::shared_ptr<Sequence> seq;
GLfloat seq_x;
GLfloat seq_y;
GLfloat seq_z;
GLfloat seq_height;
boost::shared_ptr<AnnotationColors> color_mapper;
- Color drawColor;
+ boost::shared_ptr<Color> drawColor;
const GLfloat char_pix_per_world_unit;
//! Return the pixel width of the opengl viewport.
break;
}
Color c(red, green, blue, alpha);
- parsed->color_mapper->appendInstanceColor("motif", seq.c_str(), c);
+ std::string motif_subseq(seq.begin(), seq.end());
+ parsed->color_mapper->appendInstanceColor("motif", motif_subseq, c);
parsed->motifs.insert(seq);
parsed->sequence.clear();
--- /dev/null
+#ifndef SEQ_HPP_
+#define SEQ_HPP_
+
+#include <string>
+
+#include <boost/serialization/base_object.hpp>
+#include <boost/serialization/export.hpp>
+#include <boost/serialization/list.hpp>
+#include <boost/serialization/nvp.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/utility.hpp>
+#include <boost/serialization/version.hpp>
+#include <boost/serialization/vector.hpp>
+
+#include <boost/shared_ptr.hpp>
+
+//! These classes provide for the internal implementation for the Sequence class
+
+/** the only purpose of this class is that the shared_ptr template
+ * functions need the serialization support to be in-class.
+ */
+class SeqString : public std::string
+{
+public:
+ typedef std::string::difference_type difference_type;
+ typedef std::string::iterator iterator;
+ typedef std::string::reverse_iterator reverse_iterator;
+ typedef std::string::const_iterator const_iterator;
+ typedef std::string::const_reverse_iterator const_reverse_iterator;
+ typedef std::string::reference reference;
+ typedef std::string::const_reference const_reference;
+ typedef std::string::size_type size_type;
+ typedef std::string::value_type value_type;
+ static const size_type npos = std::string::npos;
+
+ SeqString() : std::string() {};
+ SeqString(const std::string &s) : std::string(s) {};
+
+private:
+ friend class boost::serialization::access;
+ template<class Archive>
+ void serialize(Archive& ar, const unsigned int /*version*/) {
+ //ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(std::string);
+ ar & boost::serialization::make_nvp("bases",
+ boost::serialization::base_object<std::string>(*this)
+ );
+ }
+};
+typedef boost::shared_ptr<SeqString> SeqStringRef;
+
+#endif /*SEQ_HPP_*/
--- /dev/null
+#include <stdexcept>
+#include <set>
+#include <algorithm>
+
+#include "seq_span.hpp"
+#include "mussa_exceptions.hpp"
+
+SeqSpan::SeqSpan(const SeqSpan &o)
+ : seq(o.seq),
+ seq_start(o.seq_start),
+ seq_count(o.seq_count),
+ parent(o.parent)
+{
+}
+
+SeqSpan::SeqSpan(const SeqSpan *p)
+ : seq(p->seq),
+ seq_start(p->seq_start),
+ seq_count(p->seq_count),
+ parent(p->parent)
+{
+}
+
+SeqSpan::SeqSpan(const std::string &seq_)
+ : seq(new SeqString(seq_)),
+ seq_start(0),
+ seq_count(seq_.length()),
+ parent()
+{
+}
+
+SeqSpan::SeqSpan(const SeqSpanRef parent_, size_type start_, size_type count_)
+ : seq(parent_->seq),
+ seq_start(parent_->seq_start + start_),
+ parent(parent_)
+{
+ if (count_ == npos)
+ seq_count = parent_->seq_count;
+ else
+ seq_count = count_;
+}
+
+//////
+// Helpful operators
+SeqSpan &SeqSpan::operator=(const SeqSpan& s)
+{
+ if (this != &s) {
+ seq = s.seq;
+ seq_start = s.seq_start;
+ seq_count = s.seq_count;
+ parent = s.parent;
+ }
+ return *this;
+}
+
+std::ostream& operator<<(std::ostream& out, const SeqSpan& s)
+{
+ out << s.sequence();
+}
+
+/* Not implemented yet
+//! compare two spans
+//! \throws sequence_invalid_comparison
+friend bool operator<(const SeqSpan&, const SeqSpan&);
+bool operator<(const SeqSpan& a, const SeqSpan& b)
+{
+ // are we subcomponents of the same sequence region?
+ if (a.seq.get() == b.seq.get()) {
+ if (a.seq_start < b.seq_start)
+ return true;
+ else
+ return false;
+ } else {
+ throw mussa_error("can only compare two spans from the same sequence");
+ }
+}
+*/
+#include <iostream>
+bool operator==(const SeqSpan& a, const SeqSpan& b)
+{
+ if (SeqSpan::isFamily(a, b)) {
+ std::cout << " " << a.seq_start
+ << " " << b.seq_start
+ << " " << a.seq_count
+ << " " << b.seq_count << std::endl;
+ // can do fast comparison
+ if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ return false;
+}
+
+bool operator!=(const SeqSpan& a, const SeqSpan& b)
+{
+ return not (a == b);
+}
+
+SeqSpan::const_reference SeqSpan::operator[](SeqSpan::size_type i) const
+{
+ return at(i);
+}
+
+SeqSpan::const_reference SeqSpan::at(SeqSpan::size_type i) const
+{
+ if (!seq) throw std::out_of_range("empty sequence");
+ return seq->at(i+seq_start);
+}
+
+const char *SeqSpan::data() const
+{
+ if (seq) {
+ return seq->c_str()+seq_start;
+ } else
+ return 0;
+}
+
+SeqSpan::const_iterator SeqSpan::begin() const
+{
+ if (seq and seq_count != 0)
+ return seq->begin()+seq_start;
+ else
+ return SeqSpan::const_iterator(0);
+}
+
+SeqSpan::const_iterator SeqSpan::end() const
+{
+ if (seq and seq_count != 0) {
+ return seq->begin() + seq_start + seq_count;
+ } else {
+ return SeqSpan::const_iterator(0);
+ }
+}
+
+SeqSpan::const_reverse_iterator SeqSpan::rbegin() const
+{
+ if (seq and seq_count != 0)
+ return seq->rbegin()+(seq->size()-(seq_start+seq_count));
+ else
+ return SeqSpan::const_reverse_iterator();
+}
+
+SeqSpan::const_reverse_iterator SeqSpan::rend() const
+{
+ if (seq and seq_count != 0) {
+ return rbegin() + seq_count;
+ } else {
+ return SeqSpan::const_reverse_iterator();
+ }
+}
+
+bool SeqSpan::empty() const
+{
+ return (seq_count == 0) ? true : false;
+}
+
+SeqSpan::size_type SeqSpan::find_first_not_of(
+ const std::string& query,
+ SeqSpan::size_type index) const
+{
+ typedef std::set<std::string::value_type> sequence_set;
+ sequence_set match_set;
+
+ for(const_iterator query_item = query.begin();
+ query_item != query.end();
+ ++query_item)
+ {
+ match_set.insert(*query_item);
+ }
+ for(const_iterator base = begin();
+ base != end();
+ ++base)
+ {
+ if(match_set.find(*base) == match_set.end()) {
+ return base-begin();
+ }
+ }
+ return SeqSpan::npos;
+}
+
+void SeqSpan::setStart(SeqSpan::size_type v)
+{
+ if (v > stop()) {
+ // cry
+ throw mussa_error("can't set Start > Stop");
+ }
+ seq_count += seq_start - v;
+ seq_start = v;
+}
+
+void SeqSpan::setStop(SeqSpan::size_type v)
+{
+ if ( v < start() ) {
+ // negative sized sequences are bad
+ throw mussa_error("can't set Stop < Start");
+ }
+ seq_count = std::min<size_type>(v - seq_start, parentSize()-parentStart());
+}
+
+SeqSpan::size_type SeqSpan::parentStart() const
+{
+ if (!parent) {
+ // no parent
+ return start();
+ } else {
+ return start() - parent->start();
+ }
+}
+
+void SeqSpan::setParentStart(SeqSpan::size_type v)
+{
+ setStart(parent->start() + v);
+}
+
+SeqSpan::size_type SeqSpan::parentStop() const
+{
+ if (!parent) {
+ // no parent
+ return stop();
+ } else {
+ return stop() - parent->start();
+ }
+}
+
+void SeqSpan::setParentStop(SeqSpan::size_type v)
+{
+ setStop(parent->start() + v);
+}
+
+SeqSpanRef SeqSpan::subseq(size_type start, size_type count)
+{
+ count = std::min<size_type>(count, seq_count - start);
+
+ SeqSpanRef new_span(new SeqSpan(this->shared_from_this(), start, count));
+ return new_span;
+}
+
+std::string SeqSpan::sequence() const
+{
+ if (seq) {
+ return seq->substr(seq_start, seq_count);
+ } else {
+ return std::string();
+ }
+}
+
+bool SeqSpan::isFamily(const SeqSpan& a, const SeqSpan& b)
+{
+ return a.seq.get() == b.seq.get();
+}
--- /dev/null
+#ifndef SEQ_SPAN_HPP_
+#define SEQ_SPAN_HPP_
+
+#include <string>
+
+#include <boost/serialization/base_object.hpp>
+#include <boost/serialization/export.hpp>
+#include <boost/serialization/list.hpp>
+#include <boost/serialization/nvp.hpp>
+#include <boost/serialization/string.hpp>
+#include <boost/serialization/shared_ptr.hpp>
+#include <boost/serialization/utility.hpp>
+#include <boost/serialization/version.hpp>
+#include <boost/serialization/vector.hpp>
+
+#include <boost/shared_ptr.hpp>
+#include <boost/enable_shared_from_this.hpp>
+
+//! These classes provide for the internal implementation for the Sequence class
+#include "seq.hpp"
+
+class SeqSpan;
+typedef boost::shared_ptr<SeqSpan> SeqSpanRef;
+
+//! Track what segment of a sequence we're looking at
+class SeqSpan : public boost::enable_shared_from_this<SeqSpan> {
+public:
+ typedef SeqString::difference_type difference_type;
+ typedef SeqString::iterator iterator;
+ typedef SeqString::reverse_iterator reverse_iterator;
+ typedef SeqString::const_iterator const_iterator;
+ typedef SeqString::const_reverse_iterator const_reverse_iterator;
+ typedef SeqString::reference reference;
+ typedef SeqString::const_reference const_reference;
+ typedef SeqString::size_type size_type;
+ typedef SeqString::value_type value_type;
+ static const size_type npos = SeqString::npos;
+
+public:
+ SeqSpan(const SeqSpan &);
+ SeqSpan(const SeqSpan *);
+ explicit SeqSpan(const std::string &);
+ SeqSpan(const SeqSpanRef, size_type start=0, size_type count=npos);
+
+ //! assignment
+ SeqSpan& operator=(const SeqSpan&);
+ //! output
+ friend std::ostream& operator<<(std::ostream&, const SeqSpan&);
+ //! equality
+ friend bool operator==(const SeqSpan&, const SeqSpan&);
+ friend bool operator!=(const SeqSpan&, const SeqSpan&);
+
+ //! \defgroup string_operators
+ //! @{
+ //! retrive element at specific position
+ const_reference at(size_type n) const;
+ //! retrieve element at specific location
+ const_reference operator[](SeqSpan::size_type i) const;
+ //! return c pointer to the sequence data
+ const char *data() const;
+ //! forward iterator
+ const_iterator begin() const;
+ //! last iterator
+ const_iterator end() const;
+ //! is our sequence empty?
+ bool empty() const;
+ //! find first
+ size_type find_first_not_of(const std::string&, size_type index=0) const;
+ //! how many base pairs are there in our sequence
+ size_type size() const { return seq_count; }
+ //! alias for size (used by string)
+ size_type length() const { return size(); }
+ //! reverse iterator
+ const_reverse_iterator rbegin() const;
+ //! reverse end iterator
+ const_reverse_iterator rend() const;
+ //! @}
+
+ //! start position relative to root sequence
+ size_type start() const { return seq_start; }
+ //! set position relative to root sequence
+ void setStart(size_type);
+ //! one past the last position relative to the root sequence
+ size_type stop() const { return seq_start + seq_count; }
+ //! set one past the last position relative to the root sequence.
+ void setStop(size_type);
+
+ //! get start position relative to the parent sequence
+ size_type parentStart() const;
+ //! set start position relative to parent sequence
+ void setParentStart(size_type);
+ //! get stop position relative to the parent sequence
+ size_type parentStop() const;
+ //! set stop position relative to parent sequence
+ void setParentStop(size_type);
+ size_type parentSize() const { return (parent) ? parent->size() : size(); }
+
+
+ //! return a subsequence, copying over any appropriate annotation
+ SeqSpanRef subseq(size_type start=0, size_type count = std::string::npos);
+ //! get sequence
+ std::string sequence() const;
+ //! are both sequences derived from the same sequence tree?
+ static bool isFamily(const SeqSpan& a, const SeqSpan& b);
+
+ friend class Sequence;
+private:
+ //! do not statically initialize, only create with new
+ SeqSpan() {};
+protected:
+ //! keep a reference to the sequence container
+ SeqStringRef seq;
+ //! where our start location is in the full sequence
+ size_type seq_start;
+ //! how big we ware
+ size_type seq_count;
+ // Do I need to track the strand here?
+
+ //! keep a reference to who our parent span is
+ SeqSpanRef parent;
+
+ // boost::serialization support
+ friend class boost::serialization::access;
+ template<class Archive>
+ void serialize(Archive& ar, const unsigned int /*version*/) {
+ ar & BOOST_SERIALIZATION_NVP(seq);
+ ar & BOOST_SERIALIZATION_NVP(seq_start);
+ ar & BOOST_SERIALIZATION_NVP(seq_count);
+ ar & BOOST_SERIALIZATION_NVP(parent);
+ }
+};
+#endif /*SEQ_SPAN_HPP_*/
Sequence::Sequence(alphabet_ref alphabet_)
- : parent(0),
+ : seq(new SeqSpan("")),
alphabet(alphabet_),
- seq_start(0),
- seq_count(0),
strand(UnknownStrand)
{
}
}
Sequence::Sequence(const char *seq, alphabet_ref alphabet_)
- : parent(0),
- alphabet(alphabet_),
- seq_start(0),
- seq_count(0),
+ : alphabet(alphabet_),
strand(UnknownStrand),
header(""),
species("")
}
Sequence::Sequence(const std::string& seq, alphabet_ref alphabet_)
- : parent(0),
- alphabet(alphabet_),
- seq_start(0),
- seq_count(0),
+ : alphabet(alphabet_),
strand(UnknownStrand),
header(""),
species("")
}
Sequence::Sequence(const Sequence& o)
- : parent(o.parent),
- seq(o.seq),
+ : seq(o.seq),
alphabet(o.alphabet),
- seq_start(o.seq_start),
- seq_count(o.seq_count),
strand(o.strand),
header(o.header),
species(o.species),
{
}
+Sequence::Sequence(const Sequence* o)
+ : seq(o->seq),
+ alphabet(o->alphabet),
+ strand(o->strand),
+ header(o->header),
+ species(o->species),
+ annots(o->annots),
+ motif_list(o->motif_list)
+{
+}
+
+Sequence::Sequence(const SeqSpanRef& seq_ref, alphabet_ref alphabet_)
+ : seq(seq_ref),
+ alphabet(alphabet),
+ strand(UnknownStrand),
+ header(""),
+ species("")
+{
+}
+
Sequence &Sequence::operator=(const Sequence& s)
{
if (this != &s) {
- parent = s.parent;
seq = s.seq;
alphabet = s.alphabet;
- seq_start = s.seq_start;
- seq_count = s.seq_count;
strand = s.strand;
header = s.header;
species = s.species;
alphabet = alphabet_;
if ( count == npos)
count = in_seq.size() - start;
- boost::shared_ptr<seq_string> new_seq(new seq_string);
- new_seq->reserve(count);
+ std::string new_seq;
+ new_seq.reserve(count);
// finally, the actual conversion loop
const Alphabet& alpha_impl = get_alphabet(); // go get one of our actual alphabets
for(size_type i = 0; i != count; ++i, ++seq_i)
{
if (alpha_impl.exists(*seq_i)) {
- new_seq->append(1, toupper(*seq_i));
+ new_seq.append(1, toupper(*seq_i));
} else {
- new_seq->append(1, 'N');
+ new_seq.append(1, 'N');
}
}
- parent = 0;
- seq = new_seq;
- seq_start = 0;
- seq_count = count;
+ SeqSpanRef new_seq_ref(new SeqSpan(new_seq));
+ seq = new_seq_ref;
strand = strand_;
}
return annots;
}
-Sequence
-Sequence::subseq(int start, int count)
+void Sequence::copy_children(Sequence &new_seq, size_type start, size_type count) const
{
- if (!seq) {
- Sequence new_seq;
- return new_seq;
- }
-
- // there might be an off by one error with start+count > size()
- if ( count == npos || start+count > size()) {
- count = size()-start;
- }
- Sequence new_seq(*this);
- new_seq.parent = this;
- new_seq.seq_start = seq_start+start;
- new_seq.seq_count = count;
-
new_seq.motif_list = motif_list;
new_seq.annots.clear();
- // attempt to copy & reannotate position based annotations
- int end = start+count;
for(std::list<annot>::const_iterator annot_i = annots.begin();
annot_i != annots.end();
++annot_i)
{
- int annot_begin= annot_i->begin;
- int annot_end = annot_i->end;
+ size_type annot_begin= annot_i->begin;
+ size_type annot_end = annot_i->end;
- if (annot_begin < end) {
+ if (annot_begin < start+count) {
if (annot_begin >= start) {
annot_begin -= start;
} else {
annot_begin = 0;
}
- if (annot_end < end) {
+ if (annot_end < start+count) {
annot_end -= start;
} else {
annot_end = count;
new_seq.annots.push_back(new_annot);
}
}
+
+}
+Sequence
+Sequence::subseq(size_type start, size_type count) const
+{
+ if (!seq) {
+ Sequence new_seq;
+ return new_seq;
+ }
+
+ Sequence new_seq = *this;
+ new_seq.seq = seq->subseq(start, count);
+ copy_children(new_seq, start, count);
+
return new_seq;
}
std::string Sequence::get_sequence() const
{
- if (seq)
- return *seq;
- else
- return std::string();
+ return seq->sequence();
}
Sequence::const_reference Sequence::operator[](Sequence::size_type i) const
return at(i);
}
-Sequence::const_reference Sequence::at(Sequence::size_type i) const
-{
- if (!seq) throw std::out_of_range("empty sequence");
- return seq->at(i+seq_start);
-}
-
void
Sequence::clear()
{
- parent = 0;
seq.reset();
- seq_start = 0;
- seq_count = 0;
strand = UnknownStrand;
header.clear();
species.clear();
motif_list.clear();
}
-const char *Sequence::c_str() const
-{
- if (seq)
- return seq->c_str()+seq_start;
- else
- return 0;
-}
-
-Sequence::const_iterator Sequence::begin() const
-{
- if (seq and seq_count != 0)
- return seq->begin()+seq_start;
- else
- return Sequence::const_iterator(0);
-}
-
-Sequence::const_iterator Sequence::end() const
-{
- if (seq and seq_count != 0) {
- return seq->begin() + seq_start + seq_count;
- } else {
- return Sequence::const_iterator(0);
- }
-}
-
-Sequence::const_reverse_iterator Sequence::rbegin() const
-{
- if (seq and seq_count != 0)
- return seq->rbegin()+(seq->size()-(seq_start+seq_count));
- else
- return Sequence::const_reverse_iterator();
-}
-
-Sequence::const_reverse_iterator Sequence::rend() const
-{
- if (seq and seq_count != 0) {
- return rbegin() + seq_count;
- } else {
- return Sequence::const_reverse_iterator();
- }
-}
-
-bool Sequence::empty() const
-{
- return (seq_count == 0) ? true : false;
-}
-
-Sequence::size_type Sequence::find_first_not_of(
- const std::string& query,
- Sequence::size_type index)
-{
- typedef std::set<std::string::value_type> sequence_set;
- sequence_set match_set;
-
- for(const_iterator query_item = query.begin();
- query_item != query.end();
- ++query_item)
- {
- match_set.insert(*query_item);
- }
- for(const_iterator base = begin();
- base != end();
- ++base)
- {
- if(match_set.find(*base) == match_set.end()) {
- return base-begin();
- }
- }
- return Sequence::npos;
-}
-
-Sequence::size_type Sequence::start() const
-{
- if (parent)
- return seq_start - parent->start();
- else
- return seq_start;
-}
-
-Sequence::size_type Sequence::stop() const
-{
- return start() + seq_count;
-}
-
-Sequence::size_type Sequence::size() const
-{
- return seq_count;
-}
-
-Sequence::size_type Sequence::length() const
-{
- return size();
-}
-
void
Sequence::save(fs::fstream &save_file)
{
// this is pretty much a straight translation of Nora's python code
// to match iupac letter codes
motif_char = toupper(a_motif[motif_i]);
- seq_char = toupper(seq->at(seq_start+seq_i));
+ seq_char = toupper(seq->at(seq_i));
if (motif_char =='N')
motif_i++;
else if (motif_char == seq_char)
std::ostream& operator<<(std::ostream& out, const Sequence& s)
{
- for(Sequence::const_iterator s_i = s.begin(); s_i != s.end(); ++s_i) {
- out << *s_i;
+ if (s.seq) {
+ for(Sequence::const_iterator s_i = s.begin(); s_i != s.end(); ++s_i) {
+ out << *s_i;
+ }
}
return out;
}
}
}
-bool operator==(const Sequence& x, const Sequence& y)
+template <typename Iter1, typename Iter2>
+static
+bool sequence_insensitive_equality(Iter1 abegin, Iter1 aend, Iter2 bbegin, Iter2 bend)
{
- if (x.empty() and y.empty()) {
- // if there's no sequence in either sequence structure, they're equal
+ Iter1 aseq_i = abegin;
+ Iter2 bseq_i = bbegin;
+ if (aend-abegin == bend-bbegin) {
+ // since the length of the two sequences is equal, we only need to
+ // test one.
+ for(; aseq_i != aend; ++aseq_i, ++bseq_i) {
+ if (toupper(*aseq_i) != toupper(*bseq_i)) {
+ return false;
+ }
+ }
return true;
- } else if (x.empty() or y.empty()) {
- // if we fail the first test, and we discover one is empty,
- // we know they can't be equal. (and we need to do this
- // to prevent dereferencing an empty pointer)
- return false;
- } else if (x.seq_count != y.seq_count) {
- // if they're of different lenghts, they're not equal
+ } else {
return false;
}
- Sequence::const_iterator xseq_i = x.begin();
- Sequence::const_iterator yseq_i = y.begin();
- // since the length of the two sequences is equal, we only need to
- // test one.
- for(; xseq_i != x.end(); ++xseq_i, ++yseq_i) {
- if (toupper(*xseq_i) != toupper(*yseq_i)) {
- return false;
+}
+
+bool operator==(const Sequence& x, const Sequence& y)
+{
+ if (x.seq and y.seq) {
+ // both x and y are defined
+ if (SeqSpan::isFamily(x.seq, y.seq)) {
+ // both are part of the same SeqSpan tree
+ return *(x.seq) == *(y.seq);
+ } else {
+ // we'll have to do a real comparison
+ return sequence_insensitive_equality<SeqSpan::const_iterator, SeqSpan::const_iterator>(
+ x.begin(), x.end(),
+ y.begin(), y.end()
+ );
}
+ } else {
+ // true if they're both empty (with either a null SeqSpanRef or
+ // a zero length string
+ return (x.size() == y.size());
}
- return true;
}
bool operator!=(const Sequence& x, const Sequence& y)
{
return not operator==(x, y);
}
+
#include <boost/serialization/vector.hpp>
#include <boost/shared_ptr.hpp>
+#include <boost/enable_shared_from_this.hpp>
#include <iostream>
-#include "alg/alphabet.hpp"
+#include "alphabet.hpp"
+#include "seq.hpp"
+#include "seq_span.hpp"
// Sequence data class
};
BOOST_CLASS_EXPORT(motif);
-//! the only purpose of this class is that the shared_ptr template
-//! functions need the serialization support to be in-class.
-class seq_string : public std::string
-{
-public:
- typedef std::string::iterator iterator;
- typedef std::string::reverse_iterator reverse_iterator;
- typedef std::string::const_iterator const_iterator;
- typedef std::string::const_reverse_iterator const_reverse_iterator;
-private:
- friend class boost::serialization::access;
- template<class Archive>
- void serialize(Archive& ar, const unsigned int /*version*/) {
- //ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(std::string);
- ar & boost::serialization::make_nvp("bases",
- boost::serialization::base_object<std::string>(*this)
- );
- }
-};
//! sequence track for mussa.
class Sequence
{
public:
- typedef std::string::value_type value_type;
- typedef std::string::difference_type difference_type;
- typedef std::string::iterator iterator;
- typedef std::string::reverse_iterator reverse_iterator;
- typedef std::string::const_iterator const_iterator;
- typedef std::string::const_reverse_iterator const_reverse_iterator;
- typedef std::string::reference reference;
- typedef std::string::const_reference const_reference;
- typedef std::string::size_type size_type;
- static const size_type npos = std::string::npos;
+ typedef SeqString::value_type value_type;
+ typedef SeqString::difference_type difference_type;
+ typedef SeqString::iterator iterator;
+ typedef SeqString::reverse_iterator reverse_iterator;
+ typedef SeqString::const_iterator const_iterator;
+ typedef SeqString::const_reverse_iterator const_reverse_iterator;
+ typedef SeqString::reference reference;
+ typedef SeqString::const_reference const_reference;
+ typedef SeqString::size_type size_type;
+ static const size_type npos = SeqString::npos;
enum strand_type { UnknownStrand, PlusStrand, MinusStrand, BothStrand };
enum alphabet_ref { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet,
nucleic_alphabet, protein_alphabet };
Sequence(const char* seq, alphabet_ref a = reduced_nucleic_alphabet);
Sequence(const std::string& seq, alphabet_ref a = reduced_nucleic_alphabet);
Sequence(const Sequence& seq);
+ Sequence(const Sequence *);
+ Sequence(const SeqSpanRef&, alphabet_ref a = reduced_nucleic_alphabet);
~Sequence();
//! assignment to constant sequences
Sequence &operator=(const Sequence&);
strand_type strand=UnknownStrand);
//! retrive element at specific position
- const_reference at(size_type n) const;
+ const_reference at(size_type i) const { return seq->at(i); }
//! clear the sequence and its annotations
void clear();
- //! return c pointer to the sequence data
- const char *c_str() const;
+ //! return a non-null terminated c pointer to the sequence data
+ const char *data() const { return seq->data(); }
//! forward iterator
- const_iterator begin() const;
+ const_iterator begin() const { return seq->begin(); }
//! last iterator
- const_iterator end() const;
+ const_iterator end() const { return seq->end(); }
//! is our sequence empty?
- bool empty() const;
+ bool empty() const { return (seq) ? seq->empty() : true ; }
//! find first
- size_type find_first_not_of(const std::string&, size_type index=0);
+ size_type find_first_not_of(const std::string& q, size_type index=0) { return seq->find_first_not_of(q, index); }
//! how many base pairs are there in our sequence
- size_type size() const;
+ size_type size() const { return (seq) ? seq->size() : 0; }
//! alias for size (used by string)
- size_type length() const;
+ size_type length() const { return size(); }
//! reverse iterator
- const_reverse_iterator rbegin() const;
+ const_reverse_iterator rbegin() const { return seq->rbegin(); }
//! reverse end iterator
- const_reverse_iterator rend() const;
+ const_reverse_iterator rend() const { return seq->rend(); }
//! is our sequence empty?
//! start position relative to "base" sequence
- size_type start() const;
+ size_type start() const { return seq->parentStart(); }
//! one past the last position relative to "base" sequence
- size_type stop() const;
+ size_type stop() const { return seq->parentStop(); }
//! return a subsequence, copying over any appropriate annotation
- Sequence subseq(int start=0, int count = std::string::npos);
+ Sequence subseq(size_type start=0, size_type count = npos) const;
//! reverse a character
std::string create_reverse_map() const;
//! return a reverse compliment (this needs to be improved?)
std::list<Sequence>::iterator end);
void save(boost::filesystem::fstream &save_file);
- void load_museq(boost::filesystem::path load_file_path, int seq_num);
+ void load_museq(boost::filesystem::path load_file_path, int seq_num);
-private:
- //! parent sequence
- Sequence *parent;
- //! hold a shared pointer to our sequence string
- boost::shared_ptr<seq_string> seq;
+protected:
+ SeqSpanRef seq;
//! which alphabet we're using
alphabet_ref alphabet;
- //! start offset into the sequence
- size_type seq_start;
- //! number of basepairs of the shared sequence we represent
- size_type seq_count;
//! strand orientation
strand_type strand;
//! fasta header
//! a seperate list for motifs since we're currently not saving them
std::list<motif> motif_list;
+ //! copy over all our annotation children
+ void copy_children(Sequence &, size_type start, size_type count) const;
+
void motif_scan(const Sequence& a_motif, std::vector<int> * motif_match_starts) const;
std::string rc_motif(std::string a_motif) const;
//! look for a string sequence type and and it to an annotation list
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive& ar, const unsigned int /*version*/) {
- ar & BOOST_SERIALIZATION_NVP(parent);
ar & BOOST_SERIALIZATION_NVP(seq);
ar & BOOST_SERIALIZATION_NVP(alphabet);
- ar & BOOST_SERIALIZATION_NVP(seq_start);
- ar & BOOST_SERIALIZATION_NVP(seq_count);
ar & BOOST_SERIALIZATION_NVP(strand);
ar & BOOST_SERIALIZATION_NVP(header);
ar & BOOST_SERIALIZATION_NVP(species);
}
};
BOOST_CLASS_EXPORT(Sequence);
-
#endif
#include <boost/shared_ptr.hpp>
#include "alg/sequence.hpp"
+// FIXME: Deprecate! SeqSpan and/or Sequence should replace sequence location
//! convenience structure for holding selected track segments
class SequenceLocation {
public:
MAKE_ALG_UNITTEST( test_mussa )
MAKE_ALG_UNITTEST( test_nway )
MAKE_ALG_UNITTEST( test_sequence )
+MAKE_ALG_UNITTEST( test_seq_span )
MAKE_ALG_UNITTEST( test_sequence_location )
IF(USE_PAIRCOMP)
string s0("AAGGCCTT");
string s1("TTGGCCAA");
string s2("GATTACAA");
- boost::shared_ptr<Sequence> seq0(new Sequence(s0));
- boost::shared_ptr<Sequence> seq1(new Sequence(s1));
- boost::shared_ptr<Sequence> seq2(new Sequence(s2));
+ Sequence seq0(s0);
+ Sequence seq1(s1);
+ Sequence seq2(s2);
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
GlSequence glseq0(seq0, cm);
GlSequence glseq1(seq1, cm);
// I don't trust my operator= hack so lets make sure it works.
string s0("AAGGCCTT");
string s1("TTGGCCAA");
- boost::shared_ptr<Sequence> seq0(new Sequence(s0));
- boost::shared_ptr<Sequence> seq1(new Sequence(s1));
+ Sequence seq0(s0);
+ Sequence seq1(s1);
GlSequence glseq0(seq0, cm);
- BOOST_CHECK (*glseq0.sequence() == s0);
+ BOOST_CHECK (glseq0.get_sequence() == s0);
GlSequence glseq1(seq1, cm);
GlSequence glseq_copy0(glseq0);
- BOOST_CHECK(glseq_copy0.sequence() == glseq0.sequence());
- BOOST_CHECK( glseq_copy0.sequence() == glseq0.sequence());
+ BOOST_CHECK(glseq_copy0.get_sequence() == glseq0.get_sequence());
+ BOOST_CHECK( glseq_copy0.get_sequence() == glseq0.get_sequence());
glseq0 = glseq1;
- BOOST_CHECK( *glseq0.sequence() == s1 );
+ BOOST_CHECK( glseq0.get_sequence() == s1 );
}
BOOST_AUTO_TEST_CASE( glsequence_color )
{
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
- Color black(0.0, 0.0, 0.0, 1.0);
- Color c(0.1, 0.2, 0.3, 0.4);
- boost::shared_ptr<Sequence> seq(new Sequence("AAGGCCTT"));
+ boost::shared_ptr<Color> black(new Color(0.0, 0.0, 0.0, 1.0));
+ boost::shared_ptr<Color> c(new Color(0.1, 0.2, 0.3, 0.4));
+ Sequence seq("AAGGCCTT");
GlSequence s(seq, cm);
- BOOST_CHECK_EQUAL(s.color(), black );
+ BOOST_CHECK_EQUAL(*s.color(), *black );
s.setColor( c );
- BOOST_CHECK_EQUAL( s.color(), c );
+ BOOST_CHECK_EQUAL( *(s.color()), *c );
}
BOOST_AUTO_TEST_CASE( glsequence_renderable )
{
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
- boost::shared_ptr<Sequence> seq(new Sequence("AAGGCCTT"));
+ Sequence seq("AAGGCCTT");
GlSequence s(seq, cm);
// way more base pairs than viewport pixel width
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
string seq_string("AAGGCCTTNNAAGGCCTTNNAAGGCCTTNN");
string::size_type seqlen = seq_string.size();
- boost::shared_ptr<Sequence> seq(new Sequence(seq_string));
+ Sequence seq(seq_string);
GlSequence glseq(seq, cm);
- BOOST_CHECK( glseq.sequence_begin(0, 50) == seq->begin() );
- // always make sure we return seq->end() regardless of how much extra
+ BOOST_CHECK( glseq.region_begin(0, 50) == seq.begin() );
+ // always make sure we return seq.end() regardless of how much extra
// is asked for
- BOOST_CHECK( glseq.sequence_end(0, seqlen+10) == seq->end() );
+ BOOST_CHECK( glseq.region_end(0, seqlen+10) == seq.end() );
// do we get the right end pointer?
- BOOST_CHECK( glseq.sequence_end(0, 5) == seq->begin()+5 );
+ BOOST_CHECK( glseq.region_end(0, 5) == seq.begin()+5 );
// when we request far too much sequence what do we get?
- BOOST_CHECK( glseq.sequence_begin(seqlen+10, seqlen+20) == seq->end() );
- BOOST_CHECK( glseq.sequence_end(seqlen+10, seqlen+20) == seq->end() );
+ BOOST_CHECK( glseq.region_begin(seqlen+10, seqlen+20) == seq.end() );
+ BOOST_CHECK( glseq.region_end(seqlen+10, seqlen+20) == seq.end() );
- // we cant ask for reversed sequences with sequence_begin/end
- BOOST_CHECK( glseq.sequence_begin(10, 5) == seq->end() );
- BOOST_CHECK( glseq.sequence_end(10, 5) == seq->end() );
+ // we cant ask for reversed sequences with region_begin/end
+ BOOST_CHECK( glseq.region_begin(10, 5) == seq.end() );
+ BOOST_CHECK( glseq.region_end(10, 5) == seq.end() );
Sequence::const_iterator seq_itor;
// if we as for an empty segment? start and end should equal
- seq_itor = glseq.sequence_begin(10, 10);
- BOOST_CHECK( seq_itor == glseq.sequence_end(10, 10) );
+ seq_itor = glseq.region_begin(10, 10);
+ BOOST_CHECK( seq_itor == glseq.region_end(10, 10) );
// reuse seq_itor downhere
string::const_iterator str_itor;
- for(str_itor = seq->begin(),
- seq_itor = glseq.sequence_begin();
- str_itor != seq->end() and
- seq_itor != glseq.sequence_end();
+ for(str_itor = seq.begin(),
+ seq_itor = glseq.begin();
+ str_itor != seq.end() and
+ seq_itor != glseq.end();
++str_itor, ++seq_itor)
{
BOOST_CHECK_EQUAL( *str_itor, *seq_itor );
{
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
std::string seq_string = "AAGGCCTT";
- boost::shared_ptr<Sequence> seq(new Sequence(seq_string));
+ Sequence seq(seq_string);
GlSequence glseq(seq, cm);
BOOST_CHECK_EQUAL( glseq.leftbase( -50.0 ), 0 );
{
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
std::string seq_string = "AAGGCCTTAAGGCCTT";
- boost::shared_ptr<Sequence> seq(new Sequence(seq_string));
+ Sequence seq(seq_string);
GlSequence glseq(seq, cm);
glseq.setX(-5);
{
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
std::string seq_string = "AAGGCCTTAAGGCCTT";
- boost::shared_ptr<Sequence> seq(new Sequence(seq_string));
+ Sequence seq(seq_string);
GlSequence glseq(seq, cm);
Sequence::const_iterator seq_begin_i;
Sequence::const_iterator seq_end_i;
- BOOST_CHECK(glseq.sequence_begin(5, -5) == seq->end());
- BOOST_CHECK(glseq.sequence_begin(0, 20) == seq->begin());
- BOOST_CHECK(glseq.sequence_begin(10,20) == seq->begin()+10);
+ BOOST_CHECK(glseq.region_begin(5, -5) == seq.end());
+ BOOST_CHECK(glseq.region_begin(0, 20) == seq.begin());
+ BOOST_CHECK(glseq.region_begin(10,20) == seq.begin()+10);
- BOOST_CHECK(glseq.sequence_end(5, -5) == seq->end());
- BOOST_CHECK(glseq.sequence_end(0, 20) == seq->end());
- BOOST_CHECK(glseq.sequence_end(0, 10) == seq->begin()+10);
+ BOOST_CHECK(glseq.region_end(5, -5) == seq.end());
+ BOOST_CHECK(glseq.region_end(0, 20) == seq.end());
+ BOOST_CHECK(glseq.region_end(0, 10) == seq.begin()+10);
glseq.setX(-5);
- BOOST_CHECK(glseq.sequence_begin(0, 10) == seq->begin()+5);
- BOOST_CHECK(glseq.sequence_end(0, 15) == seq->end());
+ BOOST_CHECK(glseq.region_begin(0, 10) == seq.begin()+5);
+ BOOST_CHECK(glseq.region_end(0, 15) == seq.end());
}
+BOOST_AUTO_TEST_CASE( glsequence_subseq )
+{
+ boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
+ boost::shared_ptr<Color> c(new Color(1.0, 0.5, 0.5));
+
+ GlSequence seq("AAGGCCTT", cm);
+ seq.setColor(c);
+
+ GlSequence subseq = seq.subseq(4,2);
+ BOOST_CHECK_EQUAL(subseq.get_sequence(), "CC");
+ BOOST_CHECK_EQUAL(subseq.color(), c);
+}
+
+/*
BOOST_AUTO_TEST_CASE ( shared_ptr_test )
{
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
// I don't trust my operator= hack so lets make sure it works.
string s0("AAGGCCTT");
string s1("TTGGCCAA");
- boost::shared_ptr<Sequence> seq0(new Sequence(s0));
+ Sequence seq0(s0);
BOOST_CHECK_EQUAL( seq0.use_count(), 1 );
- boost::shared_ptr<Sequence> seq1(new Sequence(s1));
+ Sequence seq1(s1);
BOOST_CHECK_EQUAL( seq0.use_count(), 1 );
// make a block to test deallocation
BOOST_CHECK_EQUAL(seq0.use_count(), 1);
BOOST_CHECK_EQUAL(seq1.use_count(), 1);
}
-
-
+*/
\ No newline at end of file
--- /dev/null
+#define BOOST_AUTO_TEST_MAIN
+#include <boost/test/auto_unit_test.hpp>
+
+#include "seq_span.hpp"
+#include "mussa_exceptions.hpp"
+
+#include <stdlib.h>
+
+BOOST_AUTO_TEST_CASE( seqspan_from_string )
+{
+ std::string str1("AAGGCCTT");
+ SeqSpanRef span1(new SeqSpan(str1));
+ BOOST_CHECK_EQUAL(span1->length(), str1.length());
+ BOOST_CHECK_EQUAL(span1->sequence(), str1);
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_from_seqspan )
+{
+ std::string str1("AAGGCCTT");
+ SeqSpanRef span1(new SeqSpan(str1));
+ SeqSpanRef span2(new SeqSpan(span1));
+ SeqSpanRef span3(new SeqSpan(span1));
+
+ BOOST_CHECK_EQUAL(span1->length(), str1.length());
+ BOOST_CHECK_EQUAL(span1->sequence(), str1);
+ BOOST_CHECK_EQUAL(span1->length(), span2->length());
+ BOOST_CHECK_EQUAL(span2->sequence(), str1);
+
+ BOOST_CHECK_EQUAL(span1->length(), span3->length());
+ BOOST_CHECK_EQUAL(span3->sequence(), str1);
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_equality )
+{
+ std::string str1("AAGGCCTT");
+ std::string str2("AACCGGTT");
+ std::string str3("AACCGGTT");
+ SeqSpanRef span1(new SeqSpan(str1));
+ SeqSpanRef span1copy(new SeqSpan(span1));
+ SeqSpanRef span2(new SeqSpan(str2));
+ SeqSpanRef span3(new SeqSpan(str3));
+
+ BOOST_CHECK_EQUAL(*span1, *span1copy);
+ BOOST_CHECK(*span1 != *span3);
+ // if its different source strings, compare the underlying string
+ BOOST_CHECK(*span2 != *span3);
+ BOOST_CHECK_EQUAL(SeqSpan::isFamily(*span2, *span3), false);
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_find_first_not_of )
+{
+ std::string str1("AAAAT");
+ SeqSpan seq1(str1);
+ BOOST_CHECK_EQUAL(seq1.find_first_not_of("A"), str1.find_first_not_of("A"));
+
+ std::string str2("AATTGGCC");
+ SeqSpan seq2(str2);
+ BOOST_CHECK_EQUAL(seq2.find_first_not_of("qwer"), str2.find_first_not_of("qwer"));
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_at )
+{
+ std::string str1("AAGGCCTT");
+ SeqSpanRef seq1(new SeqSpan(str1));
+ SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
+
+ BOOST_CHECK_EQUAL( seq1->at(0), str1.at(0) );
+ BOOST_CHECK_EQUAL( seq1->at(2), seq2->at(0) );
+ BOOST_CHECK_EQUAL( str1[2], seq2->at(0) );
+ BOOST_CHECK_EQUAL( (*seq1)[0], seq1->at(0) );
+ BOOST_CHECK_EQUAL( (*seq1)[2], (*seq2)[0] );
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_data )
+{
+ std::string str1("AAGGCCTT");
+ SeqSpanRef seq1(new SeqSpan(str1));
+ SeqSpanRef seq2(new SeqSpan(seq1, 3, 2));
+
+ BOOST_REQUIRE_EQUAL( str1.length(), seq1->length());
+ BOOST_CHECK_EQUAL( str1.data(), seq1->data() );
+ std::string str1sub = str1.substr(3,2);
+ BOOST_REQUIRE_EQUAL( seq2->size(), str1sub.size() );
+ BOOST_REQUIRE_EQUAL( seq2->length(), str1sub.length() );
+ for (int i = 0; i != seq2->size(); ++i) {
+ BOOST_CHECK_EQUAL( seq2->data()[i], str1sub.data()[i] );
+ }
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_begin_end )
+{
+ std::string str1("AAGGCCTT");
+ SeqSpanRef seq1(new SeqSpan(str1));
+ SeqSpanRef seq2(new SeqSpan(seq1, 2, 2));
+
+ BOOST_CHECK(seq1->begin() + 2 == seq2->begin());
+
+ std::string::const_iterator str1_i = str1.begin();
+ SeqSpan::const_iterator seq1_i = seq1->begin();
+ for(; not ((str1_i == str1.end()) or (seq1_i == seq1->end())); ++str1_i, ++seq1_i) {
+ BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
+ }
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_rbegin_rend )
+{
+ std::string str1("AAGGCCTT");
+ SeqSpanRef seq1(new SeqSpan(str1));
+
+ std::string::const_reverse_iterator str1_i = str1.rbegin();
+ SeqSpan::const_reverse_iterator seq1_i = seq1->rbegin();
+ for(; seq1_i != seq1->rend(); ++str1_i, ++seq1_i) {
+ BOOST_CHECK_EQUAL( *str1_i, *seq1_i );
+ }
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_empty_start_stop )
+{
+ SeqSpanRef s1(new SeqSpan(""));
+ BOOST_CHECK_EQUAL( s1->start(), 0 );
+ BOOST_CHECK_EQUAL( s1->stop(), 0 );
+
+ BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
+ BOOST_CHECK_EQUAL( s1->parentStop(), 0 );
+
+ BOOST_CHECK_EQUAL( s1->size(), 0 );
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_global_start_stop )
+{
+ std::string seq_string("AAGGCCTT");
+ SeqSpanRef s1(new SeqSpan(seq_string));
+ BOOST_CHECK_EQUAL( s1->start(), 0 );
+ BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
+
+ std::string s2seq_string = seq_string.substr(2,3);
+ SeqSpanRef s2 = s1->subseq(2,3);
+ BOOST_CHECK_EQUAL( s2->start(), 2);
+ BOOST_CHECK_EQUAL( s2->stop(), 2+3);
+ BOOST_CHECK_EQUAL( s2->size(), 3);
+ BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
+
+ std::string s3seq_string = s2seq_string.substr(1,1);
+ SeqSpanRef s3 = s2->subseq(1,1);
+ BOOST_CHECK_EQUAL( s3->start(), 2+1 );
+ BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
+ BOOST_CHECK_EQUAL( s3->size(), 1);
+ BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_global_to_large )
+{
+ std::string seq_string("AAGGCCTT");
+ SeqSpanRef s1(new SeqSpan(seq_string));
+ BOOST_CHECK_EQUAL( s1->start(), 0 );
+ BOOST_CHECK_EQUAL( s1->stop(), seq_string.size() );
+
+ std::string s2seq_string = seq_string.substr(2,3);
+ SeqSpanRef s2 = s1->subseq(4,8);
+ BOOST_CHECK_EQUAL( s2->start(), 4);
+ BOOST_CHECK_EQUAL( s2->size(), 4);
+ BOOST_CHECK_EQUAL( s2->stop(), 8);
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_parent_start_stop )
+{
+ std::string seq_string("AAGGCCTT");
+ SeqSpanRef s1(new SeqSpan(seq_string));
+ BOOST_CHECK_EQUAL( s1->parentStart(), 0 );
+ BOOST_CHECK_EQUAL( s1->parentStop(), seq_string.size() );
+
+ std::string s2seq_string = seq_string.substr(2,3);
+ SeqSpanRef s2 = s1->subseq(2,3);
+ BOOST_CHECK_EQUAL( s2->parentStart(), 2);
+ BOOST_CHECK_EQUAL( s2->parentStop(), 2+3);
+ BOOST_CHECK_EQUAL( s2->size(), 3);
+ BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
+
+ std::string s3seq_string = s2seq_string.substr(1,1);
+ SeqSpanRef s3 = s2->subseq(1,1);
+ BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
+ BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
+ BOOST_CHECK_EQUAL( s3->size(), 1);
+ BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_global_mutable_start_stop )
+{
+ std::string seq_string("AAGGCCTT");
+ SeqSpanRef s1(new SeqSpan(seq_string));
+
+ std::string s2seq_string = seq_string.substr(2,3);
+ SeqSpanRef s2 = s1->subseq(2,3);
+ BOOST_CHECK_EQUAL( s2->start(), 2);
+ BOOST_CHECK_EQUAL( s2->stop(), 2+3);
+ BOOST_CHECK_EQUAL( s2->size(), 3);
+ BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
+
+ std::string s3seq_string = s2seq_string.substr(1,1);
+ SeqSpanRef s3 = s2->subseq(1,1);
+ // Check root location
+ BOOST_CHECK_EQUAL( s3->start(), 2+1 );
+ BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
+ BOOST_CHECK_EQUAL( s3->size(), 1);
+ // Check parent location
+ BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
+ BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
+ BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
+
+ // Extend s2 to the left
+ s2->setStart(1);
+ BOOST_CHECK_EQUAL( s2->start(), 1);
+ BOOST_CHECK_EQUAL( s2->stop(), 1+1+3);
+ BOOST_CHECK_EQUAL( s2->size(), 4);
+
+ // Child sequence should have the same global location
+ BOOST_CHECK_EQUAL( s3->start(), 2+1 );
+ BOOST_CHECK_EQUAL( s3->stop(), 2+1+1);
+ BOOST_CHECK_EQUAL( s3->size(), 1);
+ // Child sequence should now have different parent location
+ BOOST_CHECK_EQUAL( s3->parentStart(), 2 );
+ BOOST_CHECK_EQUAL( s3->parentStop(), 2+1);
+ BOOST_CHECK_EQUAL( s3->size(), 1);
+}
+
+BOOST_AUTO_TEST_CASE( seqspan_parent_mutable_start_stop )
+{
+ std::string seq_string("AAGGCCTT");
+ SeqSpanRef s1(new SeqSpan(seq_string));
+
+ std::string s2seq_string = seq_string.substr(3,3);
+ SeqSpanRef s2 = s1->subseq(3,3);
+ BOOST_CHECK_EQUAL( s2->start(), 3);
+ BOOST_CHECK_EQUAL( s2->stop(), 3+3);
+ BOOST_CHECK_EQUAL( s2->size(), 3);
+ BOOST_CHECK_EQUAL( s2->sequence(), s2seq_string);
+
+ std::string s3seq_string = s2seq_string.substr(1,1);
+ SeqSpanRef s3 = s2->subseq(1,1);
+ // Check root location
+ BOOST_CHECK_EQUAL( s3->start(), 3+1 );
+ BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
+ BOOST_CHECK_EQUAL( s3->size(), 1);
+ // Check parent location
+ BOOST_CHECK_EQUAL( s3->parentStart(), 1 );
+ BOOST_CHECK_EQUAL( s3->parentStop(), 1+1);
+ BOOST_CHECK_EQUAL( s3->sequence(), s3seq_string);
+
+ // s2 should now be equivalent to s1->subseq(1,5)
+ s2->setParentStart(1);
+ BOOST_CHECK_EQUAL( s2->start(), 1);
+ BOOST_CHECK_EQUAL( s2->stop(), 3+3);
+ BOOST_CHECK_EQUAL( s2->size(), 5);
+
+ // Child sequence should have the same global location
+ BOOST_CHECK_EQUAL( s3->start(), 3+1 );
+ BOOST_CHECK_EQUAL( s3->stop(), 3+1+1);
+ BOOST_CHECK_EQUAL( s3->size(), 1);
+ // Child sequence should now have different parent location
+ BOOST_CHECK_EQUAL( s3->parentStart(), 1+2);
+ BOOST_CHECK_EQUAL( s3->parentStop(), 1+2+1);
+}
+
+// what happens if we set a stop past our actual end
+BOOST_AUTO_TEST_CASE( seqspan_stop_past_end )
+{
+ std::string seq_string("AAGGCCTT");
+ SeqSpanRef s1(new SeqSpan(seq_string));
+ std::string s2seq_string = seq_string.substr(3,3);
+ SeqSpanRef s2 = s1->subseq(3,3);
+ std::string s3seq_string = s2seq_string.substr(1,1);
+ SeqSpanRef s3 = s2->subseq(1,1);
+
+ // should be limited by our parent sequence
+ s3->setStop(8);
+ BOOST_CHECK_EQUAL( s3->size(), 2);
+
+ s2->setStop(8);
+ BOOST_CHECK_EQUAL( s2->size(), 5);
+
+ s3->setStop(8);
+ BOOST_CHECK_EQUAL( s3->size(), 4);
+}
\ No newline at end of file
BOOST_CHECK_EQUAL(s.get_sequence(), std::string() );
}
+BOOST_AUTO_TEST_CASE( sequence_from_string )
+{
+ std::string str1("AAAT");
+ Sequence seq1(str1);
+ BOOST_CHECK_EQUAL(seq1.get_sequence(), str1);
+}
+
BOOST_AUTO_TEST_CASE( sequence_find_first_not_of )
{
std::string str1("AAAAT");
BOOST_CHECK_EQUAL(seq1_loaded, seq1);
BOOST_CHECK_EQUAL(seq2_loaded, seq2);
// test if our pointers are the same
- BOOST_CHECK_EQUAL(seq1_loaded.c_str(), seq2_loaded.c_str());
+ BOOST_CHECK_EQUAL(seq1_loaded.data(), seq2_loaded.data());
}
sequence_load_error(msg) {};
};
+//! Incomprehensible comparison
+class sequence_invalid_comparison : public mussa_error
+{
+public:
+ explicit sequence_invalid_comparison(const std::string& msg) :
+ mussa_error(msg) {};
+};
+
//! Error loading sequence annotation
class annotation_load_error : public sequence_load_error
{
void export_glsequence()
{
- class_<GlSequence>("GlSequence",
+ class_<GlSequence, bases<Sequence> >("GlSequence",
init<
- boost::shared_ptr<Sequence>,
+ const Sequence &,
boost::shared_ptr<AnnotationColors>
>())
.def(init<GlSequence &>())
.def("draw", &GlSequence::draw)
- .def("sequence", &GlSequence::sequence )
.add_property("x", &GlSequence::x, &GlSequence::setX)
.add_property("y", &GlSequence::y, &GlSequence::setY)
.add_property("size", &GlSequence::size)
export_annotation_colors();
export_conserved_path();
export_flps();
+ export_sequence();
export_glsequence();
export_mussa();
export_nway_paths();
- export_sequence();
//export_mussa_window();
}
seq_i != sequences.end();
++seq_i)
{
- boost::shared_ptr<GlSequence> gs(new GlSequence(*seq_i, cm));
+ // Blech *(*seq_i) is dereferencing the shared_ptr stored in the iterator.
+ boost::shared_ptr<GlSequence> gs(new GlSequence(*(*seq_i), cm));
converted_sequences.push_back(gs);
browser.push_sequence(gs);
}
{
if (glseq != glsequence_) {
glsequence_ = glseq;
- setName(glsequence_->sequence()->get_species());
- setLength(glsequence_->sequence()->length());
+ setName(glsequence_->get_species());
+ setLength(glsequence_->size());
emit glsequenceChanged(glsequence_);
}
}
{
std::string std_name_ = name_.toStdString();
- if (std_name_ != glsequence_->sequence()->get_species()) {
- glsequence_->sequence()->set_species(std_name_);
+ if (std_name_ != glsequence_->get_species()) {
+ glsequence_->set_species(std_name_);
emit nameChanged(name_);
}
std::string SequenceDescription::name() const
{
if (glsequence_)
- return glsequence_->sequence()->get_species();
+ return glsequence_->get_species();
else
return string("");
}
return 0;
if (glsequence_)
- return glsequence_->sequence()->size();
+ return glsequence_->size();
else
return 0;
}
if (role == Qt::DisplayRole) {
if (index.column() == 0 ) {
model_item glseq = sequences[index.row()];
- std::string name(glseq->sequence()->get_name());
+ std::string name(glseq->get_name());
if (name.size() == 0) {
return QString(tr("Unnamed Sequence"));
} else {
private slots:
void testSimple() {
- boost::shared_ptr<Sequence> seq1(new Sequence("AAGGCCTT"));
- seq1->set_species("foo");
+ Sequence seq1(new Sequence("AAGGCCTT"));
+ seq1.set_species("foo");
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
+ // this is now a copy of the original sequence... which
+ // means changes to the shared GlSequence wont do anything to Sequence
boost::shared_ptr<GlSequence> glseq1(new GlSequence(seq1, cm));
SequenceDescription sd(glseq1, 0);
QVERIFY(sd.glsequence() == glseq1);
- QVERIFY(sd.glsequence()->sequence()->get_species() == seq1->get_species());
+ QVERIFY(sd.glsequence()->get_species() == glseq1->get_species());
sd.setName(std::string("bar"));
- QVERIFY(sd.glsequence()->sequence()->get_species() == seq1->get_species());
- QVERIFY(seq1->get_species() == "bar");
+ QVERIFY(sd.glsequence()->get_species() == glseq1->get_species());
+ QVERIFY(seq1.get_species() != "bar");
+ QVERIFY(glseq1->get_species() == "bar");
}
void testDeletedPointer() {
SequenceDescription sd;
seq1->find_motif(m);
seq1->set_species("foo");
boost::shared_ptr<AnnotationColors> cm(new AnnotationColors);
- boost::shared_ptr<GlSequence> glseq1(new GlSequence(seq1, cm));
+ boost::shared_ptr<GlSequence> glseq1(new GlSequence(*seq1, cm));
sd.setGlSequence(glseq1);
}