X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=blobdiff_plain;f=alg%2Fseq_span.hpp;h=5a2da1268d3a62cf807f323d7e39bb450fea5841;hp=8d1991360df0bdcebee6e83cab7b926686630a9a;hb=75496e2c562d728af983c347527270eba360c6ee;hpb=b9755e1974201ff513c66b0fd684bde330c6fff6 diff --git a/alg/seq_span.hpp b/alg/seq_span.hpp index 8d19913..5a2da12 100644 --- a/alg/seq_span.hpp +++ b/alg/seq_span.hpp @@ -35,14 +35,34 @@ public: typedef SeqString::size_type size_type; typedef SeqString::value_type value_type; static const size_type npos = SeqString::npos; + //! Define strand types + /**! + * Unknown strand is treated as "either" strand + * Plus refers to the initially created strand + * Minus is the opposite strand + * Both is for any feature that applies to "both" strands + * (which may not actually be useful) + * Same strand is only used when creating a subsequence + * and implies the subsequence has the same orientation as the parent + * Opposite is only used for creating a subsequence + * and implies the subsequence has the opposite orientation as the parent + * Single indicates that this is single stranded and there can't be + * an opposite strand. + */ + enum strand_type { UnknownStrand, MinusStrand, PlusStrand, + BothStrand, SameStrand, OppositeStrand, SingleStrand }; public: SeqSpan(const SeqSpan &); SeqSpan(const SeqSpan *); explicit SeqSpan(const std::string &, - AlphabetRef = reduced_nucleic_alphabet + AlphabetRef a = dna_alphabet, + strand_type strand=PlusStrand ); - SeqSpan(const SeqSpanRef, size_type start=0, size_type count=npos); + SeqSpan(const SeqSpanRef, + size_type start=0, + size_type count=npos, + strand_type strand=SameStrand); //! assignment SeqSpan& operator=(const SeqSpan&); @@ -87,6 +107,7 @@ public: size_type stop() const { return seq_start + seq_count; } //! set one past the last position relative to the root sequence. void setStop(size_type); + strand_type strand() const { return seq_strand; } //! get start position relative to the parent sequence size_type parentStart() const; @@ -100,12 +121,17 @@ public: //! return a subsequence, copying over any appropriate annotation - SeqSpanRef subseq(size_type start=0, size_type count = std::string::npos); + SeqSpanRef subseq(size_type start=0, + size_type count = std::string::npos, + strand_type = PlusStrand); //! get sequence std::string sequence() const; //! are both sequences derived from the same sequence tree? static bool isFamily(const SeqSpan& a, const SeqSpan& b); + //! fill in our rc_seq variable + void initialize_rc_seq() const; + friend class Sequence; private: //! do not statically initialize, only create with new @@ -117,11 +143,15 @@ protected: size_type seq_start; //! how big we ware size_type seq_count; - // Do I need to track the strand here? + //! strand orientation + strand_type seq_strand; //! keep a reference to who our parent span is SeqSpanRef parent; + //! hold a reverse complement version of our sequence if needed + SeqStringRef rc_seq; + // boost::serialization support friend class boost::serialization::access; template @@ -129,6 +159,7 @@ protected: ar & BOOST_SERIALIZATION_NVP(seq); ar & BOOST_SERIALIZATION_NVP(seq_start); ar & BOOST_SERIALIZATION_NVP(seq_count); + ar & BOOST_SERIALIZATION_NVP(seq_strand); ar & BOOST_SERIALIZATION_NVP(parent); } };