From: Diane Trout Date: Fri, 15 Sep 2006 00:31:35 +0000 (+0000) Subject: use an enum instead of a bool for strandedness X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=commitdiff_plain;h=717b1762156bde77ee5c14a83ca4e103c8c20896 use an enum instead of a bool for strandedness I wanted to track more than +/- strand, I wanted to know if an annotation was "unknown" or perhaps applied to both strands. --- diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 0b7fb0e..f1c7a71 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -84,7 +84,8 @@ const std::string Sequence::protein_alphabet("AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTt Sequence::Sequence() : seq_start(0), - seq_count(0) + seq_count(0), + strand(UnknownStrand) { } @@ -95,6 +96,7 @@ Sequence::~Sequence() Sequence::Sequence(const char *seq) : seq_start(0), seq_count(0), + strand(UnknownStrand), header(""), species("") { @@ -104,6 +106,7 @@ Sequence::Sequence(const char *seq) Sequence::Sequence(const std::string& seq) : seq_start(0), seq_count(0), + strand(UnknownStrand), header(""), species("") { @@ -114,6 +117,7 @@ Sequence::Sequence(const Sequence& o) : seq(o.seq), seq_start(o.seq_start), seq_count(o.seq_count), + strand(o.strand), header(o.header), species(o.species), annots(o.annots), @@ -127,6 +131,7 @@ Sequence &Sequence::operator=(const Sequence& s) seq = s.seq; seq_start = s.seq_start; seq_count = s.seq_count; + strand = s.strand; header = s.header; species = s.species; annots = s.annots; @@ -242,12 +247,13 @@ Sequence::load_fasta(std::iostream& data_file, int seq_num, } void Sequence::set_filtered_sequence(const std::string &old_seq, - std::string::size_type start, - std::string::size_type count) + size_type start, + size_type count, + strand_type strand_) { char conversionTable[257]; - if ( count == 0) + if ( count == npos) count = old_seq.size() - start; boost::shared_ptr new_seq(new seq_string); new_seq->reserve(count); @@ -281,6 +287,7 @@ void Sequence::set_filtered_sequence(const std::string &old_seq, seq = new_seq; seq_start = 0; seq_count = count; + strand = strand_; } void @@ -609,6 +616,7 @@ Sequence::clear() seq.reset(); seq_start = 0; seq_count = 0; + strand = UnknownStrand; header.clear(); species.clear(); annots.clear(); diff --git a/alg/sequence.hpp b/alg/sequence.hpp index aa1460f..6ad8320 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -109,6 +109,7 @@ public: typedef std::string::const_reference const_reference; typedef std::string::size_type size_type; static const size_type npos = std::string::npos; + enum strand_type { UnknownStrand, PlusStrand, MinusStrand, BothStrand }; // some standard dna alphabets // Include nl (\012), and cr (\015) to make sequence parsing eol @@ -136,8 +137,9 @@ public: //! set sequence to a (sub)string containing nothing but AGCTN void set_filtered_sequence(const std::string& seq, - std::string::size_type start=0, - std::string::size_type count=0); + size_type start=0, + size_type count=npos, + strand_type strand=UnknownStrand); //! retrive element at specific position const_reference at(size_type n) const; @@ -158,7 +160,7 @@ public: //! return a subsequence, copying over any appropriate annotation Sequence subseq(int start=0, int count = std::string::npos) const; - //! return a reverse compliment + //! return a reverse compliment (this needs to be improved?) std::string rev_comp() const; //! set sequence (filtered) @@ -229,6 +231,8 @@ private: size_type seq_start; //! number of basepairs of the shared sequence we represent size_type seq_count; + //! strand orientation + strand_type strand; //! fasta header std::string header; //! species name @@ -251,6 +255,7 @@ private: ar & BOOST_SERIALIZATION_NVP(seq); ar & BOOST_SERIALIZATION_NVP(seq_start); ar & BOOST_SERIALIZATION_NVP(seq_count); + ar & BOOST_SERIALIZATION_NVP(strand); ar & BOOST_SERIALIZATION_NVP(header); ar & BOOST_SERIALIZATION_NVP(species); ar & BOOST_SERIALIZATION_NVP(annots);