use an enum instead of a bool for strandedness
authorDiane Trout <diane@caltech.edu>
Fri, 15 Sep 2006 00:31:35 +0000 (00:31 +0000)
committerDiane Trout <diane@caltech.edu>
Fri, 15 Sep 2006 00:31:35 +0000 (00:31 +0000)
I wanted to track more than +/- strand, I wanted to know if an annotation
was "unknown" or perhaps applied to both strands.

alg/sequence.cpp
alg/sequence.hpp

index 0b7fb0e7511a3d9a4cb99e5deb2f0a71a3a90da7..f1c7a7132ab75d6cd0cc12a9ec5974cc764d0d29 100644 (file)
@@ -84,7 +84,8 @@ const std::string Sequence::protein_alphabet("AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTt
 
 Sequence::Sequence()
   : seq_start(0),
-    seq_count(0)
+    seq_count(0),
+    strand(UnknownStrand)
 {
 }
 
@@ -95,6 +96,7 @@ Sequence::~Sequence()
 Sequence::Sequence(const char *seq)
   : seq_start(0),
     seq_count(0),
+    strand(UnknownStrand),
     header(""),
     species("")
 {
@@ -104,6 +106,7 @@ Sequence::Sequence(const char *seq)
 Sequence::Sequence(const std::string& seq) 
   : seq_start(0),
     seq_count(0),
+    strand(UnknownStrand),
     header(""),
     species("")
 {
@@ -114,6 +117,7 @@ Sequence::Sequence(const Sequence& o)
   : seq(o.seq),
     seq_start(o.seq_start),
     seq_count(o.seq_count),
+    strand(o.strand),
     header(o.header),
     species(o.species),
     annots(o.annots),
@@ -127,6 +131,7 @@ Sequence &Sequence::operator=(const Sequence& s)
     seq = s.seq;
     seq_start = s.seq_start;
     seq_count = s.seq_count;
+    strand = s.strand;
     header = s.header;
     species = s.species;
     annots = s.annots;
@@ -242,12 +247,13 @@ Sequence::load_fasta(std::iostream& data_file, int seq_num,
 }
 
 void Sequence::set_filtered_sequence(const std::string &old_seq, 
-                                     std::string::size_type start,
-                                     std::string::size_type count)
+                                     size_type start,
+                                     size_type count,
+                                     strand_type strand_)
 {
   char conversionTable[257];
 
-  if ( count == 0)
+  if ( count == npos)
     count = old_seq.size() - start;
   boost::shared_ptr<seq_string> new_seq(new seq_string);
   new_seq->reserve(count);
@@ -281,6 +287,7 @@ void Sequence::set_filtered_sequence(const std::string &old_seq,
   seq = new_seq;
   seq_start = 0;
   seq_count = count;
+  strand = strand_;
 }
 
 void
@@ -609,6 +616,7 @@ Sequence::clear()
   seq.reset();
   seq_start = 0;
   seq_count = 0;
+  strand = UnknownStrand;
   header.clear();
   species.clear();
   annots.clear();
index aa1460fcb1152201ab527b2c032a6b53b497a77a..6ad83206b994f9b8e51d3da2b30063984b3a5055 100644 (file)
@@ -109,6 +109,7 @@ public:
   typedef std::string::const_reference const_reference;
   typedef std::string::size_type size_type;
   static const size_type npos = std::string::npos;
+  enum strand_type { UnknownStrand, PlusStrand, MinusStrand, BothStrand };
 
   // some standard dna alphabets 
   // Include nl (\012), and cr (\015) to make sequence parsing eol 
@@ -136,8 +137,9 @@ public:
 
   //! set sequence to a (sub)string containing nothing but AGCTN
   void set_filtered_sequence(const std::string& seq, 
-                            std::string::size_type start=0, 
-                            std::string::size_type count=0);
+                            size_type start=0,
+                            size_type count=npos,
+                             strand_type strand=UnknownStrand);
 
   //! retrive element at specific position
   const_reference at(size_type n) const;
@@ -158,7 +160,7 @@ public:
 
   //! return a subsequence, copying over any appropriate annotation
   Sequence subseq(int start=0, int count = std::string::npos) const;
-  //! return a reverse compliment
+  //! return a reverse compliment (this needs to be improved?)
   std::string rev_comp() const;
 
   //! set sequence (filtered)
@@ -229,6 +231,8 @@ private:
   size_type seq_start;
   //! number of basepairs of the shared sequence we represent
   size_type seq_count;
+  //! strand orientation
+  strand_type strand;
   //! fasta header
   std::string header;
   //! species name
@@ -251,6 +255,7 @@ private:
     ar & BOOST_SERIALIZATION_NVP(seq);
     ar & BOOST_SERIALIZATION_NVP(seq_start);
     ar & BOOST_SERIALIZATION_NVP(seq_count);
+    ar & BOOST_SERIALIZATION_NVP(strand);
     ar & BOOST_SERIALIZATION_NVP(header);
     ar & BOOST_SERIALIZATION_NVP(species);
     ar & BOOST_SERIALIZATION_NVP(annots);