Move alphabet type into SeqString
authorDiane Trout <diane@caltech.edu>
Wed, 21 Mar 2007 03:55:12 +0000 (03:55 +0000)
committerDiane Trout <diane@caltech.edu>
Wed, 21 Mar 2007 03:55:12 +0000 (03:55 +0000)
This is another large patch.
This moves alphabet_ref from Sequence to SeqString.

I still need to deal with a better way of handling reverse complementing
especially given my attempts to keep everything in reference counted
structures.

Also this patch and the previous:
Store Sequence sequence location in a shared_ptr class
have broken the display of motifs in mussa.

(I used to be using shared_ptr<Sequences> for everything,
but now since there's a couple of seperate objects updating
the motifs in one doesn't really help so much for the other.

so warning, these two patches and the next few that I make
are going to be unstable.
**END OF DESCRIPTION***

Place the long patch description above the ***END OF DESCRIPTION*** marker.
The first line of this file will be the patch name.

This patch contains the following changes:

M ./alg/CMakeLists.txt +1
M ./alg/alphabet.cpp +35
M ./alg/alphabet.hpp -1 +14
M ./alg/motif_parser.cpp -3 +4
A ./alg/seq.cpp
M ./alg/seq.hpp -3 +17
M ./alg/seq_span.cpp -6 +3
M ./alg/seq_span.hpp -1 +4
M ./alg/sequence.cpp -82 +26
M ./alg/sequence.hpp -15 +10
M ./alg/test/CMakeLists.txt +1
M ./alg/test/test_alphabet.cpp +5
A ./alg/test/test_seq.cpp
M ./alg/test/test_seq_span.cpp +9
M ./alg/test/test_sequence.cpp -43 +43
M ./qui/motif_editor/MotifElement.cpp -2 +2

16 files changed:
alg/CMakeLists.txt
alg/alphabet.cpp
alg/alphabet.hpp
alg/motif_parser.cpp
alg/seq.cpp [new file with mode: 0644]
alg/seq.hpp
alg/seq_span.cpp
alg/seq_span.hpp
alg/sequence.cpp
alg/sequence.hpp
alg/test/CMakeLists.txt
alg/test/test_alphabet.cpp
alg/test/test_seq.cpp [new file with mode: 0644]
alg/test/test_seq_span.cpp
alg/test/test_sequence.cpp
qui/motif_editor/MotifElement.cpp

index 0a7e8e7773523286368f9ab586c687cb4fb7602f..13448bca3cf5baab7e05b4e157a960d504d256c9 100644 (file)
@@ -24,6 +24,7 @@ SET(SOURCES alphabet.cpp
             nway_other.cpp 
             nway_paths.cpp
             parse_options.cpp 
             nway_other.cpp 
             nway_paths.cpp
             parse_options.cpp 
+            seq.cpp
             seq_span.cpp
             sequence.cpp
             sequence_location.cpp )
             seq_span.cpp
             sequence.cpp
             sequence_location.cpp )
index 3e4e5c83ca1455f5bc2e66bef6fc11b1c1712f41..56919199b6c7ccf30c05a6471b95187c6075f150 100644 (file)
@@ -1,3 +1,5 @@
+#include <stdexcept>
+
 #include "alg/alphabet.hpp"
 
 // some standard dna alphabets 
 #include "alg/alphabet.hpp"
 
 // some standard dna alphabets 
@@ -12,6 +14,7 @@ const char *Alphabet::nucleic_cstr =
 //! the protein alphabet
 const char *Alphabet::protein_cstr = 
   "AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015";
 //! the protein alphabet
 const char *Alphabet::protein_cstr = 
   "AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015";
+const char *Alphabet::empty_cstr = "";
 
 const Alphabet& Alphabet::reduced_dna_alphabet() {
   static Alphabet *a = new Alphabet(reduced_dna_cstr);
 
 const Alphabet& Alphabet::reduced_dna_alphabet() {
   static Alphabet *a = new Alphabet(reduced_dna_cstr);
@@ -33,6 +36,10 @@ const Alphabet& Alphabet::protein_alphabet() {
   static Alphabet *a = new Alphabet(protein_cstr);
   return *a;
 }
   static Alphabet *a = new Alphabet(protein_cstr);
   return *a;
 }
+const Alphabet& Alphabet::empty_alphabet() {
+  static Alphabet *a = new Alphabet(empty_cstr);
+  return *a;
+}
 
 Alphabet::Alphabet(const char *a) :
   alphabet(a)
 
 Alphabet::Alphabet(const char *a) :
   alphabet(a)
@@ -47,7 +54,36 @@ void Alphabet::assign(const Alphabet& a)
   alphabet_set.insert(alphabet.begin(), alphabet.end());
 }
 
   alphabet_set.insert(alphabet.begin(), alphabet.end());
 }
 
+bool operator==(const Alphabet& x, const Alphabet& y)
+{
+  return x.alphabet == y.alphabet; 
+}
+
+std::ostream& operator<<(std::ostream& out, const Alphabet& a)
+{
+  out << a.alphabet;
+}
+
 bool Alphabet::exists(const char c) const
 {
   return (alphabet_set.find(c) != alphabet_set.end());
 }
 bool Alphabet::exists(const char c) const
 {
   return (alphabet_set.find(c) != alphabet_set.end());
 }
+
+const Alphabet& Alphabet::get_alphabet(AlphabetRef alpha)
+{
+  switch (alpha) {
+    case ::reduced_dna_alphabet:
+      return Alphabet::reduced_dna_alphabet();
+    case ::reduced_rna_alphabet:
+      return Alphabet::reduced_rna_alphabet();
+    case ::reduced_nucleic_alphabet:
+      return Alphabet::reduced_nucleic_alphabet();
+    case ::nucleic_alphabet:
+      return Alphabet::nucleic_alphabet();
+    case ::protein_alphabet:
+      return Alphabet::protein_alphabet();    
+    default:
+      throw std::runtime_error("unrecognized alphabet type");
+      break;
+  }
+}
\ No newline at end of file
index 399125478b57d514d4475bfff65685de8877fa68..513ea767b11081a9ea30c27d877f2c7d44f92306 100644 (file)
@@ -8,16 +8,26 @@
 #include <boost/serialization/version.hpp>
 
 #include <set>
 #include <boost/serialization/version.hpp>
 
 #include <set>
+#include <ostream>
 
 //! this is a helper class for sequence
 
 //! this is a helper class for sequence
+enum AlphabetRef { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet, 
+                   nucleic_alphabet, protein_alphabet, empty_alphabet=255 };
+                   
 class Alphabet {
 friend class Sequence;
 public:
   typedef std::string::const_iterator const_iterator;
 class Alphabet {
 friend class Sequence;
 public:
   typedef std::string::const_iterator const_iterator;
-
+  //! define the various alphabet types (as python corebio)
+  
+  friend bool operator==(const Alphabet&, const Alphabet&);
+  friend std::ostream& operator<<(std::ostream&, const Alphabet&);
+     
   //! case-insensitive test to check a character for existence in our alphabet
   bool exists(const char) const;
   
   //! case-insensitive test to check a character for existence in our alphabet
   bool exists(const char) const;
   
+  //! return an alphabet given an AlphabetRef enumeration
+  static const Alphabet &get_alphabet(AlphabetRef);
   // note, if you want to define an alphabet for a sequence, you probably want 
   // to update the enumeration in Sequence, and Sequence::get_sequence
   //! The standard DNA alphabet, with unique, and unknown characters
   // note, if you want to define an alphabet for a sequence, you probably want 
   // to update the enumeration in Sequence, and Sequence::get_sequence
   //! The standard DNA alphabet, with unique, and unknown characters
@@ -35,6 +45,9 @@ public:
   //! the protein alphabet
   static const char *protein_cstr;  
   static const Alphabet &protein_alphabet(); 
   //! the protein alphabet
   static const char *protein_cstr;  
   static const Alphabet &protein_alphabet(); 
+  
+  static const char *empty_cstr;
+  static const Alphabet &empty_alphabet();
     
 private:
   //! what are allowable symbols in our alphabet
     
 private:
   //! what are allowable symbols in our alphabet
index d05f15bbd55410ec1b2e76c94ce5114b6cb85a68..c59c2873e160d697ab46d2b08510d1d985f905cf 100644 (file)
@@ -1,6 +1,7 @@
 #include "mussa_exceptions.hpp"
 #include "mussa_exceptions.hpp"
-#include "alg/alphabet.hpp"
-#include "alg/motif_parser.hpp"
+#include "alphabet.hpp"
+#include "motif_parser.hpp"
+#include "seq.hpp"
 
 #include <boost/spirit/core.hpp>
 #include <boost/spirit/actor/push_back_actor.hpp>
 
 #include <boost/spirit/core.hpp>
 #include <boost/spirit/actor/push_back_actor.hpp>
@@ -56,7 +57,7 @@ void motif_parser::push_motif::operator()(
   Iterator end) const
 {
   float red, green, blue, alpha;
   Iterator end) const
 {
   float red, green, blue, alpha;
-  Sequence seq(parsed->sequence, Sequence::nucleic_alphabet);
+  Sequence seq(parsed->sequence, nucleic_alphabet);
   seq.set_fasta_header(parsed->name);
   
   alpha = 1.0;
   seq.set_fasta_header(parsed->name);
   
   alpha = 1.0;
diff --git a/alg/seq.cpp b/alg/seq.cpp
new file mode 100644 (file)
index 0000000..41b99d4
--- /dev/null
@@ -0,0 +1,40 @@
+#include <stdexcept>
+
+#include "seq.hpp"
+
+const Alphabet& SeqString::get_alphabet() const
+{
+  return Alphabet::get_alphabet(alphabet);
+}
+
+std::string SeqString::create_reverse_complement_map() const 
+{
+  std::string rc_map(256, '~');
+  // if we're rna, use U instead of T
+  // we might want to add an "is_rna" to sequence at somepoint
+  char TU = (alphabet == reduced_rna_alphabet) ? 'U' : 'T';
+  char tu = (alphabet == reduced_rna_alphabet) ? 'u' : 't';
+  rc_map['A'] = TU ; rc_map['a'] = tu ;
+  rc_map['T'] = 'A'; rc_map['t'] = 'a';
+  rc_map['U'] = 'A'; rc_map['u'] = 'a';
+  rc_map['G'] = 'C'; rc_map['g'] = 'c';
+  rc_map['C'] = 'G'; rc_map['c'] = 'g';
+  rc_map['M'] = 'K'; rc_map['m'] = 'k';
+  rc_map['R'] = 'Y'; rc_map['r'] = 'y';
+  rc_map['W'] = 'W'; rc_map['w'] = 'w';
+  rc_map['S'] = 'S'; rc_map['s'] = 's';
+  rc_map['Y'] = 'R'; rc_map['y'] = 'r';
+  rc_map['K'] = 'M'; rc_map['k'] = 'm';
+  rc_map['V'] = 'B'; rc_map['v'] = 'b';
+  rc_map['H'] = 'D'; rc_map['h'] = 'd';
+  rc_map['D'] = 'H'; rc_map['d'] = 'h';
+  rc_map['B'] = 'V'; rc_map['b'] = 'v';  
+  rc_map['N'] = 'N'; rc_map['n'] = 'n';
+  rc_map['X'] = 'X'; rc_map['x'] = 'x';
+  rc_map['?'] = '?'; 
+  rc_map['.'] = '.'; 
+  rc_map['-'] = '-'; 
+  rc_map['~'] = '~'; // not really needed, but perhaps it's clearer.
+   
+  return rc_map;
+}
index b102a00767b7356849abebbe2d235812cad03186..5ac42a2bb220ec58630ec4c1138ca6c9ee8c422b 100644 (file)
 
 #include <boost/shared_ptr.hpp>
 
 
 #include <boost/shared_ptr.hpp>
 
+#include "alphabet.hpp"
 //! These classes provide for the internal implementation for the Sequence class
 
 /** the only purpose of this class is that the shared_ptr template 
  * functions need the serialization support to be in-class.
  */
 //! These classes provide for the internal implementation for the Sequence class
 
 /** the only purpose of this class is that the shared_ptr template 
  * functions need the serialization support to be in-class.
  */
+
 class SeqString : public std::string
 {
 public:
 class SeqString : public std::string
 {
 public:
@@ -34,17 +36,29 @@ public:
   typedef std::string::value_type value_type;
   static const size_type npos = std::string::npos;
   
   typedef std::string::value_type value_type;
   static const size_type npos = std::string::npos;
   
-  SeqString() : std::string() {};
-  SeqString(const std::string &s) : std::string(s) {};
+  SeqString(AlphabetRef a=reduced_nucleic_alphabet) : 
+    std::string(),
+    alphabet(a) 
+  {}
+  SeqString(const std::string &s, AlphabetRef a=reduced_nucleic_alphabet) : 
+    std::string(s),
+    alphabet(a)
+  {}
+
+  AlphabetRef get_alphabet_ref() { return alphabet; }
+  const Alphabet& get_alphabet() const;
   
   
+  std::string create_reverse_complement_map() const;
 private:
 private:
+  AlphabetRef alphabet;
   friend class boost::serialization::access;
   template<class Archive>
   void serialize(Archive& ar, const unsigned int /*version*/) {
     //ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(std::string);
     ar & boost::serialization::make_nvp("bases",
   friend class boost::serialization::access;
   template<class Archive>
   void serialize(Archive& ar, const unsigned int /*version*/) {
     //ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(std::string);
     ar & boost::serialization::make_nvp("bases",
-     boost::serialization::base_object<std::string>(*this)
+      boost::serialization::base_object<std::string>(*this)
     );
     );
+    ar & BOOST_SERIALIZATION_NVP(alphabet);
   }
 };
 typedef boost::shared_ptr<SeqString> SeqStringRef;
   }
 };
 typedef boost::shared_ptr<SeqString> SeqStringRef;
index 3382bd8fd4288133207a2e4af1afeb96a922a689..2870f0f077f112f57cc2c60857926e2974af1f4b 100644 (file)
@@ -21,8 +21,9 @@ SeqSpan::SeqSpan(const SeqSpan *p)
 {
 }
 
 {
 }
 
-SeqSpan::SeqSpan(const std::string &seq_)
-  : seq(new SeqString(seq_)),
+SeqSpan::SeqSpan(const std::string &seq_,
+                 AlphabetRef a)
+  : seq(new SeqString(seq_, a)),
     seq_start(0),
     seq_count(seq_.length()),
     parent()
     seq_start(0),
     seq_count(seq_.length()),
     parent()
@@ -79,10 +80,6 @@ bool operator<(const SeqSpan& a, const SeqSpan& b)
 bool operator==(const SeqSpan& a, const SeqSpan& b)
 {
   if (SeqSpan::isFamily(a, b)) {
 bool operator==(const SeqSpan& a, const SeqSpan& b)
 {
   if (SeqSpan::isFamily(a, b)) {
-    std::cout << "  " << a.seq_start 
-              <<  " " << b.seq_start
-              <<  " " << a.seq_count
-              <<  " " << b.seq_count << std::endl;
     // can do fast comparison
     if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
       return true;
     // can do fast comparison
     if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
       return true;
index ec94c2717cf8d074933261f7d61863497b663b4d..8d1991360df0bdcebee6e83cab7b926686630a9a 100644 (file)
@@ -39,7 +39,9 @@ public:
 public:
   SeqSpan(const SeqSpan &);
   SeqSpan(const SeqSpan *);
 public:
   SeqSpan(const SeqSpan &);
   SeqSpan(const SeqSpan *);
-  explicit SeqSpan(const std::string &);
+  explicit SeqSpan(const std::string &, 
+               AlphabetRef = reduced_nucleic_alphabet
+           );
   SeqSpan(const SeqSpanRef, size_type start=0, size_type count=npos);
 
   //! assignment
   SeqSpan(const SeqSpanRef, size_type start=0, size_type count=npos);
 
   //! assignment
@@ -50,6 +52,7 @@ public:
   friend bool operator==(const SeqSpan&, const SeqSpan&);
   friend bool operator!=(const SeqSpan&, const SeqSpan&);
     
   friend bool operator==(const SeqSpan&, const SeqSpan&);
   friend bool operator!=(const SeqSpan&, const SeqSpan&);
     
+  const Alphabet& get_alphabet() const { return seq->get_alphabet(); }
   //! \defgroup string_operators
   //! @{
   //! retrive element at specific position
   //! \defgroup string_operators
   //! @{
   //! retrive element at specific position
index 91656abd029628b327e8adf3e62ff26df85b9f72..47cca2a7dfdfb46c831491ee276f20ec5dd6f4a0 100644 (file)
@@ -79,9 +79,8 @@ motif::~motif()
 }
 
 
 }
 
 
-Sequence::Sequence(alphabet_ref alphabet_)
-  : seq(new SeqSpan("")),
-    alphabet(alphabet_),
+Sequence::Sequence(AlphabetRef alphabet)
+  : seq(new SeqSpan("", alphabet)),
     strand(UnknownStrand)
 {
 }
     strand(UnknownStrand)
 {
 }
@@ -90,27 +89,24 @@ Sequence::~Sequence()
 {
 }
 
 {
 }
 
-Sequence::Sequence(const char *seq, alphabet_ref alphabet_)
-  : alphabet(alphabet_),
-    strand(UnknownStrand),
+Sequence::Sequence(const char *seq, AlphabetRef alphabet_)
+  : strand(UnknownStrand),
     header(""),
     species("")
 {
     header(""),
     species("")
 {
-  set_filtered_sequence(seq, alphabet);
+  set_filtered_sequence(seq, alphabet_);
 }
 
 }
 
-Sequence::Sequence(const std::string& seq, alphabet_ref alphabet_) 
-  : alphabet(alphabet_),
-    strand(UnknownStrand),
+Sequence::Sequence(const std::string& seq, AlphabetRef alphabet_) 
+  : strand(UnknownStrand),
     header(""),
     species("")
 {
     header(""),
     species("")
 {
-  set_filtered_sequence(seq, alphabet);
+  set_filtered_sequence(seq, alphabet_);
 }
 
 Sequence::Sequence(const Sequence& o)
   : seq(o.seq),
 }
 
 Sequence::Sequence(const Sequence& o)
   : seq(o.seq),
-    alphabet(o.alphabet),
     strand(o.strand),
     header(o.header),
     species(o.species),
     strand(o.strand),
     header(o.header),
     species(o.species),
@@ -121,7 +117,6 @@ Sequence::Sequence(const Sequence& o)
 
 Sequence::Sequence(const Sequence* o)
   : seq(o->seq),
 
 Sequence::Sequence(const Sequence* o)
   : seq(o->seq),
-    alphabet(o->alphabet),
     strand(o->strand),
     header(o->header),
     species(o->species),
     strand(o->strand),
     header(o->header),
     species(o->species),
@@ -130,9 +125,8 @@ Sequence::Sequence(const Sequence* o)
 {
 }
 
 {
 }
 
-Sequence::Sequence(const SeqSpanRef& seq_ref, alphabet_ref alphabet_)
+Sequence::Sequence(const SeqSpanRef& seq_ref)
   : seq(seq_ref),
   : seq(seq_ref),
-    alphabet(alphabet),
     strand(UnknownStrand),
     header(""),
     species("")
     strand(UnknownStrand),
     header(""),
     species("")
@@ -143,7 +137,6 @@ Sequence &Sequence::operator=(const Sequence& s)
 {
   if (this != &s) {
     seq = s.seq;
 {
   if (this != &s) {
     seq = s.seq;
-    alphabet = s.alphabet;
     strand = s.strand;
     header = s.header;
     species = s.species;
     strand = s.strand;
     header = s.header;
     species = s.species;
@@ -171,11 +164,11 @@ static void multiplatform_getline(std::istream& in, std::string& line)
 
 void Sequence::load_fasta(fs::path file_path, int seq_num, int start_index, int end_index)
 {
 
 void Sequence::load_fasta(fs::path file_path, int seq_num, int start_index, int end_index)
 {
-  load_fasta(file_path, alphabet, seq_num, start_index, end_index);
+  load_fasta(file_path, reduced_nucleic_alphabet, seq_num, start_index, end_index);
 }
 
 //! load a fasta file into a sequence
 }
 
 //! load a fasta file into a sequence
-void Sequence::load_fasta(fs::path file_path, alphabet_ref a, 
+void Sequence::load_fasta(fs::path file_path, AlphabetRef a, 
                           int seq_num, int start_index, int end_index)
 {
   fs::fstream data_file;
                           int seq_num, int start_index, int end_index)
 {
   fs::fstream data_file;
@@ -212,11 +205,11 @@ void Sequence::load_fasta(fs::path file_path, alphabet_ref a,
 void Sequence::load_fasta(std::istream& file, 
                           int seq_num, int start_index, int end_index)
 {
 void Sequence::load_fasta(std::istream& file, 
                           int seq_num, int start_index, int end_index)
 {
-  load_fasta(file, alphabet, seq_num, start_index, end_index);
+  load_fasta(file, reduced_nucleic_alphabet, seq_num, start_index, end_index);
 }
 
 void
 }
 
 void
-Sequence::load_fasta(std::istream& data_file, alphabet_ref a, 
+Sequence::load_fasta(std::istream& data_file, AlphabetRef a, 
                      int seq_num, 
                      int start_index, int end_index)
 {
                      int seq_num, 
                      int start_index, int end_index)
 {
@@ -227,7 +220,7 @@ Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
   std::string rev_comp;
   std::string sequence_raw;
   std::string seq_tmp;      // holds sequence during basic filtering
   std::string rev_comp;
   std::string sequence_raw;
   std::string seq_tmp;      // holds sequence during basic filtering
-  const Alphabet &alpha = get_alphabet(a);
+  const Alphabet &alpha = get_alphabet();
 
   if (seq_num == 0) {
     throw mussa_load_error("fasta sequence number is 1 based (can't be 0)");
 
   if (seq_num == 0) {
     throw mussa_load_error("fasta sequence number is 1 based (can't be 0)");
@@ -288,19 +281,18 @@ Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
 }
 
 void Sequence::set_filtered_sequence(const std::string &in_seq,
 }
 
 void Sequence::set_filtered_sequence(const std::string &in_seq,
-                                     alphabet_ref alphabet_,
+                                     AlphabetRef alphabet_,
                                      size_type start,
                                      size_type count,
                                      strand_type strand_)
 {
                                      size_type start,
                                      size_type count,
                                      strand_type strand_)
 {
-  alphabet = alphabet_;
   if ( count == npos)
     count = in_seq.size() - start;
   std::string new_seq;
   new_seq.reserve(count);
 
   // finally, the actual conversion loop
   if ( count == npos)
     count = in_seq.size() - start;
   std::string new_seq;
   new_seq.reserve(count);
 
   // finally, the actual conversion loop
-  const Alphabet& alpha_impl = get_alphabet(); // go get one of our actual alphabets
+  const Alphabet& alpha_impl = Alphabet::get_alphabet(alphabet_); // go get one of our actual alphabets
   std::string::const_iterator seq_i = in_seq.begin()+start;
   for(size_type i = 0; i != count; ++i, ++seq_i)
   {
   std::string::const_iterator seq_i = in_seq.begin()+start;
   for(size_type i = 0; i != count; ++i, ++seq_i)
   {
@@ -310,7 +302,7 @@ void Sequence::set_filtered_sequence(const std::string &in_seq,
       new_seq.append(1, 'N');
     }
   }
       new_seq.append(1, 'N');
     }
   }
-  SeqSpanRef new_seq_ref(new SeqSpan(new_seq)); 
+  SeqSpanRef new_seq_ref(new SeqSpan(new_seq, alphabet_)); 
   seq = new_seq_ref;
   strand = strand_;
 }
   seq = new_seq_ref;
   strand = strand_;
 }
@@ -570,43 +562,14 @@ Sequence::subseq(size_type start, size_type count) const
   return new_seq;
 }
 
   return new_seq;
 }
 
-std::string Sequence::create_reverse_map() const 
-{
-  std::string rc_map(256, '~');
-  // if we're rna, use U instead of T
-  // we might want to add an "is_rna" to sequence at somepoint
-  char TU = (alphabet == reduced_rna_alphabet) ? 'U' : 'T';
-  char tu = (alphabet == reduced_rna_alphabet) ? 'u' : 't';
-  rc_map['A'] = TU ; rc_map['a'] = tu ;
-  rc_map['T'] = 'A'; rc_map['t'] = 'a';
-  rc_map['U'] = 'A'; rc_map['u'] = 'a';
-  rc_map['G'] = 'C'; rc_map['g'] = 'c';
-  rc_map['C'] = 'G'; rc_map['c'] = 'g';
-  rc_map['M'] = 'K'; rc_map['m'] = 'k';
-  rc_map['R'] = 'Y'; rc_map['r'] = 'y';
-  rc_map['W'] = 'W'; rc_map['w'] = 'w';
-  rc_map['S'] = 'S'; rc_map['s'] = 's';
-  rc_map['Y'] = 'R'; rc_map['y'] = 'r';
-  rc_map['K'] = 'M'; rc_map['k'] = 'm';
-  rc_map['V'] = 'B'; rc_map['v'] = 'b';
-  rc_map['H'] = 'D'; rc_map['h'] = 'd';
-  rc_map['D'] = 'H'; rc_map['d'] = 'h';
-  rc_map['B'] = 'V'; rc_map['b'] = 'v';  
-  rc_map['N'] = 'N'; rc_map['n'] = 'n';
-  rc_map['X'] = 'X'; rc_map['x'] = 'x';
-  rc_map['?'] = '?'; 
-  rc_map['.'] = '.'; 
-  rc_map['-'] = '-'; 
-  rc_map['~'] = '~'; // not really needed, but perhaps it's clearer. 
-  return rc_map;
-}
 
 
+// FIXME: This needs to be moved into SeqSpan
 Sequence Sequence::rev_comp() const
 {
   std::string rev_comp;
   rev_comp.reserve(length());
   
 Sequence Sequence::rev_comp() const
 {
   std::string rev_comp;
   rev_comp.reserve(length());
   
-  std::string rc_map = create_reverse_map();
+  std::string rc_map = seq->seq->create_reverse_complement_map();
 
   // reverse and convert
   Sequence::const_reverse_iterator seq_i;
 
   // reverse and convert
   Sequence::const_reverse_iterator seq_i;
@@ -617,7 +580,12 @@ Sequence Sequence::rev_comp() const
   {
     rev_comp.append(1, rc_map[*seq_i]);
   }
   {
     rev_comp.append(1, rc_map[*seq_i]);
   }
-  return Sequence(rev_comp, alphabet); 
+  return Sequence(rev_comp, seq->seq->get_alphabet_ref()); 
+}
+
+const Alphabet& Sequence::get_alphabet() const
+{
+  return (seq) ? seq->get_alphabet() : Alphabet::empty_alphabet(); 
 }
 
 void Sequence::set_fasta_header(std::string header_)
 }
 
 void Sequence::set_fasta_header(std::string header_)
@@ -653,31 +621,7 @@ Sequence::get_name() const
     return "";
 }
 
     return "";
 }
 
-const Alphabet& Sequence::get_alphabet() const
-{
-  return get_alphabet(alphabet);
-}
-
-const Alphabet& Sequence::get_alphabet(alphabet_ref alpha) const
-{
-  switch (alpha) {
-    case reduced_dna_alphabet:
-      return Alphabet::reduced_dna_alphabet();
-    case reduced_rna_alphabet:
-      return Alphabet::reduced_rna_alphabet();
-    case reduced_nucleic_alphabet:
-      return Alphabet::reduced_nucleic_alphabet();
-    case nucleic_alphabet:
-      return Alphabet::nucleic_alphabet();
-    case protein_alphabet:
-      return Alphabet::protein_alphabet();    
-    default:
-      throw std::runtime_error("unrecognized alphabet type");
-      break;
-  }
-}
-
-void Sequence::set_sequence(const std::string& s, alphabet_ref a) 
+void Sequence::set_sequence(const std::string& s, AlphabetRef a) 
 {
   set_filtered_sequence(s, a);
 }
 {
   set_filtered_sequence(s, a);
 }
index 09c53c4bf3a1a911e793add0a8730335285b437a..4b93ef132d81b93e924e153965407b32585c7851 100644 (file)
@@ -104,15 +104,15 @@ public:
   typedef SeqString::size_type size_type;
   static const size_type npos = SeqString::npos;
   enum strand_type { UnknownStrand, PlusStrand, MinusStrand, BothStrand };
   typedef SeqString::size_type size_type;
   static const size_type npos = SeqString::npos;
   enum strand_type { UnknownStrand, PlusStrand, MinusStrand, BothStrand };
-  enum alphabet_ref { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet, 
-                      nucleic_alphabet, protein_alphabet };
                       
                       
-  Sequence(alphabet_ref a = reduced_nucleic_alphabet);
-  Sequence(const char* seq, alphabet_ref a = reduced_nucleic_alphabet);
-  Sequence(const std::string& seq, alphabet_ref a = reduced_nucleic_alphabet);
+  Sequence(AlphabetRef a = reduced_nucleic_alphabet);
+  Sequence(const char* seq, 
+           AlphabetRef a = reduced_nucleic_alphabet);
+  Sequence(const std::string& seq,
+           AlphabetRef a = reduced_nucleic_alphabet);
   Sequence(const Sequence& seq);
   Sequence(const Sequence *);
   Sequence(const Sequence& seq);
   Sequence(const Sequence *);
-  Sequence(const SeqSpanRef&, alphabet_ref a = reduced_nucleic_alphabet); 
+  Sequence(const SeqSpanRef&); 
   ~Sequence();
   //! assignment to constant sequences
   Sequence &operator=(const Sequence&);
   ~Sequence();
   //! assignment to constant sequences
   Sequence &operator=(const Sequence&);
@@ -125,7 +125,7 @@ public:
 
   //! set sequence to a (sub)string containing nothing but AGCTN
   void set_filtered_sequence(const std::string& seq,
 
   //! set sequence to a (sub)string containing nothing but AGCTN
   void set_filtered_sequence(const std::string& seq,
-                             alphabet_ref a, 
+                             AlphabetRef a, 
                              size_type start=0,
                              size_type count=npos,
                              strand_type strand=UnknownStrand);
                              size_type start=0,
                              size_type count=npos,
                              strand_type strand=UnknownStrand);
@@ -166,7 +166,7 @@ public:
   Sequence rev_comp() const;
 
   //! set sequence (filtered)
   Sequence rev_comp() const;
 
   //! set sequence (filtered)
-  void set_sequence(const std::string &, alphabet_ref);
+  void set_sequence(const std::string &, AlphabetRef);
   //! get sequence
   std::string get_sequence() const;
   //! set species name
   //! get sequence
   std::string get_sequence() const;
   //! set species name
@@ -181,8 +181,6 @@ public:
   std::string get_name() const;
   //! return a reference to whichever alphabet we're currently representing
   const Alphabet& get_alphabet() const; 
   std::string get_name() const;
   //! return a reference to whichever alphabet we're currently representing
   const Alphabet& get_alphabet() const; 
-  //! return a reference to whichever alphabet we're currently representing
-  const Alphabet& get_alphabet(alphabet_ref) const; 
   
   //! load sequence from fasta file using the sequences current alphabet
   void load_fasta(const boost::filesystem::path file_path, int seq_num=1,
   
   //! load sequence from fasta file using the sequences current alphabet
   void load_fasta(const boost::filesystem::path file_path, int seq_num=1,
@@ -192,7 +190,7 @@ public:
   //! \throw sequence_empty_error
   //! \throw sequence_empty_file_error
   void load_fasta(const boost::filesystem::path file_path, 
   //! \throw sequence_empty_error
   //! \throw sequence_empty_file_error
   void load_fasta(const boost::filesystem::path file_path, 
-                  alphabet_ref a, 
+                  AlphabetRef a, 
                   int seq_num=1, 
                              int start_index=0, int end_index=0);
   void load_fasta(std::istream& file, 
                   int seq_num=1, 
                              int start_index=0, int end_index=0);
   void load_fasta(std::istream& file, 
@@ -202,7 +200,7 @@ public:
   //! \throw sequence_empty_error
   //! \throw sequence_empty_file_error
   void load_fasta(std::istream& file, 
   //! \throw sequence_empty_error
   //! \throw sequence_empty_file_error
   void load_fasta(std::istream& file, 
-                  alphabet_ref a,
+                  AlphabetRef a,
                   int seq_num=1, 
                              int start_index=0, int end_index=0);
   //! load sequence annotations
                   int seq_num=1, 
                              int start_index=0, int end_index=0);
   //! load sequence annotations
@@ -236,8 +234,6 @@ public:
   
 protected:  
   SeqSpanRef seq;
   
 protected:  
   SeqSpanRef seq;
-  //! which alphabet we're using
-  alphabet_ref alphabet;
   //! strand orientation
   strand_type strand;
   //! fasta header
   //! strand orientation
   strand_type strand;
   //! fasta header
@@ -263,7 +259,6 @@ protected:
   template<class Archive>
   void serialize(Archive& ar, const unsigned int /*version*/) {
     ar & BOOST_SERIALIZATION_NVP(seq);
   template<class Archive>
   void serialize(Archive& ar, const unsigned int /*version*/) {
     ar & BOOST_SERIALIZATION_NVP(seq);
-    ar & BOOST_SERIALIZATION_NVP(alphabet);
     ar & BOOST_SERIALIZATION_NVP(strand);
     ar & BOOST_SERIALIZATION_NVP(header);
     ar & BOOST_SERIALIZATION_NVP(species);
     ar & BOOST_SERIALIZATION_NVP(strand);
     ar & BOOST_SERIALIZATION_NVP(header);
     ar & BOOST_SERIALIZATION_NVP(species);
index 433c63de67dc5d6d03edec820cf61a0ceb055f09..c3f8d09b5d7c63280b673f9ba30603ccf5e6aa6e 100644 (file)
@@ -51,6 +51,7 @@ MAKE_ALG_UNITTEST( test_glseqbrowser )
 MAKE_ALG_UNITTEST( test_glsequence )
 MAKE_ALG_UNITTEST( test_mussa )
 MAKE_ALG_UNITTEST( test_nway )
 MAKE_ALG_UNITTEST( test_glsequence )
 MAKE_ALG_UNITTEST( test_mussa )
 MAKE_ALG_UNITTEST( test_nway )
+MAKE_ALG_UNITTEST( test_seq )
 MAKE_ALG_UNITTEST( test_sequence )
 MAKE_ALG_UNITTEST( test_seq_span )
 MAKE_ALG_UNITTEST( test_sequence_location )
 MAKE_ALG_UNITTEST( test_sequence )
 MAKE_ALG_UNITTEST( test_seq_span )
 MAKE_ALG_UNITTEST( test_sequence_location )
index 7758556beb1ee587dca9ac2e09a553cc8450ba49..5f7d9f3ae03f8bab8b6a82e235b515881f92001a 100644 (file)
@@ -21,3 +21,9 @@ BOOST_AUTO_TEST_CASE( alphabet_simple )
   // copied from alphabet.cpp
   BOOST_CHECK_EQUAL( Alphabet::reduced_dna_cstr, "AaCcGgTtNn\012\015");
 }
   // copied from alphabet.cpp
   BOOST_CHECK_EQUAL( Alphabet::reduced_dna_cstr, "AaCcGgTtNn\012\015");
 }
+
+BOOST_AUTO_TEST_CASE( alphabet_equality)
+{
+  Alphabet a(Alphabet::reduced_dna_alphabet());
+  BOOST_CHECK_EQUAL( a, Alphabet::reduced_dna_alphabet() );
+}
\ No newline at end of file
diff --git a/alg/test/test_seq.cpp b/alg/test/test_seq.cpp
new file mode 100644 (file)
index 0000000..a016cb2
--- /dev/null
@@ -0,0 +1,31 @@
+#define BOOST_AUTO_TEST_MAIN
+#include <boost/test/auto_unit_test.hpp>
+
+#include "seq.hpp"
+
+BOOST_AUTO_TEST_CASE( seqstring_default_alphabet )
+{
+  SeqString s;
+  BOOST_CHECK_EQUAL(s.get_alphabet_ref(), reduced_nucleic_alphabet);
+  BOOST_CHECK_EQUAL(s.get_alphabet(), Alphabet::reduced_nucleic_alphabet());
+  BOOST_CHECK_EQUAL(s.size(), 0);
+}
+
+BOOST_AUTO_TEST_CASE( seqstring_string )
+{
+  SeqString s("AGCT");
+
+  BOOST_CHECK_EQUAL(s.get_alphabet_ref(), reduced_nucleic_alphabet);
+  BOOST_CHECK_EQUAL(s.get_alphabet(), Alphabet::reduced_nucleic_alphabet());
+  BOOST_CHECK_EQUAL(s.size(), 4);  
+}
+
+// such an exciting unit test, making sure that a=b; a==b
+BOOST_AUTO_TEST_CASE( seqstring_string_alphabet )
+{
+  SeqString s("AGCT", nucleic_alphabet);
+
+  BOOST_CHECK_EQUAL(s.get_alphabet_ref(), nucleic_alphabet);
+  BOOST_CHECK_EQUAL(s.get_alphabet(), Alphabet::nucleic_alphabet());
+  BOOST_CHECK_EQUAL(s.size(), 4);  
+}
\ No newline at end of file
index 9b98f2133d64ba0507d9881e9c75435232f0c0ad..90e3d00928fa09ee921cd67157d2bae98cac47a3 100644 (file)
@@ -14,6 +14,15 @@ BOOST_AUTO_TEST_CASE( seqspan_from_string )
   BOOST_CHECK_EQUAL(span1->sequence(), str1);
 }
 
   BOOST_CHECK_EQUAL(span1->sequence(), str1);
 }
 
+BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet )
+{
+  std::string str1("AAGGCCUU");
+  SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet));
+  BOOST_CHECK_EQUAL(span1->length(), str1.length());
+  BOOST_CHECK_EQUAL(span1->sequence(), str1);
+  BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
+}
+
 BOOST_AUTO_TEST_CASE( seqspan_from_seqspan )
 {
   std::string str1("AAGGCCTT");
 BOOST_AUTO_TEST_CASE( seqspan_from_seqspan )
 {
   std::string str1("AAGGCCTT");
index 3847252a80884fa06786afe18feb7cf7f387a1e4..6da21f166449c348c7b38c83f5dde803b7d01f54 100644 (file)
@@ -86,10 +86,10 @@ BOOST_AUTO_TEST_CASE( sequence_eol_conventions )
 BOOST_AUTO_TEST_CASE( sequence_filter )
 {
   const char *core_seq = "AATTGGCC";
 BOOST_AUTO_TEST_CASE( sequence_filter )
 {
   const char *core_seq = "AATTGGCC";
-  Sequence s1(core_seq, Sequence::reduced_dna_alphabet);
+  Sequence s1(core_seq, reduced_dna_alphabet);
   BOOST_CHECK_EQUAL(s1, core_seq);
 
   BOOST_CHECK_EQUAL(s1, core_seq);
 
-  Sequence s2("aattggcc", Sequence::reduced_dna_alphabet);
+  Sequence s2("aattggcc", reduced_dna_alphabet);
   BOOST_CHECK_EQUAL(s2, "AATTGGCC");
   BOOST_CHECK_EQUAL(s2.rev_comp(), "GGCCAATT");
   BOOST_CHECK_EQUAL(s2.rev_comp(), "ggccaatt"); // we should be case insensitive now
   BOOST_CHECK_EQUAL(s2, "AATTGGCC");
   BOOST_CHECK_EQUAL(s2.rev_comp(), "GGCCAATT");
   BOOST_CHECK_EQUAL(s2.rev_comp(), "ggccaatt"); // we should be case insensitive now
@@ -97,15 +97,15 @@ BOOST_AUTO_TEST_CASE( sequence_filter )
   //We're currently forcing sequences to uppercase
   BOOST_CHECK_EQUAL(s2.get_sequence(), "AATTGGCC"); 
 
   //We're currently forcing sequences to uppercase
   BOOST_CHECK_EQUAL(s2.get_sequence(), "AATTGGCC"); 
 
-  Sequence s3("asdfg", Sequence::reduced_dna_alphabet);
+  Sequence s3("asdfg", reduced_dna_alphabet);
   BOOST_CHECK_EQUAL(s3, "ANNNG");
   BOOST_CHECK_EQUAL(s3.subseq(0,2), "AN");
 
   BOOST_CHECK_EQUAL(s3, "ANNNG");
   BOOST_CHECK_EQUAL(s3.subseq(0,2), "AN");
 
-  s3.set_filtered_sequence("AAGGCCTT", Sequence::reduced_dna_alphabet, 0, 2); 
+  s3.set_filtered_sequence("AAGGCCTT", reduced_dna_alphabet, 0, 2); 
   BOOST_CHECK_EQUAL(s3, "AA");
   BOOST_CHECK_EQUAL(s3, "AA");
-  s3.set_filtered_sequence("AAGGCCTT", Sequence::reduced_dna_alphabet, 2, 2);
+  s3.set_filtered_sequence("AAGGCCTT", reduced_dna_alphabet, 2, 2);
   BOOST_CHECK_EQUAL( s3, "GG");
   BOOST_CHECK_EQUAL( s3, "GG");
-  s3.set_filtered_sequence("AAGGCCTT", Sequence::reduced_dna_alphabet, 4);
+  s3.set_filtered_sequence("AAGGCCTT", reduced_dna_alphabet, 4);
   BOOST_CHECK_EQUAL( s3, "CCTT");
 
   s3 = "AAGGFF";
   BOOST_CHECK_EQUAL( s3, "CCTT");
 
   s3 = "AAGGFF";
@@ -115,12 +115,12 @@ BOOST_AUTO_TEST_CASE( sequence_filter )
 BOOST_AUTO_TEST_CASE( sequence_nucleic_alphabet )
 {
   std::string agct("AGCT");
 BOOST_AUTO_TEST_CASE( sequence_nucleic_alphabet )
 {
   std::string agct("AGCT");
-  Sequence seq(agct, Sequence::nucleic_alphabet);
+  Sequence seq(agct, nucleic_alphabet);
   BOOST_CHECK_EQUAL(seq.size(), agct.size());
   BOOST_CHECK_EQUAL(seq.get_sequence(), agct);
   
   std::string bdv("BDv");
   BOOST_CHECK_EQUAL(seq.size(), agct.size());
   BOOST_CHECK_EQUAL(seq.get_sequence(), agct);
   
   std::string bdv("BDv");
-  Sequence seq_bdv(bdv, Sequence::nucleic_alphabet);
+  Sequence seq_bdv(bdv, nucleic_alphabet);
   BOOST_CHECK_EQUAL(seq_bdv.size(), bdv.size());
   // forcing sequence to upper case
   BOOST_CHECK_EQUAL(seq_bdv.get_sequence(), 
   BOOST_CHECK_EQUAL(seq_bdv.size(), bdv.size());
   // forcing sequence to upper case
   BOOST_CHECK_EQUAL(seq_bdv.get_sequence(), 
@@ -148,7 +148,7 @@ BOOST_AUTO_TEST_CASE( sequence_default_alphabet )
 
 BOOST_AUTO_TEST_CASE( subseq_names )
 {
 
 BOOST_AUTO_TEST_CASE( subseq_names )
 {
-  Sequence s1("AAGGCCTT", Sequence::reduced_dna_alphabet);
+  Sequence s1("AAGGCCTT", reduced_dna_alphabet);
   s1.set_species("species");
   s1.set_fasta_header("a fasta header");
   Sequence s2 = s1.subseq(2,2);
   s1.set_species("species");
   s1.set_fasta_header("a fasta header");
   Sequence s2 = s1.subseq(2,2);
@@ -164,7 +164,7 @@ BOOST_AUTO_TEST_CASE( sequence_start_stop )
   BOOST_CHECK_EQUAL( s1.stop(), 0 );
 
   std::string seq_string("AAGGCCTT");
   BOOST_CHECK_EQUAL( s1.stop(), 0 );
 
   std::string seq_string("AAGGCCTT");
-  Sequence s2(seq_string, Sequence::reduced_dna_alphabet);
+  Sequence s2(seq_string, reduced_dna_alphabet);
   BOOST_CHECK_EQUAL( s2.start(), 0 );
   BOOST_CHECK_EQUAL( s2.stop(), seq_string.size() );
 
   BOOST_CHECK_EQUAL( s2.start(), 0 );
   BOOST_CHECK_EQUAL( s2.stop(), seq_string.size() );
 
@@ -189,7 +189,7 @@ BOOST_AUTO_TEST_CASE( sequence_load )
   fs::path seq_path(fs::path(EXAMPLE_DIR, fs::native)/ "seq");
   seq_path /=  "human_mck_pro.fa";
   Sequence s;
   fs::path seq_path(fs::path(EXAMPLE_DIR, fs::native)/ "seq");
   seq_path /=  "human_mck_pro.fa";
   Sequence s;
-  s.load_fasta(seq_path, Sequence::reduced_dna_alphabet);
+  s.load_fasta(seq_path, reduced_dna_alphabet);
   BOOST_CHECK_EQUAL(s.subseq(0, 5), "GGATC"); // first few chars of fasta file
   BOOST_CHECK_EQUAL(s.subseq(2, 3), "ATC");
   BOOST_CHECK_EQUAL(s.get_fasta_header(), "gi|180579|gb|M21487.1|HUMCKMM1 Human "
   BOOST_CHECK_EQUAL(s.subseq(0, 5), "GGATC"); // first few chars of fasta file
   BOOST_CHECK_EQUAL(s.subseq(2, 3), "ATC");
   BOOST_CHECK_EQUAL(s.get_fasta_header(), "gi|180579|gb|M21487.1|HUMCKMM1 Human "
@@ -255,15 +255,15 @@ BOOST_AUTO_TEST_CASE( sequence_load_dna_reduced )
   std::stringstream garbage_fasta(garbage_string);
   
   Sequence s;
   std::stringstream garbage_fasta(garbage_string);
   
   Sequence s;
-  s.load_fasta(reduced_dna_fasta, Sequence::reduced_dna_alphabet);
+  s.load_fasta(reduced_dna_fasta, reduced_dna_alphabet);
   BOOST_CHECK_THROW(s.load_fasta(invalid_dna_fasta, 
   BOOST_CHECK_THROW(s.load_fasta(invalid_dna_fasta, 
-                                 Sequence::reduced_dna_alphabet),
+                                 reduced_dna_alphabet),
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(reduced_rna_fasta, 
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(reduced_rna_fasta, 
-                                 Sequence::reduced_dna_alphabet),
+                                 reduced_dna_alphabet),
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(garbage_fasta, 
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(garbage_fasta, 
-                                 Sequence::reduced_dna_alphabet),
+                                 reduced_dna_alphabet),
                     sequence_invalid_load_error);
 
 }
                     sequence_invalid_load_error);
 
 }
@@ -280,15 +280,15 @@ BOOST_AUTO_TEST_CASE( sequence_load_rna_reduced )
   std::stringstream garbage_fasta(garbage_string);
   
   Sequence s;
   std::stringstream garbage_fasta(garbage_string);
   
   Sequence s;
-  s.load_fasta(reduced_rna_fasta, Sequence::reduced_rna_alphabet);
+  s.load_fasta(reduced_rna_fasta, reduced_rna_alphabet);
   BOOST_CHECK_THROW(s.load_fasta(invalid_rna_fasta, 
   BOOST_CHECK_THROW(s.load_fasta(invalid_rna_fasta, 
-                                 Sequence::reduced_rna_alphabet),
+                                 reduced_rna_alphabet),
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(reduced_dna_fasta, 
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(reduced_dna_fasta, 
-                                 Sequence::reduced_rna_alphabet),
+                                 reduced_rna_alphabet),
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(garbage_fasta, 
                     sequence_invalid_load_error);
   BOOST_CHECK_THROW(s.load_fasta(garbage_fasta, 
-                                 Sequence::reduced_rna_alphabet),
+                                 reduced_rna_alphabet),
                     sequence_invalid_load_error);
 }
 
                     sequence_invalid_load_error);
 }
 
@@ -310,11 +310,11 @@ BOOST_AUTO_TEST_CASE( sequence_load_fasta_default )
   // there's two copies of reduced_rna_fasta because i didn't feel like
   // figuring out how to properly reset the read pointer in a stringstream
   s.load_fasta(reduced_rna_fasta1);
   // there's two copies of reduced_rna_fasta because i didn't feel like
   // figuring out how to properly reset the read pointer in a stringstream
   s.load_fasta(reduced_rna_fasta1);
-  specific.load_fasta(reduced_rna_fasta2, Sequence::reduced_nucleic_alphabet);
+  specific.load_fasta(reduced_rna_fasta2, reduced_nucleic_alphabet);
   BOOST_CHECK_EQUAL(s, specific);
   
   s.load_fasta(reduced_dna_fasta1);
   BOOST_CHECK_EQUAL(s, specific);
   
   s.load_fasta(reduced_dna_fasta1);
-  specific.load_fasta(reduced_dna_fasta2, Sequence::reduced_nucleic_alphabet);
+  specific.load_fasta(reduced_dna_fasta2, reduced_nucleic_alphabet);
   BOOST_CHECK_EQUAL(s, specific);
   
   BOOST_CHECK_THROW(s.load_fasta(invalid_nucleotide_fasta), 
   BOOST_CHECK_EQUAL(s, specific);
   
   BOOST_CHECK_THROW(s.load_fasta(invalid_nucleotide_fasta), 
@@ -352,7 +352,7 @@ BOOST_AUTO_TEST_CASE( sequence_load_multiple_sequences_one_fasta )
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement )
 {
   std::string iupac_symbols("AaCcGgTtRrYySsWwKkMmBbDdHhVvNn-~.?");
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement )
 {
   std::string iupac_symbols("AaCcGgTtRrYySsWwKkMmBbDdHhVvNn-~.?");
-  Sequence seq(iupac_symbols, Sequence::nucleic_alphabet);
+  Sequence seq(iupac_symbols, nucleic_alphabet);
   Sequence seqr = seq.rev_comp();
   
   BOOST_CHECK( seq != seqr );
   Sequence seqr = seq.rev_comp();
   
   BOOST_CHECK( seq != seqr );
@@ -365,7 +365,7 @@ BOOST_AUTO_TEST_CASE( sequence_reverse_complement )
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement_dna )
 {
   std::string dna_str("AGCTN");
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement_dna )
 {
   std::string dna_str("AGCTN");
-  Sequence dna_seq(dna_str, Sequence::reduced_dna_alphabet);
+  Sequence dna_seq(dna_str, reduced_dna_alphabet);
   BOOST_CHECK_EQUAL(dna_seq.rev_comp(), "NAGCT");  
   BOOST_CHECK_EQUAL(dna_seq, dna_seq.rev_comp().rev_comp());
 }
   BOOST_CHECK_EQUAL(dna_seq.rev_comp(), "NAGCT");  
   BOOST_CHECK_EQUAL(dna_seq, dna_seq.rev_comp().rev_comp());
 }
@@ -373,7 +373,7 @@ BOOST_AUTO_TEST_CASE( sequence_reverse_complement_dna )
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement_rna )
 {
   std::string rna_str("AGCUN");
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement_rna )
 {
   std::string rna_str("AGCUN");
-  Sequence rna_seq(rna_str, Sequence::reduced_rna_alphabet);
+  Sequence rna_seq(rna_str, reduced_rna_alphabet);
   BOOST_CHECK_EQUAL(rna_seq.rev_comp(), "NAGCU");  
   BOOST_CHECK_EQUAL(rna_seq, rna_seq.rev_comp().rev_comp());
 }
   BOOST_CHECK_EQUAL(rna_seq.rev_comp(), "NAGCU");  
   BOOST_CHECK_EQUAL(rna_seq, rna_seq.rev_comp().rev_comp());
 }
@@ -381,7 +381,7 @@ BOOST_AUTO_TEST_CASE( sequence_reverse_complement_rna )
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement_subseq )
 {
   std::string dna_str("AAAAAAAAAAGGGGGGGGGGG");
 BOOST_AUTO_TEST_CASE( sequence_reverse_complement_subseq )
 {
   std::string dna_str("AAAAAAAAAAGGGGGGGGGGG");
-  Sequence seq(dna_str, Sequence::reduced_dna_alphabet);
+  Sequence seq(dna_str, reduced_dna_alphabet);
   Sequence subseq = seq.subseq(8,4);
   BOOST_CHECK_EQUAL( subseq, "AAGG");
   Sequence rev_subseq = subseq.rev_comp();
   Sequence subseq = seq.subseq(8,4);
   BOOST_CHECK_EQUAL( subseq, "AAGG");
   Sequence rev_subseq = subseq.rev_comp();
@@ -444,7 +444,7 @@ BOOST_AUTO_TEST_CASE( annotation_load )
                       ;
   string s(100, 'A');
   s += "GCTGCTAATT";
                       ;
   string s(100, 'A');
   s += "GCTGCTAATT";
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
                      
   //istringstream annot_stream(annot_data);
   seq.parse_annot(annot_data, 0, 0);
                      
   //istringstream annot_stream(annot_data);
   seq.parse_annot(annot_data, 0, 0);
@@ -484,7 +484,7 @@ BOOST_AUTO_TEST_CASE( annotation_broken_load )
                       ;
   string s(100, 'A');
   s += "GCTGCTAATT";
                       ;
   string s(100, 'A');
   s += "GCTGCTAATT";
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
                      
   BOOST_CHECK_THROW(seq.parse_annot(annot_data, 0, 0), annotation_load_error);
   BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
                      
   BOOST_CHECK_THROW(seq.parse_annot(annot_data, 0, 0), annotation_load_error);
   BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
@@ -511,7 +511,7 @@ BOOST_AUTO_TEST_CASE(annotation_ucsc_html_load)
     "TGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTCAGCCCAGCCCAGCCAG"
     "AAAAA"
     "GGTGGAGACGACCTGGACCCTAACTACGTGCTCAGCAGCCGCGTCCGCAC";
     "TGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTCAGCCCAGCCCAGCCAG"
     "AAAAA"
     "GGTGGAGACGACCTGGACCCTAACTACGTGCTCAGCAGCCGCGTCCGCAC";
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
   seq.parse_annot(annot_data);
   std::list<annot> annots = seq.annotations();
   BOOST_CHECK_EQUAL( annots.size(), 2);
   seq.parse_annot(annot_data);
   std::list<annot> annots = seq.annotations();
   BOOST_CHECK_EQUAL( annots.size(), 2);
@@ -532,7 +532,7 @@ BOOST_AUTO_TEST_CASE( annotation_load_no_species_name )
                       ;
   string s(100, 'A');
   s += "GCTGCTAATT";
                       ;
   string s(100, 'A');
   s += "GCTGCTAATT";
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
                      
   //istringstream annot_stream(annot_data);
   seq.parse_annot(annot_data, 0, 0);
                      
   //istringstream annot_stream(annot_data);
   seq.parse_annot(annot_data, 0, 0);
@@ -583,12 +583,12 @@ BOOST_AUTO_TEST_CASE ( sequence_size )
 
 BOOST_AUTO_TEST_CASE( sequence_empty_equality )
 {
 
 BOOST_AUTO_TEST_CASE( sequence_empty_equality )
 {
-  Sequence szero("", Sequence::reduced_dna_alphabet);
+  Sequence szero("", reduced_dna_alphabet);
   BOOST_CHECK_EQUAL(szero.empty(), true);
   BOOST_CHECK_EQUAL(szero, szero);
   BOOST_CHECK_EQUAL(szero, "");
 
   BOOST_CHECK_EQUAL(szero.empty(), true);
   BOOST_CHECK_EQUAL(szero, szero);
   BOOST_CHECK_EQUAL(szero, "");
 
-  Sequence sclear("AGCT", Sequence::reduced_dna_alphabet);
+  Sequence sclear("AGCT", reduced_dna_alphabet);
   sclear.clear();
   BOOST_CHECK_EQUAL(sclear.empty(), true);
   BOOST_CHECK_EQUAL(sclear, sclear);
   sclear.clear();
   BOOST_CHECK_EQUAL(sclear.empty(), true);
   BOOST_CHECK_EQUAL(sclear, sclear);
@@ -599,7 +599,7 @@ BOOST_AUTO_TEST_CASE( sequence_empty_equality )
 BOOST_AUTO_TEST_CASE ( sequence_iterators )
 {
   std::string seq_string = "AAGGCCTTNNTATA";
 BOOST_AUTO_TEST_CASE ( sequence_iterators )
 {
   std::string seq_string = "AAGGCCTTNNTATA";
-  Sequence s(seq_string, Sequence::reduced_dna_alphabet);
+  Sequence s(seq_string, reduced_dna_alphabet);
   const Sequence cs(s);
   std::string::size_type count = 0;
 
   const Sequence cs(s);
   std::string::size_type count = 0;
 
@@ -628,7 +628,7 @@ BOOST_AUTO_TEST_CASE( sequence_motifs )
 {
   string m("AAAA");
   string bogus("AATTGGAA");
 {
   string m("AAAA");
   string bogus("AATTGGAA");
-  Sequence s1("AAAAGGGGCCCCTTTT", Sequence::reduced_dna_alphabet);
+  Sequence s1("AAAAGGGGCCCCTTTT", reduced_dna_alphabet);
 
   list<motif>::const_iterator motif_i = s1.motifs().begin();
   list<motif>::const_iterator motif_end = s1.motifs().end();
 
   list<motif>::const_iterator motif_i = s1.motifs().begin();
   list<motif>::const_iterator motif_end = s1.motifs().end();
@@ -683,7 +683,7 @@ BOOST_AUTO_TEST_CASE( sequence_motif_subseq)
   // only search the subsequence ticket:199
   string aaaa("AAAA");
   string cccc("CCCC");
   // only search the subsequence ticket:199
   string aaaa("AAAA");
   string cccc("CCCC");
-  Sequence s1("AAAANCCCC", Sequence::reduced_dna_alphabet);
+  Sequence s1("AAAANCCCC", reduced_dna_alphabet);
 
   // this shouldn't show up
   s1.add_motif(cccc);
 
   // this shouldn't show up
   s1.add_motif(cccc);
@@ -736,7 +736,7 @@ BOOST_AUTO_TEST_CASE( annotate_from_sequence )
            "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
   string gc("GCCCCC");
   string gga("GGACACCTC");
            "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
   string gc("GCCCCC");
   string gga("GGACACCTC");
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
 
   std::list<Sequence> query_list;
   std::list<string> string_list;
 
   std::list<Sequence> query_list;
   std::list<string> string_list;
@@ -782,7 +782,7 @@ BOOST_AUTO_TEST_CASE( subseq_annotation_test )
            "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
            "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
            "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
            "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
            "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
            "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
 
 
   seq.add_annotation(annot(0, 10, "0-10", "0-10"));
 
 
   seq.add_annotation(annot(0, 10, "0-10", "0-10"));
@@ -819,7 +819,7 @@ BOOST_AUTO_TEST_CASE( motif_annotation_update )
            "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
            "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
            "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
            "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
            "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
            "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
 
   // starting conditions
   BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
 
   // starting conditions
   BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
@@ -840,7 +840,7 @@ BOOST_AUTO_TEST_CASE( motif_annotation_update )
 BOOST_AUTO_TEST_CASE( out_operator )
 {
   string s("AAGGCCTT");
 BOOST_AUTO_TEST_CASE( out_operator )
 {
   string s("AAGGCCTT");
-  Sequence seq(s, Sequence::reduced_dna_alphabet);
+  Sequence seq(s, reduced_dna_alphabet);
 
   ostringstream buf;
   buf << s;
 
   ostringstream buf;
   buf << s;
@@ -849,7 +849,7 @@ BOOST_AUTO_TEST_CASE( out_operator )
 
 BOOST_AUTO_TEST_CASE( get_name )
 {
 
 BOOST_AUTO_TEST_CASE( get_name )
 {
-  Sequence seq("AAGGCCTT", Sequence::reduced_dna_alphabet);
+  Sequence seq("AAGGCCTT", reduced_dna_alphabet);
 
   BOOST_CHECK_EQUAL( seq.get_name(), "" );
   seq.set_species("hooman"); // anyone remember tradewars?
 
   BOOST_CHECK_EQUAL( seq.get_name(), "" );
   seq.set_species("hooman"); // anyone remember tradewars?
@@ -861,7 +861,7 @@ BOOST_AUTO_TEST_CASE( get_name )
 BOOST_AUTO_TEST_CASE( serialize_simple )
 {
   std::string seq_string = "AAGGCCTT";
 BOOST_AUTO_TEST_CASE( serialize_simple )
 {
   std::string seq_string = "AAGGCCTT";
-  Sequence seq(seq_string, Sequence::reduced_dna_alphabet);
+  Sequence seq(seq_string, reduced_dna_alphabet);
   seq.set_species("ribbet");
   std::ostringstream oss;
   // allocate/deallocate serialization components
   seq.set_species("ribbet");
   std::ostringstream oss;
   // allocate/deallocate serialization components
@@ -884,7 +884,7 @@ BOOST_AUTO_TEST_CASE( serialize_simple )
 BOOST_AUTO_TEST_CASE( serialize_tree )
 {
   std::string seq_string = "AAGGCCTT";
 BOOST_AUTO_TEST_CASE( serialize_tree )
 {
   std::string seq_string = "AAGGCCTT";
-  Sequence seq(seq_string, Sequence::reduced_dna_alphabet);
+  Sequence seq(seq_string, reduced_dna_alphabet);
   seq.set_species("ribbet");
   seq.add_motif("AA");
   seq.add_motif("GC");
   seq.set_species("ribbet");
   seq.add_motif("AA");
   seq.add_motif("GC");
@@ -914,7 +914,7 @@ BOOST_AUTO_TEST_CASE( serialize_tree )
 BOOST_AUTO_TEST_CASE( serialize_xml_sequence )
 {
   std::string seq_string = "AAGGCCTT";
 BOOST_AUTO_TEST_CASE( serialize_xml_sequence )
 {
   std::string seq_string = "AAGGCCTT";
-  Sequence seq(seq_string, Sequence::reduced_dna_alphabet);
+  Sequence seq(seq_string, reduced_dna_alphabet);
   seq.set_species("ribbet");
   seq.add_motif("AA");
   seq.add_motif("GC");
   seq.set_species("ribbet");
   seq.add_motif("AA");
   seq.add_motif("GC");
@@ -941,7 +941,7 @@ BOOST_AUTO_TEST_CASE( serialize_xml_sequence )
 BOOST_AUTO_TEST_CASE( serialize_xml_two )
 {
   std::string seq_string = "AAGGCCTT";
 BOOST_AUTO_TEST_CASE( serialize_xml_two )
 {
   std::string seq_string = "AAGGCCTT";
-  Sequence seq1(seq_string, Sequence::reduced_dna_alphabet);
+  Sequence seq1(seq_string, reduced_dna_alphabet);
   Sequence seq2(seq1);
 
   std::ostringstream oss;
   Sequence seq2(seq1);
 
   std::ostringstream oss;
index 89308c72858d3ece99caf974a204c79519e7ca9a..e847a245a1f90a8981159a152da28f19f74cdc34 100644 (file)
@@ -6,7 +6,7 @@
 MotifElement::MotifElement() :
   enabled(true),
   color(Color(1.0,0.0,0.0,1.0)),
 MotifElement::MotifElement() :
   enabled(true),
   color(Color(1.0,0.0,0.0,1.0)),
-  motif(Sequence::nucleic_alphabet)
+  motif(nucleic_alphabet)
 {
 }
 
 {
 }
 
@@ -77,7 +77,7 @@ void MotifElement::setSequence(const Sequence& seq)
 //! set sequence text
 void MotifElement::setSequence(const std::string& seq_text)
 {
 //! set sequence text
 void MotifElement::setSequence(const std::string& seq_text)
 {
-  motif.set_sequence(seq_text, Sequence::nucleic_alphabet);
+  motif.set_sequence(seq_text, nucleic_alphabet);
 }
 
 QString MotifElement::getSequenceText() const
 }
 
 QString MotifElement::getSequenceText() const