This is another large patch.
This moves alphabet_ref from Sequence to SeqString.
I still need to deal with a better way of handling reverse complementing
especially given my attempts to keep everything in reference counted
structures.
Also this patch and the previous:
Store Sequence sequence location in a shared_ptr class
have broken the display of motifs in mussa.
(I used to be using shared_ptr<Sequences> for everything,
but now since there's a couple of seperate objects updating
the motifs in one doesn't really help so much for the other.
so warning, these two patches and the next few that I make
are going to be unstable.
**END OF DESCRIPTION***
Place the long patch description above the ***END OF DESCRIPTION*** marker.
The first line of this file will be the patch name.
This patch contains the following changes:
M ./alg/CMakeLists.txt +1
M ./alg/alphabet.cpp +35
M ./alg/alphabet.hpp -1 +14
M ./alg/motif_parser.cpp -3 +4
A ./alg/seq.cpp
M ./alg/seq.hpp -3 +17
M ./alg/seq_span.cpp -6 +3
M ./alg/seq_span.hpp -1 +4
M ./alg/sequence.cpp -82 +26
M ./alg/sequence.hpp -15 +10
M ./alg/test/CMakeLists.txt +1
M ./alg/test/test_alphabet.cpp +5
A ./alg/test/test_seq.cpp
M ./alg/test/test_seq_span.cpp +9
M ./alg/test/test_sequence.cpp -43 +43
M ./qui/motif_editor/MotifElement.cpp -2 +2
nway_other.cpp
nway_paths.cpp
parse_options.cpp
+ seq.cpp
seq_span.cpp
sequence.cpp
sequence_location.cpp )
+#include <stdexcept>
+
#include "alg/alphabet.hpp"
// some standard dna alphabets
//! the protein alphabet
const char *Alphabet::protein_cstr =
"AaCcDdEeFfGgHhIiKkLlMmNnPpQqRrSsTtVvWwYy\012\015";
+const char *Alphabet::empty_cstr = "";
const Alphabet& Alphabet::reduced_dna_alphabet() {
static Alphabet *a = new Alphabet(reduced_dna_cstr);
static Alphabet *a = new Alphabet(protein_cstr);
return *a;
}
+const Alphabet& Alphabet::empty_alphabet() {
+ static Alphabet *a = new Alphabet(empty_cstr);
+ return *a;
+}
Alphabet::Alphabet(const char *a) :
alphabet(a)
alphabet_set.insert(alphabet.begin(), alphabet.end());
}
+bool operator==(const Alphabet& x, const Alphabet& y)
+{
+ return x.alphabet == y.alphabet;
+}
+
+std::ostream& operator<<(std::ostream& out, const Alphabet& a)
+{
+ out << a.alphabet;
+}
+
bool Alphabet::exists(const char c) const
{
return (alphabet_set.find(c) != alphabet_set.end());
}
+
+const Alphabet& Alphabet::get_alphabet(AlphabetRef alpha)
+{
+ switch (alpha) {
+ case ::reduced_dna_alphabet:
+ return Alphabet::reduced_dna_alphabet();
+ case ::reduced_rna_alphabet:
+ return Alphabet::reduced_rna_alphabet();
+ case ::reduced_nucleic_alphabet:
+ return Alphabet::reduced_nucleic_alphabet();
+ case ::nucleic_alphabet:
+ return Alphabet::nucleic_alphabet();
+ case ::protein_alphabet:
+ return Alphabet::protein_alphabet();
+ default:
+ throw std::runtime_error("unrecognized alphabet type");
+ break;
+ }
+}
\ No newline at end of file
#include <boost/serialization/version.hpp>
#include <set>
+#include <ostream>
//! this is a helper class for sequence
+enum AlphabetRef { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet,
+ nucleic_alphabet, protein_alphabet, empty_alphabet=255 };
+
class Alphabet {
friend class Sequence;
public:
typedef std::string::const_iterator const_iterator;
-
+ //! define the various alphabet types (as python corebio)
+
+ friend bool operator==(const Alphabet&, const Alphabet&);
+ friend std::ostream& operator<<(std::ostream&, const Alphabet&);
+
//! case-insensitive test to check a character for existence in our alphabet
bool exists(const char) const;
+ //! return an alphabet given an AlphabetRef enumeration
+ static const Alphabet &get_alphabet(AlphabetRef);
// note, if you want to define an alphabet for a sequence, you probably want
// to update the enumeration in Sequence, and Sequence::get_sequence
//! The standard DNA alphabet, with unique, and unknown characters
//! the protein alphabet
static const char *protein_cstr;
static const Alphabet &protein_alphabet();
+
+ static const char *empty_cstr;
+ static const Alphabet &empty_alphabet();
private:
//! what are allowable symbols in our alphabet
#include "mussa_exceptions.hpp"
-#include "alg/alphabet.hpp"
-#include "alg/motif_parser.hpp"
+#include "alphabet.hpp"
+#include "motif_parser.hpp"
+#include "seq.hpp"
#include <boost/spirit/core.hpp>
#include <boost/spirit/actor/push_back_actor.hpp>
Iterator end) const
{
float red, green, blue, alpha;
- Sequence seq(parsed->sequence, Sequence::nucleic_alphabet);
+ Sequence seq(parsed->sequence, nucleic_alphabet);
seq.set_fasta_header(parsed->name);
alpha = 1.0;
--- /dev/null
+#include <stdexcept>
+
+#include "seq.hpp"
+
+const Alphabet& SeqString::get_alphabet() const
+{
+ return Alphabet::get_alphabet(alphabet);
+}
+
+std::string SeqString::create_reverse_complement_map() const
+{
+ std::string rc_map(256, '~');
+ // if we're rna, use U instead of T
+ // we might want to add an "is_rna" to sequence at somepoint
+ char TU = (alphabet == reduced_rna_alphabet) ? 'U' : 'T';
+ char tu = (alphabet == reduced_rna_alphabet) ? 'u' : 't';
+ rc_map['A'] = TU ; rc_map['a'] = tu ;
+ rc_map['T'] = 'A'; rc_map['t'] = 'a';
+ rc_map['U'] = 'A'; rc_map['u'] = 'a';
+ rc_map['G'] = 'C'; rc_map['g'] = 'c';
+ rc_map['C'] = 'G'; rc_map['c'] = 'g';
+ rc_map['M'] = 'K'; rc_map['m'] = 'k';
+ rc_map['R'] = 'Y'; rc_map['r'] = 'y';
+ rc_map['W'] = 'W'; rc_map['w'] = 'w';
+ rc_map['S'] = 'S'; rc_map['s'] = 's';
+ rc_map['Y'] = 'R'; rc_map['y'] = 'r';
+ rc_map['K'] = 'M'; rc_map['k'] = 'm';
+ rc_map['V'] = 'B'; rc_map['v'] = 'b';
+ rc_map['H'] = 'D'; rc_map['h'] = 'd';
+ rc_map['D'] = 'H'; rc_map['d'] = 'h';
+ rc_map['B'] = 'V'; rc_map['b'] = 'v';
+ rc_map['N'] = 'N'; rc_map['n'] = 'n';
+ rc_map['X'] = 'X'; rc_map['x'] = 'x';
+ rc_map['?'] = '?';
+ rc_map['.'] = '.';
+ rc_map['-'] = '-';
+ rc_map['~'] = '~'; // not really needed, but perhaps it's clearer.
+
+ return rc_map;
+}
#include <boost/shared_ptr.hpp>
+#include "alphabet.hpp"
//! These classes provide for the internal implementation for the Sequence class
/** the only purpose of this class is that the shared_ptr template
* functions need the serialization support to be in-class.
*/
+
class SeqString : public std::string
{
public:
typedef std::string::value_type value_type;
static const size_type npos = std::string::npos;
- SeqString() : std::string() {};
- SeqString(const std::string &s) : std::string(s) {};
+ SeqString(AlphabetRef a=reduced_nucleic_alphabet) :
+ std::string(),
+ alphabet(a)
+ {}
+ SeqString(const std::string &s, AlphabetRef a=reduced_nucleic_alphabet) :
+ std::string(s),
+ alphabet(a)
+ {}
+
+ AlphabetRef get_alphabet_ref() { return alphabet; }
+ const Alphabet& get_alphabet() const;
+ std::string create_reverse_complement_map() const;
private:
+ AlphabetRef alphabet;
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive& ar, const unsigned int /*version*/) {
//ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(std::string);
ar & boost::serialization::make_nvp("bases",
- boost::serialization::base_object<std::string>(*this)
+ boost::serialization::base_object<std::string>(*this)
);
+ ar & BOOST_SERIALIZATION_NVP(alphabet);
}
};
typedef boost::shared_ptr<SeqString> SeqStringRef;
{
}
-SeqSpan::SeqSpan(const std::string &seq_)
- : seq(new SeqString(seq_)),
+SeqSpan::SeqSpan(const std::string &seq_,
+ AlphabetRef a)
+ : seq(new SeqString(seq_, a)),
seq_start(0),
seq_count(seq_.length()),
parent()
bool operator==(const SeqSpan& a, const SeqSpan& b)
{
if (SeqSpan::isFamily(a, b)) {
- std::cout << " " << a.seq_start
- << " " << b.seq_start
- << " " << a.seq_count
- << " " << b.seq_count << std::endl;
// can do fast comparison
if (a.seq_start == b.seq_start and a.seq_count == b.seq_count) {
return true;
public:
SeqSpan(const SeqSpan &);
SeqSpan(const SeqSpan *);
- explicit SeqSpan(const std::string &);
+ explicit SeqSpan(const std::string &,
+ AlphabetRef = reduced_nucleic_alphabet
+ );
SeqSpan(const SeqSpanRef, size_type start=0, size_type count=npos);
//! assignment
friend bool operator==(const SeqSpan&, const SeqSpan&);
friend bool operator!=(const SeqSpan&, const SeqSpan&);
+ const Alphabet& get_alphabet() const { return seq->get_alphabet(); }
//! \defgroup string_operators
//! @{
//! retrive element at specific position
}
-Sequence::Sequence(alphabet_ref alphabet_)
- : seq(new SeqSpan("")),
- alphabet(alphabet_),
+Sequence::Sequence(AlphabetRef alphabet)
+ : seq(new SeqSpan("", alphabet)),
strand(UnknownStrand)
{
}
{
}
-Sequence::Sequence(const char *seq, alphabet_ref alphabet_)
- : alphabet(alphabet_),
- strand(UnknownStrand),
+Sequence::Sequence(const char *seq, AlphabetRef alphabet_)
+ : strand(UnknownStrand),
header(""),
species("")
{
- set_filtered_sequence(seq, alphabet);
+ set_filtered_sequence(seq, alphabet_);
}
-Sequence::Sequence(const std::string& seq, alphabet_ref alphabet_)
- : alphabet(alphabet_),
- strand(UnknownStrand),
+Sequence::Sequence(const std::string& seq, AlphabetRef alphabet_)
+ : strand(UnknownStrand),
header(""),
species("")
{
- set_filtered_sequence(seq, alphabet);
+ set_filtered_sequence(seq, alphabet_);
}
Sequence::Sequence(const Sequence& o)
: seq(o.seq),
- alphabet(o.alphabet),
strand(o.strand),
header(o.header),
species(o.species),
Sequence::Sequence(const Sequence* o)
: seq(o->seq),
- alphabet(o->alphabet),
strand(o->strand),
header(o->header),
species(o->species),
{
}
-Sequence::Sequence(const SeqSpanRef& seq_ref, alphabet_ref alphabet_)
+Sequence::Sequence(const SeqSpanRef& seq_ref)
: seq(seq_ref),
- alphabet(alphabet),
strand(UnknownStrand),
header(""),
species("")
{
if (this != &s) {
seq = s.seq;
- alphabet = s.alphabet;
strand = s.strand;
header = s.header;
species = s.species;
void Sequence::load_fasta(fs::path file_path, int seq_num, int start_index, int end_index)
{
- load_fasta(file_path, alphabet, seq_num, start_index, end_index);
+ load_fasta(file_path, reduced_nucleic_alphabet, seq_num, start_index, end_index);
}
//! load a fasta file into a sequence
-void Sequence::load_fasta(fs::path file_path, alphabet_ref a,
+void Sequence::load_fasta(fs::path file_path, AlphabetRef a,
int seq_num, int start_index, int end_index)
{
fs::fstream data_file;
void Sequence::load_fasta(std::istream& file,
int seq_num, int start_index, int end_index)
{
- load_fasta(file, alphabet, seq_num, start_index, end_index);
+ load_fasta(file, reduced_nucleic_alphabet, seq_num, start_index, end_index);
}
void
-Sequence::load_fasta(std::istream& data_file, alphabet_ref a,
+Sequence::load_fasta(std::istream& data_file, AlphabetRef a,
int seq_num,
int start_index, int end_index)
{
std::string rev_comp;
std::string sequence_raw;
std::string seq_tmp; // holds sequence during basic filtering
- const Alphabet &alpha = get_alphabet(a);
+ const Alphabet &alpha = get_alphabet();
if (seq_num == 0) {
throw mussa_load_error("fasta sequence number is 1 based (can't be 0)");
}
void Sequence::set_filtered_sequence(const std::string &in_seq,
- alphabet_ref alphabet_,
+ AlphabetRef alphabet_,
size_type start,
size_type count,
strand_type strand_)
{
- alphabet = alphabet_;
if ( count == npos)
count = in_seq.size() - start;
std::string new_seq;
new_seq.reserve(count);
// finally, the actual conversion loop
- const Alphabet& alpha_impl = get_alphabet(); // go get one of our actual alphabets
+ const Alphabet& alpha_impl = Alphabet::get_alphabet(alphabet_); // go get one of our actual alphabets
std::string::const_iterator seq_i = in_seq.begin()+start;
for(size_type i = 0; i != count; ++i, ++seq_i)
{
new_seq.append(1, 'N');
}
}
- SeqSpanRef new_seq_ref(new SeqSpan(new_seq));
+ SeqSpanRef new_seq_ref(new SeqSpan(new_seq, alphabet_));
seq = new_seq_ref;
strand = strand_;
}
return new_seq;
}
-std::string Sequence::create_reverse_map() const
-{
- std::string rc_map(256, '~');
- // if we're rna, use U instead of T
- // we might want to add an "is_rna" to sequence at somepoint
- char TU = (alphabet == reduced_rna_alphabet) ? 'U' : 'T';
- char tu = (alphabet == reduced_rna_alphabet) ? 'u' : 't';
- rc_map['A'] = TU ; rc_map['a'] = tu ;
- rc_map['T'] = 'A'; rc_map['t'] = 'a';
- rc_map['U'] = 'A'; rc_map['u'] = 'a';
- rc_map['G'] = 'C'; rc_map['g'] = 'c';
- rc_map['C'] = 'G'; rc_map['c'] = 'g';
- rc_map['M'] = 'K'; rc_map['m'] = 'k';
- rc_map['R'] = 'Y'; rc_map['r'] = 'y';
- rc_map['W'] = 'W'; rc_map['w'] = 'w';
- rc_map['S'] = 'S'; rc_map['s'] = 's';
- rc_map['Y'] = 'R'; rc_map['y'] = 'r';
- rc_map['K'] = 'M'; rc_map['k'] = 'm';
- rc_map['V'] = 'B'; rc_map['v'] = 'b';
- rc_map['H'] = 'D'; rc_map['h'] = 'd';
- rc_map['D'] = 'H'; rc_map['d'] = 'h';
- rc_map['B'] = 'V'; rc_map['b'] = 'v';
- rc_map['N'] = 'N'; rc_map['n'] = 'n';
- rc_map['X'] = 'X'; rc_map['x'] = 'x';
- rc_map['?'] = '?';
- rc_map['.'] = '.';
- rc_map['-'] = '-';
- rc_map['~'] = '~'; // not really needed, but perhaps it's clearer.
- return rc_map;
-}
+// FIXME: This needs to be moved into SeqSpan
Sequence Sequence::rev_comp() const
{
std::string rev_comp;
rev_comp.reserve(length());
- std::string rc_map = create_reverse_map();
+ std::string rc_map = seq->seq->create_reverse_complement_map();
// reverse and convert
Sequence::const_reverse_iterator seq_i;
{
rev_comp.append(1, rc_map[*seq_i]);
}
- return Sequence(rev_comp, alphabet);
+ return Sequence(rev_comp, seq->seq->get_alphabet_ref());
+}
+
+const Alphabet& Sequence::get_alphabet() const
+{
+ return (seq) ? seq->get_alphabet() : Alphabet::empty_alphabet();
}
void Sequence::set_fasta_header(std::string header_)
return "";
}
-const Alphabet& Sequence::get_alphabet() const
-{
- return get_alphabet(alphabet);
-}
-
-const Alphabet& Sequence::get_alphabet(alphabet_ref alpha) const
-{
- switch (alpha) {
- case reduced_dna_alphabet:
- return Alphabet::reduced_dna_alphabet();
- case reduced_rna_alphabet:
- return Alphabet::reduced_rna_alphabet();
- case reduced_nucleic_alphabet:
- return Alphabet::reduced_nucleic_alphabet();
- case nucleic_alphabet:
- return Alphabet::nucleic_alphabet();
- case protein_alphabet:
- return Alphabet::protein_alphabet();
- default:
- throw std::runtime_error("unrecognized alphabet type");
- break;
- }
-}
-
-void Sequence::set_sequence(const std::string& s, alphabet_ref a)
+void Sequence::set_sequence(const std::string& s, AlphabetRef a)
{
set_filtered_sequence(s, a);
}
typedef SeqString::size_type size_type;
static const size_type npos = SeqString::npos;
enum strand_type { UnknownStrand, PlusStrand, MinusStrand, BothStrand };
- enum alphabet_ref { reduced_dna_alphabet, reduced_rna_alphabet, reduced_nucleic_alphabet,
- nucleic_alphabet, protein_alphabet };
- Sequence(alphabet_ref a = reduced_nucleic_alphabet);
- Sequence(const char* seq, alphabet_ref a = reduced_nucleic_alphabet);
- Sequence(const std::string& seq, alphabet_ref a = reduced_nucleic_alphabet);
+ Sequence(AlphabetRef a = reduced_nucleic_alphabet);
+ Sequence(const char* seq,
+ AlphabetRef a = reduced_nucleic_alphabet);
+ Sequence(const std::string& seq,
+ AlphabetRef a = reduced_nucleic_alphabet);
Sequence(const Sequence& seq);
Sequence(const Sequence *);
- Sequence(const SeqSpanRef&, alphabet_ref a = reduced_nucleic_alphabet);
+ Sequence(const SeqSpanRef&);
~Sequence();
//! assignment to constant sequences
Sequence &operator=(const Sequence&);
//! set sequence to a (sub)string containing nothing but AGCTN
void set_filtered_sequence(const std::string& seq,
- alphabet_ref a,
+ AlphabetRef a,
size_type start=0,
size_type count=npos,
strand_type strand=UnknownStrand);
Sequence rev_comp() const;
//! set sequence (filtered)
- void set_sequence(const std::string &, alphabet_ref);
+ void set_sequence(const std::string &, AlphabetRef);
//! get sequence
std::string get_sequence() const;
//! set species name
std::string get_name() const;
//! return a reference to whichever alphabet we're currently representing
const Alphabet& get_alphabet() const;
- //! return a reference to whichever alphabet we're currently representing
- const Alphabet& get_alphabet(alphabet_ref) const;
//! load sequence from fasta file using the sequences current alphabet
void load_fasta(const boost::filesystem::path file_path, int seq_num=1,
//! \throw sequence_empty_error
//! \throw sequence_empty_file_error
void load_fasta(const boost::filesystem::path file_path,
- alphabet_ref a,
+ AlphabetRef a,
int seq_num=1,
int start_index=0, int end_index=0);
void load_fasta(std::istream& file,
//! \throw sequence_empty_error
//! \throw sequence_empty_file_error
void load_fasta(std::istream& file,
- alphabet_ref a,
+ AlphabetRef a,
int seq_num=1,
int start_index=0, int end_index=0);
//! load sequence annotations
protected:
SeqSpanRef seq;
- //! which alphabet we're using
- alphabet_ref alphabet;
//! strand orientation
strand_type strand;
//! fasta header
template<class Archive>
void serialize(Archive& ar, const unsigned int /*version*/) {
ar & BOOST_SERIALIZATION_NVP(seq);
- ar & BOOST_SERIALIZATION_NVP(alphabet);
ar & BOOST_SERIALIZATION_NVP(strand);
ar & BOOST_SERIALIZATION_NVP(header);
ar & BOOST_SERIALIZATION_NVP(species);
MAKE_ALG_UNITTEST( test_glsequence )
MAKE_ALG_UNITTEST( test_mussa )
MAKE_ALG_UNITTEST( test_nway )
+MAKE_ALG_UNITTEST( test_seq )
MAKE_ALG_UNITTEST( test_sequence )
MAKE_ALG_UNITTEST( test_seq_span )
MAKE_ALG_UNITTEST( test_sequence_location )
// copied from alphabet.cpp
BOOST_CHECK_EQUAL( Alphabet::reduced_dna_cstr, "AaCcGgTtNn\012\015");
}
+
+BOOST_AUTO_TEST_CASE( alphabet_equality)
+{
+ Alphabet a(Alphabet::reduced_dna_alphabet());
+ BOOST_CHECK_EQUAL( a, Alphabet::reduced_dna_alphabet() );
+}
\ No newline at end of file
--- /dev/null
+#define BOOST_AUTO_TEST_MAIN
+#include <boost/test/auto_unit_test.hpp>
+
+#include "seq.hpp"
+
+BOOST_AUTO_TEST_CASE( seqstring_default_alphabet )
+{
+ SeqString s;
+ BOOST_CHECK_EQUAL(s.get_alphabet_ref(), reduced_nucleic_alphabet);
+ BOOST_CHECK_EQUAL(s.get_alphabet(), Alphabet::reduced_nucleic_alphabet());
+ BOOST_CHECK_EQUAL(s.size(), 0);
+}
+
+BOOST_AUTO_TEST_CASE( seqstring_string )
+{
+ SeqString s("AGCT");
+
+ BOOST_CHECK_EQUAL(s.get_alphabet_ref(), reduced_nucleic_alphabet);
+ BOOST_CHECK_EQUAL(s.get_alphabet(), Alphabet::reduced_nucleic_alphabet());
+ BOOST_CHECK_EQUAL(s.size(), 4);
+}
+
+// such an exciting unit test, making sure that a=b; a==b
+BOOST_AUTO_TEST_CASE( seqstring_string_alphabet )
+{
+ SeqString s("AGCT", nucleic_alphabet);
+
+ BOOST_CHECK_EQUAL(s.get_alphabet_ref(), nucleic_alphabet);
+ BOOST_CHECK_EQUAL(s.get_alphabet(), Alphabet::nucleic_alphabet());
+ BOOST_CHECK_EQUAL(s.size(), 4);
+}
\ No newline at end of file
BOOST_CHECK_EQUAL(span1->sequence(), str1);
}
+BOOST_AUTO_TEST_CASE( seqspan_from_string_with_alphabet )
+{
+ std::string str1("AAGGCCUU");
+ SeqSpanRef span1(new SeqSpan(str1, reduced_rna_alphabet));
+ BOOST_CHECK_EQUAL(span1->length(), str1.length());
+ BOOST_CHECK_EQUAL(span1->sequence(), str1);
+ BOOST_CHECK_EQUAL(span1->get_alphabet(), Alphabet::reduced_rna_alphabet());
+}
+
BOOST_AUTO_TEST_CASE( seqspan_from_seqspan )
{
std::string str1("AAGGCCTT");
BOOST_AUTO_TEST_CASE( sequence_filter )
{
const char *core_seq = "AATTGGCC";
- Sequence s1(core_seq, Sequence::reduced_dna_alphabet);
+ Sequence s1(core_seq, reduced_dna_alphabet);
BOOST_CHECK_EQUAL(s1, core_seq);
- Sequence s2("aattggcc", Sequence::reduced_dna_alphabet);
+ Sequence s2("aattggcc", reduced_dna_alphabet);
BOOST_CHECK_EQUAL(s2, "AATTGGCC");
BOOST_CHECK_EQUAL(s2.rev_comp(), "GGCCAATT");
BOOST_CHECK_EQUAL(s2.rev_comp(), "ggccaatt"); // we should be case insensitive now
//We're currently forcing sequences to uppercase
BOOST_CHECK_EQUAL(s2.get_sequence(), "AATTGGCC");
- Sequence s3("asdfg", Sequence::reduced_dna_alphabet);
+ Sequence s3("asdfg", reduced_dna_alphabet);
BOOST_CHECK_EQUAL(s3, "ANNNG");
BOOST_CHECK_EQUAL(s3.subseq(0,2), "AN");
- s3.set_filtered_sequence("AAGGCCTT", Sequence::reduced_dna_alphabet, 0, 2);
+ s3.set_filtered_sequence("AAGGCCTT", reduced_dna_alphabet, 0, 2);
BOOST_CHECK_EQUAL(s3, "AA");
- s3.set_filtered_sequence("AAGGCCTT", Sequence::reduced_dna_alphabet, 2, 2);
+ s3.set_filtered_sequence("AAGGCCTT", reduced_dna_alphabet, 2, 2);
BOOST_CHECK_EQUAL( s3, "GG");
- s3.set_filtered_sequence("AAGGCCTT", Sequence::reduced_dna_alphabet, 4);
+ s3.set_filtered_sequence("AAGGCCTT", reduced_dna_alphabet, 4);
BOOST_CHECK_EQUAL( s3, "CCTT");
s3 = "AAGGFF";
BOOST_AUTO_TEST_CASE( sequence_nucleic_alphabet )
{
std::string agct("AGCT");
- Sequence seq(agct, Sequence::nucleic_alphabet);
+ Sequence seq(agct, nucleic_alphabet);
BOOST_CHECK_EQUAL(seq.size(), agct.size());
BOOST_CHECK_EQUAL(seq.get_sequence(), agct);
std::string bdv("BDv");
- Sequence seq_bdv(bdv, Sequence::nucleic_alphabet);
+ Sequence seq_bdv(bdv, nucleic_alphabet);
BOOST_CHECK_EQUAL(seq_bdv.size(), bdv.size());
// forcing sequence to upper case
BOOST_CHECK_EQUAL(seq_bdv.get_sequence(),
BOOST_AUTO_TEST_CASE( subseq_names )
{
- Sequence s1("AAGGCCTT", Sequence::reduced_dna_alphabet);
+ Sequence s1("AAGGCCTT", reduced_dna_alphabet);
s1.set_species("species");
s1.set_fasta_header("a fasta header");
Sequence s2 = s1.subseq(2,2);
BOOST_CHECK_EQUAL( s1.stop(), 0 );
std::string seq_string("AAGGCCTT");
- Sequence s2(seq_string, Sequence::reduced_dna_alphabet);
+ Sequence s2(seq_string, reduced_dna_alphabet);
BOOST_CHECK_EQUAL( s2.start(), 0 );
BOOST_CHECK_EQUAL( s2.stop(), seq_string.size() );
fs::path seq_path(fs::path(EXAMPLE_DIR, fs::native)/ "seq");
seq_path /= "human_mck_pro.fa";
Sequence s;
- s.load_fasta(seq_path, Sequence::reduced_dna_alphabet);
+ s.load_fasta(seq_path, reduced_dna_alphabet);
BOOST_CHECK_EQUAL(s.subseq(0, 5), "GGATC"); // first few chars of fasta file
BOOST_CHECK_EQUAL(s.subseq(2, 3), "ATC");
BOOST_CHECK_EQUAL(s.get_fasta_header(), "gi|180579|gb|M21487.1|HUMCKMM1 Human "
std::stringstream garbage_fasta(garbage_string);
Sequence s;
- s.load_fasta(reduced_dna_fasta, Sequence::reduced_dna_alphabet);
+ s.load_fasta(reduced_dna_fasta, reduced_dna_alphabet);
BOOST_CHECK_THROW(s.load_fasta(invalid_dna_fasta,
- Sequence::reduced_dna_alphabet),
+ reduced_dna_alphabet),
sequence_invalid_load_error);
BOOST_CHECK_THROW(s.load_fasta(reduced_rna_fasta,
- Sequence::reduced_dna_alphabet),
+ reduced_dna_alphabet),
sequence_invalid_load_error);
BOOST_CHECK_THROW(s.load_fasta(garbage_fasta,
- Sequence::reduced_dna_alphabet),
+ reduced_dna_alphabet),
sequence_invalid_load_error);
}
std::stringstream garbage_fasta(garbage_string);
Sequence s;
- s.load_fasta(reduced_rna_fasta, Sequence::reduced_rna_alphabet);
+ s.load_fasta(reduced_rna_fasta, reduced_rna_alphabet);
BOOST_CHECK_THROW(s.load_fasta(invalid_rna_fasta,
- Sequence::reduced_rna_alphabet),
+ reduced_rna_alphabet),
sequence_invalid_load_error);
BOOST_CHECK_THROW(s.load_fasta(reduced_dna_fasta,
- Sequence::reduced_rna_alphabet),
+ reduced_rna_alphabet),
sequence_invalid_load_error);
BOOST_CHECK_THROW(s.load_fasta(garbage_fasta,
- Sequence::reduced_rna_alphabet),
+ reduced_rna_alphabet),
sequence_invalid_load_error);
}
// there's two copies of reduced_rna_fasta because i didn't feel like
// figuring out how to properly reset the read pointer in a stringstream
s.load_fasta(reduced_rna_fasta1);
- specific.load_fasta(reduced_rna_fasta2, Sequence::reduced_nucleic_alphabet);
+ specific.load_fasta(reduced_rna_fasta2, reduced_nucleic_alphabet);
BOOST_CHECK_EQUAL(s, specific);
s.load_fasta(reduced_dna_fasta1);
- specific.load_fasta(reduced_dna_fasta2, Sequence::reduced_nucleic_alphabet);
+ specific.load_fasta(reduced_dna_fasta2, reduced_nucleic_alphabet);
BOOST_CHECK_EQUAL(s, specific);
BOOST_CHECK_THROW(s.load_fasta(invalid_nucleotide_fasta),
BOOST_AUTO_TEST_CASE( sequence_reverse_complement )
{
std::string iupac_symbols("AaCcGgTtRrYySsWwKkMmBbDdHhVvNn-~.?");
- Sequence seq(iupac_symbols, Sequence::nucleic_alphabet);
+ Sequence seq(iupac_symbols, nucleic_alphabet);
Sequence seqr = seq.rev_comp();
BOOST_CHECK( seq != seqr );
BOOST_AUTO_TEST_CASE( sequence_reverse_complement_dna )
{
std::string dna_str("AGCTN");
- Sequence dna_seq(dna_str, Sequence::reduced_dna_alphabet);
+ Sequence dna_seq(dna_str, reduced_dna_alphabet);
BOOST_CHECK_EQUAL(dna_seq.rev_comp(), "NAGCT");
BOOST_CHECK_EQUAL(dna_seq, dna_seq.rev_comp().rev_comp());
}
BOOST_AUTO_TEST_CASE( sequence_reverse_complement_rna )
{
std::string rna_str("AGCUN");
- Sequence rna_seq(rna_str, Sequence::reduced_rna_alphabet);
+ Sequence rna_seq(rna_str, reduced_rna_alphabet);
BOOST_CHECK_EQUAL(rna_seq.rev_comp(), "NAGCU");
BOOST_CHECK_EQUAL(rna_seq, rna_seq.rev_comp().rev_comp());
}
BOOST_AUTO_TEST_CASE( sequence_reverse_complement_subseq )
{
std::string dna_str("AAAAAAAAAAGGGGGGGGGGG");
- Sequence seq(dna_str, Sequence::reduced_dna_alphabet);
+ Sequence seq(dna_str, reduced_dna_alphabet);
Sequence subseq = seq.subseq(8,4);
BOOST_CHECK_EQUAL( subseq, "AAGG");
Sequence rev_subseq = subseq.rev_comp();
;
string s(100, 'A');
s += "GCTGCTAATT";
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
//istringstream annot_stream(annot_data);
seq.parse_annot(annot_data, 0, 0);
;
string s(100, 'A');
s += "GCTGCTAATT";
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
BOOST_CHECK_THROW(seq.parse_annot(annot_data, 0, 0), annotation_load_error);
BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
"TGGGTCAGTGTCACCTCCAGGATACAGACAGCCCCCCTTCAGCCCAGCCCAGCCAG"
"AAAAA"
"GGTGGAGACGACCTGGACCCTAACTACGTGCTCAGCAGCCGCGTCCGCAC";
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
seq.parse_annot(annot_data);
std::list<annot> annots = seq.annotations();
BOOST_CHECK_EQUAL( annots.size(), 2);
;
string s(100, 'A');
s += "GCTGCTAATT";
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
//istringstream annot_stream(annot_data);
seq.parse_annot(annot_data, 0, 0);
BOOST_AUTO_TEST_CASE( sequence_empty_equality )
{
- Sequence szero("", Sequence::reduced_dna_alphabet);
+ Sequence szero("", reduced_dna_alphabet);
BOOST_CHECK_EQUAL(szero.empty(), true);
BOOST_CHECK_EQUAL(szero, szero);
BOOST_CHECK_EQUAL(szero, "");
- Sequence sclear("AGCT", Sequence::reduced_dna_alphabet);
+ Sequence sclear("AGCT", reduced_dna_alphabet);
sclear.clear();
BOOST_CHECK_EQUAL(sclear.empty(), true);
BOOST_CHECK_EQUAL(sclear, sclear);
BOOST_AUTO_TEST_CASE ( sequence_iterators )
{
std::string seq_string = "AAGGCCTTNNTATA";
- Sequence s(seq_string, Sequence::reduced_dna_alphabet);
+ Sequence s(seq_string, reduced_dna_alphabet);
const Sequence cs(s);
std::string::size_type count = 0;
{
string m("AAAA");
string bogus("AATTGGAA");
- Sequence s1("AAAAGGGGCCCCTTTT", Sequence::reduced_dna_alphabet);
+ Sequence s1("AAAAGGGGCCCCTTTT", reduced_dna_alphabet);
list<motif>::const_iterator motif_i = s1.motifs().begin();
list<motif>::const_iterator motif_end = s1.motifs().end();
// only search the subsequence ticket:199
string aaaa("AAAA");
string cccc("CCCC");
- Sequence s1("AAAANCCCC", Sequence::reduced_dna_alphabet);
+ Sequence s1("AAAANCCCC", reduced_dna_alphabet);
// this shouldn't show up
s1.add_motif(cccc);
"AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
string gc("GCCCCC");
string gga("GGACACCTC");
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
std::list<Sequence> query_list;
std::list<string> string_list;
"GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
"CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
"AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
seq.add_annotation(annot(0, 10, "0-10", "0-10"));
"GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
"CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
"AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
// starting conditions
BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
BOOST_AUTO_TEST_CASE( out_operator )
{
string s("AAGGCCTT");
- Sequence seq(s, Sequence::reduced_dna_alphabet);
+ Sequence seq(s, reduced_dna_alphabet);
ostringstream buf;
buf << s;
BOOST_AUTO_TEST_CASE( get_name )
{
- Sequence seq("AAGGCCTT", Sequence::reduced_dna_alphabet);
+ Sequence seq("AAGGCCTT", reduced_dna_alphabet);
BOOST_CHECK_EQUAL( seq.get_name(), "" );
seq.set_species("hooman"); // anyone remember tradewars?
BOOST_AUTO_TEST_CASE( serialize_simple )
{
std::string seq_string = "AAGGCCTT";
- Sequence seq(seq_string, Sequence::reduced_dna_alphabet);
+ Sequence seq(seq_string, reduced_dna_alphabet);
seq.set_species("ribbet");
std::ostringstream oss;
// allocate/deallocate serialization components
BOOST_AUTO_TEST_CASE( serialize_tree )
{
std::string seq_string = "AAGGCCTT";
- Sequence seq(seq_string, Sequence::reduced_dna_alphabet);
+ Sequence seq(seq_string, reduced_dna_alphabet);
seq.set_species("ribbet");
seq.add_motif("AA");
seq.add_motif("GC");
BOOST_AUTO_TEST_CASE( serialize_xml_sequence )
{
std::string seq_string = "AAGGCCTT";
- Sequence seq(seq_string, Sequence::reduced_dna_alphabet);
+ Sequence seq(seq_string, reduced_dna_alphabet);
seq.set_species("ribbet");
seq.add_motif("AA");
seq.add_motif("GC");
BOOST_AUTO_TEST_CASE( serialize_xml_two )
{
std::string seq_string = "AAGGCCTT";
- Sequence seq1(seq_string, Sequence::reduced_dna_alphabet);
+ Sequence seq1(seq_string, reduced_dna_alphabet);
Sequence seq2(seq1);
std::ostringstream oss;
MotifElement::MotifElement() :
enabled(true),
color(Color(1.0,0.0,0.0,1.0)),
- motif(Sequence::nucleic_alphabet)
+ motif(nucleic_alphabet)
{
}
//! set sequence text
void MotifElement::setSequence(const std::string& seq_text)
{
- motif.set_sequence(seq_text, Sequence::nucleic_alphabet);
+ motif.set_sequence(seq_text, nucleic_alphabet);
}
QString MotifElement::getSequenceText() const