#include <vector>
#include <iostream>
+class Sequence;
//! FLP = Fixed Length Pairs (Data)
/*!
* vector of linked lists of the match type struct
void setup(int win_size, int hard_thres);
//! compare two sequences and store the list of results in all_matches
- void seqcomp(std::string seq1, std::string seq2, bool is_RC);
+ void seqcomp(const Sequence& seq1, const Sequence& seq2, bool is_RC);
//bool FLPs::match_less(match *match1, match *match2);
//void FLPs::sort();
//! Return all the matches for a particular window?
#include "mussa_exceptions.hpp"
#include "alg/flp.hpp"
+#include "alg/sequence.hpp"
#include <cassert>
#include <sstream>
using namespace std;
void
-FLPs::seqcomp(string sseq1, string sseq2, bool is_RC)
+FLPs::seqcomp(const Sequence& sseq1, const Sequence& sseq2, bool is_RC)
{
int start_i, seq1_i, seq2_i, win_i; // loop variables
int matches; // number of matches in to a window
int i2_offset;
- char * seq1, * seq2;
+ Sequence::const_iterator seq1 = sseq1.begin();
+ Sequence::const_iterator seq2 = sseq2.begin();
+
int seq1_win_num = sseq1.size() - window_size + 1;
int seq2_win_num = sseq2.size() - window_size + 1;
alloc_matches(sseq1.size());
throw mussa_error(msg.str());
}
- seq1 = (char *) sseq1.c_str();
- seq2 = (char *) sseq2.c_str();
-
if (is_RC)
i2_offset = window_size - sseq2.size();
else
}
else if (ana_mode == EntropyNway)
{
- vector<string> some_Seqs;
+ vector<Sequence> some_Seqs;
//unlike other methods, entropy needs to look at the sequence at this stage
some_Seqs.clear();
for(vector<Sequence>::size_type i = 0; i < the_seqs.size(); i++)
void Mussa::add_motif(const Sequence& motif, const Color& color)
{
motif_sequences.insert(motif);
- color_mapper->appendInstanceColor("motif", motif, color);
+ color_mapper->appendInstanceColor("motif", motif.get_sequence(), color);
}
void Mussa::set_motifs(const vector<Sequence>& motifs,
void
-NwayPaths::setup_ent(double new_entropy_thres, vector<string> some_seqs)
+NwayPaths::setup_ent(double new_entropy_thres, vector<Sequence> some_seqs)
{
ent_thres = new_entropy_thres;
c_sequences.clear();
for (vector<string>::size_type i = 0; i < some_seqs.size(); i++)
- c_sequences.push_back((char *)some_seqs[i].c_str());
+ c_sequences.push_back((char *)some_seqs[i].get_sequence().c_str());
}
#include "alg/conserved_path.hpp"
#include "alg/flp.hpp"
+#include "alg/sequence.hpp"
class NwayPaths : public QObject
{
//! setup an nway comparison, initialize # of species, window size,
//! threshold
void setup(int w, int t);
- void setup_ent(double new_entropy_thres, std::vector<std::string> some_Seqs);
+ void setup_ent(double new_entropy_thres, std::vector<Sequence> some_Seqs);
//! clear out our path
void clear();
//! set the score that a match must exceed inorder to be recorded as a path
}
Sequence::Sequence()
- : std::string(),
- header(""),
- species("")
+ : seq(""),
+ header(""),
+ species("")
{
}
}
Sequence::Sequence(const Sequence& o)
- : std::string(o),
+ : seq(o.seq),
header(o.header),
species(o.species),
annots(o.annots),
{
if (this != &s) {
//sequence = s.sequence;
- assign(s);
+ seq = s.seq;
header = s.header;
species = s.species;
annots = s.annots;
return *this;
}
+/*
+Sequence::operator std::string()
+{
+ std::string s(seq.begin(), seq.end());
+ return s;
+}
+
+Sequence::operator std::string() const
+{
+ std::string s(seq.begin(), seq.end());
+ return s;
+}
+*/
+
static void multiplatform_getline(std::istream& in, std::string& line)
{
line.clear();
if ( count == 0)
count = old_seq.size() - start;
- std::string::clear();
- reserve(count);
+ seq.clear();
+ seq.reserve(count);
// Make a conversion table
// finally, the actual conversion loop
for(std::string::size_type seq_index = 0; seq_index < count; seq_index++)
{
- append(1, conversionTable[ (int)old_seq[seq_index+start]]);
+ seq.append(1, conversionTable[ (int)old_seq[seq_index+start]]);
}
}
- // this doesn't work properly under gcc 3.x ... it can't recognize toupper
- //transform(sequence.begin(), sequence.end(), sequence.begin(), toupper);
-
void
Sequence::load_annot(fs::path file_path, int start_index, int end_index)
{
if ( count == npos || start+count > size()) {
count = size()-start;
}
- Sequence new_seq(std::string::substr(start, count));
+ Sequence new_seq(seq.substr(start, count));
new_seq.set_fasta_header(get_fasta_header());
new_seq.set_species(get_species());
// finally, the actual conversion loop
for(seq_i = len - 1; seq_i >= 0; seq_i--)
{
- table_i = (int) at(seq_i);
+ table_i = (int) seq.at(seq_i);
rev_comp += conversionTable[table_i];
}
return "";
}
+void Sequence::set_sequence(const std::string& s)
+{
+ set_filtered_sequence(s);
+}
+
+std::string Sequence::get_sequence() const
+{
+ return seq;
+}
+
+Sequence::const_reference Sequence::operator[](Sequence::size_type i) const
+{
+ return seq[i];
+}
+
+Sequence::const_reference Sequence::at(Sequence::size_type n) const
+{
+ return seq[n];
+}
+
void
Sequence::clear()
{
- std::string::clear();
- header = "";
- species = "";
+ seq.clear();
+ header.clear();
+ species.clear();
annots.clear();
+ motif_list.clear();
+}
+
+Sequence::iterator Sequence::begin()
+{
+ return seq.begin();
+}
+
+Sequence::const_iterator Sequence::begin() const
+{
+ return seq.begin();
+}
+
+Sequence::iterator Sequence::end()
+{
+ return seq.end();
}
+Sequence::const_iterator Sequence::end() const
+{
+ return seq.end();
+}
+
+bool Sequence::empty() const
+{
+ return seq.empty();
+}
+
+Sequence::size_type Sequence::size() const
+{
+ return seq.size();
+}
+
+Sequence::size_type Sequence::length() const
+{
+ return size();
+}
void
Sequence::save(fs::fstream &save_file)
//std::string save_file_path)
seq_counter++;
}
getline(load_file, file_data_line);
- assign(file_data_line);
+ seq.assign(file_data_line);
getline(load_file, file_data_line);
getline(load_file, file_data_line);
if (file_data_line == "<Annotations>")
}
std::string
-Sequence::rc_motif(std::string a_motif)
+Sequence::rc_motif(std::string a_motif) const
{
std::string rev_comp;
char conversionTable[257];
}
std::string
-Sequence::motif_normalize(std::string a_motif)
+Sequence::motif_normalize(const std::string& a_motif)
{
std::string valid_motif;
int seq_i, len;
motif_start_i != motif_starts.end();
++motif_start_i)
{
- motif_list.push_back(motif(*motif_start_i, a_motif));
+ motif_list.push_back(motif(*motif_start_i, a_motif.get_sequence()));
}
}
}
std::vector<int>
-Sequence::find_motif(std::string a_motif)
+Sequence::find_motif(const std::string& a_motif) const
{
std::vector<int> motif_match_starts;
- std::string a_motif_rc;
+ std::string norm_motif_rc;
motif_match_starts.clear();
//std::cout << "motif is: " << a_motif << std::endl;
- a_motif = motif_normalize(a_motif);
+ std::string norm_motif = motif_normalize(a_motif);
//std::cout << "motif is: " << a_motif << std::endl;
- if (a_motif.size() > 0)
+ if (norm_motif.size() > 0)
{
//std::cout << "Sequence: none blank motif\n";
- motif_scan(a_motif, &motif_match_starts);
+ motif_scan(norm_motif, &motif_match_starts);
- a_motif_rc = rc_motif(a_motif);
+ norm_motif_rc = rc_motif(a_motif);
// make sure not to do search again if it is a palindrome
- if (a_motif_rc != a_motif) {
- motif_scan(a_motif_rc, &motif_match_starts);
+ if (norm_motif_rc != norm_motif) {
+ motif_scan(norm_motif_rc, &motif_match_starts);
}
}
return motif_match_starts;
}
+std::vector<int>
+Sequence::find_motif(const Sequence& a_motif) const
+{
+ return find_motif(a_motif.get_sequence());
+}
+
void
-Sequence::motif_scan(std::string a_motif, std::vector<int> * motif_match_starts)
+Sequence::motif_scan(std::string a_motif, std::vector<int> * motif_match_starts) const
{
- const char * seq_c;
+ std::string::const_iterator seq_c = seq.begin();
std::string::size_type seq_i;
int motif_i, motif_len;
- // faster to loop thru the sequence as a old c std::string (ie char array)
- seq_c = c_str();
//std::cout << "Sequence: motif, seq len = " << sequence.length() << std::endl;
motif_len = a_motif.length();
std::list<Sequence>::iterator end)
{
while (start != end) {
- add_string_annotation(*start, start->get_fasta_header());
+ add_string_annotation(start->get_sequence(), start->get_fasta_header());
++start;
}
}
+std::ostream& operator<<(std::ostream& out, const Sequence& s)
+{
+ for(Sequence::const_iterator s_i = s.begin(); s_i != s.end(); ++s_i) {
+ out << *s_i;
+ }
+ return out;
+}
+
+bool operator<(const Sequence& x, const Sequence& y)
+{
+ Sequence::const_iterator x_i = x.begin();
+ Sequence::const_iterator y_i = y.begin();
+
+ while(1) {
+ if( x_i == x.end() and y_i == y.end() ) {
+ return false;
+ } else if ( x_i == x.end() ) {
+ return true;
+ } else if ( y_i == y.end() ) {
+ return false;
+ } else if ( (*x_i) < (*y_i)) {
+ return true;
+ } else if ( (*x_i) > (*y_i) ) {
+ return false;
+ } else {
+ ++x_i;
+ ++y_i;
+ }
+ }
+}
+
+bool operator==(const Sequence& x, const Sequence& y)
+{
+ if (x.seq == y.seq and x.annots == y.annots and x.motif_list == y.motif_list) {
+ return true;
+ } else {
+ return false;
+ }
+}
BOOST_CLASS_EXPORT(motif);
//! sequence track for mussa.
-class Sequence : public std::string
+class Sequence
{
- private:
- std::string header;
- std::string species;
-
- std::list<annot> annots;
- //! a seperate list for motifs since we're currently not saving them
- std::list<motif> motif_list;
-
- void motif_scan(std::string a_motif, std::vector<int> * motif_match_starts);
- std::string rc_motif(std::string a_motif);
- //! look for a string sequence type and and it to an annotation list
- void add_string_annotation(std::string a_seq, std::string name);
-
- // boost::serialization support
- friend class boost::serialization::access;
- template<class Archive>
- void serialize(Archive& ar, const unsigned int /*version*/) {
- ar & boost::serialization::make_nvp(
- "seq_text",
- boost::serialization::base_object<std::string >(*this)
- );
- ar & BOOST_SERIALIZATION_NVP(header);
- ar & BOOST_SERIALIZATION_NVP(species);
- ar & BOOST_SERIALIZATION_NVP(annots);
- ar & BOOST_SERIALIZATION_NVP(motif_list);
- }
-
- public:
- Sequence();
- ~Sequence();
- Sequence(const char* seq);
- Sequence(const std::string& seq);
- Sequence(const Sequence& seq);
- //! assignment to constant sequences
- Sequence &operator=(const Sequence&);
-
- //! set sequence to a (sub)string containing nothing but AGCTN
- void set_filtered_sequence(const std::string& seq,
- std::string::size_type start=0,
- std::string::size_type count=0);
-
- //! load sequence AGCT from fasta file
- //! \throw mussa_load_error
- //! \throw sequence_empty_error
- //! \throw sequence_empty_file_error
- void load_fasta(const boost::filesystem::path file_path, int seq_num=1,
- int start_index=0, int end_index=0);
- //! load sequence from stream
- //! \throw mussa_load_error
- //! \throw sequence_empty_error
- //! \throw sequence_empty_file_error
- void load_fasta(std::iostream& file, int seq_num=1,
- int start_index=0, int end_index=0);
- //! load sequence annotations
- //! \throws mussa_load_error
- void load_annot(const boost::filesystem::path file_path, int start_index, int end_index);
- //! load sequence annotations
- //! \throws mussa_load_error
- void load_annot(std::fstream& data_stream, int start_index, int end_index);
- bool parse_annot(std::string data, int start_index=0, int end_index=0);
- //! add an annotation to our list of annotations
- void add_annotation(const annot& a);
- const std::list<annot>& annotations() const;
- const std::list<motif>& motifs() const;
-
- //! return a subsequence, copying over any appropriate annotation
- Sequence subseq(int start=0, int count = std::string::npos) const;
- //! return a reverse compliment
- std::string rev_comp() const;
-
- //! clear the sequence and its annotations
- void clear();
-
- //! set species name
- void set_species(const std::string &);
- //! get species name
- std::string get_species() const;
- //! set the fasta header
- void set_fasta_header(std::string header);
- //! get the fasta header
- std::string get_fasta_header() const;
- //! get name (will return the first non-empty, of fasta_header, species)
- std::string get_name() const;
-
- //! add a motif to our list of motifs
- //! \throws motif_normalize_error if there's something wrong with a_motif
- void add_motif(const Sequence& a_motif);
- //! clear our list of found motifs
- void clear_motifs();
- //! search a sequence for a_motif
- //! \throws motif_normalize_error if there's something wrong with a_motif
- std::vector<int> find_motif(std::string a_motif);
- //! convert IUPAC symbols to upperase
- //! \throws motif_normalize_error if there is an invalid symbol
- static std::string motif_normalize(std::string a_motif);
-
- //! annotate the current sequence with other sequences
- void find_sequences(std::list<Sequence>::iterator start,
- std::list<Sequence>::iterator end);
-
- void save(boost::filesystem::fstream &save_file);
- void load_museq(boost::filesystem::path load_file_path, int seq_num);
-
+public:
+ typedef std::string::difference_type difference_type;
+ typedef std::string::iterator iterator;
+ typedef std::string::const_iterator const_iterator;
+ typedef std::string::reference reference;
+ typedef std::string::const_reference const_reference;
+ typedef std::string::size_type size_type;
+ static const size_type npos = std::string::npos;
+
+ Sequence();
+ ~Sequence();
+ Sequence(const char* seq);
+ Sequence(const std::string& seq);
+ Sequence(const Sequence& seq);
+ //! assignment to constant sequences
+ Sequence &operator=(const Sequence&);
+
+ // dangerous since they create large copies
+ //operator std::string();
+ //operator std::string() const;
+
+ friend std::ostream& operator<<(std::ostream&, const Sequence&);
+ friend bool operator<(const Sequence&, const Sequence&);
+ friend bool operator==(const Sequence&, const Sequence&);
+ const_reference operator[](size_type) const;
+
+ //! set sequence to a (sub)string containing nothing but AGCTN
+ void set_filtered_sequence(const std::string& seq,
+ std::string::size_type start=0,
+ std::string::size_type count=0);
+
+ //! retrive element at specific position
+ const_reference at(size_type n) const;
+ //! clear the sequence and its annotations
+ void clear();
+ //! forward iterator
+ iterator begin();
+ const_iterator begin() const;
+ iterator end();
+ const_iterator end() const;
+ //! is our sequence empty?
+ bool empty() const;
+ //! how many base pairs are there in our sequence
+ size_type size() const;
+ //! alias for size (used by string)
+ size_type length() const;
+
+ //! return a subsequence, copying over any appropriate annotation
+ Sequence subseq(int start=0, int count = std::string::npos) const;
+ //! return a reverse compliment
+ std::string rev_comp() const;
+
+ //! set sequence (filtered)
+ void set_sequence(const std::string &);
+ //! get sequence
+ std::string get_sequence() const;
+ //! set species name
+ void set_species(const std::string &);
+ //! get species name
+ std::string get_species() const;
+ //! set the fasta header
+ void set_fasta_header(std::string header);
+ //! get the fasta header
+ std::string get_fasta_header() const;
+ //! get name (will return the first non-empty, of fasta_header, species)
+ std::string get_name() const;
+
+ //! load sequence AGCT from fasta file
+ //! \throw mussa_load_error
+ //! \throw sequence_empty_error
+ //! \throw sequence_empty_file_error
+ void load_fasta(const boost::filesystem::path file_path, int seq_num=1,
+ int start_index=0, int end_index=0);
+ //! load sequence from stream
+ //! \throw mussa_load_error
+ //! \throw sequence_empty_error
+ //! \throw sequence_empty_file_error
+ void load_fasta(std::iostream& file, int seq_num=1,
+ int start_index=0, int end_index=0);
+ //! load sequence annotations
+ //! \throws mussa_load_error
+ void load_annot(const boost::filesystem::path file_path, int start_index, int end_index);
+ //! load sequence annotations
+ //! \throws mussa_load_error
+ void load_annot(std::fstream& data_stream, int start_index, int end_index);
+ bool parse_annot(std::string data, int start_index=0, int end_index=0);
+ //! add an annotation to our list of annotations
+ void add_annotation(const annot& a);
+ const std::list<annot>& annotations() const;
+ const std::list<motif>& motifs() const;
+
+ //! add a motif to our list of motifs
+ //! \throws motif_normalize_error if there's something wrong with a_motif
+ void add_motif(const Sequence& a_motif);
+ //! clear our list of found motifs
+ void clear_motifs();
+ //! search a sequence for a_motif
+ //! \throws motif_normalize_error if there's something wrong with a_motif
+ std::vector<int> find_motif(const std::string& a_motif) const;
+ //! search a sequence for a_motif
+ //! \throws motif_normalize_error if there's something wrong with a_motif
+ std::vector<int> find_motif(const Sequence& a_motif) const;
+ //! convert IUPAC symbols to upperase
+ //! \throws motif_normalize_error if there is an invalid symbol
+ static std::string motif_normalize(const std::string& a_motif);
+
+ //! annotate the current sequence with other sequences
+ void find_sequences(std::list<Sequence>::iterator start,
+ std::list<Sequence>::iterator end);
+
+ void save(boost::filesystem::fstream &save_file);
+ void load_museq(boost::filesystem::path load_file_path, int seq_num);
+
+private:
+ std::string seq;
+ std::string header;
+ std::string species;
+
+ std::list<annot> annots;
+ //! a seperate list for motifs since we're currently not saving them
+ std::list<motif> motif_list;
+
+ void motif_scan(std::string a_motif, std::vector<int> * motif_match_starts) const;
+ std::string rc_motif(std::string a_motif) const;
+ //! look for a string sequence type and and it to an annotation list
+ void add_string_annotation(std::string a_seq, std::string name);
+
+ // boost::serialization support
+ friend class boost::serialization::access;
+ template<class Archive>
+ void serialize(Archive& ar, const unsigned int /*version*/) {
+ ar & BOOST_SERIALIZATION_NVP(seq);
+ ar & BOOST_SERIALIZATION_NVP(header);
+ ar & BOOST_SERIALIZATION_NVP(species);
+ ar & BOOST_SERIALIZATION_NVP(annots);
+ ar & BOOST_SERIALIZATION_NVP(motif_list);
+ }
};
BOOST_CLASS_EXPORT(Sequence);
BOOST_CHECK_EQUAL(s2, "AATTGGCC");
BOOST_CHECK_EQUAL(s2.rev_comp(), "GGCCAATT");
BOOST_CHECK_EQUAL(s2.size(), s2.size());
- BOOST_CHECK_EQUAL(s2.c_str(), core_seq);
+ BOOST_CHECK_EQUAL(s2.get_sequence(), core_seq);
Sequence s3("asdfg");
BOOST_CHECK_EQUAL(s3, "ANNNG");
#include <string>
#include "alg/glsequence.hpp"
-PyObject* seq_to_string(const Sequence& s) {
- ::PyString_FromStringAndSize(s.data(), s.size());
-}
-
void export_sequence()
{
class_<Sequence>("Sequence")
.def(init<std::string>())
- .def("__str__", &seq_to_string, "cast to string")
- .def("__repr__", &seq_to_string, "display as string")
+ .def("__str__", &Sequence::get_sequence, "cast to string")
+ .def("__repr__", &Sequence::get_sequence, "display as string")
.def("size", &Sequence::size, "return the length of the sequence")
.def("__len__", &Sequence::size, "return the length of the sequence")
.def("clear", &Sequence::clear, "clear the sequence and its annotations")
{
assert (m != 0);
const set<Sequence> &motif = analysis->motifs();
- vector<string> motif_seq(motif.begin(), motif.end());
+ vector<Sequence> motif_seq(motif.begin(), motif.end());
connect(&applyButton, SIGNAL(clicked()), this, SLOT(updateMotifs()));
layout.addWidget(&applyButton);
{
MotifDetail *detail = new MotifDetail;
if (i < motif_seq.size()) {
- detail->setMotif(motif_seq[i]);
- detail->setColor(analysis->colorMapper()->lookup("motif", motif_seq[i]));
+ detail->setMotif(motif_seq[i].get_sequence());
+ detail->setColor(analysis->colorMapper()->lookup("motif", motif_seq[i].get_sequence()));
}
motif_details.push_back(detail);
layout.addWidget(detail);