}
}
-//! copy sequence from selected tracks as plain sequences
-void GlSeqBrowser::copySelectedTracksAsString(std::string& copy_buffer)
+//! copy sequence from selected tracks as FASTA sequences
+void GlSeqBrowser::copySelectedTracksAsFasta(std::string& copy_buffer)
{
- struct AsString {
+ struct AsFasta {
static string formatter(const Sequence& seq, int left, int right)
{
stringstream s;
- s << seq.subseq(left, right-left+1) << std::endl;
+ s << ">" << seq.get_header()
+ << "|" << "subregion=" << left << "-" << right+1
+ << std::endl
+ << seq.subseq(left, right-left+1) << std::endl;
return s.str();
}
};
-
- copySelectedTracks(copy_buffer, AsString::formatter);
+ copySelectedTracks(copy_buffer, AsFasta::formatter);
}
-//! copy sequence from selected tracks as FASTA sequences
-void GlSeqBrowser::copySelectedTracksAsFasta(std::string& copy_buffer)
+//! copy sequence from selected tracks as new sequences
+/*
+void GlSeqBrowser::copySelectedTracksAsSequence(std::string& copy_buffer)
{
- struct AsFasta {
+ struct AsSequence {
static string formatter(const Sequence& seq, int left, int right)
{
stringstream s;
};
copySelectedTracks(copy_buffer, AsFasta::formatter);
}
+*/
+
+//! copy sequence from selected tracks as plain sequences
+void GlSeqBrowser::copySelectedTracksAsString(std::string& copy_buffer)
+{
+ struct AsString {
+ static string formatter(const Sequence& seq, int left, int right)
+ {
+ stringstream s;
+ s << seq.subseq(left, right-left+1) << std::endl;
+ return s.str();
+ }
+ };
+
+ copySelectedTracks(copy_buffer, AsString::formatter);
+}
void GlSeqBrowser::centerOnPath(const vector<int>& paths)
{
void
Mussa::add_a_seq(string a_seq)
{
- Sequence aSeq;
-
- aSeq.set_seq(a_seq);
+ Sequence aSeq(a_seq);
the_seqs.push_back(aSeq);
}
{
//cout << "seqcomping: " << i << " v. " << i2 << endl;
all_comps[i][i2].setup(window, threshold);
- all_comps[i][i2].seqcomp(the_seqs[i].get_seq(), the_seqs[i2].get_seq(), false);
- all_comps[i][i2].seqcomp(the_seqs[i].get_seq(),the_seqs[i2].rev_comp(),true);
+ all_comps[i][i2].seqcomp(the_seqs[i], the_seqs[i2], false);
+ all_comps[i][i2].seqcomp(the_seqs[i], the_seqs[i2].rev_comp(),true);
++seqcomps_done;
emit progress("seqcomp", seqcomps_done, seqcomps_todo);
}
some_Seqs.clear();
for(vector<Sequence>::size_type i = 0; i < the_seqs.size(); i++)
{
- some_Seqs.push_back(the_seqs[i].get_seq());
+ some_Seqs.push_back(the_seqs[i]);
}
the_paths.setup_ent(ent_thres, some_Seqs); // ent analysis extra setup
save_file.open(analysis_name, ios::out);
for(vector<Sequence>::size_type i = 0; i < the_seqs.size(); i++)
- save_file << the_seqs[i].get_seq() << endl;
+ save_file << the_seqs[i] << endl;
save_file << window << endl;
save_file.close();
for(i = 0; i < seq_num; i++)
{
getline(save_file, file_data_line);
- a_seq.set_seq(file_data_line);
+ a_seq = file_data_line;
the_seqs.push_back(a_seq);
}
{
}
+annot::~annot()
+{
+}
+
+bool operator==(const annot& left, const annot& right)
+{
+ return ((left.start == right.start) and
+ (left.end == right.end) and
+ (left.type == right.type) and
+ (left.name == right.name));
+}
+
motif::motif(int start, std::string motif)
: annot(start, start+motif.size(), "motif", motif),
sequence(motif)
{
}
+motif::~motif()
+{
+}
+
Sequence::Sequence()
- : sequence(""),
+ : std::string(),
header(""),
species("")
{
motif_list.clear();
}
-Sequence::Sequence(std::string seq)
- : header(""),
+Sequence::~Sequence()
+{
+}
+
+Sequence::Sequence(const std::string& seq)
+ : std::string(),
+ header(""),
species("")
{
set_filtered_sequence(seq);
}
+Sequence::Sequence(const Sequence& o)
+ : std::string(o),
+ header(o.header),
+ species(o.species),
+ annots(o.annots),
+ motif_list(o.motif_list)
+{
+}
+
Sequence &Sequence::operator=(const Sequence& s)
{
if (this != &s) {
- sequence = s.sequence;
+ //sequence = s.sequence;
+ assign(s);
header = s.header;
species = s.species;
annots = s.annots;
return *this;
}
-char Sequence::operator[](int index) const
-{
- return sequence[index];
-}
-
-std::ostream& operator<<(std::ostream& out, const Sequence& seq)
-{
- out << "Sequence(" << seq.get_seq() << ")";
- return out;
-}
-
static void multiplatform_getline(std::istream& in, std::string& line)
{
line.clear();
if (c=='\012' or c == '\015') {
in.get();
}
-
}
//! load a fasta file into a sequence
if ( count == 0)
count = old_seq.size() - start;
- sequence.clear();
- sequence.reserve(count);
+ std::string::clear();
+ reserve(count);
// Make a conversion table
// finally, the actual conversion loop
for(std::string::size_type seq_index = 0; seq_index < count; seq_index++)
{
- sequence += conversionTable[ (int)old_seq[seq_index+start]];
+ append(1, conversionTable[ (int)old_seq[seq_index+start]]);
}
}
find_sequences(query_seqs.begin(), query_seqs.end());
}
-/*
-void
-Sequence::load_annot(std::istream& data_stream, int start_index, int end_index)
-{
- std::string file_data_line;
- annot an_annot;
- std::string::size_type space_split_i;
- std::string annot_value;
- std::list<annot>::iterator list_i;
- std::string err_msg;
-
- annots.clear();
-
- getline(data_stream,file_data_line);
- species = file_data_line;
-
- // end_index = 0 means no end was specified, so cut to the end
- if (end_index == 0)
- end_index = sequence.length();
-
- //std::cout << "START: " << start_index << " END: " << end_index << std::endl;
-
- while ( !data_stream.eof() )
- {
- getline(data_stream,file_data_line);
- if (file_data_line != "")
- {
- // need to get 4 values...almost same code 4 times...
- // get annot start index
- space_split_i = file_data_line.find(" ");
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.start = atoi (annot_value.c_str());
- file_data_line = file_data_line.substr(space_split_i+1);
- // get annot end index
- space_split_i = file_data_line.find(" ");
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.end = atoi (annot_value.c_str());
- file_data_line = file_data_line.substr(space_split_i+1);
-
- //std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
- // << std::endl;
-
- // get annot name
- space_split_i = file_data_line.find(" ");
- if (space_split_i == std::string::npos) // no entries for name & type
- {
- std::cout << "seq, annots - no name or type\n";
- an_annot.name = "";
- an_annot.type = "";
- }
- else
- {
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.name = annot_value;
- file_data_line = file_data_line.substr(space_split_i+1);
- // get annot type
- space_split_i = file_data_line.find(" ");
- if (space_split_i == std::string::npos) // no entry for type
- an_annot.type = "";
- else
- {
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.type = annot_value;
- }
- }
-
-
- // add annot to list if it falls within the range of sequence specified
- if ((start_index <= an_annot.start) && (end_index >= an_annot.end))
- {
- an_annot.start -= start_index;
- an_annot.end -= start_index;
- annots.push_back(an_annot);
- }
- // else no (or bogus) annotations
- }
- }
-}
-*/
-
const std::string& Sequence::get_species() const
{
return species;
}
-bool Sequence::empty() const
+void Sequence::add_annotation(const annot& a)
{
- return (size() == 0);
+ annots.push_back(a);
}
const std::list<annot>& Sequence::annotations() const
return annots;
}
-std::string::size_type Sequence::length() const
-{
- return size();
-}
-
-std::string::size_type Sequence::size() const
-{
- return sequence.size();
-}
-
-Sequence::iterator Sequence::begin()
-{
- return sequence.begin();
-}
-
-Sequence::const_iterator Sequence::begin() const
-{
- return sequence.begin();
-}
-
-Sequence::iterator Sequence::end()
-{
- return sequence.end();
-}
-
-Sequence::const_iterator Sequence::end() const
+Sequence
+Sequence::subseq(int start, int count) const
{
- return sequence.end();
-}
-
+ // there might be an off by one error with start+count > size()
+ if ( count == npos || start+count > size()) {
+ count = size()-start;
+ }
+ Sequence new_seq(std::string::substr(start, count));
+ new_seq.set_header(get_header());
+ //new_seq.set_species(get_species());
-const std::string&
-Sequence::get_seq() const
-{
- return sequence;
-}
+ new_seq.motif_list = motif_list;
+ // attempt to copy & reannotate position based annotations
+ int end = start+count;
+ for(std::list<annot>::const_iterator annot_i = annots.begin();
+ annot_i != annots.end();
+ ++annot_i)
+ {
+ int annot_start = annot_i->start;
+ int annot_end = annot_i->end;
+
+ if (annot_start < end) {
+ if (annot_start >=start) {
+ annot_start -= start;
+ } else {
+ annot_start = 0;
+ }
-std::string
-Sequence::subseq(int start, int count) const
-{
- return sequence.substr(start, count);
-}
+ if (annot_end < end) {
+ annot_end -= start;
+ } else {
+ annot_end = count;
+ }
+ annot new_annot(annot_start, annot_end, annot_i->type, annot_i->name);
+ new_seq.annots.push_back(new_annot);
+ }
+ }
-const char *
-Sequence::c_seq() const
-{
- return sequence.c_str();
+ return new_seq;
}
std::string
char conversionTable[257];
int seq_i, table_i, len;
- len = sequence.length();
+ len = length();
rev_comp.reserve(len);
// make a conversion table
// init all parts of conversion table to '~' character
// finally, the actual conversion loop
for(seq_i = len - 1; seq_i >= 0; seq_i--)
{
- table_i = (int) sequence[seq_i];
+ table_i = (int) at(seq_i);
rev_comp += conversionTable[table_i];
}
}
*/
-void
-Sequence::set_seq(const std::string& a_seq)
-{
- set_filtered_sequence(a_seq);
-}
-
-
/*
std::string
Sequence::species()
void
Sequence::clear()
{
- sequence = "";
+ std::string::clear();
header = "";
species = "";
annots.clear();
//save_file.open(save_file_path.c_str(), std::ios::app);
save_file << "<Sequence>" << std::endl;
- save_file << sequence << std::endl;
+ save_file << *this << std::endl;
save_file << "</Sequence>" << std::endl;
save_file << "<Annotations>" << std::endl;
seq_counter++;
}
getline(load_file, file_data_line);
- sequence = file_data_line;
+ assign(file_data_line);
getline(load_file, file_data_line);
getline(load_file, file_data_line);
if (file_data_line == "<Annotations>")
void
Sequence::motif_scan(std::string a_motif, std::vector<int> * motif_match_starts)
{
- char * seq_c;
+ const char * seq_c;
std::string::size_type seq_i;
int motif_i, motif_len;
// faster to loop thru the sequence as a old c std::string (ie char array)
- seq_c = (char*)sequence.c_str();
+ seq_c = c_str();
//std::cout << "Sequence: motif, seq len = " << sequence.length() << std::endl;
motif_len = a_motif.length();
//std::cout << "Sequence: motif, length= " << length << std::endl;
seq_i = 0;
- while (seq_i < sequence.length())
+ while (seq_i < length())
{
//std::cout << seq_c[seq_i];
//std::cout << seq_c[seq_i] << "?" << a_motif[motif_i] << ":" << motif_i << " ";
std::list<Sequence>::iterator end)
{
while (start != end) {
- add_string_annotation(start->get_seq(), start->get_header());
+ add_string_annotation(*start, start->get_header());
++start;
}
}
{
annot();
annot(int start, int end, std::string type, std::string name);
+ ~annot();
int start;
int end;
std::string type;
std::string name;
+
+ friend bool operator==(const annot& left, const annot& right);
};
/* The way that motifs are found currently doesn't really
//! this constructor is for when we're adding motifs to our annotations
motif(int start, std::string motif);
+ ~motif();
};
//! sequence track for mussa.
-class Sequence
+class Sequence : public std::string
{
private:
- std::string sequence;
std::string header;
std::string species;
void add_string_annotation(std::string a_seq, std::string name);
public:
- typedef std::string::iterator iterator;
- typedef std::string::const_iterator const_iterator;
- typedef std::string::size_type size_type;
-
Sequence();
- Sequence(std::string seq);
+ ~Sequence();
+ Sequence(const std::string& seq);
+ Sequence(const Sequence& seq);
//! assignment to constant sequences
Sequence &operator=(const Sequence&);
Sequence &operator=(const std::string &);
- char operator[](int) const;
- friend std::ostream& operator<<(std::ostream& out, const Sequence& seq);
//! set sequence to a (sub)string containing nothing but AGCTN
void set_filtered_sequence(const std::string& seq,
//! \throws mussa_load_error
void load_annot(std::fstream& data_stream, int start_index, int end_index);
void parse_annot(std::string data, int start_index, int end_index);
+ //! add an annotation to our list of annotations
+ void add_annotation(const annot& a);
const std::list<annot>& annotations() const;
const std::list<motif>& motifs() const;
const std::string& get_species() const;
- // simple access functions
- void set_seq(const std::string& a_seq);
- const std::string& get_seq() const;
- const char *c_seq() const;
- std::string subseq(int start, int count) const;
+ //! return a subsequence, copying over any appropriate annotation
+ Sequence subseq(int start=0, int count = std::string::npos) const;
+ //! return a reverse compliment
std::string rev_comp() const;
- // string-like functions
- iterator begin();
- const_iterator begin() const;
- iterator end();
- const_iterator end() const;
- bool empty() const;
- //! alias for size, (included as this is much like a string)
- std::string::size_type length() const;
- //! the number of base pairs in this sequence
- std::string::size_type size() const;
+ //! clear the sequence and its annotations
void clear();
void set_header(std::string header);
Sequence s1("AAAATTTT");
Sequence s2("AACAGGGG");
f1.setup(4,3);
- f1.seqcomp(s1.get_seq(), s2.get_seq(), false);
- f1.seqcomp(s1.get_seq(), s2.rev_comp(), true);
+ f1.seqcomp(s1, s2, false);
+ f1.seqcomp(s1, s2.rev_comp(), true);
FLPs f2;
f2.setup(4,3);
- f2.seqcomp(s1.get_seq(), s2.rev_comp(), true);
- f2.seqcomp(s1.get_seq(), s2.get_seq(), false);
+ f2.seqcomp(s1, s2.rev_comp(), true);
+ f2.seqcomp(s1, s2, false);
// The order of doing the reverse compliment search shouldn't matter
// when we're using exactly the same sequence
Sequence seq1(s1);
GlSequence glseq0(seq0, cm);
- BOOST_CHECK (glseq0.sequence().get_seq() == s0);
+ BOOST_CHECK (glseq0.sequence() == s0);
GlSequence glseq1(seq1, cm);
GlSequence glseq_copy0(glseq0);
- BOOST_CHECK(glseq_copy0.sequence().get_seq() == glseq0.sequence().get_seq());
+ BOOST_CHECK(glseq_copy0.sequence() == glseq0.sequence());
BOOST_CHECK( &(glseq_copy0.sequence()) == &(glseq0.sequence()));
glseq0 = glseq1;
- BOOST_CHECK( glseq0.sequence().get_seq() == s1 );
+ BOOST_CHECK( glseq0.sequence() == s1 );
}
BOOST_AUTO_TEST_CASE( glsequence_color )
analysis.add_a_seq(s2);
BOOST_CHECK_EQUAL( analysis.sequences().size(), 3 );
- BOOST_CHECK_EQUAL( analysis.sequences()[0].get_seq(), s0);
- BOOST_CHECK_EQUAL( analysis.sequences()[1].get_seq(), s1);
- BOOST_CHECK_EQUAL( analysis.sequences()[2].get_seq(), s2);
+ BOOST_CHECK_EQUAL( analysis.sequences()[0], s0);
+ BOOST_CHECK_EQUAL( analysis.sequences()[1], s1);
+ BOOST_CHECK_EQUAL( analysis.sequences()[2], s2);
}
// for some reason we can call nway once safely but it
//! Do simple operations work correctly?
BOOST_AUTO_TEST_CASE( sequence_filter )
{
- Sequence s1("AATTGGCC");
- BOOST_CHECK_EQUAL(s1.get_seq(), "AATTGGCC");
+ const char *core_seq = "AATTGGCC";
+ Sequence s1(core_seq);
+ BOOST_CHECK_EQUAL(s1, core_seq);
Sequence s2("aattggcc");
- BOOST_CHECK_EQUAL(s2.get_seq(), "AATTGGCC");
+ BOOST_CHECK_EQUAL(s2, "AATTGGCC");
BOOST_CHECK_EQUAL(s2.rev_comp(), "GGCCAATT");
- BOOST_CHECK_EQUAL(s2.size(), s2.get_seq().size());
- BOOST_CHECK_EQUAL(s2.c_seq(), s2.get_seq().c_str());
+ BOOST_CHECK_EQUAL(s2.size(), s2.size());
+ BOOST_CHECK_EQUAL(s2.c_str(), core_seq);
Sequence s3("asdfg");
- BOOST_CHECK_EQUAL(s3.get_seq(), "ANNNG");
+ BOOST_CHECK_EQUAL(s3, "ANNNG");
BOOST_CHECK_EQUAL(s3.subseq(0,2), "AN");
s3.set_filtered_sequence("AAGGCCTT", 0, 2);
- BOOST_CHECK_EQUAL(s3.get_seq(), "AA");
+ BOOST_CHECK_EQUAL(s3, "AA");
s3.set_filtered_sequence("AAGGCCTT", 2, 2);
- BOOST_CHECK_EQUAL( s3.get_seq(), "GG");
+ BOOST_CHECK_EQUAL( s3, "GG");
s3.set_filtered_sequence("AAGGCCTT", 4);
- BOOST_CHECK_EQUAL( s3.get_seq(), "CCTT");
+ BOOST_CHECK_EQUAL( s3, "CCTT");
s3.clear();
- BOOST_CHECK_EQUAL(s3.get_seq(), "");
+ BOOST_CHECK_EQUAL(s3, "");
- s3.set_seq("AAGGFF");
- BOOST_CHECK_EQUAL(s3.get_seq(), "AAGGNN");
+ s3 = "AAGGFF";
+ BOOST_CHECK_EQUAL(s3, "AAGGNN");
}
//! Can we load data from a file
BOOST_CHECK_EQUAL(seq.annotations().size(), count);
}
+BOOST_AUTO_TEST_CASE( subseq_annotation_test )
+{
+ string s("CCGCCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC"
+ "ACCCACCTGACCGCGGGCGGCTCCGGCCCCGCTTCGCCCCACTGCGATCA"
+ "GTCGCGTCCCGCAGGCCAGGCACGCCCCGCCGCTCCCGCTGCGCCGGGCG"
+ "TCTGGGACCTCGGGCGGCTCCTCCGAGGGGCGGGGCAGCCGGGAGCCACG"
+ "CCCCCGCAGGTGAGCCGGCCACGCCCACCGCCCGTGGGAAGTTCAGCCTC"
+ "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
+ "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
+ "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
+ Sequence seq(s);
+
+
+ seq.add_annotation(annot(0, 10, "0-10", "0-10"));
+ seq.add_annotation(annot(10, 20, "10-20", "10-20"));
+ seq.add_annotation(annot(0, 20, "0-20", "0-20"));
+ seq.add_annotation(annot(8, 12, "8-12", "8-12"));
+ seq.add_annotation(annot(100, 5000, "100-5000", "100-5000"));
+
+ Sequence subseq = seq.subseq(5, 10);
+ const list<annot> annots = subseq.annotations();
+ // generate some ground truth
+ list<annot> correct;
+ correct.push_back(annot(0, 5, "0-10", "0-10"));
+ correct.push_back(annot(5,10, "10-20", "10-20"));
+ correct.push_back(annot(0,10, "0-20", "0-20"));
+ correct.push_back(annot(3, 7, "8-12", "8-12"));
+ BOOST_REQUIRE_EQUAL( annots.size(), correct.size() );
+
+ list<annot>::iterator correct_i = correct.begin();
+ list<annot>::const_iterator annot_i = annots.begin();
+ for(; annot_i != annots.end(); ++annot_i, ++correct_i)
+ {
+ BOOST_CHECK( *annot_i == *correct_i );
+ }
+}
+
+BOOST_AUTO_TEST_CASE( out_operator )
+{
+ string s("AAGGCCTT");
+ Sequence seq(s);
+
+ ostringstream buf;
+ buf << s;
+ BOOST_CHECK_EQUAL( s, buf.str() );
+}
+
{
class_<Sequence>("Sequence")
.def(init<std::string>())
- .def("__str__", &Sequence::get_seq, return_value_policy<return_by_value>())
+ //.def("__str__", &Sequence::get_seq, return_value_policy<return_by_value>())
.def("size", &Sequence::size)
.def("__len__", &Sequence::size)
.add_property("header", &Sequence::get_header, &Sequence::set_header)