From 67888dae3b16b9d69aa846e393f11e7ff3633f16 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 6 Apr 2007 00:23:50 +0000 Subject: [PATCH] remove annot class in favor of SeqSpan It's not quite as clean as I might like as my metadata Annotations class is annoying to access in C++. also to support our our "legacy" mussa annotation file format I added a couple of annotation creating helper functions. Since I didn't want to also change the motif class at the same time, I just move the members of annot into motif. At some point I'll finish getting ride of motif. --- alg/annotation_colors.cpp | 5 - alg/annotation_colors.hpp | 1 - alg/glsequence.cpp | 6 +- alg/seq_span.hpp | 3 + alg/sequence.cpp | 180 +++++++++++++++++------------ alg/sequence.hpp | 59 ++++------ alg/test/test_annotation_color.cpp | 5 +- alg/test/test_sequence.cpp | 156 ++++++++++++++----------- py/annot.cpp | 14 +-- py/sequence.cpp | 9 +- py/test/TestSequence.py | 8 +- 11 files changed, 239 insertions(+), 207 deletions(-) diff --git a/alg/annotation_colors.cpp b/alg/annotation_colors.cpp index a84d459..230206a 100644 --- a/alg/annotation_colors.cpp +++ b/alg/annotation_colors.cpp @@ -77,11 +77,6 @@ void AnnotationColors::erase(const string &type, root_map.cm[type].cm.erase(instance); } -Color AnnotationColors::lookup(const annot &a) const -{ - return lookup(a.type, a.name); -} - Color AnnotationColors::lookup(const string &type, const string &instance) const { // Yeah, there's probably a nicer looking recursive solution diff --git a/alg/annotation_colors.hpp b/alg/annotation_colors.hpp index 50c738b..26b56c9 100644 --- a/alg/annotation_colors.hpp +++ b/alg/annotation_colors.hpp @@ -60,7 +60,6 @@ public: void erase(const std::string &type, const std::string& instance); //! lookup an annotation color - Color lookup(const annot &) const; Color lookup(const std::string &, const std::string &) const; private: // nested maps, with default? diff --git a/alg/glsequence.cpp b/alg/glsequence.cpp index eb6cfe9..340a9ad 100644 --- a/alg/glsequence.cpp +++ b/alg/glsequence.cpp @@ -256,14 +256,14 @@ void GlSequence::draw_annotations(GLfloat left, GLfloat right) const { // draw annotations GLfloat annotation_z = z() + 10.0; - const std::list& annots = Sequence::annotations(); + const SeqSpanRefList& annots = Sequence::annotations(); const MotifList& motifs = Sequence::motifs(); - for (std::list::const_iterator annot_itor = annots.begin(); + for (SeqSpanRefList::const_iterator annot_itor = annots.begin(); annot_itor != annots.end(); ++annot_itor) { glColor3f(0.0, 0.8, 0.0); - draw_box(left, right, x()+annot_itor->begin, x()+annot_itor->end, + draw_box(left, right, x()+(*annot_itor)->start(), x()+(*annot_itor)->stop(), height(), annotation_z); } // if motifs? diff --git a/alg/seq_span.hpp b/alg/seq_span.hpp index f5e5748..1752a7a 100644 --- a/alg/seq_span.hpp +++ b/alg/seq_span.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -24,6 +25,8 @@ class SeqSpan; typedef boost::shared_ptr SeqSpanRef; +typedef std::list SeqSpanRefList; +typedef boost::shared_ptr SeqSpanRefListRef; //! Track what segment of a sequence we're looking at class SeqSpan : public boost::enable_shared_from_this { diff --git a/alg/sequence.cpp b/alg/sequence.cpp index 6cb4e2c..55ff5bf 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -40,27 +40,7 @@ namespace spirit = boost::spirit; #include #include -annot::annot() - : begin(0), - end(0), - type(""), - name("") -{ -} - -annot::annot(int begin, int end, std::string type, std::string name) - : begin(begin), - end(end), - type(type), - name(name) -{ -} - -annot::~annot() -{ -} - -bool operator==(const annot& left, const annot& right) +bool operator==(const motif& left, const motif& right) { return ((left.begin== right.begin) and (left.end == right.end) and @@ -68,8 +48,20 @@ bool operator==(const annot& left, const annot& right) (left.name == right.name)); } -motif::motif(int begin, std::string motif) - : annot(begin, begin+motif.size(), "motif", motif), +motif::motif() + : begin(0), + end(0), + type("motif"), + name(""), + sequence("") +{ +} + +motif::motif(int begin_, std::string motif) + : begin(begin_), + end(begin_+motif.size()), + type("motif"), + name(motif), sequence(motif) { } @@ -78,9 +70,9 @@ motif::~motif() { } - Sequence::Sequence(AlphabetRef alphabet) - : seq(new SeqSpan("", alphabet, SeqSpan::PlusStrand)), + : seq(new SeqSpan("", alphabet, SeqSpan::PlusStrand)), + annotation_list(new SeqSpanRefList), motif_list(new MotifList) { } @@ -92,6 +84,7 @@ Sequence::~Sequence() Sequence::Sequence(const char *seq, AlphabetRef alphabet_, SeqSpan::strand_type strand_) : header(""), species(""), + annotation_list(new SeqSpanRefList), motif_list(new MotifList) { set_filtered_sequence(seq, alphabet_, 0, npos, strand_); @@ -102,6 +95,7 @@ Sequence::Sequence(const std::string& seq, SeqSpan::strand_type strand_) : header(""), species(""), + annotation_list(new SeqSpanRefList), motif_list(new MotifList) { set_filtered_sequence(seq, alphabet_, 0, seq.size(), strand_); @@ -111,7 +105,7 @@ Sequence::Sequence(const Sequence& o) : seq(o.seq), header(o.header), species(o.species), - annots(o.annots), + annotation_list(o.annotation_list), motif_list(o.motif_list) { } @@ -120,7 +114,7 @@ Sequence::Sequence(const Sequence* o) : seq(o->seq), header(o->header), species(o->species), - annots(o->annots), + annotation_list(o->annotation_list), motif_list(o->motif_list) { } @@ -129,7 +123,7 @@ Sequence::Sequence(const SequenceRef o) : seq(new SeqSpan(o->seq)), header(o->header), species(o->species), - annots(o->annots), + annotation_list(o->annotation_list), motif_list(o->motif_list) { } @@ -138,6 +132,7 @@ Sequence::Sequence(const SeqSpanRef& seq_ref) : seq(seq_ref), header(""), species(""), + annotation_list(new SeqSpanRefList), motif_list(new MotifList) { } @@ -148,7 +143,7 @@ Sequence &Sequence::operator=(const Sequence& s) seq = s.seq; header = s.header; species = s.species; - annots = s.annots; + annotation_list = s.annotation_list; motif_list = s.motif_list; } return *this; @@ -371,20 +366,23 @@ Sequence::load_annot(fs::path file_path, int start_index, int end_index) */ struct push_back_annot { - std::list& annot_list; + Sequence* parent; + SeqSpanRefListRef children; int& begin; int& end; std::string& name; std::string& type; int &parsed; - push_back_annot(std::list& annot_list_, + push_back_annot(Sequence* parent_seq, + SeqSpanRefListRef children_list, int& begin_, int& end_, std::string& name_, std::string& type_, int &parsed_) - : annot_list(annot_list_), + : parent(parent_seq), + children(children_list), begin(begin_), end(end_), name(name_), @@ -396,8 +394,7 @@ struct push_back_annot { void operator()(std::string::const_iterator, std::string::const_iterator) const { - //std::cout << "adding annot: " << begin << "|" << end << "|" << name << "|" << type << std::endl; - annot_list.push_back(annot(begin, end, name, type)); + children->push_back(parent->make_annotation(name, type, begin, end)); ++parsed; }; }; @@ -446,8 +443,8 @@ Sequence::parse_annot(std::string data, int start_index, int end_index) int end=0; std::string name; std::string type; - std::string seq; - std::list parsed_annots; + std::string seqstr; + SeqSpanRefListRef parsed_annots(new SeqSpanRefList); std::list query_seqs; int parsed=0; @@ -489,13 +486,13 @@ Sequence::parse_annot(std::string data, int start_index, int end_index) ) // to understand how this group gets set // read the comment above struct push_back_annot - )[push_back_annot(parsed_annots, start, end, type, name, parsed)] + )[push_back_annot(this, parsed_annots, start, end, name, type, parsed)] | ((spirit::ch_p('>')|spirit::str_p(">")) >> (*(spirit::print_p))[spirit::assign_a(name)] >> spirit::eol_p >> - (+(spirit::chset<>(Alphabet::nucleic_cstr)))[spirit::assign_a(seq)] - )[push_back_seq(query_seqs, name, seq, parsed)] + (+(spirit::chset<>(Alphabet::nucleic_cstr)))[spirit::assign_a(seqstr)] + )[push_back_seq(query_seqs, name, seqstr, parsed)] ) >> *spirit::space_p ) @@ -506,33 +503,56 @@ Sequence::parse_annot(std::string data, int start_index, int end_index) msg << "Error parsing annotation #" << parsed; throw annotation_load_error(msg.str()); } + // If everything loaded correctly add the sequences to our annotation list // add newly parsed annotations to our sequence - std::copy(parsed_annots.begin(), parsed_annots.end(), std::back_inserter(annots)); - // go seearch for query sequences + std::copy(parsed_annots->begin(), parsed_annots->end(), std::back_inserter(*annotation_list)); + // go search for query sequences find_sequences(query_seqs.begin(), query_seqs.end()); } -void Sequence::add_annotation(const annot& a) +void Sequence::add_annotation(const SeqSpanRef a) { - annots.push_back(a); + annotation_list->push_back(a); } -const std::list& Sequence::annotations() const +void Sequence::add_annotation(std::string name, std::string type, size_type start, size_type stop) { - return annots; + add_annotation(make_annotation(name, type, start, stop)); +} + +SeqSpanRef +Sequence::make_annotation(std::string name, std::string type, size_type start, size_type stop) const +{ + // we want things to be in the positive direction + if (stop < start) { + size_type tmp = start; + start = stop; + stop = tmp; + } + size_type count = stop - start; + SeqSpanRef new_annot(seq->subseq(start, count, SeqSpan::UnknownStrand)); + AnnotationsRef metadata(new Annotations(name)); + metadata->set("type", type); + new_annot->setAnnotations(metadata); + return new_annot; +} + +const SeqSpanRefList& Sequence::annotations() const +{ + return *annotation_list; } void Sequence::copy_children(Sequence &new_seq, size_type start, size_type count) const { new_seq.motif_list = motif_list; - new_seq.annots.clear(); + new_seq.annotation_list.reset(new SeqSpanRefList); - for(std::list::const_iterator annot_i = annots.begin(); - annot_i != annots.end(); + for(SeqSpanRefList::const_iterator annot_i = annotation_list->begin(); + annot_i != annotation_list->end(); ++annot_i) { - size_type annot_begin= annot_i->begin; - size_type annot_end = annot_i->end; + size_type annot_begin= (*annot_i)->start(); + size_type annot_end = (*annot_i)->stop(); if (annot_begin < start+count) { if (annot_begin >= start) { @@ -547,8 +567,9 @@ void Sequence::copy_children(Sequence &new_seq, size_type start, size_type count annot_end = count; } - annot new_annot(annot_begin, annot_end, annot_i->type, annot_i->name); - new_seq.annots.push_back(new_annot); + SeqSpanRef new_annot(seq->subseq(annot_begin, annot_end)); + new_annot->setAnnotations((*annot_i)->annotations()); + new_seq.annotation_list->push_back(new_annot); } } } @@ -562,7 +583,7 @@ Sequence::subseq(size_type start, size_type count, SeqSpan::strand_type strand) return new_seq; } - Sequence new_seq = *this; + Sequence new_seq(*this); new_seq.seq = seq->subseq(start, count, strand); if (seq->annotations()) { AnnotationsRef a(new Annotations(*(seq->annotations()))); @@ -640,15 +661,18 @@ Sequence::clear() seq.reset(); header.clear(); species.clear(); - annots.clear(); + annotation_list.reset(new SeqSpanRefList); motif_list.reset(new MotifList); } void Sequence::save(fs::fstream &save_file) { + std::string type("type"); + std::string empty_str(""); //fstream save_file; - std::list::iterator annots_i; + SeqSpanRefList::iterator annots_i; + AnnotationsRef metadata; // not sure why, or if i'm doing something wrong, but can't seem to pass // file pointers down to this method from the mussa control class @@ -661,10 +685,14 @@ Sequence::save(fs::fstream &save_file) save_file << "" << std::endl; save_file << species << std::endl; - for (annots_i = annots.begin(); annots_i != annots.end(); ++annots_i) + for (annots_i = annotation_list->begin(); + annots_i != annotation_list->end(); + ++annots_i) { - save_file << annots_i->begin << " " << annots_i->end << " " ; - save_file << annots_i->name << " " << annots_i->type << std::endl; + metadata = (*annots_i)->annotations(); + save_file << (*annots_i)->parentStart() << " " << (*annots_i)->parentStop() << " " ; + save_file << metadata->name() << " " + << metadata->getdefault(type, empty_str) << std::endl; } save_file << "" << std::endl; //save_file.close(); @@ -676,11 +704,17 @@ Sequence::load_museq(fs::path load_file_path, int seq_num) fs::fstream load_file; std::string file_data_line; int seq_counter; - annot an_annot; + //annot an_annot; + int annot_begin; + int annot_end; + std::string annot_name; + std::string annot_type; + std::string::size_type space_split_i; std::string annot_value; - annots.clear(); + annotation_list.reset(new SeqSpanRefList); + load_file.open(load_file_path, std::ios::in); seq_counter = 0; @@ -709,39 +743,41 @@ Sequence::load_museq(fs::path load_file_path, int seq_num) // get annot start index space_split_i = file_data_line.find(" "); annot_value = file_data_line.substr(0,space_split_i); - an_annot.begin = atoi (annot_value.c_str()); + annot_begin = atoi (annot_value.c_str()); file_data_line = file_data_line.substr(space_split_i+1); // get annot end index space_split_i = file_data_line.find(" "); annot_value = file_data_line.substr(0,space_split_i); - an_annot.end = atoi (annot_value.c_str()); + annot_end = atoi (annot_value.c_str()); if (space_split_i == std::string::npos) // no entry for type or name { std::cout << "seq, annots - no type or name\n"; - an_annot.type = ""; - an_annot.name = ""; + annot_name = ""; + annot_type = ""; } else // else get annot type { file_data_line = file_data_line.substr(space_split_i+1); space_split_i = file_data_line.find(" "); annot_value = file_data_line.substr(0,space_split_i); - an_annot.type = annot_value; + //an_annot.type = annot_value; + annot_type = annot_value; if (space_split_i == std::string::npos) // no entry for name { std::cout << "seq, annots - no name\n"; - an_annot.name = ""; + annot_name = ""; } else // get annot name { file_data_line = file_data_line.substr(space_split_i+1); space_split_i = file_data_line.find(" "); annot_value = file_data_line.substr(0,space_split_i); - an_annot.type = annot_value; + // this seems like its wrong? + annot_type = annot_value; } } - annots.push_back(an_annot); // don't forget to actually add the annot + add_annotation(annot_name, annot_type, annot_begin, annot_end); } //std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end // << "-->" << an_annot.type << "::" << an_annot.name << std::endl; @@ -856,7 +892,6 @@ Sequence::motif_scan(const Sequence& a_motif, std::vector * motif_match_sta // end Nora stuff, now we see if a match is found this pass if (motif_i == motif_len) { - annot new_motif; motif_match_starts->push_back(seq_i - motif_len + 1); motif_i = 0; } @@ -871,16 +906,11 @@ void Sequence::add_string_annotation(std::string a_seq, { std::vector seq_starts = find_motif(a_seq); - //std::cout << "searching for " << a_seq << " found " << seq_starts.size() << std::endl; - for(std::vector::iterator seq_start_i = seq_starts.begin(); seq_start_i != seq_starts.end(); ++seq_start_i) { - annots.push_back(annot(*seq_start_i, - *seq_start_i+a_seq.size(), - "", - name)); + add_annotation(name, "", *seq_start_i, *seq_start_i+a_seq.size()); } } diff --git a/alg/sequence.hpp b/alg/sequence.hpp index 9c7ddd4..c2de433 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -38,21 +38,27 @@ // Sequence data class -//! Attach annotation information to a sequence track -struct annot +/* The way that motifs are found currently doesn't really + * indicate that the match was a reverse compliment + */ +struct motif { - annot(); - annot(int begin, int end, std::string type, std::string name); - ~annot(); - + motif(); + //motif(int begin, int end, std::string type, std::string name); + //! this constructor is for when we're adding motifs to our annotations + motif(int begin, std::string motif); + ~motif(); + int begin; int end; std::string type; std::string name; + std::string sequence; + + friend bool operator==(const motif& left, const motif& right); - friend bool operator==(const annot& left, const annot& right); -private: // boost::serialization support +private: friend class boost::serialization::access; template void serialize(Archive& ar, const unsigned int /*version*/) { @@ -60,29 +66,6 @@ private: ar & BOOST_SERIALIZATION_NVP(end); ar & BOOST_SERIALIZATION_NVP(type); ar & BOOST_SERIALIZATION_NVP(name); - } -}; -BOOST_CLASS_EXPORT(annot); - - -/* The way that motifs are found currently doesn't really - * indicate that the match was a reverse compliment - */ -struct motif : public annot -{ - std::string sequence; - - motif() : annot(), sequence("") {}; - //! this constructor is for when we're adding motifs to our annotations - motif(int begin, std::string motif); - ~motif(); - - // boost::serialization support -private: - friend class boost::serialization::access; - template - void serialize(Archive& ar, const unsigned int /*version*/) { - ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(annot); ar & BOOST_SERIALIZATION_NVP(sequence); } }; @@ -226,8 +209,12 @@ public: */ void parse_annot(std::string data, int start_index=0, int end_index=0); //! add an annotation to our list of annotations - void add_annotation(const annot& a); - const std::list& annotations() const; + void add_annotation(const SeqSpanRef a); + //! add an annotation using tristan's mussa file paramenters + void add_annotation(std::string name, std::string type, size_type start, size_type stop); + //! create an initialized annotation with the "standard" types. + SeqSpanRef make_annotation(std::string name, std::string type, size_type start, size_type stop) const; + const SeqSpanRefList& annotations() const; const MotifList& motifs() const; //! add a motif to our list of motifs @@ -251,8 +238,8 @@ protected: //! species name std::string species; - //! store our oldstyle annotations - std::list annots; + //! store annotation regions + SeqSpanRefListRef annotation_list; //! a seperate list for motifs since we're currently not saving them MotifListRef motif_list; @@ -271,7 +258,7 @@ protected: ar & BOOST_SERIALIZATION_NVP(seq); ar & BOOST_SERIALIZATION_NVP(header); ar & BOOST_SERIALIZATION_NVP(species); - ar & BOOST_SERIALIZATION_NVP(annots); + ar & BOOST_SERIALIZATION_NVP(annotation_list); ar & BOOST_SERIALIZATION_NVP(motif_list); } }; diff --git a/alg/test/test_annotation_color.cpp b/alg/test/test_annotation_color.cpp index 7d2a885..aa06060 100644 --- a/alg/test/test_annotation_color.cpp +++ b/alg/test/test_annotation_color.cpp @@ -26,6 +26,8 @@ BOOST_AUTO_TEST_CASE( simple_annot_colors ) BOOST_CHECK_EQUAL( ac.lookup("bleem", "foo"), black ); BOOST_CHECK_EQUAL( ac.lookup("venchent", "a"), white ); + /* + // Removed as we're phasing out the annot and annotation colors classes annot a; a.begin = 30; a.end = 45; @@ -37,7 +39,8 @@ BOOST_AUTO_TEST_CASE( simple_annot_colors ) BOOST_CHECK_EQUAL( ac.lookup(a), black ); a.type = "venchent"; BOOST_CHECK_EQUAL( ac.lookup(a), white ); - + */ + ac.clear(); BOOST_CHECK_EQUAL( ac.lookup("bleem", "a"), black); } diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 921c5ef..82ff14b 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -463,16 +463,16 @@ BOOST_AUTO_TEST_CASE( sequence_empty_reverse_iterator) BOOST_AUTO_TEST_CASE( annotation_load ) { string annot_data = "human\n" - "0 10 name type\n" - "10 20 myf7\n" - "20 30 myod\n" - "50\t55 anothername\n" - "60 50 backward\n" - ">ident3 asdf\n" + "0 10 name type\n" //0 + "10 20 myf7\n" //1 + "20 30 myod\n" //2 + "50\t55 anothername\n" //3 + "60 50 backward\n" //4 + ">ident3 asdf\n" //7 (as these are added last) "GCT\n" "gCTn\n" - "75\t90\tname2\ttype2\n" - "100 120 name-asdf type!@#$%\n" + "75\t90\tname2\ttype2\n" //5 + "100 120 name-asdf type!@#$%\n" //6 ; string s(100, 'A'); s += "GCTGCTAATT"; @@ -480,27 +480,37 @@ BOOST_AUTO_TEST_CASE( annotation_load ) //istringstream annot_stream(annot_data); seq.parse_annot(annot_data, 0, 0); - std::list annots_list = seq.annotations(); - std::vector annots(annots_list.begin(), annots_list.end()); + SeqSpanRefList annots_list(seq.annotations()); + std::vector annots(annots_list.begin(), annots_list.end()); BOOST_REQUIRE_EQUAL( annots.size(), 8); - BOOST_CHECK_EQUAL( annots[0].begin, 0 ); - BOOST_CHECK_EQUAL( annots[0].end, 10 ); - BOOST_CHECK_EQUAL( annots[0].type, "type"); - BOOST_CHECK_EQUAL( annots[0].name, "name"); - BOOST_CHECK_EQUAL( annots[1].name, "myf7"); - BOOST_CHECK_EQUAL( annots[2].name, "myod"); - BOOST_CHECK_EQUAL( annots[3].name, "anothername"); - BOOST_CHECK_EQUAL( annots[4].name, "backward"); - BOOST_CHECK_EQUAL( annots[5].name, "name2"); - BOOST_CHECK_EQUAL( annots[5].end, 90); - BOOST_CHECK_EQUAL( annots[6].begin, 100); - BOOST_CHECK_EQUAL( annots[6].end, 120); - BOOST_CHECK_EQUAL( annots[6].name, "name-asdf"); - BOOST_CHECK_EQUAL( annots[6].type, "type!@#$%"); + BOOST_CHECK_EQUAL( annots[0]->start(), 0 ); + BOOST_CHECK_EQUAL( annots[0]->stop(), 10 ); + BOOST_REQUIRE( annots[0]->annotations() ); + BOOST_CHECK_EQUAL( annots[0]->annotations()->get("type"), "type"); + BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "name"); + BOOST_REQUIRE( annots[1]->annotations() ); + BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "myf7"); + BOOST_REQUIRE( annots[2]->annotations() ); + BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "myod"); + BOOST_REQUIRE( annots[3]->annotations() ); + BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "anothername"); + BOOST_REQUIRE( annots[4]->annotations() ); + BOOST_CHECK_EQUAL( annots[4]->annotations()->name(), "backward"); + BOOST_REQUIRE( annots[5]->annotations() ); + BOOST_CHECK_EQUAL( annots[5]->annotations()->name(), "name2"); + BOOST_CHECK_EQUAL( annots[5]->start(), 75); + BOOST_CHECK_EQUAL( annots[5]->stop(), 90); + BOOST_CHECK_EQUAL( annots[6]->start(), 100); + BOOST_CHECK_EQUAL( annots[6]->stop(), 110); + BOOST_REQUIRE( annots[6]->annotations() ); + BOOST_CHECK_EQUAL( annots[6]->annotations()->name(), "name-asdf"); + BOOST_CHECK_EQUAL( annots[6]->annotations()->get("type"), "type!@#$%"); // sequence defined annotations will always be after the // absolute positions - BOOST_CHECK_EQUAL( annots[7].name, "ident3 asdf"); - BOOST_CHECK_EQUAL( annots[7].begin, 100); + BOOST_REQUIRE( annots[7]->annotations() ); + BOOST_CHECK_EQUAL( annots[7]->annotations()->name(), "ident3 asdf"); + BOOST_CHECK_EQUAL( annots[7]->start(), 100); + BOOST_CHECK_EQUAL( annots[7]->stop(), 107); //BOOST_CHECK_EQUAL( annots } @@ -545,7 +555,7 @@ BOOST_AUTO_TEST_CASE(annotation_ucsc_html_load) "GGTGGAGACGACCTGGACCCTAACTACGTGCTCAGCAGCCGCGTCCGCAC"; Sequence seq(s, reduced_dna_alphabet); seq.parse_annot(annot_data); - std::list annots = seq.annotations(); + SeqSpanRefList annots(seq.annotations()); BOOST_CHECK_EQUAL( annots.size(), 2); } @@ -568,12 +578,12 @@ BOOST_AUTO_TEST_CASE( annotation_load_no_species_name ) //istringstream annot_stream(annot_data); seq.parse_annot(annot_data, 0, 0); - std::list annots_list = seq.annotations(); - std::vector annots(annots_list.begin(), annots_list.end()); + SeqSpanRefList annots_list(seq.annotations()); + std::vector annots(annots_list.begin(), annots_list.end()); BOOST_REQUIRE_EQUAL( annots.size(), 8); - BOOST_CHECK_EQUAL( annots[0].begin, 0 ); - BOOST_CHECK_EQUAL( annots[0].end, 10 ); - BOOST_CHECK_EQUAL( annots[0].type, "type"); + BOOST_CHECK_EQUAL( annots[0]->start(), 0 ); + BOOST_CHECK_EQUAL( annots[0]->stop(), 10 ); + BOOST_CHECK_EQUAL( annots[0]->annotations()->get("type"), "type"); } // ticket:83 when you try to load a sequence from a file that doesn't @@ -742,12 +752,14 @@ BOOST_AUTO_TEST_CASE( sequence_motif_subseq) BOOST_AUTO_TEST_CASE( annot_test ) { - annot a(0, 10, "test", "thing"); - - BOOST_CHECK_EQUAL( a.begin, 0 ); - BOOST_CHECK_EQUAL( a.end, 10 ); - BOOST_CHECK_EQUAL( a.type, "test" ); - BOOST_CHECK_EQUAL( a.name, "thing" ); + Sequence s("AAAAAAAAAA"); + s.add_annotation("test", "thing", 0, 10); + SeqSpanRef a(s.annotations().front()); + + BOOST_CHECK_EQUAL( a->start(), 0 ); + BOOST_CHECK_EQUAL( a->stop(), 10 ); + BOOST_CHECK_EQUAL( a->annotations()->get("name"), "test" ); + BOOST_CHECK_EQUAL( a->annotations()->get("type"), "thing" ); motif m(10, "AAGGCC"); BOOST_CHECK_EQUAL( m.begin, 10 ); @@ -795,12 +807,13 @@ BOOST_AUTO_TEST_CASE( annotate_from_sequence ) } } BOOST_CHECK_EQUAL(seq.annotations().size(), count); - const std::list &a = seq.annotations(); - for (std::list::const_iterator annot_i = a.begin(); + const SeqSpanRefList& a = seq.annotations(); + for (SeqSpanRefList::const_iterator annot_i = a.begin(); annot_i != a.end(); ++annot_i) { - int count = annot_i->end - annot_i->begin ; + //FIXME: was I doing something here? + int count = (*annot_i)->stop() - (*annot_i)->start(); } } @@ -816,29 +829,36 @@ BOOST_AUTO_TEST_CASE( subseq_annotation_test ) "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT"); Sequence seq(s, reduced_dna_alphabet); - - seq.add_annotation(annot(0, 10, "0-10", "0-10")); - seq.add_annotation(annot(10, 20, "10-20", "10-20")); - seq.add_annotation(annot(0, 20, "0-20", "0-20")); - seq.add_annotation(annot(8, 12, "8-12", "8-12")); - seq.add_annotation(annot(100, 5000, "100-5000", "100-5000")); + seq.add_annotation("0-10", "0-10", 0, 10); + seq.add_annotation("10-20", "10-20", 10, 20); + seq.add_annotation("0-20", "0-20", 0, 20); + seq.add_annotation("8-12", "8-12", 8, 12); + seq.add_annotation("100-5000", "100-5000", 100, 5000); Sequence subseq = seq.subseq(5, 10); - const list annots = subseq.annotations(); - // generate some ground truth - list correct; - correct.push_back(annot(0, 5, "0-10", "0-10")); - correct.push_back(annot(5,10, "10-20", "10-20")); - correct.push_back(annot(0,10, "0-20", "0-20")); - correct.push_back(annot(3, 7, "8-12", "8-12")); - BOOST_REQUIRE_EQUAL( annots.size(), correct.size() ); - - list::iterator correct_i = correct.begin(); - list::const_iterator annot_i = annots.begin(); - for(; annot_i != annots.end(); ++annot_i, ++correct_i) - { - BOOST_CHECK( *annot_i == *correct_i ); - } + SeqSpanRefList annots_list = subseq.annotations(); + BOOST_REQUIRE_EQUAL( annots_list.size(), 4 ); + + std::vector annots(annots_list.begin(), annots_list.end()); + BOOST_CHECK_EQUAL( annots[0]->start(), 0); + BOOST_CHECK_EQUAL( annots[0]->size(), 5); + BOOST_REQUIRE( annots[0]->annotations() ); + BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "0-10"); + + BOOST_CHECK_EQUAL( annots[1]->start(), 5); + BOOST_CHECK_EQUAL( annots[1]->size(), 10); + BOOST_REQUIRE( annots[1]->annotations() ); + BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "10-20"); + + BOOST_CHECK_EQUAL( annots[2]->start(), 0); + BOOST_CHECK_EQUAL( annots[2]->size(), 10); + BOOST_REQUIRE( annots[2]->annotations() ); + BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "0-20"); + + BOOST_CHECK_EQUAL( annots[3]->start(), 3); + BOOST_CHECK_EQUAL( annots[3]->size(), 7); + BOOST_REQUIRE( annots[3]->annotations() ); + BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "8-12"); } BOOST_AUTO_TEST_CASE( motif_annotation_update ) @@ -856,9 +876,9 @@ BOOST_AUTO_TEST_CASE( motif_annotation_update ) // starting conditions BOOST_CHECK_EQUAL(seq.annotations().size(), 0); BOOST_CHECK_EQUAL(seq.motifs().size(), 0); - seq.add_annotation(annot(0, 10, "0-10", "0-10")); - seq.add_annotation(annot(10, 20, "10-20", "10-20")); - seq.add_annotation(annot(0, 20, "0-20", "0-20")); + seq.add_annotation("0-10", "0-10", 0, 10); + seq.add_annotation("10-20", "10-20", 10, 20); + seq.add_annotation("0-20", "0-20", 0, 20); BOOST_CHECK_EQUAL(seq.annotations().size(), 3); BOOST_CHECK_EQUAL(seq.motifs().size(), 0); seq.add_motif("CCGTCCC"); @@ -920,8 +940,7 @@ BOOST_AUTO_TEST_CASE( serialize_tree ) seq.set_species("ribbet"); seq.add_motif("AA"); seq.add_motif("GC"); - annot a1(6,7,"t","t"); - seq.add_annotation(a1); + seq.add_annotation("t", "t", 6, 7); std::ostringstream oss; // allocate/deallocate serialization components @@ -950,8 +969,7 @@ BOOST_AUTO_TEST_CASE( serialize_xml_sequence ) seq.set_species("ribbet"); seq.add_motif("AA"); seq.add_motif("GC"); - annot a1(6,7,"t","t"); - seq.add_annotation(a1); + seq.add_annotation("t", "t", 6, 7); std::ostringstream oss; // allocate/deallocate serialization components diff --git a/py/annot.cpp b/py/annot.cpp index 7b06fb8..72e1aed 100644 --- a/py/annot.cpp +++ b/py/annot.cpp @@ -5,15 +5,11 @@ using namespace boost::python; void export_annot() { - class_("annot") - .def(init()) - .def_readwrite("begin", &annot::begin) - .def_readwrite("end", &annot::end) - .def_readwrite("type", &annot::type) - .def_readwrite("name", &annot::name) - ; - - class_ >("motif", init()) + class_("motif", init()) + .def_readwrite("begin", &motif::begin) + .def_readwrite("end", &motif::end) + .def_readwrite("type", &motif::type) + .def_readwrite("name", &motif::name) .def_readwrite("sequence", &motif::sequence) ; } diff --git a/py/sequence.cpp b/py/sequence.cpp index fbee90d..4d650c8 100644 --- a/py/sequence.cpp +++ b/py/sequence.cpp @@ -17,13 +17,20 @@ namespace fs = boost::filesystem; void export_sequence() { void (Sequence::*load_fasta_piii)(const fs::path, int, int, int) = &Sequence::load_fasta; + void (Sequence::*add_annotation_ssii)( + std::string, + std::string, + Sequence::size_type, + Sequence::size_type + ) = &Sequence::add_annotation; class_("Sequence") .def(init()) .def("__len__", &Sequence::size, "return the length of the sequence") .def("__repr__", &Sequence::get_sequence, "display as string") .def("__str__", &Sequence::get_sequence, "cast to string") - .def("add_annotation", &Sequence::add_annotation, "append an annotation") + .def("add_annotation", add_annotation_ssii, + "create an annotation from name, type, start, stop") //.def("annotations", &Sequence::annotations, "return list of annotations") .def("add_motif", &Sequence::add_motif, "add a motif sequenence") .def("clear", &Sequence::clear, "clear the sequence and its annotations") diff --git a/py/test/TestSequence.py b/py/test/TestSequence.py index 5fcc362..80fc3e6 100644 --- a/py/test/TestSequence.py +++ b/py/test/TestSequence.py @@ -29,14 +29,8 @@ class TestSequence(unittest.TestCase): self.failUnless(s.species == species) def testAnnotations(self): - annot = mussa.annot() - annot.begin = 0 - annot.end = 10 - annot.name = "foo" - annot.type = "utr" - seq = mussa.Sequence("AAGGCCTTAATTGGCCTT") - seq.add_annotation(annot) + seq.add_annotation("foo", "utr", 0, 10) def notestFile(self): # remove no prefix once we have sequence loading from a stream -- 2.30.2