root_map.cm[type].cm.erase(instance);
}
-Color AnnotationColors::lookup(const annot &a) const
-{
- return lookup(a.type, a.name);
-}
-
Color AnnotationColors::lookup(const string &type, const string &instance) const
{
// Yeah, there's probably a nicer looking recursive solution
void erase(const std::string &type, const std::string& instance);
//! lookup an annotation color
- Color lookup(const annot &) const;
Color lookup(const std::string &, const std::string &) const;
private:
// nested maps, with default?
{
// draw annotations
GLfloat annotation_z = z() + 10.0;
- const std::list<annot>& annots = Sequence::annotations();
+ const SeqSpanRefList& annots = Sequence::annotations();
const MotifList& motifs = Sequence::motifs();
- for (std::list<annot>::const_iterator annot_itor = annots.begin();
+ for (SeqSpanRefList::const_iterator annot_itor = annots.begin();
annot_itor != annots.end();
++annot_itor)
{
glColor3f(0.0, 0.8, 0.0);
- draw_box(left, right, x()+annot_itor->begin, x()+annot_itor->end,
+ draw_box(left, right, x()+(*annot_itor)->start(), x()+(*annot_itor)->stop(),
height(), annotation_z);
}
// if motifs?
#include <string>
#include <map>
+#include <list>
#include <boost/serialization/base_object.hpp>
#include <boost/serialization/export.hpp>
class SeqSpan;
typedef boost::shared_ptr<SeqSpan> SeqSpanRef;
+typedef std::list<SeqSpanRef> SeqSpanRefList;
+typedef boost::shared_ptr<SeqSpanRefList> SeqSpanRefListRef;
//! Track what segment of a sequence we're looking at
class SeqSpan : public boost::enable_shared_from_this<SeqSpan> {
#include <sstream>
#include <set>
-annot::annot()
- : begin(0),
- end(0),
- type(""),
- name("")
-{
-}
-
-annot::annot(int begin, int end, std::string type, std::string name)
- : begin(begin),
- end(end),
- type(type),
- name(name)
-{
-}
-
-annot::~annot()
-{
-}
-
-bool operator==(const annot& left, const annot& right)
+bool operator==(const motif& left, const motif& right)
{
return ((left.begin== right.begin) and
(left.end == right.end) and
(left.name == right.name));
}
-motif::motif(int begin, std::string motif)
- : annot(begin, begin+motif.size(), "motif", motif),
+motif::motif()
+ : begin(0),
+ end(0),
+ type("motif"),
+ name(""),
+ sequence("")
+{
+}
+
+motif::motif(int begin_, std::string motif)
+ : begin(begin_),
+ end(begin_+motif.size()),
+ type("motif"),
+ name(motif),
sequence(motif)
{
}
{
}
-
Sequence::Sequence(AlphabetRef alphabet)
- : seq(new SeqSpan("", alphabet, SeqSpan::PlusStrand)),
+ : seq(new SeqSpan("", alphabet, SeqSpan::PlusStrand)),
+ annotation_list(new SeqSpanRefList),
motif_list(new MotifList)
{
}
Sequence::Sequence(const char *seq, AlphabetRef alphabet_, SeqSpan::strand_type strand_)
: header(""),
species(""),
+ annotation_list(new SeqSpanRefList),
motif_list(new MotifList)
{
set_filtered_sequence(seq, alphabet_, 0, npos, strand_);
SeqSpan::strand_type strand_)
: header(""),
species(""),
+ annotation_list(new SeqSpanRefList),
motif_list(new MotifList)
{
set_filtered_sequence(seq, alphabet_, 0, seq.size(), strand_);
: seq(o.seq),
header(o.header),
species(o.species),
- annots(o.annots),
+ annotation_list(o.annotation_list),
motif_list(o.motif_list)
{
}
: seq(o->seq),
header(o->header),
species(o->species),
- annots(o->annots),
+ annotation_list(o->annotation_list),
motif_list(o->motif_list)
{
}
: seq(new SeqSpan(o->seq)),
header(o->header),
species(o->species),
- annots(o->annots),
+ annotation_list(o->annotation_list),
motif_list(o->motif_list)
{
}
: seq(seq_ref),
header(""),
species(""),
+ annotation_list(new SeqSpanRefList),
motif_list(new MotifList)
{
}
seq = s.seq;
header = s.header;
species = s.species;
- annots = s.annots;
+ annotation_list = s.annotation_list;
motif_list = s.motif_list;
}
return *this;
*/
struct push_back_annot {
- std::list<annot>& annot_list;
+ Sequence* parent;
+ SeqSpanRefListRef children;
int& begin;
int& end;
std::string& name;
std::string& type;
int &parsed;
- push_back_annot(std::list<annot>& annot_list_,
+ push_back_annot(Sequence* parent_seq,
+ SeqSpanRefListRef children_list,
int& begin_,
int& end_,
std::string& name_,
std::string& type_,
int &parsed_)
- : annot_list(annot_list_),
+ : parent(parent_seq),
+ children(children_list),
begin(begin_),
end(end_),
name(name_),
void operator()(std::string::const_iterator,
std::string::const_iterator) const
{
- //std::cout << "adding annot: " << begin << "|" << end << "|" << name << "|" << type << std::endl;
- annot_list.push_back(annot(begin, end, name, type));
+ children->push_back(parent->make_annotation(name, type, begin, end));
++parsed;
};
};
int end=0;
std::string name;
std::string type;
- std::string seq;
- std::list<annot> parsed_annots;
+ std::string seqstr;
+ SeqSpanRefListRef parsed_annots(new SeqSpanRefList);
std::list<Sequence> query_seqs;
int parsed=0;
)
// to understand how this group gets set
// read the comment above struct push_back_annot
- )[push_back_annot(parsed_annots, start, end, type, name, parsed)]
+ )[push_back_annot(this, parsed_annots, start, end, name, type, parsed)]
|
((spirit::ch_p('>')|spirit::str_p(">")) >>
(*(spirit::print_p))[spirit::assign_a(name)] >>
spirit::eol_p >>
- (+(spirit::chset<>(Alphabet::nucleic_cstr)))[spirit::assign_a(seq)]
- )[push_back_seq(query_seqs, name, seq, parsed)]
+ (+(spirit::chset<>(Alphabet::nucleic_cstr)))[spirit::assign_a(seqstr)]
+ )[push_back_seq(query_seqs, name, seqstr, parsed)]
) >>
*spirit::space_p
)
msg << "Error parsing annotation #" << parsed;
throw annotation_load_error(msg.str());
}
+ // If everything loaded correctly add the sequences to our annotation list
// add newly parsed annotations to our sequence
- std::copy(parsed_annots.begin(), parsed_annots.end(), std::back_inserter(annots));
- // go seearch for query sequences
+ std::copy(parsed_annots->begin(), parsed_annots->end(), std::back_inserter(*annotation_list));
+ // go search for query sequences
find_sequences(query_seqs.begin(), query_seqs.end());
}
-void Sequence::add_annotation(const annot& a)
+void Sequence::add_annotation(const SeqSpanRef a)
{
- annots.push_back(a);
+ annotation_list->push_back(a);
}
-const std::list<annot>& Sequence::annotations() const
+void Sequence::add_annotation(std::string name, std::string type, size_type start, size_type stop)
{
- return annots;
+ add_annotation(make_annotation(name, type, start, stop));
+}
+
+SeqSpanRef
+Sequence::make_annotation(std::string name, std::string type, size_type start, size_type stop) const
+{
+ // we want things to be in the positive direction
+ if (stop < start) {
+ size_type tmp = start;
+ start = stop;
+ stop = tmp;
+ }
+ size_type count = stop - start;
+ SeqSpanRef new_annot(seq->subseq(start, count, SeqSpan::UnknownStrand));
+ AnnotationsRef metadata(new Annotations(name));
+ metadata->set("type", type);
+ new_annot->setAnnotations(metadata);
+ return new_annot;
+}
+
+const SeqSpanRefList& Sequence::annotations() const
+{
+ return *annotation_list;
}
void Sequence::copy_children(Sequence &new_seq, size_type start, size_type count) const
{
new_seq.motif_list = motif_list;
- new_seq.annots.clear();
+ new_seq.annotation_list.reset(new SeqSpanRefList);
- for(std::list<annot>::const_iterator annot_i = annots.begin();
- annot_i != annots.end();
+ for(SeqSpanRefList::const_iterator annot_i = annotation_list->begin();
+ annot_i != annotation_list->end();
++annot_i)
{
- size_type annot_begin= annot_i->begin;
- size_type annot_end = annot_i->end;
+ size_type annot_begin= (*annot_i)->start();
+ size_type annot_end = (*annot_i)->stop();
if (annot_begin < start+count) {
if (annot_begin >= start) {
annot_end = count;
}
- annot new_annot(annot_begin, annot_end, annot_i->type, annot_i->name);
- new_seq.annots.push_back(new_annot);
+ SeqSpanRef new_annot(seq->subseq(annot_begin, annot_end));
+ new_annot->setAnnotations((*annot_i)->annotations());
+ new_seq.annotation_list->push_back(new_annot);
}
}
}
return new_seq;
}
- Sequence new_seq = *this;
+ Sequence new_seq(*this);
new_seq.seq = seq->subseq(start, count, strand);
if (seq->annotations()) {
AnnotationsRef a(new Annotations(*(seq->annotations())));
seq.reset();
header.clear();
species.clear();
- annots.clear();
+ annotation_list.reset(new SeqSpanRefList);
motif_list.reset(new MotifList);
}
void
Sequence::save(fs::fstream &save_file)
{
+ std::string type("type");
+ std::string empty_str("");
//fstream save_file;
- std::list<annot>::iterator annots_i;
+ SeqSpanRefList::iterator annots_i;
+ AnnotationsRef metadata;
// not sure why, or if i'm doing something wrong, but can't seem to pass
// file pointers down to this method from the mussa control class
save_file << "<Annotations>" << std::endl;
save_file << species << std::endl;
- for (annots_i = annots.begin(); annots_i != annots.end(); ++annots_i)
+ for (annots_i = annotation_list->begin();
+ annots_i != annotation_list->end();
+ ++annots_i)
{
- save_file << annots_i->begin << " " << annots_i->end << " " ;
- save_file << annots_i->name << " " << annots_i->type << std::endl;
+ metadata = (*annots_i)->annotations();
+ save_file << (*annots_i)->parentStart() << " " << (*annots_i)->parentStop() << " " ;
+ save_file << metadata->name() << " "
+ << metadata->getdefault(type, empty_str) << std::endl;
}
save_file << "</Annotations>" << std::endl;
//save_file.close();
fs::fstream load_file;
std::string file_data_line;
int seq_counter;
- annot an_annot;
+ //annot an_annot;
+ int annot_begin;
+ int annot_end;
+ std::string annot_name;
+ std::string annot_type;
+
std::string::size_type space_split_i;
std::string annot_value;
- annots.clear();
+ annotation_list.reset(new SeqSpanRefList);
+
load_file.open(load_file_path, std::ios::in);
seq_counter = 0;
// get annot start index
space_split_i = file_data_line.find(" ");
annot_value = file_data_line.substr(0,space_split_i);
- an_annot.begin = atoi (annot_value.c_str());
+ annot_begin = atoi (annot_value.c_str());
file_data_line = file_data_line.substr(space_split_i+1);
// get annot end index
space_split_i = file_data_line.find(" ");
annot_value = file_data_line.substr(0,space_split_i);
- an_annot.end = atoi (annot_value.c_str());
+ annot_end = atoi (annot_value.c_str());
if (space_split_i == std::string::npos) // no entry for type or name
{
std::cout << "seq, annots - no type or name\n";
- an_annot.type = "";
- an_annot.name = "";
+ annot_name = "";
+ annot_type = "";
}
else // else get annot type
{
file_data_line = file_data_line.substr(space_split_i+1);
space_split_i = file_data_line.find(" ");
annot_value = file_data_line.substr(0,space_split_i);
- an_annot.type = annot_value;
+ //an_annot.type = annot_value;
+ annot_type = annot_value;
if (space_split_i == std::string::npos) // no entry for name
{
std::cout << "seq, annots - no name\n";
- an_annot.name = "";
+ annot_name = "";
}
else // get annot name
{
file_data_line = file_data_line.substr(space_split_i+1);
space_split_i = file_data_line.find(" ");
annot_value = file_data_line.substr(0,space_split_i);
- an_annot.type = annot_value;
+ // this seems like its wrong?
+ annot_type = annot_value;
}
}
- annots.push_back(an_annot); // don't forget to actually add the annot
+ add_annotation(annot_name, annot_type, annot_begin, annot_end);
}
//std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
// << "-->" << an_annot.type << "::" << an_annot.name << std::endl;
// end Nora stuff, now we see if a match is found this pass
if (motif_i == motif_len)
{
- annot new_motif;
motif_match_starts->push_back(seq_i - motif_len + 1);
motif_i = 0;
}
{
std::vector<int> seq_starts = find_motif(a_seq);
- //std::cout << "searching for " << a_seq << " found " << seq_starts.size() << std::endl;
-
for(std::vector<int>::iterator seq_start_i = seq_starts.begin();
seq_start_i != seq_starts.end();
++seq_start_i)
{
- annots.push_back(annot(*seq_start_i,
- *seq_start_i+a_seq.size(),
- "",
- name));
+ add_annotation(name, "", *seq_start_i, *seq_start_i+a_seq.size());
}
}
// Sequence data class
-//! Attach annotation information to a sequence track
-struct annot
+/* The way that motifs are found currently doesn't really
+ * indicate that the match was a reverse compliment
+ */
+struct motif
{
- annot();
- annot(int begin, int end, std::string type, std::string name);
- ~annot();
-
+ motif();
+ //motif(int begin, int end, std::string type, std::string name);
+ //! this constructor is for when we're adding motifs to our annotations
+ motif(int begin, std::string motif);
+ ~motif();
+
int begin;
int end;
std::string type;
std::string name;
+ std::string sequence;
+
+ friend bool operator==(const motif& left, const motif& right);
- friend bool operator==(const annot& left, const annot& right);
-private:
// boost::serialization support
+private:
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive& ar, const unsigned int /*version*/) {
ar & BOOST_SERIALIZATION_NVP(end);
ar & BOOST_SERIALIZATION_NVP(type);
ar & BOOST_SERIALIZATION_NVP(name);
- }
-};
-BOOST_CLASS_EXPORT(annot);
-
-
-/* The way that motifs are found currently doesn't really
- * indicate that the match was a reverse compliment
- */
-struct motif : public annot
-{
- std::string sequence;
-
- motif() : annot(), sequence("") {};
- //! this constructor is for when we're adding motifs to our annotations
- motif(int begin, std::string motif);
- ~motif();
-
- // boost::serialization support
-private:
- friend class boost::serialization::access;
- template<class Archive>
- void serialize(Archive& ar, const unsigned int /*version*/) {
- ar & BOOST_SERIALIZATION_BASE_OBJECT_NVP(annot);
ar & BOOST_SERIALIZATION_NVP(sequence);
}
};
*/
void parse_annot(std::string data, int start_index=0, int end_index=0);
//! add an annotation to our list of annotations
- void add_annotation(const annot& a);
- const std::list<annot>& annotations() const;
+ void add_annotation(const SeqSpanRef a);
+ //! add an annotation using tristan's mussa file paramenters
+ void add_annotation(std::string name, std::string type, size_type start, size_type stop);
+ //! create an initialized annotation with the "standard" types.
+ SeqSpanRef make_annotation(std::string name, std::string type, size_type start, size_type stop) const;
+ const SeqSpanRefList& annotations() const;
const MotifList& motifs() const;
//! add a motif to our list of motifs
//! species name
std::string species;
- //! store our oldstyle annotations
- std::list<annot> annots;
+ //! store annotation regions
+ SeqSpanRefListRef annotation_list;
//! a seperate list for motifs since we're currently not saving them
MotifListRef motif_list;
ar & BOOST_SERIALIZATION_NVP(seq);
ar & BOOST_SERIALIZATION_NVP(header);
ar & BOOST_SERIALIZATION_NVP(species);
- ar & BOOST_SERIALIZATION_NVP(annots);
+ ar & BOOST_SERIALIZATION_NVP(annotation_list);
ar & BOOST_SERIALIZATION_NVP(motif_list);
}
};
BOOST_CHECK_EQUAL( ac.lookup("bleem", "foo"), black );
BOOST_CHECK_EQUAL( ac.lookup("venchent", "a"), white );
+ /*
+ // Removed as we're phasing out the annot and annotation colors classes
annot a;
a.begin = 30;
a.end = 45;
BOOST_CHECK_EQUAL( ac.lookup(a), black );
a.type = "venchent";
BOOST_CHECK_EQUAL( ac.lookup(a), white );
-
+ */
+
ac.clear();
BOOST_CHECK_EQUAL( ac.lookup("bleem", "a"), black);
}
BOOST_AUTO_TEST_CASE( annotation_load )
{
string annot_data = "human\n"
- "0 10 name type\n"
- "10 20 myf7\n"
- "20 30 myod\n"
- "50\t55 anothername\n"
- "60 50 backward\n"
- ">ident3 asdf\n"
+ "0 10 name type\n" //0
+ "10 20 myf7\n" //1
+ "20 30 myod\n" //2
+ "50\t55 anothername\n" //3
+ "60 50 backward\n" //4
+ ">ident3 asdf\n" //7 (as these are added last)
"GCT\n"
"gCTn\n"
- "75\t90\tname2\ttype2\n"
- "100 120 name-asdf type!@#$%\n"
+ "75\t90\tname2\ttype2\n" //5
+ "100 120 name-asdf type!@#$%\n" //6
;
string s(100, 'A');
s += "GCTGCTAATT";
//istringstream annot_stream(annot_data);
seq.parse_annot(annot_data, 0, 0);
- std::list<annot> annots_list = seq.annotations();
- std::vector<annot> annots(annots_list.begin(), annots_list.end());
+ SeqSpanRefList annots_list(seq.annotations());
+ std::vector<SeqSpanRef> annots(annots_list.begin(), annots_list.end());
BOOST_REQUIRE_EQUAL( annots.size(), 8);
- BOOST_CHECK_EQUAL( annots[0].begin, 0 );
- BOOST_CHECK_EQUAL( annots[0].end, 10 );
- BOOST_CHECK_EQUAL( annots[0].type, "type");
- BOOST_CHECK_EQUAL( annots[0].name, "name");
- BOOST_CHECK_EQUAL( annots[1].name, "myf7");
- BOOST_CHECK_EQUAL( annots[2].name, "myod");
- BOOST_CHECK_EQUAL( annots[3].name, "anothername");
- BOOST_CHECK_EQUAL( annots[4].name, "backward");
- BOOST_CHECK_EQUAL( annots[5].name, "name2");
- BOOST_CHECK_EQUAL( annots[5].end, 90);
- BOOST_CHECK_EQUAL( annots[6].begin, 100);
- BOOST_CHECK_EQUAL( annots[6].end, 120);
- BOOST_CHECK_EQUAL( annots[6].name, "name-asdf");
- BOOST_CHECK_EQUAL( annots[6].type, "type!@#$%");
+ BOOST_CHECK_EQUAL( annots[0]->start(), 0 );
+ BOOST_CHECK_EQUAL( annots[0]->stop(), 10 );
+ BOOST_REQUIRE( annots[0]->annotations() );
+ BOOST_CHECK_EQUAL( annots[0]->annotations()->get("type"), "type");
+ BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "name");
+ BOOST_REQUIRE( annots[1]->annotations() );
+ BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "myf7");
+ BOOST_REQUIRE( annots[2]->annotations() );
+ BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "myod");
+ BOOST_REQUIRE( annots[3]->annotations() );
+ BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "anothername");
+ BOOST_REQUIRE( annots[4]->annotations() );
+ BOOST_CHECK_EQUAL( annots[4]->annotations()->name(), "backward");
+ BOOST_REQUIRE( annots[5]->annotations() );
+ BOOST_CHECK_EQUAL( annots[5]->annotations()->name(), "name2");
+ BOOST_CHECK_EQUAL( annots[5]->start(), 75);
+ BOOST_CHECK_EQUAL( annots[5]->stop(), 90);
+ BOOST_CHECK_EQUAL( annots[6]->start(), 100);
+ BOOST_CHECK_EQUAL( annots[6]->stop(), 110);
+ BOOST_REQUIRE( annots[6]->annotations() );
+ BOOST_CHECK_EQUAL( annots[6]->annotations()->name(), "name-asdf");
+ BOOST_CHECK_EQUAL( annots[6]->annotations()->get("type"), "type!@#$%");
// sequence defined annotations will always be after the
// absolute positions
- BOOST_CHECK_EQUAL( annots[7].name, "ident3 asdf");
- BOOST_CHECK_EQUAL( annots[7].begin, 100);
+ BOOST_REQUIRE( annots[7]->annotations() );
+ BOOST_CHECK_EQUAL( annots[7]->annotations()->name(), "ident3 asdf");
+ BOOST_CHECK_EQUAL( annots[7]->start(), 100);
+ BOOST_CHECK_EQUAL( annots[7]->stop(), 107);
//BOOST_CHECK_EQUAL( annots
}
"GGTGGAGACGACCTGGACCCTAACTACGTGCTCAGCAGCCGCGTCCGCAC";
Sequence seq(s, reduced_dna_alphabet);
seq.parse_annot(annot_data);
- std::list<annot> annots = seq.annotations();
+ SeqSpanRefList annots(seq.annotations());
BOOST_CHECK_EQUAL( annots.size(), 2);
}
//istringstream annot_stream(annot_data);
seq.parse_annot(annot_data, 0, 0);
- std::list<annot> annots_list = seq.annotations();
- std::vector<annot> annots(annots_list.begin(), annots_list.end());
+ SeqSpanRefList annots_list(seq.annotations());
+ std::vector<SeqSpanRef> annots(annots_list.begin(), annots_list.end());
BOOST_REQUIRE_EQUAL( annots.size(), 8);
- BOOST_CHECK_EQUAL( annots[0].begin, 0 );
- BOOST_CHECK_EQUAL( annots[0].end, 10 );
- BOOST_CHECK_EQUAL( annots[0].type, "type");
+ BOOST_CHECK_EQUAL( annots[0]->start(), 0 );
+ BOOST_CHECK_EQUAL( annots[0]->stop(), 10 );
+ BOOST_CHECK_EQUAL( annots[0]->annotations()->get("type"), "type");
}
// ticket:83 when you try to load a sequence from a file that doesn't
BOOST_AUTO_TEST_CASE( annot_test )
{
- annot a(0, 10, "test", "thing");
-
- BOOST_CHECK_EQUAL( a.begin, 0 );
- BOOST_CHECK_EQUAL( a.end, 10 );
- BOOST_CHECK_EQUAL( a.type, "test" );
- BOOST_CHECK_EQUAL( a.name, "thing" );
+ Sequence s("AAAAAAAAAA");
+ s.add_annotation("test", "thing", 0, 10);
+ SeqSpanRef a(s.annotations().front());
+
+ BOOST_CHECK_EQUAL( a->start(), 0 );
+ BOOST_CHECK_EQUAL( a->stop(), 10 );
+ BOOST_CHECK_EQUAL( a->annotations()->get("name"), "test" );
+ BOOST_CHECK_EQUAL( a->annotations()->get("type"), "thing" );
motif m(10, "AAGGCC");
BOOST_CHECK_EQUAL( m.begin, 10 );
}
}
BOOST_CHECK_EQUAL(seq.annotations().size(), count);
- const std::list<annot> &a = seq.annotations();
- for (std::list<annot>::const_iterator annot_i = a.begin();
+ const SeqSpanRefList& a = seq.annotations();
+ for (SeqSpanRefList::const_iterator annot_i = a.begin();
annot_i != a.end();
++annot_i)
{
- int count = annot_i->end - annot_i->begin ;
+ //FIXME: was I doing something here?
+ int count = (*annot_i)->stop() - (*annot_i)->start();
}
}
"AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
Sequence seq(s, reduced_dna_alphabet);
-
- seq.add_annotation(annot(0, 10, "0-10", "0-10"));
- seq.add_annotation(annot(10, 20, "10-20", "10-20"));
- seq.add_annotation(annot(0, 20, "0-20", "0-20"));
- seq.add_annotation(annot(8, 12, "8-12", "8-12"));
- seq.add_annotation(annot(100, 5000, "100-5000", "100-5000"));
+ seq.add_annotation("0-10", "0-10", 0, 10);
+ seq.add_annotation("10-20", "10-20", 10, 20);
+ seq.add_annotation("0-20", "0-20", 0, 20);
+ seq.add_annotation("8-12", "8-12", 8, 12);
+ seq.add_annotation("100-5000", "100-5000", 100, 5000);
Sequence subseq = seq.subseq(5, 10);
- const list<annot> annots = subseq.annotations();
- // generate some ground truth
- list<annot> correct;
- correct.push_back(annot(0, 5, "0-10", "0-10"));
- correct.push_back(annot(5,10, "10-20", "10-20"));
- correct.push_back(annot(0,10, "0-20", "0-20"));
- correct.push_back(annot(3, 7, "8-12", "8-12"));
- BOOST_REQUIRE_EQUAL( annots.size(), correct.size() );
-
- list<annot>::iterator correct_i = correct.begin();
- list<annot>::const_iterator annot_i = annots.begin();
- for(; annot_i != annots.end(); ++annot_i, ++correct_i)
- {
- BOOST_CHECK( *annot_i == *correct_i );
- }
+ SeqSpanRefList annots_list = subseq.annotations();
+ BOOST_REQUIRE_EQUAL( annots_list.size(), 4 );
+
+ std::vector<SeqSpanRef> annots(annots_list.begin(), annots_list.end());
+ BOOST_CHECK_EQUAL( annots[0]->start(), 0);
+ BOOST_CHECK_EQUAL( annots[0]->size(), 5);
+ BOOST_REQUIRE( annots[0]->annotations() );
+ BOOST_CHECK_EQUAL( annots[0]->annotations()->name(), "0-10");
+
+ BOOST_CHECK_EQUAL( annots[1]->start(), 5);
+ BOOST_CHECK_EQUAL( annots[1]->size(), 10);
+ BOOST_REQUIRE( annots[1]->annotations() );
+ BOOST_CHECK_EQUAL( annots[1]->annotations()->name(), "10-20");
+
+ BOOST_CHECK_EQUAL( annots[2]->start(), 0);
+ BOOST_CHECK_EQUAL( annots[2]->size(), 10);
+ BOOST_REQUIRE( annots[2]->annotations() );
+ BOOST_CHECK_EQUAL( annots[2]->annotations()->name(), "0-20");
+
+ BOOST_CHECK_EQUAL( annots[3]->start(), 3);
+ BOOST_CHECK_EQUAL( annots[3]->size(), 7);
+ BOOST_REQUIRE( annots[3]->annotations() );
+ BOOST_CHECK_EQUAL( annots[3]->annotations()->name(), "8-12");
}
BOOST_AUTO_TEST_CASE( motif_annotation_update )
// starting conditions
BOOST_CHECK_EQUAL(seq.annotations().size(), 0);
BOOST_CHECK_EQUAL(seq.motifs().size(), 0);
- seq.add_annotation(annot(0, 10, "0-10", "0-10"));
- seq.add_annotation(annot(10, 20, "10-20", "10-20"));
- seq.add_annotation(annot(0, 20, "0-20", "0-20"));
+ seq.add_annotation("0-10", "0-10", 0, 10);
+ seq.add_annotation("10-20", "10-20", 10, 20);
+ seq.add_annotation("0-20", "0-20", 0, 20);
BOOST_CHECK_EQUAL(seq.annotations().size(), 3);
BOOST_CHECK_EQUAL(seq.motifs().size(), 0);
seq.add_motif("CCGTCCC");
seq.set_species("ribbet");
seq.add_motif("AA");
seq.add_motif("GC");
- annot a1(6,7,"t","t");
- seq.add_annotation(a1);
+ seq.add_annotation("t", "t", 6, 7);
std::ostringstream oss;
// allocate/deallocate serialization components
seq.set_species("ribbet");
seq.add_motif("AA");
seq.add_motif("GC");
- annot a1(6,7,"t","t");
- seq.add_annotation(a1);
+ seq.add_annotation("t", "t", 6, 7);
std::ostringstream oss;
// allocate/deallocate serialization components
void export_annot()
{
- class_<annot>("annot")
- .def(init<int, int, std::string, std::string>())
- .def_readwrite("begin", &annot::begin)
- .def_readwrite("end", &annot::end)
- .def_readwrite("type", &annot::type)
- .def_readwrite("name", &annot::name)
- ;
-
- class_<motif, bases<annot> >("motif", init<int, std::string>())
+ class_<motif>("motif", init<int, std::string>())
+ .def_readwrite("begin", &motif::begin)
+ .def_readwrite("end", &motif::end)
+ .def_readwrite("type", &motif::type)
+ .def_readwrite("name", &motif::name)
.def_readwrite("sequence", &motif::sequence)
;
}
void export_sequence()
{
void (Sequence::*load_fasta_piii)(const fs::path, int, int, int) = &Sequence::load_fasta;
+ void (Sequence::*add_annotation_ssii)(
+ std::string,
+ std::string,
+ Sequence::size_type,
+ Sequence::size_type
+ ) = &Sequence::add_annotation;
class_<Sequence>("Sequence")
.def(init<std::string>())
.def("__len__", &Sequence::size, "return the length of the sequence")
.def("__repr__", &Sequence::get_sequence, "display as string")
.def("__str__", &Sequence::get_sequence, "cast to string")
- .def("add_annotation", &Sequence::add_annotation, "append an annotation")
+ .def("add_annotation", add_annotation_ssii,
+ "create an annotation from name, type, start, stop")
//.def("annotations", &Sequence::annotations, "return list of annotations")
.def("add_motif", &Sequence::add_motif, "add a motif sequenence")
.def("clear", &Sequence::clear, "clear the sequence and its annotations")
self.failUnless(s.species == species)
def testAnnotations(self):
- annot = mussa.annot()
- annot.begin = 0
- annot.end = 10
- annot.name = "foo"
- annot.type = "utr"
-
seq = mussa.Sequence("AAGGCCTTAATTGGCCTT")
- seq.add_annotation(annot)
+ seq.add_annotation("foo", "utr", 0, 10)
def notestFile(self):
# remove no prefix once we have sequence loading from a stream