From 2dcf1431198e9c736db8734f615f3e2e0ee35b75 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 14 Mar 2006 03:26:31 +0000 Subject: [PATCH] attach motifs to a sequence object this attaches a list of motfis (derived from annot class) to a sequence, ideally there'd be only one list, but as I'm under a time pressure its a bit easier to keep the lists seperate. (As this way I don't have to go deal with fixing the file IO until later) --- alg/sequence.cpp | 50 +++++++++++++++++++++++++++++++++--- alg/sequence.hpp | 21 ++++++++++++++- alg/test/test_sequence.cpp | 52 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 4 deletions(-) diff --git a/alg/sequence.cpp b/alg/sequence.cpp index b05349e..5c7f069 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -30,8 +30,35 @@ using namespace std; +annot::annot() + : start(0), + end(0), + type(""), + name("") +{ +} + +annot::annot(int start, int end, std::string type, std::string name) + : start(start), + end(end), + type(type), + name(name) +{ +} + +motif::motif(int start, std::string motif) + : annot(start, start+motif.size(), "motif", motif), + sequence(motif) +{ +} + Sequence::Sequence() + : sequence(""), + header(""), + species("") { + annots.clear(); + motif_list.clear(); } Sequence::Sequence(string seq) @@ -266,7 +293,7 @@ bool Sequence::empty() const return (size() == 0); } -const std::list Sequence::annotations() const +const std::list& Sequence::annotations() const { return annots; } @@ -606,6 +633,23 @@ Sequence::motif_validate(string a_motif) return valid_motif; } +void Sequence::add_motif(string a_motif) +{ + vector motif_starts = find_motif(a_motif); + + + for(vector::iterator motif_start_i = motif_starts.begin(); + motif_start_i != motif_starts.end(); + ++motif_start_i) + { + motif_list.push_back(motif(*motif_start_i, a_motif)); + } +} + +const list& Sequence::motifs() const +{ + return motif_list; +} vector Sequence::find_motif(string a_motif) @@ -613,7 +657,6 @@ Sequence::find_motif(string a_motif) vector motif_match_starts; string a_motif_rc; - motif_match_starts.clear(); //cout << "motif is: " << a_motif << endl; @@ -710,12 +753,13 @@ Sequence::motif_scan(string a_motif, vector * motif_match_starts) if (motif_i == motif_len) { //cout << "!!"; + annot new_motif; motif_match_starts->push_back(seq_i - motif_len + 1); motif_i = 0; } seq_i++; } - cout << endl; + //cout << endl; } diff --git a/alg/sequence.hpp b/alg/sequence.hpp index 39313be..9b52497 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -25,10 +25,24 @@ //! Attach annotation information to a sequence track struct annot { + annot(); + annot(int start, int end, std::string type, std::string name); + int start, end; std::string name, type; }; +/* The way that motifs are found currently doesn't really + * indicate that the match was a reverse compliment + */ +struct motif : public annot +{ + std::string sequence; + + //! this constructor is for when we're adding motifs to our annotations + motif(int start, std::string motif); +}; + //! sequence track for mussa. class Sequence { @@ -40,6 +54,8 @@ class Sequence std::string species; std::list annots; + //! a seperate list for motifs since we're currently not saving them + std::list motif_list; void motif_scan(std::string a_motif, std::vector * motif_match_starts); std::string rc_motif(std::string a_motif); @@ -69,7 +85,8 @@ class Sequence /*! \throws mussa_load_error */ void load_annot(const std::string file_path, int start_index, int end_index); - const std::list annotations() const; + const std::list& annotations() const; + const std::list& motifs() const; // simple access functions void set_seq(const std::string& a_seq); @@ -91,6 +108,8 @@ class Sequence void clear(); const std::string& get_header() const; + //! add a motif to our list of motifs + void add_motif(std::string a_motif); std::vector find_motif(std::string a_motif); void save(std::fstream &save_file); void load_museq(std::string load_file_path, int seq_num); diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 6b06b46..0a5cab2 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -1,8 +1,12 @@ #include +#include +#include #include "alg/sequence.hpp" #include "mussa_exceptions.hpp" +using namespace std; + //! when we try to load a missing file, do we get an error? BOOST_AUTO_TEST_CASE( sequence_load_exception ) { @@ -88,3 +92,51 @@ BOOST_AUTO_TEST_CASE ( sequence_iterators ) BOOST_CHECK_EQUAL( s.size(), count ); BOOST_CHECK_EQUAL( cs.size(), count ); } + +BOOST_AUTO_TEST_CASE( sequence_motifs ) +{ + string m("AAAA"); + string bogus("AATTGGAA"); + Sequence s1("AAAAGGGGCCCCTTTT"); + + list::const_iterator motif_i = s1.motifs().begin(); + list::const_iterator motif_end = s1.motifs().end(); + + // do our iterators work? + BOOST_CHECK( motif_i == s1.motifs().begin() ); + BOOST_CHECK( motif_end == s1.motifs().end() ); + BOOST_CHECK( motif_i == motif_end ); + + + s1.add_motif(bogus); + BOOST_CHECK( s1.motifs().begin() == s1.motifs().end() ); + s1.add_motif(m); + BOOST_CHECK( s1.motifs().begin() != s1.motifs().end() ); + BOOST_CHECK_EQUAL( s1.motifs().size(), 2 ); + + for(motif_i = s1.motifs().begin(); + motif_i != s1.motifs().end(); + ++motif_i) + { + BOOST_CHECK_EQUAL( motif_i->type, "motif" ); + BOOST_CHECK_EQUAL( motif_i->name, m); + BOOST_CHECK_EQUAL( motif_i->sequence, m); + } + +} + +BOOST_AUTO_TEST_CASE( annot_test ) +{ + annot a(0, 10, "test", "thing"); + + BOOST_CHECK_EQUAL( a.start, 0 ); + BOOST_CHECK_EQUAL( a.end, 10 ); + BOOST_CHECK_EQUAL( a.type, "test" ); + BOOST_CHECK_EQUAL( a.name, "thing" ); + + motif m(10, "AAGGCC"); + BOOST_CHECK_EQUAL( m.start, 10 ); + BOOST_CHECK_EQUAL( m.type, "motif" ); + BOOST_CHECK_EQUAL( m.name, "AAGGCC" ); + BOOST_CHECK_EQUAL( m.end, 10+6 ); +} -- 2.30.2