From: Diane Trout Date: Tue, 16 May 2006 00:53:06 +0000 (+0000) Subject: add annotations by sequence string X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=mussa.git;a=commitdiff_plain;h=f25185c3fe073c83f711342a5eb0f9afca88613b add annotations by sequence string Ok so this is a rather limited search system as it is just using the motif searching code, but adding things to the annot list instead of the motif list. (and it handles looking at a list of sequences). Ideally there should be some better way of specifying which algorithm to use but i'm really tired today and not coding well. --- diff --git a/alg/sequence.cpp b/alg/sequence.cpp index b15c4f5..53a13f0 100644 --- a/alg/sequence.cpp +++ b/alg/sequence.cpp @@ -796,3 +796,28 @@ Sequence::motif_scan(string a_motif, vector * motif_match_starts) //cout << endl; } +void Sequence::add_string_annotation(std::string a_seq, + std::string name) +{ + vector seq_starts = find_motif(a_seq); + + for(vector::iterator seq_start_i = seq_starts.begin(); + seq_start_i != seq_starts.end(); + ++seq_start_i) + { + annots.push_back(annot(*seq_start_i, + *seq_start_i+a_seq.size(), + "", + name)); + } +} + +void Sequence::find_sequences(std::list::iterator start, + std::list::iterator end) +{ + while (start != end) { + add_string_annotation(start->get_seq(), start->get_header()); + ++start; + } +} + diff --git a/alg/sequence.hpp b/alg/sequence.hpp index 06dd3fa..dac29db 100644 --- a/alg/sequence.hpp +++ b/alg/sequence.hpp @@ -62,6 +62,9 @@ class Sequence void motif_scan(std::string a_motif, std::vector * motif_match_starts); std::string rc_motif(std::string a_motif); + //! look for a string sequence type and and it to an annotation list + void add_string_annotation(std::string a_seq, std::string name); + public: typedef std::string::iterator iterator; typedef std::string::const_iterator const_iterator; @@ -114,19 +117,21 @@ class Sequence const std::string& get_header() const; //! add a motif to our list of motifs - /*! \throws motif_normalize_error if there's something wrong with a_motif - */ + //! \throws motif_normalize_error if there's something wrong with a_motif void add_motif(std::string a_motif); //! clear our list of found motifs void clear_motifs(); //! search a sequence for a_motif - /*! \throws motif_normalize_error if there's something wrong with a_motif - */ + //! \throws motif_normalize_error if there's something wrong with a_motif std::vector find_motif(std::string a_motif); //! convert IUPAC symbols to upperase - /*! \throws motif_normalize_error if there is an invalid symbol - */ + //! \throws motif_normalize_error if there is an invalid symbol static std::string motif_normalize(std::string a_motif); + + //! annotate the current sequence with other sequences + void find_sequences(std::list::iterator start, + std::list::iterator end); + void save(boost::filesystem::fstream &save_file); void load_museq(boost::filesystem::path load_file_path, int seq_num); }; diff --git a/alg/test/test_sequence.cpp b/alg/test/test_sequence.cpp index 09c621e..ae2726c 100644 --- a/alg/test/test_sequence.cpp +++ b/alg/test/test_sequence.cpp @@ -158,3 +158,24 @@ BOOST_AUTO_TEST_CASE( annot_test ) BOOST_CHECK_EQUAL( m.name, "AAGGCC" ); BOOST_CHECK_EQUAL( m.end, 10+6 ); } + +BOOST_AUTO_TEST_CASE( annotate_from_sequence ) +{ + Sequence s("CCGCCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC" + "ACCCACCTGACCGCGGGCGGCTCCGGCCCCGCTTCGCCCCACTGCGATCA" + "GTCGCGTCCCGCAGGCCAGGCACGCCCCGCCGCTCCCGCTGCGCCGGGCG" + "TCTGGGACCTCGGGCGGCTCCTCCGAGGGGCGGGGCAGCCGGGAGCCACG" + "CCCCCGCAGGTGAGCCGGCCACGCCCACCGCCCGTGGGAAGTTCAGCCTC" + "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA" + "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA" + "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT"); + + std::list query_list; + query_list.push_back(Sequence("GCCCCC")); + query_list.push_back(Sequence("GGACACCTC")); + + BOOST_CHECK_EQUAL( s.annotations().size(), 0 ); + s.find_sequences(query_list.begin(), query_list.end()); + BOOST_CHECK_EQUAL( s.annotations().size(), 4 ); +} +