add annotations by sequence string
authorDiane Trout <diane@caltech.edu>
Tue, 16 May 2006 00:53:06 +0000 (00:53 +0000)
committerDiane Trout <diane@caltech.edu>
Tue, 16 May 2006 00:53:06 +0000 (00:53 +0000)
Ok so this is a rather limited search system as it is just using the
motif searching code, but adding things to the annot list instead of the
motif list. (and it handles looking at a list of sequences).

Ideally there should be some better way of specifying which algorithm to use
but i'm really tired today and not coding well.

alg/sequence.cpp
alg/sequence.hpp
alg/test/test_sequence.cpp

index b15c4f5fc946ccecc13064f962ded0c2b32ca708..53a13f0b266ddde77b716a55e7b991a4a1aac02a 100644 (file)
@@ -796,3 +796,28 @@ Sequence::motif_scan(string a_motif, vector<int> * motif_match_starts)
   //cout << endl;
 }
 
+void Sequence::add_string_annotation(std::string a_seq, 
+                                     std::string name)
+{
+  vector<int> seq_starts = find_motif(a_seq);
+
+  for(vector<int>::iterator seq_start_i = seq_starts.begin();
+      seq_start_i != seq_starts.end();
+      ++seq_start_i)
+  {
+    annots.push_back(annot(*seq_start_i, 
+                           *seq_start_i+a_seq.size(),
+                           "",
+                           name));
+  }
+}
+
+void Sequence::find_sequences(std::list<Sequence>::iterator start, 
+                              std::list<Sequence>::iterator end)
+{
+  while (start != end) {
+    add_string_annotation(start->get_seq(), start->get_header());
+    ++start;
+  }
+}
+
index 06dd3fad5125560a37cd5d26c5c6bc08ad98118d..dac29db641de6fae3a28d50f0b3476e3fc3e1b1b 100644 (file)
@@ -62,6 +62,9 @@ class Sequence
 
     void motif_scan(std::string a_motif, std::vector<int> * motif_match_starts);
     std::string rc_motif(std::string a_motif);
+    //! look for a string sequence type and and it to an annotation list
+    void add_string_annotation(std::string a_seq, std::string name);
+
   public:
     typedef std::string::iterator iterator;
     typedef std::string::const_iterator const_iterator;
@@ -114,19 +117,21 @@ class Sequence
 
     const std::string& get_header() const;
     //! add a motif to our list of motifs
-    /*! \throws motif_normalize_error if there's something wrong with a_motif
-     */
+    //! \throws motif_normalize_error if there's something wrong with a_motif
     void add_motif(std::string a_motif);
     //! clear our list of found motifs
     void clear_motifs();
     //! search a sequence for a_motif
-    /*! \throws motif_normalize_error if there's something wrong with a_motif
-     */
+    //! \throws motif_normalize_error if there's something wrong with a_motif
     std::vector<int> find_motif(std::string a_motif);
     //! convert IUPAC symbols to upperase
-    /*! \throws motif_normalize_error if there is an invalid symbol
-     */
+    //! \throws motif_normalize_error if there is an invalid symbol
     static std::string motif_normalize(std::string a_motif);
+
+    //! annotate the current sequence with other sequences
+    void find_sequences(std::list<Sequence>::iterator start, 
+                        std::list<Sequence>::iterator end);
+
     void save(boost::filesystem::fstream &save_file);
     void load_museq(boost::filesystem::path load_file_path, int seq_num); 
 };
index 09c621e937874d673a2c66dcae3df69f817ae59e..ae2726cc0d0321d40a82829151426308db557e7c 100644 (file)
@@ -158,3 +158,24 @@ BOOST_AUTO_TEST_CASE( annot_test )
   BOOST_CHECK_EQUAL( m.name, "AAGGCC" );
   BOOST_CHECK_EQUAL( m.end,  10+6 );
 }
+
+BOOST_AUTO_TEST_CASE( annotate_from_sequence )
+{
+  Sequence s("CCGCCCCCCATCATCGCGGCTCTCCGAGAGTCCCGCGCCCCACTCCCGGC"
+             "ACCCACCTGACCGCGGGCGGCTCCGGCCCCGCTTCGCCCCACTGCGATCA"
+             "GTCGCGTCCCGCAGGCCAGGCACGCCCCGCCGCTCCCGCTGCGCCGGGCG"
+             "TCTGGGACCTCGGGCGGCTCCTCCGAGGGGCGGGGCAGCCGGGAGCCACG"
+             "CCCCCGCAGGTGAGCCGGCCACGCCCACCGCCCGTGGGAAGTTCAGCCTC"
+             "GGGGCTCCAGCCCCGCGGGAATGGCAGAACTTCGCACGCGGAACTGGTAA"
+             "CCTCCAGGACACCTCGAATCAGGGTGATTGTAGCGCAGGGGCCTTGGCCA"
+             "AGCTAAAACTTTGGAAACTTTAGATCCCAGACAGGTGGCTTTCTTGCAGT");
+
+  std::list<Sequence> query_list;
+  query_list.push_back(Sequence("GCCCCC"));
+  query_list.push_back(Sequence("GGACACCTC"));
+
+  BOOST_CHECK_EQUAL( s.annotations().size(), 0 );
+  s.find_sequences(query_list.begin(), query_list.end());
+  BOOST_CHECK_EQUAL( s.annotations().size(), 4 );
+}
+