ticket:62 fix local alignment
authorDiane Trout <diane@caltech.edu>
Sat, 20 May 2006 01:16:26 +0000 (01:16 +0000)
committerDiane Trout <diane@caltech.edu>
Sat, 20 May 2006 01:16:26 +0000 (01:16 +0000)
This appears to fix the problem where the local alignment was being
miscomputed. In implementing this I collapsed ExtendedConservedPath into
ConservedPath, and I finished seperating computing the local alignment
from drawing said alignment. Once I seperated the two and could actually
call the core algorithm I was able to add some (simple) unittesting of
the local alignment code, and use some much simpler drawing code.

14 files changed:
alg/conserved_path.cpp
alg/conserved_path.hpp
alg/mussa.cpp
alg/mussa.hpp
alg/nway_entropy.cpp
alg/nway_other.cpp
alg/nway_paths.cpp
alg/nway_paths.hpp
alg/test/test_conserved_path.cpp
alg/test/test_mussa.cpp
py/conserved_path.cpp
qui/MussaAlignedWindow.cpp
qui/MussaAlignedWindow.hpp
qui/MussaWindow.cpp

index a3778daae38edec6b5cc3b21139d56b3b4b80151..c67b4540d986b53940f52550c3e3a1c30c695ec5 100644 (file)
@@ -6,25 +6,29 @@ using namespace std;
 /////////////////////
 
 ConservedPath::ConservedPath()
-  : score(0.0),
+  : window_size(0),
+    score(0.0),
     track_indexes()
 {
 }
 
 ConservedPath::ConservedPath(const ConservedPath::ConservedPath& other)
-  : score(other.score),
+  : window_size(other.window_size),
+    score(other.score),
     track_indexes(other.track_indexes)
 {
 }
 
-ConservedPath::ConservedPath(double s, ConservedPath::path_type path)
-  : score(s),
+ConservedPath::ConservedPath(size_t win_size, double s, ConservedPath::path_type path)
+  : window_size(win_size),
+    score(s),
     track_indexes(path)
 {
 }
 
 void ConservedPath::clear()
 {
+  window_size = 0;
   score = 0.0;
   track_indexes.clear();
 }
@@ -52,7 +56,8 @@ bool operator!=(const ConservedPath::ConservedPath& a,
 
 std::ostream& operator<<(std::ostream& out, const ConservedPath& path)
 {
-  out << "<path score=" << path.score 
+  out << "<path win_size" << path.window_size 
+      << " score=" << path.score 
       << " len=" << path.track_indexes.size();
   if (path.track_indexes.size() > 0)
   {
@@ -150,7 +155,7 @@ std::vector<ConservedPath::path_element> ConservedPath::normalizedIndexes() cons
        ++this_itor)
   {
     if (*this_itor < 0) {
-      paths.push_back(-*this_itor);
+      paths.push_back(-(*this_itor)-window_size);
     } else {
       paths.push_back(*this_itor);
     }
@@ -158,51 +163,7 @@ std::vector<ConservedPath::path_element> ConservedPath::normalizedIndexes() cons
   return paths;
 }
 
-
-/////////////////////
-ExtendedConservedPath::ExtendedConservedPath()
-  : ConservedPath(),
-    window_size(0)
-{
-}
-
-ExtendedConservedPath::ExtendedConservedPath(const ExtendedConservedPath& other)
-  : ConservedPath(other), 
-    window_size(other.window_size)
-{
-}
-
-ExtendedConservedPath::ExtendedConservedPath(int win_size, ConservedPath other)
-  : ConservedPath(other), 
-    window_size(win_size)
-{
-}
-
-ExtendedConservedPath::ExtendedConservedPath(int win_size, 
-                                             double s, 
-                                             ConservedPath::path_type p)
-  : ConservedPath(s, p),
-    window_size(win_size)
-{
-}
-
-std::vector<ConservedPath::path_element> ExtendedConservedPath::normalizedIndexes() const
-{
-  vector<path_element> paths;
-  for (ConservedPath::const_iterator this_itor = begin(); 
-       this_itor != end(); 
-       ++this_itor)
-  {
-    if (*this_itor < 0) {
-      paths.push_back(-(*this_itor) - window_size);
-    } else {
-      paths.push_back((*this_itor));
-    }
-  }
-  return paths;
-}
-
-ExtendedConservedPath& ExtendedConservedPath::extend(int growth)
+ConservedPath& ConservedPath::extend(int growth)
 {
   window_size += growth;
   
@@ -219,10 +180,3 @@ ExtendedConservedPath& ExtendedConservedPath::extend(int growth)
   return *this;
 }
 
-std::ostream& operator<<(std::ostream& out, const ExtendedConservedPath& path)
-{
-  out << "<extended_path size=" << path.window_size;
-  out << (ConservedPath&)path;
-  return out;
-}
-
index 6217474c9bd4a9dade4988ae6fedee6eef741bb4..05d23c91f5205c171156ed27b2328509f1092aeb 100644 (file)
@@ -13,7 +13,7 @@ struct ConservedPath
 
   ConservedPath();
   ConservedPath(const ConservedPath& );
-  ConservedPath(double score, path_type path);
+  ConservedPath(size_t window_size, double score, path_type path);
 
   //! reset our path to be empty (including setting threshold to 0)
   void clear();
@@ -40,30 +40,15 @@ struct ConservedPath
   std::vector<bool> reverseComplimented() const;
   //! return the list of indexes normalized (to the left)
   std::vector<path_element> normalizedIndexes() const;
+  //! extend our current path
+  //! (aka increment our window size  by growth)
+  ConservedPath& extend(int growth=1);
 
+  //! window size (really should always be positive
+  size_t window_size;
   //! either number of conserved bases or average entropy
   double score;
   //! offsets into each of our sequences representing the start of our window
   std::vector<path_element> track_indexes;
 };
-
-struct ExtendedConservedPath : public ConservedPath
-{
-  ExtendedConservedPath();
-  ExtendedConservedPath(const ExtendedConservedPath& other);
-  ExtendedConservedPath(int win_size, ConservedPath conserved_path);
-  ExtendedConservedPath(int win_size, double score, std::vector<int> path);
-
-  std::vector<ConservedPath::path_element> ExtendedConservedPath::normalizedIndexes() const;
-  //! extend our current path
-  /*! (aka increment our window size  by growth)
-   */
-  ExtendedConservedPath& extend(int growth=1);
-
-  // output some useful information
-  friend std::ostream& operator<<(std::ostream&, const ExtendedConservedPath&);
-
-  //! size of extended window
-  int window_size;
-};
 #endif
index e432b58e8392cea350dad20cb3da9908eb61e534..768dcdd85ca5653ef621f6413a8344275b7064d8 100644 (file)
@@ -152,6 +152,93 @@ const NwayPaths& Mussa::paths() const
   return the_paths;
 }
 
+//template <class IteratorT>
+//void Mussa::createLocalAlignment(IteratorT begin, IteratorT end)
+void Mussa::createLocalAlignment(std::list<ConservedPath>::iterator begin, 
+                                 std::list<ConservedPath>::iterator end, 
+                                 std::list<ConservedPath::path_type>& result,
+                                 std::list<std::vector<bool> >& reversed)
+{
+  const vector<Sequence>& raw_seq = the_seqs;
+  ConservedPath::path_type aligned_path;
+  size_t i2, i3;
+  int x_start, x_end;
+  int window_length, win_i;
+  int rc_1 = 0; 
+  int rc_2 = 0;
+  vector<bool> rc_list;
+  bool full_match;
+  vector<bool> matched;
+  int align_counter;
+
+  align_counter = 0;
+  for(std::list<ConservedPath>::iterator pathz_i=begin; pathz_i != end; ++pathz_i)
+  {
+    ConservedPath& a_path = *pathz_i;
+    window_length = a_path.window_size;
+    // determine which parts of the path are RC relative to first species
+    rc_list = a_path.reverseComplimented();
+    
+    // loop over each bp in the conserved region for all sequences
+    for(win_i = 0; win_i < window_length; win_i++)
+    {
+      aligned_path.clear();
+      // determine which exact base pairs match between the sequences
+      full_match = true;
+      for(i2 = 0; i2 < a_path.size()-1; i2++)
+      {
+        // assume not rc as most likely, adjust below
+        rc_1 = 0;
+        rc_2 = 0;
+        // no matter the case, any RC node needs adjustments
+        if (a_path[i2] < 0)
+          rc_1 = window_length-1;
+        if (a_path[i2+1] < 0)
+          rc_2 = window_length-1;        
+         
+        x_start = (abs(a_path[i2]-rc_1+win_i));
+        x_end =   (abs(a_path[i2+1]-rc_2+win_i));
+        
+        // RC case handling
+        // ugh, and xor...only want rc coloring if just one of the nodes is rc
+        // if both nodes are rc, then they are 'normal' relative to each other
+        if((rc_list[i2] || rc_list[i2+1] )&&!(rc_list[i2] && rc_list[i2+1]))
+        { //the hideous rc matching logic - not complex, but annoying
+          if(!(( (raw_seq[i2][x_start]=='A')&&(raw_seq[i2+1][x_end]=='T')) ||
+                ((raw_seq[i2][x_start]=='T')&&(raw_seq[i2+1][x_end]=='A')) ||
+                ((raw_seq[i2][x_start]=='G')&&(raw_seq[i2+1][x_end]=='C')) ||
+                ((raw_seq[i2][x_start]=='C')&&(raw_seq[i2+1][x_end]=='G'))) ) 
+          {
+            full_match = false;
+          } else {
+            aligned_path.push_back(x_start);
+          }
+        }
+        else
+        { // forward match
+          if (!( (raw_seq[i2][x_start] == raw_seq[i2+1][x_end]) &&
+                (raw_seq[i2][x_start] != 'N') &&
+                (raw_seq[i2+1][x_end] != 'N') ) ) {
+            full_match = false;
+          } else {
+            aligned_path.push_back(x_start);
+          }
+        }
+      }
+      // grab the last part of our path, assuming we matched
+      if (full_match)
+        aligned_path.push_back(x_end);
+
+      if (aligned_path.size() > 0) {
+        result.push_back(aligned_path);
+        reversed.push_back(rc_list);
+      }
+    }
+    align_counter++;
+  }
+}
+
+
 // takes a string and sets it as the next seq 
 void
 Mussa::add_a_seq(string a_seq)
index 0a724e7904450899175eeec336dfe9e154e4bec2..4fae7de68b8f90be4f34f98a420ffe97319c389b 100644 (file)
@@ -81,6 +81,13 @@ class Mussa
     //! return the refined paths found by the nway analysis.
     const NwayPaths& paths() const;
 
+    //! given selected_paths, and view_paths, compute per base pair matches
+    //template <class IteratorT>
+    void createLocalAlignment(std::list<ConservedPath>::iterator begin, 
+                              std::list<ConservedPath>::iterator end,
+                              std::list<ConservedPath::path_type>& result,
+                              std::list<std::vector<bool> >& reversed);
+
     //! run seqcomp and the nway filtering algorithm.
     /*!analyze will run seqcomp and then the nway algorithm
      * on whatever sequences have been loaded into this mussa instance.
index 13a42c5b6ccd4b1e6357b952e9673d2f17e95fb6..1d9870a4aa0ed3cf05926e5c8e908f68d566c86e 100644 (file)
@@ -196,7 +196,7 @@ NwayPaths::entropy_path_search(vector<vector<FLPs> > all_comparisons)
         // check entropy <---------------------------------------------------
         avg_entropy = path_entropy(path);
         if (avg_entropy <= ent_thres)
-          pathz.push_back(ConservedPath(avg_entropy, path));
+          pathz.push_back(ConservedPath(win_size, avg_entropy, path));
         
         // now advance the right iterator
         not_advanced = true;
index 19aaf0d268279bb981ae5d7730d352cb2ab3f4db..70b5bd9e35f143d946cacf716ab5a54b465821d5 100644 (file)
@@ -163,7 +163,7 @@ NwayPaths::radiate_path_search(vector<vector<FLPs> > all_comparisons)
         // add path that each species iterator is pointing to
         set_path_to_cur_sp_itor_track(path, win_i, sp_itor_begin);
        
-        pathz.push_back(ConservedPath(soft_thres, path));
+        pathz.push_back(ConservedPath(win_size, soft_thres, path));
 
         // now advance the right iterator
         still_paths = advance_sp_itor_track(sp_itor_begin, 
@@ -241,7 +241,7 @@ NwayPaths::trans_path_search(vector<vector<FLPs> > all_comparisons)
 
         // if the path is transitive, save the path
         if (is_transitive_path(path, all_comparisons, soft_thres)) {
-          ConservedPath new_path(soft_thres, path);
+          ConservedPath new_path(win_size, soft_thres, path);
           pathz.push_back(new_path);
         }
 
index c76e89d441410f9cd89bb0e9e0a5458bca7bdfe7..f71b0283bb84db0e87981ecf358a6972795ae46e 100644 (file)
@@ -62,7 +62,7 @@ void
 NwayPaths::simple_refine()
 {
   // ext_path remembers the first window set in an extending path
-  ExtendedConservedPath ext_path, new_path;
+  ConservedPath ext_path, new_path;
   list<ConservedPath>::iterator cur_path, next_path;
   list<ConservedPath>::iterator pathz_i;
   int win_ext_len = 0;
@@ -76,7 +76,7 @@ NwayPaths::simple_refine()
   // only try to extend when pathz isn't empty.
   if (pathz_i != pathz.end())
   {
-    ext_path = ExtendedConservedPath( win_size, *pathz_i);
+    ext_path = *pathz_i;
 
     while(pathz_i != pathz.end())
     {
@@ -108,7 +108,7 @@ NwayPaths::simple_refine()
         if (not end) {
           // reset stuff
           win_ext_len = 0;
-          ext_path = ExtendedConservedPath( win_size, *next_path);
+          ext_path = *next_path;
         }
       }
     }
@@ -120,7 +120,7 @@ NwayPaths::simple_refine()
 void
 NwayPaths::add_path(int threshold, vector<int>& loaded_path)
 {
-  pathz.push_back(ConservedPath(threshold, loaded_path));
+  pathz.push_back(ConservedPath(threshold, 0.0, loaded_path));
 }
 
 void
@@ -134,7 +134,7 @@ void
 NwayPaths::save(fs::path save_file_path)
 {
   fs::fstream save_file;
-  list<ExtendedConservedPath >::iterator path_i, paths_end;
+  list<ConservedPath >::iterator path_i, paths_end;
 
   save_file.open(save_file_path, ios::out);
 
@@ -151,7 +151,7 @@ NwayPaths::save(fs::path save_file_path)
   //paths_end = pathz.end();
   while (path_i != paths_end)
   {
-    ExtendedConservedPath& a_path = *path_i;
+    ConservedPath& a_path = *path_i;
     //cout << a_path.size() << endl;
     //first entry is the window length of the windows in the path
     save_file << a_path.window_size << ":";
@@ -253,9 +253,9 @@ NwayPaths::load(fs::path load_file_path)
           file_data_line = file_data_line.substr(comma_split_i+1);
         }
         assert (loaded_path.size() == species_num );
-        refined_pathz.push_back(ExtendedConservedPath(atoi(path_width.c_str()), 
-                                                      threshold, 
-                                                      loaded_path));
+        refined_pathz.push_back(ConservedPath(atoi(path_width.c_str()), 
+                                                   threshold, 
+                                                   loaded_path));
       }
       getline(load_file,file_data_line);
     }
index acbef47475a70a62b36e3879946b3bf284fef690..a25cfcebeaf751e3b6669a8d14228dfa440807f3 100644 (file)
@@ -28,7 +28,7 @@ class NwayPaths
   friend class SeqView;
   protected:
     int threshold;
-    int win_size;
+    size_t win_size;
     int soft_thres;
 
     double ent_thres;
@@ -75,13 +75,13 @@ class NwayPaths
     std::list<ConservedPath>::iterator pbegin() { return pathz.begin() ; }
     std::list<ConservedPath>::iterator pend() { return pathz.end() ; }
     size_t path_size() const { return refined_pathz.size(); }
-    std::list<ExtendedConservedPath>::iterator rpbegin() { return refined_pathz.begin() ; }
-    std::list<ExtendedConservedPath>::iterator rpend() { return refined_pathz.end() ; }
+    std::list<ConservedPath>::iterator rpbegin() { return refined_pathz.begin() ; }
+    std::list<ConservedPath>::iterator rpend() { return refined_pathz.end() ; }
     size_t refined_path_size() const { return refined_pathz.size(); }
 
     // these probably shouldn't be public, but lets start 
     // simple
     std::list<ConservedPath> pathz;
-    std::list<ExtendedConservedPath > refined_pathz;
+    std::list<ConservedPath > refined_pathz;
 };
 #endif
index 7050a73a11bfc5b4e625d4cc76c22fcda51ac5a5..f8d670c31fd62a81faf084bc997709e110c2701c 100644 (file)
@@ -22,7 +22,7 @@ BOOST_AUTO_TEST_CASE ( conserved_path_simple )
   cp1.track_indexes = path;
   BOOST_CHECK_EQUAL( cp1.size(), path.size() );
 
-  ConservedPath cp2(18, path);
+  ConservedPath cp2(20, 18, path);
   BOOST_CHECK_EQUAL (cp1, cp2);
   BOOST_CHECK_EQUAL( cp2.size(), path.size() );
 
@@ -69,7 +69,7 @@ BOOST_AUTO_TEST_CASE ( conserved_path_reverse_compliment )
   vector<int> path;
   path += 3,4,-5,1; // magic from boost assign
 
-  ConservedPath cp1(10, path);
+  ConservedPath cp1(20, 10, path);
   vector<bool> reversed = cp1.reverseComplimented();
 
   BOOST_CHECK_EQUAL( path.size(), reversed.size());
@@ -88,24 +88,24 @@ BOOST_AUTO_TEST_CASE ( extended_conserved_path_simple )
   vector<int> path;
   path += 3,4,5,1; // magic from boost assign
 
-  ExtendedConservedPath ecp1;
+  ConservedPath ecp1;
   ecp1.window_size = 30;
   ecp1.score = 18;
   ecp1.track_indexes = path;
   BOOST_CHECK_EQUAL ( ecp1.size(), path.size() );
 
-  ExtendedConservedPath ecp2(30, 18.0, path);
+  ConservedPath ecp2(30, 18.0, path);
   BOOST_CHECK_EQUAL ( ecp1, ecp2 );
   BOOST_CHECK_EQUAL ( ecp2.size(), path.size() );
 
-  ExtendedConservedPath ecp_ne(35, 20.0, path);
+  ConservedPath ecp_ne(35, 20.0, path);
   BOOST_CHECK(ecp2 != ecp_ne);
 
   ConservedPath cp1;
   cp1.score = 18;
   cp1.track_indexes = path;
 
-  ExtendedConservedPath ecp3(30, cp1);
+  ConservedPath ecp3(cp1);
   BOOST_CHECK_EQUAL( ecp2, ecp3 );
   BOOST_CHECK_EQUAL( ecp3.size(), path.size() );
 }
@@ -123,10 +123,10 @@ BOOST_AUTO_TEST_CASE ( extended_conserved_path_growth )
   path_not_next += 4,5,6,5; 
   const int window_size = 4;
 
-  ExtendedConservedPath ecp_base(window_size, 20, path_base);
-  ConservedPath cp_next(25, path_next);
-  ConservedPath cp_next_reversed(25, path_next_reversed);
-  ConservedPath cp_not_next(22, path_not_next);
+  ConservedPath ecp_base(window_size, 20, path_base);
+  ConservedPath cp_next(window_size, 25, path_next);
+  ConservedPath cp_next_reversed(window_size, 25, path_next_reversed);
+  ConservedPath cp_not_next(window_size, 22, path_not_next);
 
   BOOST_CHECK(ecp_base.nextTo(cp_next));
   BOOST_CHECK(!ecp_base.nextTo(cp_not_next));
@@ -145,7 +145,7 @@ BOOST_AUTO_TEST_CASE ( extended_conserved_normalized )
   vector<int> normalized_path;
   normalized_path += 3,4, 5,9;
 
-  ExtendedConservedPath ecp1;
+  ConservedPath ecp1;
   ecp1.window_size = 10;
   ecp1.score = 18;
   ecp1.track_indexes = path;
index da17b226bc510bca0f96799222eec4dc36518dfa..f215816efd396bc8925ebc9e242b375ff8da0873 100644 (file)
@@ -1,6 +1,10 @@
 #include <boost/test/auto_unit_test.hpp>
 #include <boost/filesystem/operations.hpp>
 namespace fs = boost::filesystem;
+#include <boost/assign/list_of.hpp>
+#include <boost/assign/list_inserter.hpp>
+#include <boost/assign.hpp>
+namespace assign = boost::assign;
 
 #include <string>
 #include <sstream>
@@ -160,3 +164,76 @@ BOOST_AUTO_TEST_CASE( mussa_add_motif )
   m1.add_motifs(motifs, colors);
   BOOST_CHECK_EQUAL( first_size, m1.motifs().size() );
 }
+
+static void 
+local_align_test(const vector<Sequence> &seqs, 
+                 const list<ConservedPath::path_type>& result,
+                 const list<vector<bool> >& reversed)
+{
+  map<char, vector <char> >  m;
+  assign::insert(m)('A', assign::list_of('A')('T') )
+                   ('T', assign::list_of('T')('A') )
+                   ('G', assign::list_of('G')('C') )
+                   ('C', assign::list_of('C')('G') );
+  list<vector<bool> >::const_iterator rc_i = reversed.begin();
+
+  for(list<ConservedPath::path_type>::const_iterator base_i = result.begin();
+      base_i != result.end();
+      ++base_i, ++rc_i)
+  {
+    // since the reverse compliment flag is relative to the first sequence
+    // the first one should always be false
+    BOOST_CHECK_EQUAL( (*rc_i)[0], false );
+    const int first_path_basepair_index = (*base_i)[0];
+    const int second_path_basepair_index = (*base_i)[1];
+    const char first_basepair = seqs[0][first_path_basepair_index];
+    const char second_basepair = seqs[1][second_path_basepair_index];
+    // get our index into our reverse compliment map m
+    const int second_compliment_index = (*rc_i)[1];
+    // lookup the forward or reverse compliment depending on our rc flag
+    const char complimented_second = m[second_basepair][second_compliment_index];
+   
+    BOOST_CHECK_EQUAL( first_basepair, complimented_second) ;
+  }
+}
+
+                 
+BOOST_AUTO_TEST_CASE( local_alignment )
+{
+  string s0("GCGCATAT");
+  string s1("AAAAAAAT");
+  Sequence seq1(s1);
+
+  Mussa analysis;
+  analysis.add_a_seq(s0);
+  analysis.add_a_seq(s1);
+  analysis.analyze(4,3);
+  NwayPaths npath = analysis.paths();
+  list<ConservedPath::path_type> result;
+  list<vector<bool> > reversed;
+  list<ConservedPath>::iterator pathz_i = npath.pathz.begin();
+
+  list<ConservedPath> selected_paths;
+  selected_paths.push_back(*pathz_i);
+  analysis.createLocalAlignment(selected_paths.begin(), 
+                                selected_paths.end(),
+                                result,
+                                reversed);
+
+  local_align_test(analysis.sequences(), result, reversed);
+
+  ++pathz_i;
+  result.clear();
+  reversed.clear();
+  selected_paths.clear();
+  selected_paths.push_back(*pathz_i);
+  analysis.createLocalAlignment(selected_paths.begin(), 
+                                selected_paths.end(),
+                                result,
+                                reversed);
+  local_align_test(analysis.sequences(), result, reversed);
+
+
+}
+
+
index 65a5d061011194482734f430ab51835906ee0b43..0ab7a55f8b1cd9370d9a9f9e6d43c4b9d06e09c2 100644 (file)
@@ -13,14 +13,11 @@ void export_conserved_path()
   ;
 
   class_<ConservedPath>("ConservedPath")
+    .def_readwrite("window_size", &ConservedPath::window_size)
     .def_readwrite("score", &ConservedPath::score)
     .def_readwrite("track_indexes", &ConservedPath::track_indexes)
     //.add_property("track_indexes", //iterator<std::vector<int> >())
     //    range(&ConservedPath::track_indexes.begin(),
     //                                 &ConservedPath::track_indexes.end()))
   ;
-
-  class_<ExtendedConservedPath, bases<ConservedPath> >("ExtendedConservedPath")
-    .def_readwrite("window_size", &ExtendedConservedPath::window_size)
-  ;
 }
index a65a9555167ef4da175ddb7f99eb55e7715226b2..d113f48f65daab996d9ba959fa30b8de6697cb7a 100644 (file)
@@ -58,8 +58,8 @@ void MussaAlignedWindow::setSelectedPaths(Mussa &m, const set<int>& sel_paths)
 {
   // sets are sorted
   set<int>::iterator sel_i = sel_paths.begin();
-  list<ExtendedConservedPath>::const_iterator path_i = m.paths().refined_pathz.begin();
-  list<ExtendedConservedPath>::const_iterator path_end = m.paths().refined_pathz.end();
+  list<ConservedPath>::const_iterator path_i = m.paths().refined_pathz.begin();
+  list<ConservedPath>::const_iterator path_end = m.paths().refined_pathz.end();
   size_t path_size = m.paths().refined_pathz.size();
   size_t pathid=0;
 
@@ -91,7 +91,7 @@ void MussaAlignedWindow::setupMenus()
   pick_actions.clear();
   view_actions.clear();
 
-  for(vector<ExtendedConservedPath >::iterator pathz_i=selected_paths.begin(); 
+  for(vector<ConservedPath >::iterator pathz_i=selected_paths.begin(); 
       pathz_i != selected_paths.end(); 
       ++pathz_i)
   {
@@ -144,110 +144,31 @@ void MussaAlignedWindow::update()
 
 void MussaAlignedWindow::computeMatchLines()
 {
-  const vector<Sequence>& raw_seq = analysis.sequences();
-  vector<int> aligned_path;
-  size_t i2, i3;
-  int x_start, x_end;
-  int window_length, win_i;
-  int rc_1 = 0; 
-  int rc_2 = 0;
-  vector<bool> rc_list;
-  bool full_match;
-  vector<bool> matched;
-  int align_counter;
-
   browser.clear_links();
-  align_counter = 0;
-  for(vector<ExtendedConservedPath >::iterator pathz_i=selected_paths.begin(); 
-      pathz_i != selected_paths.end(); 
-      ++pathz_i)
+  
+  // filter out conserved paths
+  list<ConservedPath> filtered_paths;
+  vector<ConservedPath>::iterator path_i = selected_paths.begin();
+  list<ConservedPath::path_type> result;
+  list<vector<bool> > reversed;
+
+  for(vector<ConservedPath>::size_type count = 0; 
+      count != selected_paths.size();
+      ++count, ++path_i)
   {
-    if (view_paths[align_counter])
-    {
-      ExtendedConservedPath& a_path = *pathz_i;
-      window_length = a_path.window_size;
-      // determine which parts of the path are RC relative to first species
-      rc_list = a_path.reverseComplimented();
-      
-      // loop over each bp in the conserved region for all sequences
-      for(win_i = 0; win_i < window_length; win_i++)
-      {
-        aligned_path.clear();
-        // determine which exact base pairs match between the sequences
-        full_match = true;
-        for(i2 = 0; i2 < a_path.size()-1; i2++)
-        {
-          // assume not rc as most likely, adjust below
-          rc_1 = 0;
-          rc_2 = 0;
-          // no matter the case, any RC node needs adjustments
-          if (a_path[i2] < 0)
-            rc_1 = window_length-1;
-          if (a_path[i2+1] < 0)
-            rc_2 = window_length-1;        
-           
-          x_start = (abs(a_path[i2]-rc_1+win_i));
-          x_end =   (abs(a_path[i2+1]-rc_2+win_i));
-          
-          // RC case handling
-          // ugh, and xor...only want rc coloring if just one of the nodes is rc
-          // if both nodes are rc, then they are 'normal' relative to each other
-          if((rc_list[i2] || rc_list[i2+1] )&&!(rc_list[i2] && rc_list[i2+1]))
-          { //the hideous rc matching logic - not complex, but annoying
-            if(!(( (raw_seq[i2][x_start]=='A')&&(raw_seq[i2+1][x_end]=='T')) ||
-                  ((raw_seq[i2][x_start]=='T')&&(raw_seq[i2+1][x_end]=='A')) ||
-                  ((raw_seq[i2][x_start]=='G')&&(raw_seq[i2+1][x_end]=='C')) ||
-                  ((raw_seq[i2][x_start]=='C')&&(raw_seq[i2+1][x_end]=='G'))) )
-              full_match = false;
-          }
-          else
-          {
-            if (!( (raw_seq[i2][x_start] == raw_seq[i2+1][x_end]) &&
-                  (raw_seq[i2][x_start] != 'N') &&
-                  (raw_seq[i2+1][x_end] != 'N') ) )
-              full_match = false;
-          }
-        }
-        
-        // draw for matches stretching across all sequences
-        if (full_match)
-        {
-          // now can draw the line for each bp in this window that matches
-          // grrr, need to ask if anyone cares if I switch the seq 
-          // top-bot order...
-          i3 = 0;
-          //y_loc = y_min + 5;
-          for(i2 = 0; i2 < a_path.size()-1; i2++)
-          {
-            // assume not rc as most likely, adjust below
-            rc_1 = 0;
-            rc_2 = 0;
-            // no matter the case, any RC node needs adjustments
-            if (a_path[i2] < 0)
-            {
-              rc_1 = window_length;        
-            }
-            if (a_path[i2] < 0)
-            {
-              rc_2 = window_length;        
-            }
-            
-            // maybe shouldn't recalc these, but store values from first loop
-            x_start = (abs((int) (a_path[i2]-rc_1+win_i)));
-            x_end =   (abs((int) (a_path[i2+1]-rc_2+win_i)));
-            aligned_path.push_back(x_start);
-            // if we're on the last time through the loop, save x_end too
-            if (i2 == a_path.size()-2) {
-              aligned_path.push_back(x_end);
-            }
-          }
-        }
-        if (aligned_path.size() > 0) {
-          browser.link(aligned_path, rc_list,1);
-        }
-      }
-    }
-    align_counter++;
+    if (view_paths[count]) 
+      filtered_paths.push_back(*path_i);
+  }
+  analysis.createLocalAlignment(filtered_paths.begin(), 
+                                filtered_paths.end(),
+                                result, 
+                                reversed);
+
+  list<ConservedPath::path_type>::const_iterator result_i = result.begin();
+  list<vector<bool> >::const_iterator reversed_i = reversed.begin();
+  for(int i = 0; i != result.size(); ++i, ++result_i, ++reversed_i)
+  {
+    // make 1 base long links
+    browser.link(*result_i, *reversed_i, 1);
   }
 }
-
index eb8be9867fce62f1c4a5b7abdd9924680dae1ce4..ec426fbd8d555135d9fd9ccfae64d1d4d742c24a 100644 (file)
@@ -38,7 +38,7 @@ protected:
   Mussa& analysis;
   //std::vector<Sequence> sequences;
   //const std::set<int>& selected_paths;
-  std::vector<ExtendedConservedPath> selected_paths;
+  std::vector<ConservedPath> selected_paths;
   std::vector<bool> view_paths;
   SequenceBrowserWidget browser;
   QMenu pick_align_menu;
index 0c86b7d7cbe7be2322bec17850eb2d1aea8813dc..b32dcfa8bf4b43ebd273515ae1c46aaef8170263 100644 (file)
@@ -434,7 +434,7 @@ void MussaWindow::updateLinks()
   bool reversed = false;
   const NwayPaths& nway = analysis->paths();
 
-  typedef list<ExtendedConservedPath> conserved_paths;
+  typedef list<ConservedPath> conserved_paths;
   typedef conserved_paths::const_iterator const_conserved_paths_itor;
   for(const_conserved_paths_itor path_itor = nway.refined_pathz.begin();
       path_itor != nway.refined_pathz.end();