ticket:62 fix local alignment
[mussa.git] / alg / mussa.cpp
index e432b58e8392cea350dad20cb3da9908eb61e534..768dcdd85ca5653ef621f6413a8344275b7064d8 100644 (file)
@@ -152,6 +152,93 @@ const NwayPaths& Mussa::paths() const
   return the_paths;
 }
 
+//template <class IteratorT>
+//void Mussa::createLocalAlignment(IteratorT begin, IteratorT end)
+void Mussa::createLocalAlignment(std::list<ConservedPath>::iterator begin, 
+                                 std::list<ConservedPath>::iterator end, 
+                                 std::list<ConservedPath::path_type>& result,
+                                 std::list<std::vector<bool> >& reversed)
+{
+  const vector<Sequence>& raw_seq = the_seqs;
+  ConservedPath::path_type aligned_path;
+  size_t i2, i3;
+  int x_start, x_end;
+  int window_length, win_i;
+  int rc_1 = 0; 
+  int rc_2 = 0;
+  vector<bool> rc_list;
+  bool full_match;
+  vector<bool> matched;
+  int align_counter;
+
+  align_counter = 0;
+  for(std::list<ConservedPath>::iterator pathz_i=begin; pathz_i != end; ++pathz_i)
+  {
+    ConservedPath& a_path = *pathz_i;
+    window_length = a_path.window_size;
+    // determine which parts of the path are RC relative to first species
+    rc_list = a_path.reverseComplimented();
+    
+    // loop over each bp in the conserved region for all sequences
+    for(win_i = 0; win_i < window_length; win_i++)
+    {
+      aligned_path.clear();
+      // determine which exact base pairs match between the sequences
+      full_match = true;
+      for(i2 = 0; i2 < a_path.size()-1; i2++)
+      {
+        // assume not rc as most likely, adjust below
+        rc_1 = 0;
+        rc_2 = 0;
+        // no matter the case, any RC node needs adjustments
+        if (a_path[i2] < 0)
+          rc_1 = window_length-1;
+        if (a_path[i2+1] < 0)
+          rc_2 = window_length-1;        
+         
+        x_start = (abs(a_path[i2]-rc_1+win_i));
+        x_end =   (abs(a_path[i2+1]-rc_2+win_i));
+        
+        // RC case handling
+        // ugh, and xor...only want rc coloring if just one of the nodes is rc
+        // if both nodes are rc, then they are 'normal' relative to each other
+        if((rc_list[i2] || rc_list[i2+1] )&&!(rc_list[i2] && rc_list[i2+1]))
+        { //the hideous rc matching logic - not complex, but annoying
+          if(!(( (raw_seq[i2][x_start]=='A')&&(raw_seq[i2+1][x_end]=='T')) ||
+                ((raw_seq[i2][x_start]=='T')&&(raw_seq[i2+1][x_end]=='A')) ||
+                ((raw_seq[i2][x_start]=='G')&&(raw_seq[i2+1][x_end]=='C')) ||
+                ((raw_seq[i2][x_start]=='C')&&(raw_seq[i2+1][x_end]=='G'))) ) 
+          {
+            full_match = false;
+          } else {
+            aligned_path.push_back(x_start);
+          }
+        }
+        else
+        { // forward match
+          if (!( (raw_seq[i2][x_start] == raw_seq[i2+1][x_end]) &&
+                (raw_seq[i2][x_start] != 'N') &&
+                (raw_seq[i2+1][x_end] != 'N') ) ) {
+            full_match = false;
+          } else {
+            aligned_path.push_back(x_start);
+          }
+        }
+      }
+      // grab the last part of our path, assuming we matched
+      if (full_match)
+        aligned_path.push_back(x_end);
+
+      if (aligned_path.size() > 0) {
+        result.push_back(aligned_path);
+        reversed.push_back(rc_list);
+      }
+    }
+    align_counter++;
+  }
+}
+
+
 // takes a string and sets it as the next seq 
 void
 Mussa::add_a_seq(string a_seq)