[project @ 13]
[mussa.git] / mussa_nway.cc
index 5c67ac92456f7a466330119602cb83acf60dafa2..3e233b03dd3f4cd803e12ce572b941201f7440ad 100644 (file)
@@ -1,3 +1,13 @@
+//  This file is part of the Mussa source distribution.
+//  http://mussa.caltech.edu/
+//  Contact author: Tristan  De Buysscher, tristan@caltech.edu
+
+// This program and all associated source code files are Copyright (C) 2005
+// the California Institute of Technology, Pasadena, CA, 91125 USA.  It is
+// under the GNU Public License; please see the included LICENSE.txt
+// file for more information, or contact Tristan directly.
+
+
 //                        ----------------------------------------
 //                         ---------- mussa_nway.cc  -----------
 //                        ----------------------------------------
@@ -20,99 +30,23 @@ Nway_Paths::setup(int sp_num, int w, int t)
 {
   species_num = sp_num;
   threshold = t;
+  soft_thres = threshold;
   win_size = w;
   pathz.clear();
-}
-
 
-void
-Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
-{
-  list<int> new_nodes, trans_check_nodes;
-  list<int>::iterator new_nodes_i, new_nodes_end;
-  int i;
-  bool trans_check_good;
-
-  new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
-  new_nodes_i = new_nodes.begin();
-  new_nodes_end = new_nodes.end();
-  while(new_nodes_i != new_nodes_end)
-  {
-    //cout << "    * species " << depth << " node: " << *new_nodes_i << endl;
-    // check transitivity with previous nodes in path
-    for(i = 0; i < depth - 1; i++)
-    {
-      trans_check_good = true;
-      trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
-      if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
-                                            trans_check_nodes.end(),
-                                            *new_nodes_i) ) &&
-           (trans_check_nodes.end() == find(trans_check_nodes.begin(),
-                                           trans_check_nodes.end(),
-                                           *new_nodes_i * -1) ) )
-        trans_check_good = false;
-    }
-
-    if (trans_check_good)
-    {
-      // this makes sure path nodes are recorded with RC status relative to
-      // the base species
-      if ( path[depth-1] >= 0)
-        path.push_back(*new_nodes_i);
-      else
-        path.push_back(*new_nodes_i * -1);
-
-      if (depth < species_num - 1)
-        path_search(all_comparisons, path, depth + 1);
-      else
-        pathz.push_back(path);
-      path.pop_back();
-    } 
-    ++new_nodes_i;
-  }
+  cout << "nway: species_num = " << species_num << ", thres = " << threshold
+       << ", softo = " << soft_thres << endl;
 }
-//          cout << "    ****I have the power...\n";
-
-/* use this if I ever get the friggin seqcomp match lists to sort...
-      if (binary_search(trans_check_nodes.begin(), trans_check_nodes.end(), 
-                        *new_nodes_i))
-*/
 
 void
-Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
+Nway_Paths::set_soft_thres(int sft_thres)
 {
-  vector<int> path;
-  int win_i, window_num;
-  list<int> new_nodes;
-  list<int>::iterator new_nodes_i, new_nodes_end;
-
-  pathz.clear();
-  window_num = all_comparisons[0][1].win_num();
-  cout << window_num << endl;
-  // loop thru all windows in first species
-  for (win_i = 0; win_i < window_num; win_i++)
-  {
-    path.clear();
-    path.push_back(win_i);
-    new_nodes = all_comparisons[0][1].matches(path[0]);
-    new_nodes_i = new_nodes.begin();
-    new_nodes_end = new_nodes.end();
-    //if (new_nodes_i != new_nodes_end)
-    //cout << "* species 0 node: " << win_i << endl;
-    //  cout << "foookin hell\n";
-    path.push_back(0);
-    while(new_nodes_i != new_nodes_end)
-    {
-      //cout << "  * species 1 node: " << *new_nodes_i << endl;
-      path[1] = *new_nodes_i;
-      path_search(all_comparisons, path, 2);
-      ++new_nodes_i;
-    }
-  }
+  soft_thres = sft_thres;
 }
 
 
 
+
 // dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
 // doesn't deal with interleaved adjacency
 void
@@ -122,29 +56,48 @@ Nway_Paths::simple_refine()
   vector<int> ext_path, cur_path, next_path, new_path;
   list<vector<int> >::iterator pathz_i;
   int i2, win_ext_len = 0;
-  bool extending;
+  bool extending, end = false;
+  int count_loop = 0;
 
 
   refined_pathz.clear();
 
+  cout << "path number is: " << pathz.size() << endl;
   pathz_i = pathz.begin();
-  ext_path = *pathz_i; 
+  ext_path = *pathz_i;
+
   while(pathz_i != pathz.end())
   {
+    //cout << "path number is: " << pathz.size() << endl;
+    //++count_loop;
+    //cout << "glorpagh!!!!!" << count_loop << endl;
+    
     // keep track of current path and advance to next path
     cur_path = *pathz_i;
     ++pathz_i;
-    next_path = *pathz_i;
+    if (pathz_i == pathz.end())
+      end = true;
+    else
+      next_path = *pathz_i;
+
+    //cout << "S\'Nork!\n";
  
-    // if node for each seq is equal to the next node+1 then for all
-    // sequences then we are extending
-    extending = true;
-    for(i2 = 0; i2 < species_num; i2++)
+    if (not end)
     {
-      //cout << cur_path[i2] << " vs " << next_path[i2] << endl; 
-      if (cur_path[i2] + 1 != next_path[i2])
-        extending = false;
+      // if node for each seq is equal to the next node+1 then for all
+      // sequences then we are extending
+      extending = true;
+      for(i2 = 0; i2 < species_num; i2++)
+      {
+       //cout << cur_path[i2] << " vs " << endl;
+       //cout << next_path[i2] << endl; 
+       if (cur_path[i2] + 1 != next_path[i2])
+         extending = false;
+      }
     }
+    else
+      extending = false;
+
     if (extending)
     {
       //cout << "Raaaawwwrrr!  I am extending\n";
@@ -152,17 +105,18 @@ Nway_Paths::simple_refine()
     }
     else
     {
+      //cout << "Larfnarg?\n";
       // add the extend window length as first element and add as refined
       new_path = ext_path;
       new_path.insert(new_path.begin(),win_size + win_ext_len);
       for(i2 = 1; i2 <= species_num; i2++)
       {
-        if (new_path[i2] < 0)
-        {
-          //cout << "before: " << new_path[i2];
-          new_path[i2] +=  win_ext_len;
-          //cout << " after: " << new_path[i2] << endl;
-        }
+       if (new_path[i2] < 0)
+       {
+         //cout << "before: " << new_path[i2];
+         new_path[i2] +=  win_ext_len;
+         //cout << " after: " << new_path[i2] << endl;
+       }
       }
       refined_pathz.push_back(new_path);
       // reset stuff
@@ -170,6 +124,7 @@ Nway_Paths::simple_refine()
       ext_path = next_path;
     }
   }
+  cout << "r_path number is: " << refined_pathz.size() << endl;
 }
 
 
@@ -324,6 +279,94 @@ Nway_Paths::load(string load_file_path)
 
 
 
+
+void
+Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
+{
+  list<int> new_nodes, trans_check_nodes;
+  list<int>::iterator new_nodes_i, new_nodes_end;
+  int i;
+  bool trans_check_good;
+
+  new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
+  new_nodes_i = new_nodes.begin();
+  new_nodes_end = new_nodes.end();
+  while(new_nodes_i != new_nodes_end)
+  {
+    //cout << "    * species " << depth << " node: " << *new_nodes_i << endl;
+    // check transitivity with previous nodes in path
+    trans_check_good = true;
+    for(i = 0; i < depth - 1; i++)
+    {
+      trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
+      if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
+                                            trans_check_nodes.end(),
+                                            *new_nodes_i) ) &&
+           (trans_check_nodes.end() == find(trans_check_nodes.begin(),
+                                           trans_check_nodes.end(),
+                                           *new_nodes_i * -1) ) )
+        trans_check_good = false;
+    }
+
+    if (trans_check_good)
+    {
+      // this makes sure path nodes are recorded with RC status relative to
+      // the base species
+      if ( path[depth-1] >= 0)
+        path.push_back(*new_nodes_i);
+      else
+        path.push_back(*new_nodes_i * -1);
+
+      if (depth < species_num - 1)
+        path_search(all_comparisons, path, depth + 1);
+      else
+        pathz.push_back(path);
+      path.pop_back();
+    } 
+    ++new_nodes_i;
+  }
+}
+//          cout << "    ****I have the power...\n";
+
+/* use this if I ever get the friggin seqcomp match lists to sort...
+      if (binary_search(trans_check_nodes.begin(), trans_check_nodes.end(), 
+                        *new_nodes_i))
+*/
+
+void
+Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
+{
+  vector<int> path;
+  int win_i, window_num;
+  list<int> new_nodes;
+  list<int>::iterator new_nodes_i, new_nodes_end;
+
+  pathz.clear();
+  window_num = all_comparisons[0][1].win_num();
+  cout << window_num << endl;
+  // loop thru all windows in first species
+  for (win_i = 0; win_i < window_num; win_i++)
+  {
+    path.clear();
+    path.push_back(win_i);
+    new_nodes = all_comparisons[0][1].matches(path[0]);
+    new_nodes_i = new_nodes.begin();
+    new_nodes_end = new_nodes.end();
+    //if (new_nodes_i != new_nodes_end)
+    //cout << "* species 0 node: " << win_i << endl;
+    //  cout << "foookin hell\n";
+    path.push_back(0);
+    while(new_nodes_i != new_nodes_end)
+    {
+      //cout << "  * species 1 node: " << *new_nodes_i << endl;
+      path[1] = *new_nodes_i;
+      path_search(all_comparisons, path, 2);
+      ++new_nodes_i;
+    }
+  }
+}
+
+
 void
 Nway_Paths::save_old(string save_file_path)
 {