1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 // ----------------------------------------
12 // ---------- mussa_nway.cc -----------
13 // ----------------------------------------
15 #include "mussa_nway.hh"
21 Nway_Paths::Nway_Paths()
26 Nway_Paths::setup(int sp_num, int w, int t)
30 soft_thres = threshold;
34 cout << "nway: species_num = " << species_num << ", thres = " << threshold
35 << ", softo = " << soft_thres << endl;
39 Nway_Paths::set_soft_thres(int sft_thres)
41 soft_thres = sft_thres;
47 // dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
48 // doesn't deal with interleaved adjacency
50 Nway_Paths::simple_refine()
52 // ext_path remembers the first window set in an extending path
53 vector<int> ext_path, cur_path, next_path, new_path;
54 list<vector<int> >::iterator pathz_i;
55 int i2, win_ext_len = 0;
56 bool extending, end = false;
60 refined_pathz.clear();
62 cout << "path number is: " << pathz.size() << endl;
63 pathz_i = pathz.begin();
66 while(pathz_i != pathz.end())
68 //cout << "path number is: " << pathz.size() << endl;
70 //cout << "glorpagh!!!!!" << count_loop << endl;
72 // keep track of current path and advance to next path
75 if (pathz_i == pathz.end())
80 //cout << "S\'Nork!\n";
84 // if node for each seq is equal to the next node+1 then for all
85 // sequences then we are extending
87 for(i2 = 0; i2 < species_num; i2++)
89 //cout << cur_path[i2] << " vs " << endl;
90 //cout << next_path[i2] << endl;
91 if (cur_path[i2] + 1 != next_path[i2])
100 //cout << "Raaaawwwrrr! I am extending\n";
105 //cout << "Larfnarg?\n";
106 // add the extend window length as first element and add as refined
108 new_path.insert(new_path.begin(),win_size + win_ext_len);
109 for(i2 = 1; i2 <= species_num; i2++)
111 if (new_path[i2] < 0)
113 //cout << "before: " << new_path[i2];
114 new_path[i2] += win_ext_len;
115 //cout << " after: " << new_path[i2] << endl;
118 refined_pathz.push_back(new_path);
121 ext_path = next_path;
124 cout << "r_path number is: " << refined_pathz.size() << endl;
129 Nway_Paths::add_path(vector<int> loaded_path)
131 pathz.push_back(loaded_path);
136 Nway_Paths::save(string save_file_path)
139 list<vector<int> >::iterator path_i, paths_end;
143 save_file.open(save_file_path.c_str(), ios::out);
145 save_file << "<Mussa type=flp seq_num=" << species_num;
146 save_file << " win=" << win_size;
147 // add a function para new_thres defaults to -1 to later deal with
148 // reanalysis with higher thres - if statement whether to record base thres
149 // or new thres (ie if -1, then base)
150 save_file << " thres=" << threshold << " >\n";
152 path_i = refined_pathz.begin();
153 paths_end = refined_pathz.end();
154 //path_i = pathz.begin();
155 //paths_end = pathz.end();
156 while (path_i != paths_end)
159 //cout << a_path.size() << endl;
160 //first entry is the window length of the windows in the path
161 save_file << a_path[0] << ":";
162 for(i = 1; i <= species_num; ++i)
164 save_file << a_path[i];
165 if (i != species_num)
172 save_file << "</Mussa>\n";
177 if (path_i == paths_end)
178 cout << "Arrrrrrgggghhhhhh\n";
182 Nway_Paths::seq_num()
189 Nway_Paths::load(string load_file_path)
192 string file_data_line, header_data, data, path_node, path_length;
193 int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
194 vector<int> loaded_path;
198 load_file.open(load_file_path.c_str(), ios::in);
202 err_msg = "Sequence File: " + load_file_path + " not found";
210 // grab mussa tag - discard for now...maybe check in future...
211 getline(load_file,file_data_line);
212 space_split_i = file_data_line.find(" ");
213 file_data_line = file_data_line.substr(space_split_i+1);
214 // grab type tag - need in future to distinguish between flp and vlp paths
215 space_split_i = file_data_line.find(" ");
216 file_data_line = file_data_line.substr(space_split_i+1);
217 // get species/seq number
218 space_split_i = file_data_line.find(" ");
219 header_data = file_data_line.substr(0,space_split_i);
220 equal_split_i = header_data.find("=");
221 data = file_data_line.substr(equal_split_i+1);
222 species_num = atoi (data.c_str());
223 file_data_line = file_data_line.substr(space_split_i+1);
225 space_split_i = file_data_line.find(" ");
226 header_data = file_data_line.substr(0,space_split_i);
227 equal_split_i = header_data.find("=");
228 data = file_data_line.substr(equal_split_i+1);
229 win_size = atoi (data.c_str());
230 file_data_line = file_data_line.substr(space_split_i+1);
232 space_split_i = file_data_line.find(" ");
233 header_data = file_data_line.substr(0,space_split_i);
234 equal_split_i = header_data.find("=");
235 data = file_data_line.substr(equal_split_i+1);
236 threshold = atoi (data.c_str());
237 file_data_line = file_data_line.substr(space_split_i+1);
239 cout << "seq_num=" << species_num << " win=" << win_size;
240 cout << " thres=" << threshold << endl;
242 // clear out the current data
243 refined_pathz.clear();
247 getline(load_file,file_data_line);
248 while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
250 if (file_data_line != "")
253 colon_split_i = file_data_line.find(":");
254 path_length = file_data_line.substr(0,colon_split_i);
255 loaded_path.push_back(atoi (path_length.c_str()));
256 file_data_line = file_data_line.substr(colon_split_i+1);
257 for(i = 0; i < species_num; i++)
259 comma_split_i = file_data_line.find(",");
260 path_node = file_data_line.substr(0, comma_split_i);
261 temp = atoi (path_node.c_str());
262 loaded_path.push_back(temp);
263 file_data_line = file_data_line.substr(comma_split_i+1);
265 refined_pathz.push_back(loaded_path);
267 getline(load_file,file_data_line);
281 Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
283 list<int> new_nodes, trans_check_nodes;
284 list<int>::iterator new_nodes_i, new_nodes_end;
286 bool trans_check_good;
288 new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
289 new_nodes_i = new_nodes.begin();
290 new_nodes_end = new_nodes.end();
291 while(new_nodes_i != new_nodes_end)
293 //cout << " * species " << depth << " node: " << *new_nodes_i << endl;
294 // check transitivity with previous nodes in path
295 trans_check_good = true;
296 for(i = 0; i < depth - 1; i++)
298 trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
299 if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
300 trans_check_nodes.end(),
302 (trans_check_nodes.end() == find(trans_check_nodes.begin(),
303 trans_check_nodes.end(),
304 *new_nodes_i * -1) ) )
305 trans_check_good = false;
308 if (trans_check_good)
310 // this makes sure path nodes are recorded with RC status relative to
312 if ( path[depth-1] >= 0)
313 path.push_back(*new_nodes_i);
315 path.push_back(*new_nodes_i * -1);
317 if (depth < species_num - 1)
318 path_search(all_comparisons, path, depth + 1);
320 pathz.push_back(path);
326 // cout << " ****I have the power...\n";
328 /* use this if I ever get the friggin seqcomp match lists to sort...
329 if (binary_search(trans_check_nodes.begin(), trans_check_nodes.end(),
334 Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
337 int win_i, window_num;
339 list<int>::iterator new_nodes_i, new_nodes_end;
342 window_num = all_comparisons[0][1].win_num();
343 cout << window_num << endl;
344 // loop thru all windows in first species
345 for (win_i = 0; win_i < window_num; win_i++)
348 path.push_back(win_i);
349 new_nodes = all_comparisons[0][1].matches(path[0]);
350 new_nodes_i = new_nodes.begin();
351 new_nodes_end = new_nodes.end();
352 //if (new_nodes_i != new_nodes_end)
353 //cout << "* species 0 node: " << win_i << endl;
354 // cout << "foookin hell\n";
356 while(new_nodes_i != new_nodes_end)
358 //cout << " * species 1 node: " << *new_nodes_i << endl;
359 path[1] = *new_nodes_i;
360 path_search(all_comparisons, path, 2);
368 Nway_Paths::save_old(string save_file_path)
371 list<vector<int> >::iterator path_i, paths_end;
375 save_file.open(save_file_path.c_str(), ios::app);
377 path_i = pathz.begin();
378 paths_end = pathz.end();
379 while(path_i != paths_end)
382 //cout << a_path.size() << endl;
383 for(i = 0; i < species_num; ++i)
384 save_file << i << "," << a_path[i] << " ";
394 Nway_Paths::find_paths(vector<vector<FLPs> > all_comparisons)
397 vector<list<list<int> > > path_src_tree;
399 list<int>::iterator node_i, node_end;
400 <list<list<int> > >::iterator branch_i, branch_end;
403 path_src_tree.reserve(species_num - 1);
406 // loop thru all windows in first species
407 for (win_i = 0; win_i < window_num; win_i++)
409 // clear the path search tree
410 for(i = 0; i < species_num; i++)
411 path_src_tree[i].clear();
413 // top level kept empty even tho implicity has one entry of the first
414 // species at this window - why bother, adds a little speed
416 // get connection list for first species, creating a list of nodes
417 // of second species connected to the first species at this window
418 new_nodes = all_comparisons[0][1];
419 path_src_tree[1].push_back(new_nodes);
421 // loop thru rest of species for this window to see if any paths of matches
422 // go across all species
423 // if path search tree becomes empty, break out of loop, no reason to search further
425 while ((sp_i < species_num) && (path tree not empty))
427 branch_i = path_src_tree[1].begin();
428 branch_end = path_src_tree[1].end();
429 while (branch_i != branch_end)
431 node_i = branch_i->begin();
432 node_end = branch_i->end();
436 // loop over all current nodes
437 // get connection list for each node
438 // loop over each previous node in list
439 // get those nodes connection list
440 // intersect previous node connections with current
445 // insert any of the paths found into the master list of paths
447 // add no paths if tmp_pathz is empty...
451 void Nway_Paths::refine()
455 void Nway_Paths::print()
457 list<list<int> >::iterator pathz_i;
459 cout << "printing list of lists\n";
460 for (pathz_i = pathz.begin(); pathz_i != pathz.end(); ++pathz_i)
462 for (path_i = pathz_i->begin(); path_i != pathz_i->end(); ++path_i)
463 cout << *path_i << " ";