1 // This file is part of the Mussa source distribution.
2 // http://mussa.caltech.edu/
3 // Contact author: Tristan De Buysscher, tristan@caltech.edu
5 // This program and all associated source code files are Copyright (C) 2005
6 // the California Institute of Technology, Pasadena, CA, 91125 USA. It is
7 // under the GNU Public License; please see the included LICENSE.txt
8 // file for more information, or contact Tristan directly.
11 // ----------------------------------------
12 // ---------- mussa_nway.cc -----------
13 // ----------------------------------------
15 #include "mussa_nway.hh"
24 Nway_Paths::Nway_Paths()
29 Nway_Paths::setup(int sp_num, int w, int t)
33 soft_thres = threshold;
37 cout << "nway: species_num = " << species_num << ", thres = " << threshold
38 << ", softo = " << soft_thres << endl;
42 Nway_Paths::set_soft_thres(int sft_thres)
44 soft_thres = sft_thres;
50 // dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
51 // doesn't deal with interleaved adjacency
53 Nway_Paths::simple_refine()
55 // ext_path remembers the first window set in an extending path
56 vector<int> ext_path, cur_path, next_path, new_path;
57 list<vector<int> >::iterator pathz_i;
58 int i2, win_ext_len = 0;
59 bool extending, end = false;
63 refined_pathz.clear();
65 cout << "path number is: " << pathz.size() << endl;
66 pathz_i = pathz.begin();
69 while(pathz_i != pathz.end())
71 //cout << "path number is: " << pathz.size() << endl;
73 //cout << "glorpagh!!!!!" << count_loop << endl;
75 // keep track of current path and advance to next path
78 if (pathz_i == pathz.end())
83 //cout << "S\'Nork!\n";
87 // if node for each seq is equal to the next node+1 then for all
88 // sequences then we are extending
90 for(i2 = 0; i2 < species_num; i2++)
92 //cout << cur_path[i2] << " vs " << endl;
93 //cout << next_path[i2] << endl;
94 if (cur_path[i2] + 1 != next_path[i2])
103 //cout << "Raaaawwwrrr! I am extending\n";
108 //cout << "Larfnarg?\n";
109 // add the extend window length as first element and add as refined
111 new_path.insert(new_path.begin(),win_size + win_ext_len);
112 for(i2 = 1; i2 <= species_num; i2++)
114 if (new_path[i2] < 0)
116 //cout << "before: " << new_path[i2];
117 new_path[i2] += win_ext_len;
118 //cout << " after: " << new_path[i2] << endl;
121 refined_pathz.push_back(new_path);
124 ext_path = next_path;
127 cout << "r_path number is: " << refined_pathz.size() << endl;
132 Nway_Paths::add_path(vector<int> loaded_path)
134 pathz.push_back(loaded_path);
139 Nway_Paths::save(string save_file_path)
142 list<vector<int> >::iterator path_i, paths_end;
146 save_file.open(save_file_path.c_str(), ios::out);
148 save_file << "<Mussa type=flp seq_num=" << species_num;
149 save_file << " win=" << win_size;
150 // add a function para new_thres defaults to -1 to later deal with
151 // reanalysis with higher thres - if statement whether to record base thres
152 // or new thres (ie if -1, then base)
153 save_file << " thres=" << threshold << " >\n";
155 path_i = refined_pathz.begin();
156 paths_end = refined_pathz.end();
157 //path_i = pathz.begin();
158 //paths_end = pathz.end();
159 while (path_i != paths_end)
162 //cout << a_path.size() << endl;
163 //first entry is the window length of the windows in the path
164 save_file << a_path[0] << ":";
165 for(i = 1; i <= species_num; ++i)
167 save_file << a_path[i];
168 if (i != species_num)
175 save_file << "</Mussa>\n";
180 if (path_i == paths_end)
181 cout << "Arrrrrrgggghhhhhh\n";
185 Nway_Paths::seq_num()
192 Nway_Paths::load(string load_file_path)
195 string file_data_line, header_data, data, path_node, path_length;
196 int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
197 vector<int> loaded_path;
201 load_file.open(load_file_path.c_str(), ios::in);
205 err_msg = "Sequence File: " + load_file_path + " not found";
213 // grab mussa tag - discard for now...maybe check in future...
214 getline(load_file,file_data_line);
215 space_split_i = file_data_line.find(" ");
216 file_data_line = file_data_line.substr(space_split_i+1);
217 // grab type tag - need in future to distinguish between flp and vlp paths
218 space_split_i = file_data_line.find(" ");
219 file_data_line = file_data_line.substr(space_split_i+1);
220 // get species/seq number
221 space_split_i = file_data_line.find(" ");
222 header_data = file_data_line.substr(0,space_split_i);
223 equal_split_i = header_data.find("=");
224 data = file_data_line.substr(equal_split_i+1);
225 species_num = atoi (data.c_str());
226 file_data_line = file_data_line.substr(space_split_i+1);
228 space_split_i = file_data_line.find(" ");
229 header_data = file_data_line.substr(0,space_split_i);
230 equal_split_i = header_data.find("=");
231 data = file_data_line.substr(equal_split_i+1);
232 win_size = atoi (data.c_str());
233 file_data_line = file_data_line.substr(space_split_i+1);
235 space_split_i = file_data_line.find(" ");
236 header_data = file_data_line.substr(0,space_split_i);
237 equal_split_i = header_data.find("=");
238 data = file_data_line.substr(equal_split_i+1);
239 threshold = atoi (data.c_str());
240 file_data_line = file_data_line.substr(space_split_i+1);
242 cout << "seq_num=" << species_num << " win=" << win_size;
243 cout << " thres=" << threshold << endl;
245 // clear out the current data
246 refined_pathz.clear();
250 getline(load_file,file_data_line);
251 while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
253 if (file_data_line != "")
256 colon_split_i = file_data_line.find(":");
257 path_length = file_data_line.substr(0,colon_split_i);
258 loaded_path.push_back(atoi (path_length.c_str()));
259 file_data_line = file_data_line.substr(colon_split_i+1);
260 for(i = 0; i < species_num; i++)
262 comma_split_i = file_data_line.find(",");
263 path_node = file_data_line.substr(0, comma_split_i);
264 temp = atoi (path_node.c_str());
265 loaded_path.push_back(temp);
266 file_data_line = file_data_line.substr(comma_split_i+1);
268 refined_pathz.push_back(loaded_path);
270 getline(load_file,file_data_line);
284 Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
286 list<int> new_nodes, trans_check_nodes;
287 list<int>::iterator new_nodes_i, new_nodes_end;
289 bool trans_check_good;
291 new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
292 new_nodes_i = new_nodes.begin();
293 new_nodes_end = new_nodes.end();
294 while(new_nodes_i != new_nodes_end)
296 //cout << " * species " << depth << " node: " << *new_nodes_i << endl;
297 // check transitivity with previous nodes in path
298 trans_check_good = true;
299 for(i = 0; i < depth - 1; i++)
301 trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
302 if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
303 trans_check_nodes.end(),
305 (trans_check_nodes.end() == find(trans_check_nodes.begin(),
306 trans_check_nodes.end(),
307 *new_nodes_i * -1) ) )
308 trans_check_good = false;
311 if (trans_check_good)
313 // this makes sure path nodes are recorded with RC status relative to
315 if ( path[depth-1] >= 0)
316 path.push_back(*new_nodes_i);
318 path.push_back(*new_nodes_i * -1);
320 if (depth < species_num - 1)
321 path_search(all_comparisons, path, depth + 1);
323 pathz.push_back(path);
329 // cout << " ****I have the power...\n";
331 /* use this if I ever get the friggin seqcomp match lists to sort...
332 if (binary_search(trans_check_nodes.begin(), trans_check_nodes.end(),
337 Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
340 int win_i, window_num;
342 list<int>::iterator new_nodes_i, new_nodes_end;
345 window_num = all_comparisons[0][1].win_num();
346 cout << window_num << endl;
347 // loop thru all windows in first species
348 for (win_i = 0; win_i < window_num; win_i++)
351 path.push_back(win_i);
352 new_nodes = all_comparisons[0][1].matches(path[0]);
353 new_nodes_i = new_nodes.begin();
354 new_nodes_end = new_nodes.end();
355 //if (new_nodes_i != new_nodes_end)
356 //cout << "* species 0 node: " << win_i << endl;
357 // cout << "foookin hell\n";
359 while(new_nodes_i != new_nodes_end)
361 //cout << " * species 1 node: " << *new_nodes_i << endl;
362 path[1] = *new_nodes_i;
363 path_search(all_comparisons, path, 2);
371 Nway_Paths::save_old(string save_file_path)
374 list<vector<int> >::iterator path_i, paths_end;
378 save_file.open(save_file_path.c_str(), ios::app);
380 path_i = pathz.begin();
381 paths_end = pathz.end();
382 while(path_i != paths_end)
385 //cout << a_path.size() << endl;
386 for(i = 0; i < species_num; ++i)
387 save_file << i << "," << a_path[i] << " ";
397 Nway_Paths::find_paths(vector<vector<FLPs> > all_comparisons)
400 vector<list<list<int> > > path_src_tree;
402 list<int>::iterator node_i, node_end;
403 <list<list<int> > >::iterator branch_i, branch_end;
406 path_src_tree.reserve(species_num - 1);
409 // loop thru all windows in first species
410 for (win_i = 0; win_i < window_num; win_i++)
412 // clear the path search tree
413 for(i = 0; i < species_num; i++)
414 path_src_tree[i].clear();
416 // top level kept empty even tho implicity has one entry of the first
417 // species at this window - why bother, adds a little speed
419 // get connection list for first species, creating a list of nodes
420 // of second species connected to the first species at this window
421 new_nodes = all_comparisons[0][1];
422 path_src_tree[1].push_back(new_nodes);
424 // loop thru rest of species for this window to see if any paths of matches
425 // go across all species
426 // if path search tree becomes empty, break out of loop, no reason to search further
428 while ((sp_i < species_num) && (path tree not empty))
430 branch_i = path_src_tree[1].begin();
431 branch_end = path_src_tree[1].end();
432 while (branch_i != branch_end)
434 node_i = branch_i->begin();
435 node_end = branch_i->end();
439 // loop over all current nodes
440 // get connection list for each node
441 // loop over each previous node in list
442 // get those nodes connection list
443 // intersect previous node connections with current
448 // insert any of the paths found into the master list of paths
450 // add no paths if tmp_pathz is empty...
454 void Nway_Paths::refine()
458 void Nway_Paths::print()
460 list<list<int> >::iterator pathz_i;
462 cout << "printing list of lists\n";
463 for (pathz_i = pathz.begin(); pathz_i != pathz.end(); ++pathz_i)
465 for (path_i = pathz_i->begin(); path_i != pathz_i->end(); ++path_i)
466 cout << *path_i << " ";