1 // ----------------------------------------
2 // ---------- mussa_nway.cc -----------
3 // ----------------------------------------
5 #include "mussa_nway.hh"
14 Nway_Paths::Nway_Paths()
19 Nway_Paths::setup(int sp_num, int w, int t)
23 soft_thres = threshold;
27 cout << "nway: species_num = " << species_num << ", thres = " << threshold
28 << ", softo = " << soft_thres << endl;
32 Nway_Paths::set_soft_thres(int sft_thres)
34 soft_thres = sft_thres;
40 // dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
41 // doesn't deal with interleaved adjacency
43 Nway_Paths::simple_refine()
45 // ext_path remembers the first window set in an extending path
46 vector<int> ext_path, cur_path, next_path, new_path;
47 list<vector<int> >::iterator pathz_i;
48 int i2, win_ext_len = 0;
49 bool extending, end = false;
53 refined_pathz.clear();
55 cout << "path number is: " << pathz.size() << endl;
56 pathz_i = pathz.begin();
59 while(pathz_i != pathz.end())
61 //cout << "path number is: " << pathz.size() << endl;
63 //cout << "glorpagh!!!!!" << count_loop << endl;
65 // keep track of current path and advance to next path
68 if (pathz_i == pathz.end())
73 //cout << "S\'Nork!\n";
77 // if node for each seq is equal to the next node+1 then for all
78 // sequences then we are extending
80 for(i2 = 0; i2 < species_num; i2++)
82 //cout << cur_path[i2] << " vs " << endl;
83 //cout << next_path[i2] << endl;
84 if (cur_path[i2] + 1 != next_path[i2])
93 //cout << "Raaaawwwrrr! I am extending\n";
98 //cout << "Larfnarg?\n";
99 // add the extend window length as first element and add as refined
101 new_path.insert(new_path.begin(),win_size + win_ext_len);
102 for(i2 = 1; i2 <= species_num; i2++)
104 if (new_path[i2] < 0)
106 //cout << "before: " << new_path[i2];
107 new_path[i2] += win_ext_len;
108 //cout << " after: " << new_path[i2] << endl;
111 refined_pathz.push_back(new_path);
114 ext_path = next_path;
117 cout << "r_path number is: " << refined_pathz.size() << endl;
122 Nway_Paths::add_path(vector<int> loaded_path)
124 pathz.push_back(loaded_path);
129 Nway_Paths::save(string save_file_path)
132 list<vector<int> >::iterator path_i, paths_end;
136 save_file.open(save_file_path.c_str(), ios::out);
138 save_file << "<Mussa type=flp seq_num=" << species_num;
139 save_file << " win=" << win_size;
140 // add a function para new_thres defaults to -1 to later deal with
141 // reanalysis with higher thres - if statement whether to record base thres
142 // or new thres (ie if -1, then base)
143 save_file << " thres=" << threshold << " >\n";
145 path_i = refined_pathz.begin();
146 paths_end = refined_pathz.end();
147 //path_i = pathz.begin();
148 //paths_end = pathz.end();
149 while (path_i != paths_end)
152 //cout << a_path.size() << endl;
153 //first entry is the window length of the windows in the path
154 save_file << a_path[0] << ":";
155 for(i = 1; i <= species_num; ++i)
157 save_file << a_path[i];
158 if (i != species_num)
165 save_file << "</Mussa>\n";
170 if (path_i == paths_end)
171 cout << "Arrrrrrgggghhhhhh\n";
175 Nway_Paths::seq_num()
182 Nway_Paths::load(string load_file_path)
185 string file_data_line, header_data, data, path_node, path_length;
186 int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
187 vector<int> loaded_path;
191 load_file.open(load_file_path.c_str(), ios::in);
195 err_msg = "Sequence File: " + load_file_path + " not found";
203 // grab mussa tag - discard for now...maybe check in future...
204 getline(load_file,file_data_line);
205 space_split_i = file_data_line.find(" ");
206 file_data_line = file_data_line.substr(space_split_i+1);
207 // grab type tag - need in future to distinguish between flp and vlp paths
208 space_split_i = file_data_line.find(" ");
209 file_data_line = file_data_line.substr(space_split_i+1);
210 // get species/seq number
211 space_split_i = file_data_line.find(" ");
212 header_data = file_data_line.substr(0,space_split_i);
213 equal_split_i = header_data.find("=");
214 data = file_data_line.substr(equal_split_i+1);
215 species_num = atoi (data.c_str());
216 file_data_line = file_data_line.substr(space_split_i+1);
218 space_split_i = file_data_line.find(" ");
219 header_data = file_data_line.substr(0,space_split_i);
220 equal_split_i = header_data.find("=");
221 data = file_data_line.substr(equal_split_i+1);
222 win_size = atoi (data.c_str());
223 file_data_line = file_data_line.substr(space_split_i+1);
225 space_split_i = file_data_line.find(" ");
226 header_data = file_data_line.substr(0,space_split_i);
227 equal_split_i = header_data.find("=");
228 data = file_data_line.substr(equal_split_i+1);
229 threshold = atoi (data.c_str());
230 file_data_line = file_data_line.substr(space_split_i+1);
232 cout << "seq_num=" << species_num << " win=" << win_size;
233 cout << " thres=" << threshold << endl;
235 // clear out the current data
236 refined_pathz.clear();
240 getline(load_file,file_data_line);
241 while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
243 if (file_data_line != "")
246 colon_split_i = file_data_line.find(":");
247 path_length = file_data_line.substr(0,colon_split_i);
248 loaded_path.push_back(atoi (path_length.c_str()));
249 file_data_line = file_data_line.substr(colon_split_i+1);
250 for(i = 0; i < species_num; i++)
252 comma_split_i = file_data_line.find(",");
253 path_node = file_data_line.substr(0, comma_split_i);
254 temp = atoi (path_node.c_str());
255 loaded_path.push_back(temp);
256 file_data_line = file_data_line.substr(comma_split_i+1);
258 refined_pathz.push_back(loaded_path);
260 getline(load_file,file_data_line);
274 Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
276 list<int> new_nodes, trans_check_nodes;
277 list<int>::iterator new_nodes_i, new_nodes_end;
279 bool trans_check_good;
281 new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
282 new_nodes_i = new_nodes.begin();
283 new_nodes_end = new_nodes.end();
284 while(new_nodes_i != new_nodes_end)
286 //cout << " * species " << depth << " node: " << *new_nodes_i << endl;
287 // check transitivity with previous nodes in path
288 trans_check_good = true;
289 for(i = 0; i < depth - 1; i++)
291 trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
292 if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
293 trans_check_nodes.end(),
295 (trans_check_nodes.end() == find(trans_check_nodes.begin(),
296 trans_check_nodes.end(),
297 *new_nodes_i * -1) ) )
298 trans_check_good = false;
301 if (trans_check_good)
303 // this makes sure path nodes are recorded with RC status relative to
305 if ( path[depth-1] >= 0)
306 path.push_back(*new_nodes_i);
308 path.push_back(*new_nodes_i * -1);
310 if (depth < species_num - 1)
311 path_search(all_comparisons, path, depth + 1);
313 pathz.push_back(path);
319 // cout << " ****I have the power...\n";
321 /* use this if I ever get the friggin seqcomp match lists to sort...
322 if (binary_search(trans_check_nodes.begin(), trans_check_nodes.end(),
327 Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
330 int win_i, window_num;
332 list<int>::iterator new_nodes_i, new_nodes_end;
335 window_num = all_comparisons[0][1].win_num();
336 cout << window_num << endl;
337 // loop thru all windows in first species
338 for (win_i = 0; win_i < window_num; win_i++)
341 path.push_back(win_i);
342 new_nodes = all_comparisons[0][1].matches(path[0]);
343 new_nodes_i = new_nodes.begin();
344 new_nodes_end = new_nodes.end();
345 //if (new_nodes_i != new_nodes_end)
346 //cout << "* species 0 node: " << win_i << endl;
347 // cout << "foookin hell\n";
349 while(new_nodes_i != new_nodes_end)
351 //cout << " * species 1 node: " << *new_nodes_i << endl;
352 path[1] = *new_nodes_i;
353 path_search(all_comparisons, path, 2);
361 Nway_Paths::save_old(string save_file_path)
364 list<vector<int> >::iterator path_i, paths_end;
368 save_file.open(save_file_path.c_str(), ios::app);
370 path_i = pathz.begin();
371 paths_end = pathz.end();
372 while(path_i != paths_end)
375 //cout << a_path.size() << endl;
376 for(i = 0; i < species_num; ++i)
377 save_file << i << "," << a_path[i] << " ";
387 Nway_Paths::find_paths(vector<vector<FLPs> > all_comparisons)
390 vector<list<list<int> > > path_src_tree;
392 list<int>::iterator node_i, node_end;
393 <list<list<int> > >::iterator branch_i, branch_end;
396 path_src_tree.reserve(species_num - 1);
399 // loop thru all windows in first species
400 for (win_i = 0; win_i < window_num; win_i++)
402 // clear the path search tree
403 for(i = 0; i < species_num; i++)
404 path_src_tree[i].clear();
406 // top level kept empty even tho implicity has one entry of the first
407 // species at this window - why bother, adds a little speed
409 // get connection list for first species, creating a list of nodes
410 // of second species connected to the first species at this window
411 new_nodes = all_comparisons[0][1];
412 path_src_tree[1].push_back(new_nodes);
414 // loop thru rest of species for this window to see if any paths of matches
415 // go across all species
416 // if path search tree becomes empty, break out of loop, no reason to search further
418 while ((sp_i < species_num) && (path tree not empty))
420 branch_i = path_src_tree[1].begin();
421 branch_end = path_src_tree[1].end();
422 while (branch_i != branch_end)
424 node_i = branch_i->begin();
425 node_end = branch_i->end();
429 // loop over all current nodes
430 // get connection list for each node
431 // loop over each previous node in list
432 // get those nodes connection list
433 // intersect previous node connections with current
438 // insert any of the paths found into the master list of paths
440 // add no paths if tmp_pathz is empty...
444 void Nway_Paths::refine()
448 void Nway_Paths::print()
450 list<list<int> >::iterator pathz_i;
452 cout << "printing list of lists\n";
453 for (pathz_i = pathz.begin(); pathz_i != pathz.end(); ++pathz_i)
455 for (path_i = pathz_i->begin(); path_i != pathz_i->end(); ++path_i)
456 cout << *path_i << " ";