1 // ----------------------------------------
2 // ---------- mussa_nway.cc -----------
3 // ----------------------------------------
5 #include "mussa_nway.hh"
14 Nway_Paths::Nway_Paths()
19 Nway_Paths::setup(int sp_num, int w, int t)
29 Nway_Paths::path_search(vector<vector<FLPs> > all_comparisons, vector<int> path, int depth)
31 list<int> new_nodes, trans_check_nodes;
32 list<int>::iterator new_nodes_i, new_nodes_end;
34 bool trans_check_good;
36 new_nodes = all_comparisons[depth - 1][depth].matches(path[depth-1]);
37 new_nodes_i = new_nodes.begin();
38 new_nodes_end = new_nodes.end();
39 while(new_nodes_i != new_nodes_end)
41 //cout << " * species " << depth << " node: " << *new_nodes_i << endl;
42 // check transitivity with previous nodes in path
43 for(i = 0; i < depth - 1; i++)
45 trans_check_good = true;
46 trans_check_nodes = all_comparisons[i][depth].matches(path[i]);
47 if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
48 trans_check_nodes.end(),
50 (trans_check_nodes.end() == find(trans_check_nodes.begin(),
51 trans_check_nodes.end(),
52 *new_nodes_i * -1) ) )
53 trans_check_good = false;
58 // this makes sure path nodes are recorded with RC status relative to
60 if ( path[depth-1] >= 0)
61 path.push_back(*new_nodes_i);
63 path.push_back(*new_nodes_i * -1);
65 if (depth < species_num - 1)
66 path_search(all_comparisons, path, depth + 1);
68 pathz.push_back(path);
74 // cout << " ****I have the power...\n";
76 /* use this if I ever get the friggin seqcomp match lists to sort...
77 if (binary_search(trans_check_nodes.begin(), trans_check_nodes.end(),
82 Nway_Paths::find_paths_r(vector<vector<FLPs> > all_comparisons)
85 int win_i, window_num;
87 list<int>::iterator new_nodes_i, new_nodes_end;
90 window_num = all_comparisons[0][1].win_num();
91 cout << window_num << endl;
92 // loop thru all windows in first species
93 for (win_i = 0; win_i < window_num; win_i++)
96 path.push_back(win_i);
97 new_nodes = all_comparisons[0][1].matches(path[0]);
98 new_nodes_i = new_nodes.begin();
99 new_nodes_end = new_nodes.end();
100 //if (new_nodes_i != new_nodes_end)
101 //cout << "* species 0 node: " << win_i << endl;
102 // cout << "foookin hell\n";
104 while(new_nodes_i != new_nodes_end)
106 //cout << " * species 1 node: " << *new_nodes_i << endl;
107 path[1] = *new_nodes_i;
108 path_search(all_comparisons, path, 2);
114 // dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
115 // doesn't deal with interleaved adjacency
117 Nway_Paths::simple_refine()
119 // ext_path remembers the first window set in an extending path
120 vector<int> ext_path, cur_path, next_path, new_path;
121 list<vector<int> >::iterator pathz_i;
122 int i2, win_ext_len = 0;
126 refined_pathz.clear();
128 pathz_i = pathz.begin();
130 while(pathz_i != pathz.end())
132 // keep track of current path and advance to next path
135 next_path = *pathz_i;
137 // if node for each seq is equal to the next node+1 then for all
138 // sequences then we are extending
140 for(i2 = 0; i2 < species_num; i2++)
142 //cout << cur_path[i2] << " vs " << next_path[i2] << endl;
143 if (cur_path[i2] + 1 != next_path[i2])
148 //cout << "Raaaawwwrrr! I am extending\n";
153 // add the extend window length as first element and add as refined
155 new_path.insert(new_path.begin(),win_size + win_ext_len);
156 refined_pathz.push_back(new_path);
159 ext_path = next_path;
166 Nway_Paths::add_path(vector<int> loaded_path)
168 pathz.push_back(loaded_path);
173 Nway_Paths::save(string save_file_path)
176 list<vector<int> >::iterator path_i, paths_end;
180 save_file.open(save_file_path.c_str(), ios::out);
182 save_file << "<Mussa type=flp seq_num=" << species_num;
183 save_file << " win=" << win_size;
184 // add a function para new_thres defaults to -1 to later deal with
185 // reanalysis with higher thres - if statement whether to record base thres
186 // or new thres (ie if -1, then base)
187 save_file << " thres=" << threshold << " >\n";
189 path_i = refined_pathz.begin();
190 paths_end = refined_pathz.end();
191 //path_i = pathz.begin();
192 //paths_end = pathz.end();
193 while (path_i != paths_end)
196 //cout << a_path.size() << endl;
197 //first entry is the window length of the windows in the path
198 save_file << a_path[0] << ":";
199 for(i = 1; i <= species_num; ++i)
201 save_file << a_path[i];
202 if (i != species_num)
209 save_file << "</Mussa>\n";
214 if (path_i == paths_end)
215 cout << "Arrrrrrgggghhhhhh\n";
219 Nway_Paths::load(string load_file_path)
222 string file_data_line, header_data, data, path_node, path_length;
223 int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
224 vector<int> loaded_path;
227 load_file.open(load_file_path.c_str(), ios::in);
230 // grab mussa tag - discard for now...maybe check in future...
231 getline(load_file,file_data_line);
232 space_split_i = file_data_line.find(" ");
233 file_data_line = file_data_line.substr(space_split_i+1);
234 // grab type tag - need in future to distinguish between flp and vlp paths
235 space_split_i = file_data_line.find(" ");
236 file_data_line = file_data_line.substr(space_split_i+1);
237 // get species/seq number
238 space_split_i = file_data_line.find(" ");
239 header_data = file_data_line.substr(0,space_split_i);
240 equal_split_i = header_data.find("=");
241 data = file_data_line.substr(equal_split_i+1);
242 species_num = atoi (data.c_str());
243 file_data_line = file_data_line.substr(space_split_i+1);
245 space_split_i = file_data_line.find(" ");
246 header_data = file_data_line.substr(0,space_split_i);
247 equal_split_i = header_data.find("=");
248 data = file_data_line.substr(equal_split_i+1);
249 win_size = atoi (data.c_str());
250 file_data_line = file_data_line.substr(space_split_i+1);
252 space_split_i = file_data_line.find(" ");
253 header_data = file_data_line.substr(0,space_split_i);
254 equal_split_i = header_data.find("=");
255 data = file_data_line.substr(equal_split_i+1);
256 threshold = atoi (data.c_str());
257 file_data_line = file_data_line.substr(space_split_i+1);
259 cout << "seq_num=" << species_num << " win=" << win_size;
260 cout << " thres=" << threshold << endl;
262 // clear out the current data
263 refined_pathz.clear();
267 getline(load_file,file_data_line);
268 while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
270 if (file_data_line != "")
273 colon_split_i = file_data_line.find(":");
274 path_length = file_data_line.substr(0,colon_split_i);
275 loaded_path.push_back(atoi (path_length.c_str()));
276 file_data_line = file_data_line.substr(colon_split_i+1);
277 for(i = 0; i < species_num; i++)
279 comma_split_i = file_data_line.find(",");
280 path_node = file_data_line.substr(0, comma_split_i);
281 temp = atoi (path_node.c_str());
282 loaded_path.push_back(temp);
283 file_data_line = file_data_line.substr(comma_split_i+1);
285 refined_pathz.push_back(loaded_path);
287 getline(load_file,file_data_line);
299 Nway_Paths::save_old(string save_file_path)
302 list<vector<int> >::iterator path_i, paths_end;
306 save_file.open(save_file_path.c_str(), ios::app);
308 path_i = pathz.begin();
309 paths_end = pathz.end();
310 while(path_i != paths_end)
313 //cout << a_path.size() << endl;
314 for(i = 0; i < species_num; ++i)
315 save_file << i << "," << a_path[i] << " ";
325 Nway_Paths::find_paths(vector<vector<FLPs> > all_comparisons)
328 vector<list<list<int> > > path_src_tree;
330 list<int>::iterator node_i, node_end;
331 <list<list<int> > >::iterator branch_i, branch_end;
334 path_src_tree.reserve(species_num - 1);
337 // loop thru all windows in first species
338 for (win_i = 0; win_i < window_num; win_i++)
340 // clear the path search tree
341 for(i = 0; i < species_num; i++)
342 path_src_tree[i].clear();
344 // top level kept empty even tho implicity has one entry of the first
345 // species at this window - why bother, adds a little speed
347 // get connection list for first species, creating a list of nodes
348 // of second species connected to the first species at this window
349 new_nodes = all_comparisons[0][1];
350 path_src_tree[1].push_back(new_nodes);
352 // loop thru rest of species for this window to see if any paths of matches
353 // go across all species
354 // if path search tree becomes empty, break out of loop, no reason to search further
356 while ((sp_i < species_num) && (path tree not empty))
358 branch_i = path_src_tree[1].begin();
359 branch_end = path_src_tree[1].end();
360 while (branch_i != branch_end)
362 node_i = branch_i->begin();
363 node_end = branch_i->end();
367 // loop over all current nodes
368 // get connection list for each node
369 // loop over each previous node in list
370 // get those nodes connection list
371 // intersect previous node connections with current
376 // insert any of the paths found into the master list of paths
378 // add no paths if tmp_pathz is empty...
382 void Nway_Paths::refine()
386 void Nway_Paths::print()
388 list<list<int> >::iterator pathz_i;
390 cout << "printing list of lists\n";
391 for (pathz_i = pathz.begin(); pathz_i != pathz.end(); ++pathz_i)
393 for (path_i = pathz_i->begin(); path_i != pathz_i->end(); ++path_i)
394 cout << *path_i << " ";