}
void
-Nway_Paths::setup(int sp_num)
+Nway_Paths::setup(int sp_num, int w, int t)
{
species_num = sp_num;
+ threshold = t;
+ win_size = w;
pathz.clear();
}
if (trans_check_good)
{
+ // this makes sure path nodes are recorded with RC status relative to
+ // the base species
if ( path[depth-1] >= 0)
path.push_back(*new_nodes_i);
else
path.push_back(*new_nodes_i * -1);
+
if (depth < species_num - 1)
path_search(all_comparisons, path, depth + 1);
else
}
}
+// dumbly goes thru and combines path windows exactly adjacent (ie + 1 index)
+// doesn't deal with interleaved adjacency
+void
+Nway_Paths::simple_refine()
+{
+ // ext_path remembers the first window set in an extending path
+ vector<int> ext_path, cur_path, next_path, new_path;
+ list<vector<int> >::iterator pathz_i;
+ int i2, win_ext_len = 0;
+ bool extending;
+
+
+ refined_pathz.clear();
+
+ pathz_i = pathz.begin();
+ ext_path = *pathz_i;
+ while(pathz_i != pathz.end())
+ {
+ // keep track of current path and advance to next path
+ cur_path = *pathz_i;
+ ++pathz_i;
+ next_path = *pathz_i;
+
+ // if node for each seq is equal to the next node+1 then for all
+ // sequences then we are extending
+ extending = true;
+ for(i2 = 0; i2 < species_num; i2++)
+ {
+ //cout << cur_path[i2] << " vs " << next_path[i2] << endl;
+ if (cur_path[i2] + 1 != next_path[i2])
+ extending = false;
+ }
+ if (extending)
+ {
+ //cout << "Raaaawwwrrr! I am extending\n";
+ win_ext_len++;
+ }
+ else
+ {
+ // add the extend window length as first element and add as refined
+ new_path = ext_path;
+ new_path.insert(new_path.begin(),win_size + win_ext_len);
+ refined_pathz.push_back(new_path);
+ // reset stuff
+ win_ext_len = 0;
+ ext_path = next_path;
+ }
+ }
+}
+
void
Nway_Paths::add_path(vector<int> loaded_path)
save_file.open(save_file_path.c_str(), ios::out);
- //save_file << "<Seqcomp type=" << ana_type << " win=" << window_size;
- //save_file << " thres=" << hard_threshold << ">\n";
-
- path_i = pathz.begin();
- paths_end = pathz.end();
- if (path_i == paths_end)
- cout << "Arrrrrrgggghhhhhh\n";
- while(path_i != paths_end)
+ save_file << "<Mussa type=flp seq_num=" << species_num;
+ save_file << " win=" << win_size;
+ // add a function para new_thres defaults to -1 to later deal with
+ // reanalysis with higher thres - if statement whether to record base thres
+ // or new thres (ie if -1, then base)
+ save_file << " thres=" << threshold << " >\n";
+
+ path_i = refined_pathz.begin();
+ paths_end = refined_pathz.end();
+ //path_i = pathz.begin();
+ //paths_end = pathz.end();
+ while (path_i != paths_end)
{
a_path = *path_i;
//cout << a_path.size() << endl;
- for(i = 0; i < species_num; ++i)
- save_file << i << "," << a_path[i] << " ";
+ //first entry is the window length of the windows in the path
+ save_file << a_path[0] << ":";
+ for(i = 1; i <= species_num; ++i)
+ {
+ save_file << a_path[i];
+ if (i != species_num)
+ save_file << ",";
+ }
save_file << endl;
++path_i;
}
-//save_file << "</Mussa>\n";
+ save_file << "</Mussa>\n";
save_file.close();
}
+/*
+ if (path_i == paths_end)
+ cout << "Arrrrrrgggghhhhhh\n";
+*/
+
+int
+Nway_Paths::load(string load_file_path)
+{
+ fstream load_file;
+ string file_data_line, header_data, data, path_node, path_length;
+ int i, space_split_i, equal_split_i, comma_split_i, colon_split_i;
+ vector<int> loaded_path;
+
+
+ load_file.open(load_file_path.c_str(), ios::in);
+
+ // get header data
+ // grab mussa tag - discard for now...maybe check in future...
+ getline(load_file,file_data_line);
+ space_split_i = file_data_line.find(" ");
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // grab type tag - need in future to distinguish between flp and vlp paths
+ space_split_i = file_data_line.find(" ");
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get species/seq number
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ species_num = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get window size
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ win_size = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get window size
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ threshold = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+
+ cout << "seq_num=" << species_num << " win=" << win_size;
+ cout << " thres=" << threshold << endl;
+
+ // clear out the current data
+ refined_pathz.clear();
+
+ int temp;
+
+ getline(load_file,file_data_line);
+ while ( (!load_file.eof()) && (file_data_line != "</Mussa>") )
+ {
+ if (file_data_line != "")
+ {
+ loaded_path.clear();
+ colon_split_i = file_data_line.find(":");
+ path_length = file_data_line.substr(0,colon_split_i);
+ loaded_path.push_back(atoi (path_length.c_str()));
+ file_data_line = file_data_line.substr(colon_split_i+1);
+ for(i = 0; i < species_num; i++)
+ {
+ comma_split_i = file_data_line.find(",");
+ path_node = file_data_line.substr(0, comma_split_i);
+ temp = atoi (path_node.c_str());
+ loaded_path.push_back(temp);
+ file_data_line = file_data_line.substr(comma_split_i+1);
+ }
+ refined_pathz.push_back(loaded_path);
+ }
+ getline(load_file,file_data_line);
+ }
+
+
+ load_file.close();
+
+ return species_num;
+}
+
+
+
void
Nway_Paths::save_old(string save_file_path)
{