// ---------- mussa_nway.cc -----------
// ----------------------------------------
+#include <boost/filesystem/fstream.hpp>
+namespace fs = boost::filesystem;
+
+#include "alg/mussa_callback.hpp"
#include "alg/nway_paths.hpp"
#include "alg/conserved_path.hpp"
#include "mussa_exceptions.hpp"
-#include <fstream>
#include <iostream>
#include <stdexcept>
{
}
+NwayPaths::NwayPaths(const NwayPaths &o)
+ : pathz(o.pathz),
+ refined_pathz(o.refined_pathz),
+ threshold(o.threshold),
+ win_size(o.win_size),
+ soft_thres(o.soft_thres),
+ ent_thres(o.ent_thres),
+ c_sequences(o.c_sequences)
+{
+}
+
+void NwayPaths::clear()
+{
+ c_sequences.clear();
+ pathz.clear();
+ refined_pathz.clear();
+}
+
void
NwayPaths::setup(int w, int t)
{
threshold = t;
soft_thres = threshold;
win_size = w;
- pathz.clear();
+ clear();
//cout << "nway: thres = " << threshold
// << ", soft threo = " << soft_thres << endl;
}
void
-NwayPaths::set_soft_thres(int sft_thres)
+NwayPaths::set_soft_threshold(int sft_thres)
{
soft_thres = sft_thres;
}
+int NwayPaths::get_soft_threshold() const
+{
+ return soft_thres;
+}
+
int NwayPaths::get_threshold() const
{
return threshold;
NwayPaths::simple_refine()
{
// ext_path remembers the first window set in an extending path
- ExtendedConservedPath ext_path, new_path;
+ ConservedPath ext_path, new_path;
list<ConservedPath>::iterator cur_path, next_path;
list<ConservedPath>::iterator pathz_i;
int win_ext_len = 0;
//cout << "path number is: " << pathz.size() << endl;
pathz_i = pathz.begin();
+ int path_count = 0;
// only try to extend when pathz isn't empty.
if (pathz_i != pathz.end())
{
- ext_path = ExtendedConservedPath( win_size, *pathz_i);
+ ext_path = *pathz_i;
+ ++path_count;
while(pathz_i != pathz.end())
{
// keep track of current path and advance to next path
cur_path = pathz_i;
++pathz_i;
- if (pathz_i == pathz.end())
+ ++path_count;
+
+ if (pathz_i == pathz.end()) {
end = true;
- else
+ extending = false;
+ } else {
next_path = pathz_i;
-
- if (not end)
- {
// if node for each seq is equal to the next node+1 then for all
// sequences then we are extending
extending = cur_path->nextTo(*next_path);
}
- else
- extending = false;
-
+
if (extending)
{
win_ext_len++;
new_path = ext_path;
new_path.extend(win_ext_len);
refined_pathz.push_back(new_path);
- // reset stuff
- win_ext_len = 0;
- ext_path = ExtendedConservedPath( win_size, *next_path);
+ if (not end) {
+ // reset stuff
+ win_ext_len = 0;
+ ext_path = *next_path;
+ }
}
+ if ((path_count % 100) == 0)
+ emit progress("refine", path_count-1, pathz.size());
}
}
+ // this mysterious call tells the dialog box that we're actually done
+ emit progress("refine", pathz.size(), pathz.size());
//cout << "r_path number is: " << refined_pathz.size() << endl;
}
-
void
NwayPaths::add_path(int threshold, vector<int>& loaded_path)
{
- pathz.push_back(ConservedPath(threshold, loaded_path));
+ pathz.push_back(ConservedPath(threshold, 0.0, loaded_path));
}
void
void
-NwayPaths::save(string save_file_path)
+NwayPaths::save(fs::path save_file_path)
{
- fstream save_file;
- list<ExtendedConservedPath >::iterator path_i, paths_end;
+ fs::fstream save_file;
+ list<ConservedPath >::iterator path_i, paths_end;
- save_file.open(save_file_path.c_str(), ios::out);
+ save_file.open(save_file_path, ios::out);
save_file << "<Mussa type=flp seq_count=" << sequence_count();
save_file << " win=" << win_size;
// add a function para new_thres defaults to -1 to later deal with
// reanalysis with higher thres - if statement whether to record base thres
// or new thres (ie if -1, then base)
- save_file << " thres=" << threshold << " >\n";
+ save_file << " thres=" << threshold << " soft_thres=" << soft_thres << " >\n";
path_i = refined_pathz.begin();
paths_end = refined_pathz.end();
//paths_end = pathz.end();
while (path_i != paths_end)
{
- ExtendedConservedPath& a_path = *path_i;
+ ConservedPath& a_path = *path_i;
//cout << a_path.size() << endl;
//first entry is the window length of the windows in the path
save_file << a_path.window_size << ":";
- for(size_t i = 0; i != sequence_count(); ++i)
+ for(size_type i = 0; i != sequence_count(); ++i)
{
save_file << a_path[i];
if (i != sequence_count())
}
-size_t
-NwayPaths::sequence_count()
+NwayPaths::size_type NwayPaths::sequence_count() const
{
if (refined_pathz.begin() == refined_pathz.end() )
return 0;
return refined_pathz.begin()->size();
}
+NwayPaths::size_type NwayPaths::size() const
+{
+ return pathz.size();
+}
void
-NwayPaths::load(string load_file_path)
+NwayPaths::load(fs::path load_file_path)
{
- fstream load_file;
+ fs::fstream load_file;
string file_data_line, header_data, data, path_node, path_width;
int space_split_i, equal_split_i, comma_split_i, colon_split_i;
vector<int> loaded_path;
- load_file.open(load_file_path.c_str(), ios::in);
+ load_file.open(load_file_path, ios::in);
if (!load_file)
{
- throw mussa_load_error("Sequence File: " + load_file_path + " not found");
+ throw mussa_load_error("Sequence File: " + load_file_path.string() + " not found");
}
else
{
data = file_data_line.substr(equal_split_i+1);
threshold = atoi (data.c_str());
file_data_line = file_data_line.substr(space_split_i+1);
-
+ // get cur_threshold
+ //std::cout << "file_data_line: " << file_data_line << "\n";
+ //std::cout << "find(\">\"): " << file_data_line.find(">") << "\n";
+ if (file_data_line.find(">") != 0)
+ {
+ space_split_i = file_data_line.find(" ");
+ header_data = file_data_line.substr(0,space_split_i);
+ equal_split_i = header_data.find("=");
+ data = file_data_line.substr(equal_split_i+1);
+ soft_thres = atoi (data.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ }
+ else
+ {
+ soft_thres = threshold;
+ }
+ //std::cout << "nway_soft_thres: " << soft_thres << "\n";
//cout << "seq_num=" << species_num << " win=" << win_size;
//cout << " thres=" << threshold << endl;
// whats our window size?
path_width = file_data_line.substr(0,colon_split_i);
file_data_line = file_data_line.substr(colon_split_i+1);
- for(size_t i = 0; i < species_num; i++)
+ for(size_type i = 0; i < species_num; i++)
{
comma_split_i = file_data_line.find(",");
path_node = file_data_line.substr(0, comma_split_i);
file_data_line = file_data_line.substr(comma_split_i+1);
}
assert (loaded_path.size() == species_num );
- refined_pathz.push_back(ExtendedConservedPath(atoi(path_width.c_str()),
- threshold,
- loaded_path));
+ refined_pathz.push_back(ConservedPath(atoi(path_width.c_str()),
+ threshold,
+ loaded_path));
}
getline(load_file,file_data_line);
}
void
-NwayPaths::path_search(vector<vector<FLPs> > all_comparisons, ConservedPath path, int depth)
+NwayPaths::path_search(vector<vector<FLPs> > all_comparisons, ConservedPath path, size_type depth)
{
list<int> new_nodes, trans_check_nodes;
list<int>::iterator new_nodes_i, new_nodes_end;
- int i;
bool trans_check_good;
new_nodes = all_comparisons[depth - 1][depth].match_locations(path[depth-1]);
//cout << " * species " << depth << " node: " << *new_nodes_i << endl;
// check transitivity with previous nodes in path
trans_check_good = true;
- for(i = 0; i < depth - 1; i++)
+ for(size_type i = 0; i < depth - 1; i++)
{
trans_check_nodes = all_comparisons[i][depth].match_locations(path[i]);
if ( (trans_check_nodes.end() == find(trans_check_nodes.begin(),
void
-NwayPaths::save_old(string save_file_path)
+NwayPaths::save_old(fs::path save_file_path)
{
- fstream save_file;
+ fs::fstream save_file;
list<ConservedPath >::iterator path_i, paths_end;
- int i;
+ size_type i;
- save_file.open(save_file_path.c_str(), ios::app);
+ save_file.open(save_file_path, ios::app);
path_i = pathz.begin();
paths_end = pathz.end();