#include <sstream>
#include "mussa_exceptions.hpp"
-#include "alg/flp.hpp"
-#include "alg/mussa.hpp"
-#include "alg/motif_parser.hpp"
+
+#include "flp.hpp"
+#include "io.hpp"
+#include "mussa.hpp"
+#include "motif_parser.hpp"
using namespace std;
set_dirty(true);
}
+void Mussa::load_mupa_file(std::string para_file_path) {
+ load_mupa_file(boost::filesystem::path(para_file_path));
+}
+
void
Mussa::load_mupa_file(fs::path para_file_path)
{
- fs::ifstream para_file;
+ if (not fs::exists(para_file_path))
+ {
+ throw mussa_load_error("Config File: " + para_file_path.string() + " not found");
+ } else if (fs::is_directory(para_file_path)) {
+ throw mussa_load_error("Config File: " + para_file_path.string() + " is a directory.");
+ } else if (fs::is_empty(para_file_path)) {
+ throw mussa_load_error("Config File: " + para_file_path.string() + " is empty");
+ } else {
+ // what directory is the mupa file in?
+ fs::path file_path_base( para_file_path.branch_path()) ;
+
+ fs::ifstream para_file;
+ para_file.open(para_file_path, ios::in);
+
+ load_mupa_stream(para_file, file_path_base);
+ para_file.close();
+ }
+}
+
+void
+Mussa::load_mupa_stream(std::istream& para_file, fs::path& file_path_base)
+{
string file_data_line;
string param, value;
fs::path annot_file;
// initialize values
clear();
- // if file was opened, read the parameter values
- if (not fs::exists(para_file_path))
+ // setup loop by getting file's first line
+ getline(para_file, file_data_line);
+ split_index = file_data_line.find(" ");
+ param = file_data_line.substr(0,split_index);
+ value = file_data_line.substr(split_index+1);
+
+ while (para_file)
{
- throw mussa_load_error("Config File: " + para_file_path.string() + " not found");
- } else if (fs::is_directory(para_file_path)) {
- throw mussa_load_error("Config File: " + para_file_path.string() + " is a directory.");
- } else if (fs::is_empty(para_file_path)) {
- throw mussa_load_error("Config File: " + para_file_path.string() + " is empty");
- } else {
- para_file.open(para_file_path, ios::in);
-
- // what directory is the mupa file in?
- fs::path file_path_base = para_file_path.branch_path();
-
- // setup loop by getting file's first line
- getline(para_file,file_data_line);
- split_index = file_data_line.find(" ");
- param = file_data_line.substr(0,split_index);
- value = file_data_line.substr(split_index+1);
-
- while (para_file)
+ did_seq = false;
+ if (param == "ANA_NAME")
+ analysis_name = value;
+ else if (param == "APPEND_WIN")
+ win_append = true;
+ else if (param == "APPEND_THRES")
+ thres_append = true;
+ else if (param == "SEQUENCE_NUM")
+ ; // ignore sequence_num now
+ else if (param == "WINDOW")
+ window = atoi(value.c_str());
+ else if (param == "THRESHOLD")
+ threshold = atoi(value.c_str());
+ else if (param == "SEQUENCE")
{
- did_seq = false;
- if (param == "ANA_NAME")
- analysis_name = value;
- else if (param == "APPEND_WIN")
- win_append = true;
- else if (param == "APPEND_THRES")
- thres_append = true;
- else if (param == "SEQUENCE_NUM")
- ; // ignore sequence_num now
- else if (param == "WINDOW")
- window = atoi(value.c_str());
- else if (param == "THRESHOLD")
- threshold = atoi(value.c_str());
- else if (param == "SEQUENCE")
+ fs::path seq_file = file_path_base / value;
+ //cout << "seq_file_name " << seq_files.back() << endl;
+ fasta_index = 1;
+ annot_file = "";
+ sub_seq_start = 0;
+ sub_seq_end = 0;
+ seq_params = true;
+
+ while (para_file && seq_params)
{
- fs::path seq_file = file_path_base / value;
- //cout << "seq_file_name " << seq_files.back() << endl;
- fasta_index = 1;
- annot_file = "";
- sub_seq_start = 0;
- sub_seq_end = 0;
- seq_params = true;
-
- while (para_file && seq_params)
- {
- getline(para_file,file_data_line);
- split_index = file_data_line.find(" ");
- param = file_data_line.substr(0,split_index);
- value = file_data_line.substr(split_index+1);
-
- if (param == "FASTA_INDEX")
- fasta_index = atoi(value.c_str());
- else if (param == "ANNOTATION")
- annot_file = file_path_base / value;
- else if (param == "SEQ_START")
- sub_seq_start = atoi(value.c_str());
- else if (param == "SEQ_END")
- {
- sub_seq_end = atoi(value.c_str());
- }
- //ignore empty lines or that start with '#'
- else if ((param == "") || (param == "#")) {}
- else seq_params = false;
- }
- load_sequence(seq_file, annot_file, fasta_index, sub_seq_start,
- sub_seq_end);
- did_seq = true;
- }
- //ignore empty lines or that start with '#'
- else if ((param == "") || (param == "#")) {}
- else
- {
- clog << "Illegal/misplaced mussa parameter in file\n";
- clog << param << "\n";
- }
-
- if (!did_seq)
- {
- getline(para_file,file_data_line);
+ multiplatform_getline(para_file,file_data_line);
split_index = file_data_line.find(" ");
param = file_data_line.substr(0,split_index);
value = file_data_line.substr(split_index+1);
- did_seq = false;
+
+ if (param == "FASTA_INDEX")
+ fasta_index = atoi(value.c_str());
+ else if (param == "ANNOTATION")
+ annot_file = file_path_base / value;
+ else if (param == "SEQ_START")
+ sub_seq_start = atoi(value.c_str());
+ else if (param == "SEQ_END")
+ {
+ sub_seq_end = atoi(value.c_str());
+ }
+ //ignore empty lines or that start with '#'
+ else if ((param == "") || (param == "#")) {
+ // pass
+ } else {
+ seq_params = false;
+ }
}
+ load_sequence(seq_file, annot_file, fasta_index, sub_seq_start,
+ sub_seq_end);
+ did_seq = true;
+ }
+ //ignore empty lines or that start with '#'
+ else if ((param == "") || (param == "#")) {}
+ else
+ {
+ clog << "Illegal/misplaced mussa parameter in file\n";
+ clog << param << "\n";
}
- para_file.close();
-
- soft_thres = threshold;
- //cout << "nway mupa: analysis_name = " << analysis_name
- // << " window = " << window
- // << " threshold = " << threshold << endl;
+ if (!did_seq)
+ {
+ multiplatform_getline(para_file,file_data_line);
+ split_index = file_data_line.find(" ");
+ param = file_data_line.substr(0,split_index);
+ value = file_data_line.substr(split_index+1);
+ did_seq = false;
+ }
}
+
+ soft_thres = threshold;
// no file was loaded, signal error
set_dirty(true);
}
vector<FLPs> empty_FLP_vector;
FLPs dummy_comp;
+
+ //--------------------------------------------------------
+ // Load Muway
+ //--------------------------------------------------------
analysis_path = ana_file;
analysis_name = ana_path.leaf();
fs::path muway(analysis_name+".muway", fs::native);
threshold = the_paths.get_threshold();
soft_thres = threshold;
- int seq_num = the_paths.sequence_count();
+
+ //--------------------------------------------------------
+ // Load Sequence
+ //--------------------------------------------------------
+ //int seq_num = the_paths.sequence_count();
fs::path museq(analysis_name + ".museq", fs::native);
a_file_path = analysis_path / museq;
// this is a bit of a hack due to C++ not acting like it should with files
- for (i = 1; i <= seq_num; i++)
+ /*for (i = 1; i <= seq_num; i++)
{
boost::shared_ptr<Sequence> tmp_seq(new Sequence);
tmp_seq->load_museq(a_file_path, i);
the_seqs.push_back(tmp_seq);
+ }*/
+
+ i = 1;
+ //int seq_num = 0;
+ boost::filesystem::fstream load_museq_fs;
+ load_museq_fs.open(a_file_path, std::ios::in);
+ boost::shared_ptr<Sequence> tmp_seq;
+ while (1)
+ {
+ tmp_seq = Sequence::load_museq(load_museq_fs);
+
+ if (tmp_seq)
+ {
+ the_seqs.push_back(tmp_seq);
+ }
+ else
+ {
+ break;
+ }
+
+
+ //safe guard in case of an infinate loop.
+ //FIXME: If mussa can handle a comparison of 10000 sequences
+ // in the future, then this code should be fixed.
+ if (i == 10000)
+ {
+ throw mussa_load_error(" Run away sequence load!");
+ }
+ i++;
}
+ load_museq_fs.close();
+ //--------------------------------------------------------
+ // Load Motifs
+ //--------------------------------------------------------
fs::path mtl(analysis_name + ".mtl", fs::native);
fs::path motif_file = analysis_path / mtl;
if (fs::exists(motif_file)) {
load_motifs(motif_file);
}
+
+ vector<Sequence>::size_type seq_num = the_seqs.size();
empty_FLP_vector.clear();
for(i = 0; i < seq_num; i++)
{
all_comps[i].push_back(dummy_comp);
}
+
for(i = 0; i < seq_num; i++)
{
for(i2 = i+1; i2 < seq_num; i2++)
{
// once we've loaded all the motifs from the file,
// lets attach them to the sequences
- for(vector<boost::shared_ptr<Sequence> >::iterator seq_i = the_seqs.begin();
+ for(vector<SequenceRef >::iterator seq_i = the_seqs.begin();
seq_i != the_seqs.end();
++seq_i)
{