vector<FLPs> empty_FLP_vector;
FLPs dummy_comp;
+
+ //--------------------------------------------------------
+ // Load Muway
+ //--------------------------------------------------------
analysis_path = ana_file;
analysis_name = ana_path.leaf();
fs::path muway(analysis_name+".muway", fs::native);
threshold = the_paths.get_threshold();
soft_thres = threshold;
- int seq_num = the_paths.sequence_count();
+
+ //--------------------------------------------------------
+ // Load Sequence
+ //--------------------------------------------------------
+ //int seq_num = the_paths.sequence_count();
fs::path museq(analysis_name + ".museq", fs::native);
a_file_path = analysis_path / museq;
// this is a bit of a hack due to C++ not acting like it should with files
- for (i = 1; i <= seq_num; i++)
+ /*for (i = 1; i <= seq_num; i++)
{
boost::shared_ptr<Sequence> tmp_seq(new Sequence);
tmp_seq->load_museq(a_file_path, i);
the_seqs.push_back(tmp_seq);
+ }*/
+
+ i = 1;
+ //int seq_num = 0;
+ boost::filesystem::fstream load_museq_fs;
+ load_museq_fs.open(a_file_path, std::ios::in);
+ boost::shared_ptr<Sequence> tmp_seq;
+ while (1)
+ {
+ tmp_seq = Sequence::load_museq(load_museq_fs);
+
+ if (tmp_seq)
+ {
+ the_seqs.push_back(tmp_seq);
+ }
+ else
+ {
+ break;
+ }
+
+
+ //safe guard in case of an infinate loop.
+ //FIXME: If mussa can handle a comparison of 10000 sequences
+ // in the future, then this code should be fixed.
+ if (i == 10000)
+ {
+ throw mussa_load_error(" Run away sequence load!");
+ }
+ i++;
}
+ load_museq_fs.close();
+ //--------------------------------------------------------
+ // Load Motifs
+ //--------------------------------------------------------
fs::path mtl(analysis_name + ".mtl", fs::native);
fs::path motif_file = analysis_path / mtl;
if (fs::exists(motif_file)) {
load_motifs(motif_file);
}
+
+ vector<Sequence>::size_type seq_num = the_seqs.size();
empty_FLP_vector.clear();
for(i = 0; i < seq_num; i++)
{
all_comps[i].push_back(dummy_comp);
}
+
for(i = 0; i < seq_num; i++)
{
for(i2 = i+1; i2 < seq_num; i2++)
//save_file.close();
}
-void
-Sequence::load_museq(fs::path load_file_path, int seq_num)
+//void
+//Sequence::load_museq(fs::path load_file_path, int seq_num)
+//{
+// fs::fstream load_file;
+// std::string file_data_line;
+// int seq_counter;
+// //annot an_annot;
+// int annot_begin;
+// int annot_end;
+// std::string annot_name;
+// std::string annot_type;
+//
+// std::string::size_type space_split_i;
+// std::string annot_value;
+//
+// annotation_list.reset(new SeqSpanRefList);
+//
+// load_file.open(load_file_path, std::ios::in);
+//
+// seq_counter = 0;
+// // search for the seq_num-th sequence
+// while ( (!load_file.eof()) && (seq_counter < seq_num) )
+// {
+// getline(load_file,file_data_line);
+// if (file_data_line == "<Sequence>")
+// seq_counter++;
+// }
+// getline(load_file, file_data_line);
+// // looks like the sequence is written as a single line
+// set_filtered_sequence(file_data_line, reduced_dna_alphabet, 0, file_data_line.size(), SeqSpan::PlusStrand);
+// getline(load_file, file_data_line);
+// getline(load_file, file_data_line);
+// if (file_data_line == "<Annotations>")
+// {
+// getline(load_file, file_data_line);
+// species = file_data_line;
+// while ( (!load_file.eof()) && (file_data_line != "</Annotations>") )
+// {
+// getline(load_file,file_data_line);
+// if ((file_data_line != "") && (file_data_line != "</Annotations>"))
+// {
+// // need to get 4 values...almost same code 4 times...
+// // get annot start index
+// space_split_i = file_data_line.find(" ");
+// annot_value = file_data_line.substr(0,space_split_i);
+// annot_begin = atoi (annot_value.c_str());
+// file_data_line = file_data_line.substr(space_split_i+1);
+// // get annot end index
+// space_split_i = file_data_line.find(" ");
+// annot_value = file_data_line.substr(0,space_split_i);
+// annot_end = atoi (annot_value.c_str());
+//
+// if (space_split_i == std::string::npos) // no entry for type or name
+// {
+// std::cout << "seq, annots - no type or name\n";
+// annot_name = "";
+// annot_type = "";
+// }
+// else // else get annot type
+// {
+// file_data_line = file_data_line.substr(space_split_i+1);
+// space_split_i = file_data_line.find(" ");
+// annot_value = file_data_line.substr(0,space_split_i);
+// //an_annot.type = annot_value;
+// annot_type = annot_value;
+// if (space_split_i == std::string::npos) // no entry for name
+// {
+// std::cout << "seq, annots - no name\n";
+// annot_name = "";
+// }
+// else // get annot name
+// {
+// file_data_line = file_data_line.substr(space_split_i+1);
+// space_split_i = file_data_line.find(" ");
+// annot_value = file_data_line.substr(0,space_split_i);
+// // this seems like its wrong?
+// annot_type = annot_value;
+// }
+// }
+// add_annotation(annot_name, annot_type, annot_begin, annot_end);
+// }
+// //std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
+// // << "-->" << an_annot.type << "::" << an_annot.name << std::endl;
+// }
+// }
+// load_file.close();
+//}
+
+SequenceRef Sequence::load_museq(boost::filesystem::fstream& load_file)
{
- fs::fstream load_file;
+ boost::shared_ptr<Sequence> seq(new Sequence);
std::string file_data_line;
int seq_counter;
//annot an_annot;
std::string::size_type space_split_i;
std::string annot_value;
- annotation_list.reset(new SeqSpanRefList);
-
- load_file.open(load_file_path, std::ios::in);
+ //seq->annotation_list.reset(new SeqSpanRefList);
seq_counter = 0;
- // search for the seq_num-th sequence
+ // search for the next sequence
+ int seq_num = 1;
while ( (!load_file.eof()) && (seq_counter < seq_num) )
{
getline(load_file,file_data_line);
if (file_data_line == "<Sequence>")
seq_counter++;
}
+
+ // Could not find next sequence
+ if (load_file.eof())
+ {
+ seq.reset();
+ return seq;
+ }
+
getline(load_file, file_data_line);
// looks like the sequence is written as a single line
- set_filtered_sequence(file_data_line, reduced_dna_alphabet, 0, file_data_line.size(), SeqSpan::PlusStrand);
+ seq->set_filtered_sequence(file_data_line, reduced_dna_alphabet, 0, file_data_line.size(), SeqSpan::PlusStrand);
getline(load_file, file_data_line);
getline(load_file, file_data_line);
if (file_data_line == "<Annotations>")
{
getline(load_file, file_data_line);
- species = file_data_line;
+ seq->set_species(file_data_line);
while ( (!load_file.eof()) && (file_data_line != "</Annotations>") )
{
getline(load_file,file_data_line);
annot_type = annot_value;
}
}
- add_annotation(annot_name, annot_type, annot_begin, annot_end);
+ seq->add_annotation(annot_name, annot_type, annot_begin, annot_end);
}
//std::cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
// << "-->" << an_annot.type << "::" << an_annot.name << std::endl;
}
}
- load_file.close();
+ //load_file.close();
+ return seq;
}