1 // ----------------------------------------
2 // ---------- mussa_class.cc -----------
3 // ----------------------------------------
5 #include "mussa_class.hh"
7 // doesn't do neg ints...
21 remainder = an_int % 10;
24 converted_int = "0" + converted_int;
25 else if (remainder == 1)
26 converted_int = "1" + converted_int;
27 else if (remainder == 2)
28 converted_int = "2" + converted_int;
29 else if (remainder == 3)
30 converted_int = "3" + converted_int;
31 else if (remainder == 4)
32 converted_int = "4" + converted_int;
33 else if (remainder == 5)
34 converted_int = "5" + converted_int;
35 else if (remainder == 6)
36 converted_int = "6" + converted_int;
37 else if (remainder == 7)
38 converted_int = "7" + converted_int;
39 else if (remainder == 8)
40 converted_int = "8" + converted_int;
41 else if (remainder == 9)
42 converted_int = "9" + converted_int;
57 Mussa::load_parameters(string para_file_path)
60 string file_data_line;
61 string param, value, annot_file;
62 int split_index, fasta_index;
63 int sub_seq_start, sub_seq_end;
64 bool seq_params, did_seq;
73 fasta_indices.clear();
75 sub_seq_starts.clear();
78 para_file.open(para_file_path.c_str(), ios::in);
80 // if file was opened, read the parameter values
83 // setup loop by getting file's first line
84 getline(para_file,file_data_line);
85 split_index = file_data_line.find(" ");
86 param = file_data_line.substr(0,split_index);
87 value = file_data_line.substr(split_index+1);
89 while (!para_file.eof())
92 if (param == "ANA_NAME")
94 else if (param == "APPEND_WIN")
96 else if (param == "APPEND_THRES")
98 else if (param == "SEQUENCE_NUM")
99 seq_num = atoi(value.c_str());
100 else if (param == "WINDOW")
101 window = atoi(value.c_str());
102 else if (param == "THRESHOLD")
103 threshold = atoi(value.c_str());
104 else if (param == "SEQUENCE")
106 seq_files.push_back(value);
113 while ((!para_file.eof()) && seq_params)
115 getline(para_file,file_data_line);
116 split_index = file_data_line.find(" ");
117 param = file_data_line.substr(0,split_index);
118 value = file_data_line.substr(split_index+1);
120 if (param == "FASTA_INDEX")
121 fasta_index = atoi(value.c_str());
122 else if (param == "ANNOTATION")
124 else if (param == "SEQ_START")
125 sub_seq_start = atoi(value.c_str());
126 else if (param == "SEQ_END")
128 cout << "hey! " << atoi(value.c_str()) << endl;
129 sub_seq_end = atoi(value.c_str());
131 //ignore empty lines or that start with '#'
132 else if ((param == "") || (param == "#")) {}
133 else seq_params = false;
136 fasta_indices.push_back(fasta_index);
137 annot_files.push_back(annot_file);
138 sub_seq_starts.push_back(sub_seq_start);
139 sub_seq_ends.push_back(sub_seq_end);
142 //ignore empty lines or that start with '#'
143 else if ((param == "") || (param == "#")) {}
146 cout << "Illegal/misplaced mussa parameter in file\n";
147 cout << param << "\n";
152 getline(para_file,file_data_line);
153 split_index = file_data_line.find(" ");
154 param = file_data_line.substr(0,split_index);
155 value = file_data_line.substr(split_index+1);
162 cout << "ana_name = " << ana_name << win_append << win_append << "\n";
163 cout << "window = " << window << " threshold = " << threshold << "\n";
168 // no file was loaded, signal error
171 err_msg = "Config File: " + para_file_path + " not found";
176 // if (!((param == "") || (param == "#")))
177 // cout << value << " = " << param << endl;
181 Mussa::analyze(string para_file_path, int w, int t)
184 time_t t1, t2, begin, end;
185 double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
191 err_msg = load_parameters(para_file_path);
193 setuptime = difftime(t2, t1);
204 err_msg = get_Seqs();
206 seqloadtime = difftime(t2, t1);
215 seqcomptime = difftime(t2, t1);
222 nwaytime = difftime(t2, t1);
229 savetime = difftime(t2, t1);
232 totaltime = difftime(end, begin);
234 cout << "setup\tseqload\tseqcomp\tnway\tsave\ttotal\n";
235 cout << setuptime << "\t";
236 cout << seqloadtime << "\t";
237 cout << seqcomptime << "\t";
238 cout << nwaytime << "\t";
239 cout << savetime << "\t";
240 cout << totaltime << "\n";
253 list<string>::iterator seq_files_i, annot_files_i;
254 list<int>::iterator fasta_indices_i, seq_starts_i, seq_ends_i;
258 seq_files_i = seq_files.begin();
259 fasta_indices_i = fasta_indices.begin();
260 annot_files_i = annot_files.begin();
261 seq_starts_i = sub_seq_starts.begin();
262 seq_ends_i = sub_seq_ends.begin();
266 while ( (seq_files_i != seq_files.end()) && (err_msg == "") )
267 /* it should be guarenteed that each of the following exist
268 should I bother checking, and how to deal with if not true...
270 (fasta_indices_i != fasta_indices.end()) &&
271 (annot_files_i != annot_files.end()) &&
272 (seq_starts_i != sub_seq_starts.end()) &&
273 (seq_ends_i != sub_seq_ends.end()) )
276 err_msg = aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i,
280 if (*annot_files_i != "")
281 err_msg = aSeq.load_annot(*annot_files_i, *seq_starts_i, *seq_ends_i);
285 the_Seqs.push_back(aSeq);
286 cout << aSeq.hdr() << endl;
287 //cout << aSeq.seq() << endl;
289 ++seq_files_i; // advance all the iterators
309 int i, i2; // loop vars over sequences to analyze
310 vector<int> seq_lens;
311 vector<FLPs> empty_FLP_vector;
313 string save_file_string;
315 empty_FLP_vector.clear();
316 for(i = 0; i < seq_num; i++)
318 all_comps.push_back(empty_FLP_vector);
319 for(i2 = 0; i2 < seq_num; i2++)
320 all_comps[i].push_back(dummy_comp);
322 for(i = 0; i < seq_num; i++)
323 seq_lens.push_back(the_Seqs[i].len());
325 for(i = 0; i < seq_num; i++)
326 for(i2 = i+1; i2 < seq_num; i2++)
328 cout << "seqcomping: " << i << " v. " << i2 << endl;
329 all_comps[i][i2].setup("m", window, threshold, seq_lens[i],seq_lens[i2]);
330 all_comps[i][i2].seqcomp(the_Seqs[i].seq(), the_Seqs[i2].seq(), false);
331 all_comps[i][i2].seqcomp(the_Seqs[i].seq(),the_Seqs[i2].rev_comp(),true);
332 //all_comps[i][i2].file_save(save_file_string);
341 the_paths.setup(seq_num, window, threshold);
342 //the_paths.find_paths_r(all_comps);
343 the_paths.trans_path_search(all_comps);
344 //the_paths.radiate_path_search(all_comps);
345 the_paths.simple_refine();
351 string save_path_base, save_path;
355 // gotta do bit with adding win & thres if to be appended - need itos
357 // not sure why, but gotta close file each time since can't pass file streams
359 save_path_base = ana_name;
362 save_path_base += "_w" + int_to_str(window);
365 save_path_base += "_t" + int_to_str(threshold);
367 // save sequence and annots to a special mussa file
368 save_path = save_path_base + ".museq";
369 save_file.open(save_path.c_str(), ios::out);
370 save_file << "<Mussa_Sequence>" << endl;
373 for(i = 0; i < seq_num; i++)
374 the_Seqs[i].save(save_file);
376 //save_file.open(save_path.c_str(), ios::app);
377 save_file << "</Mussa_Sequence>" << endl;
380 // save nway paths to its mussa save file
381 save_path = save_path_base + ".muway";
382 the_paths.save(save_path);
386 Mussa::load(string ana_file)
389 string load_file_path;
395 load_file_path = ana_name + ".muway";
396 err_msg = the_paths.load(load_file_path);
400 seq_num = the_paths.seq_num();
404 load_file_path = ana_name + ".museq";
406 // this is a bit of a hack due to C++ not acting like it should with files
407 for (i = 1; i <= seq_num; i++)
410 tmp_seq.load_museq(load_file_path, i);
411 the_Seqs.push_back(tmp_seq);
435 save_file.open(ana_name.c_str(), ios::out);
437 for(i = 0; i < seq_num; i++)
438 save_file << the_Seqs[i].seq() << endl;
440 save_file << window << endl;
442 //note more complex eventually since ana_name may need to have
443 //window size, threshold and other stuff to modify it...
444 the_paths.save_old(ana_name);
449 Mussa::load_old(char * load_file_path, int s_num)
452 string file_data_line;
453 int i, space_split_i, comma_split_i;
454 vector<int> loaded_path;
455 string node_pair, node;
459 the_paths.setup(seq_num, 0, 0);
460 save_file.open(load_file_path, ios::in);
462 // currently loads old mussa format
465 for(i = 0; i < seq_num; i++)
467 getline(save_file, file_data_line);
468 a_seq.set_seq(file_data_line);
469 the_Seqs.push_back(a_seq);
473 getline(save_file, file_data_line);
474 window = atoi(file_data_line.c_str());
477 while (!save_file.eof())
480 getline(save_file, file_data_line);
481 if (file_data_line != "")
482 for(i = 0; i < seq_num; i++)
484 space_split_i = file_data_line.find(" ");
485 node_pair = file_data_line.substr(0,space_split_i);
486 //cout << "np= " << node_pair;
487 comma_split_i = node_pair.find(",");
488 node = node_pair.substr(comma_split_i+1);
489 //cout << "n= " << node << " ";
490 loaded_path.push_back(atoi (node.c_str()));
491 file_data_line = file_data_line.substr(space_split_i+1);
494 the_paths.add_path(loaded_path);
498 //the_paths.save("tmp.save");