1 // ----------------------------------------
2 // ---------- mussa_class.cc -----------
3 // ----------------------------------------
5 #include "mussa_class.hh"
7 // doesn't do neg ints...
21 remainder = an_int % 10;
24 converted_int = "0" + converted_int;
25 else if (remainder == 1)
26 converted_int = "1" + converted_int;
27 else if (remainder == 2)
28 converted_int = "2" + converted_int;
29 else if (remainder == 3)
30 converted_int = "3" + converted_int;
31 else if (remainder == 4)
32 converted_int = "4" + converted_int;
33 else if (remainder == 5)
34 converted_int = "5" + converted_int;
35 else if (remainder == 6)
36 converted_int = "6" + converted_int;
37 else if (remainder == 7)
38 converted_int = "7" + converted_int;
39 else if (remainder == 8)
40 converted_int = "8" + converted_int;
41 else if (remainder == 9)
42 converted_int = "9" + converted_int;
57 Mussa::load_parameters(string para_file_path)
60 string file_data_line;
61 string param, value, annot_file;
62 int split_index, fasta_index;
63 int sub_seq_start, sub_seq_end;
64 bool seq_params, did_seq;
71 fasta_indices.clear();
73 sub_seq_starts.clear();
76 para_file.open(para_file_path.c_str(), ios::in);
78 getline(para_file,file_data_line);
79 split_index = file_data_line.find(" ");
80 param = file_data_line.substr(0,split_index);
81 value = file_data_line.substr(split_index+1);
84 while (!para_file.eof())
87 if (param == "ANA_NAME")
89 else if (param == "APPEND_WIN")
91 else if (param == "APPEND_THRES")
93 else if (param == "SEQUENCE_NUM")
94 seq_num = atoi(value.c_str());
95 else if (param == "WINDOW")
96 window = atoi(value.c_str());
97 else if (param == "THRESHOLD")
98 threshold = atoi(value.c_str());
99 else if (param == "SEQUENCE")
101 seq_files.push_back(value);
108 while ((!para_file.eof()) && seq_params)
110 getline(para_file,file_data_line);
111 split_index = file_data_line.find(" ");
112 param = file_data_line.substr(0,split_index);
113 value = file_data_line.substr(split_index+1);
115 if (param == "FASTA_INDEX")
116 fasta_index = atoi(value.c_str());
117 else if (param == "ANNOTATION")
119 else if (param == "SEQ_START")
120 sub_seq_start = atoi(value.c_str());
121 else if (param == "SEQ_END")
123 cout << "hey! " << atoi(value.c_str()) << endl;
124 sub_seq_end = atoi(value.c_str());
126 //ignore empty lines or that start with '#'
127 else if ((param == "") || (param == "#")) {}
128 else seq_params = false;
132 fasta_indices.push_back(fasta_index);
133 annot_files.push_back(annot_file);
134 sub_seq_starts.push_back(sub_seq_start);
135 sub_seq_ends.push_back(sub_seq_end);
139 //ignore empty lines or that start with '#'
140 else if ((param == "") || (param == "#")) {}
143 cout << "Illegal/misplaced mussa parameter in file\n";
144 cout << param << "\n";
149 getline(para_file,file_data_line);
150 split_index = file_data_line.find(" ");
151 param = file_data_line.substr(0,split_index);
152 value = file_data_line.substr(split_index+1);
159 cout << "ana_name = " << ana_name << win_append << win_append << "\n";
160 cout << "window = " << window << " threshold = " << threshold << "\n";
163 // if (!((param == "") || (param == "#")))
164 // cout << value << " = " << param << endl;
166 Mussa::analyze(string para_file_path, int w, int t)
168 time_t t1, t2, begin, end;
169 double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
180 load_parameters(para_file_path);
182 setuptime = difftime(t2, t1);
189 seqloadtime = difftime(t2, t1);
196 seqcomptime = difftime(t2, t1);
203 nwaytime = difftime(t2, t1);
210 savetime = difftime(t2, t1);
213 totaltime = difftime(end, begin);
215 cout << "setup\tseqload\tseqcomp\tnway\tsave\ttotal\n";
216 cout << setuptime << "\t";
217 cout << seqloadtime << "\t";
218 cout << seqcomptime << "\t";
219 cout << nwaytime << "\t";
220 cout << savetime << "\t";
221 cout << totaltime << "\n";
228 list<string>::iterator seq_files_i, annot_files_i;
229 list<int>::iterator fasta_indices_i, seq_starts_i, seq_ends_i;
232 seq_files_i = seq_files.begin();
233 fasta_indices_i = fasta_indices.begin();
234 annot_files_i = annot_files.begin();
235 seq_starts_i = sub_seq_starts.begin();
236 seq_ends_i = sub_seq_ends.begin();
239 while (seq_files_i != seq_files.end())
240 /* it should be guarenteed that each of the following exist
242 (fasta_indices_i != fasta_indices.end()) &&
243 (annot_files_i != annot_files.end()) &&
244 (seq_starts_i != sub_seq_starts.end()) &&
245 (seq_ends_i != sub_seq_ends.end()) )
248 aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i,*seq_ends_i);
249 if (*annot_files_i != "")
250 aSeq.load_annot(*annot_files_i);
251 the_Seqs.push_back(aSeq);
252 cout << aSeq.hdr() << endl;
253 //cout << aSeq.seq() << endl;
273 int i, i2; // loop vars over sequences to analyze
274 vector<int> seq_lens;
275 vector<FLPs> empty_FLP_vector;
277 string save_file_string;
279 empty_FLP_vector.clear();
280 for(i = 0; i < seq_num; i++)
282 all_comps.push_back(empty_FLP_vector);
283 for(i2 = 0; i2 < seq_num; i2++)
284 all_comps[i].push_back(dummy_comp);
287 for(i = 0; i < seq_num; i++)
288 seq_lens.push_back(the_Seqs[i].len());
290 for(i = 0; i < seq_num; i++)
291 for(i2 = i+1; i2 < seq_num; i2++)
293 all_comps[i][i2].setup("m", window, threshold, seq_lens[i],seq_lens[i2]);
294 all_comps[i][i2].seqcomp(the_Seqs[i].seq(), the_Seqs[i2].seq(), false);
295 all_comps[i][i2].seqcomp(the_Seqs[i].seq(),the_Seqs[i2].rev_comp(),true);
297 all_comps[i][i2].file_save(save_file_string);
306 the_paths.setup(seq_num, window, threshold);
307 the_paths.find_paths_r(all_comps);
308 the_paths.simple_refine();
314 string save_path_base, save_path;
318 // gotta do bit with adding win & thres if to be appended - need itos
320 // not sure why, but gotta close file each time since can't pass file streams
322 save_path_base = ana_name;
325 save_path_base += "_w" + int_to_str(window);
328 save_path_base += "_t" + int_to_str(threshold);
330 // save sequence and annots to a special mussa file
331 save_path = save_path_base + ".museq";
332 save_file.open(save_path.c_str(), ios::out);
333 save_file << "<Mussa_Sequence>" << endl;
336 for(i = 0; i < seq_num; i++)
337 the_Seqs[i].save(save_file);
339 //save_file.open(save_path.c_str(), ios::app);
340 save_file << "</Mussa_Sequence>" << endl;
343 // save nway paths to its mussa save file
344 save_path = save_path_base + ".muway";
345 the_paths.save(save_path);
349 Mussa::load(string ana_file)
352 string load_file_path;
357 load_file_path = ana_name + ".muway";
358 seq_num = the_paths.load(load_file_path);
360 load_file_path = ana_name + ".museq";
361 for (i = 1; i <= seq_num; i++)
364 tmp_seq.load_museq(load_file_path, i);
365 the_Seqs.push_back(tmp_seq);
384 save_file.open(ana_name.c_str(), ios::out);
386 for(i = 0; i < seq_num; i++)
387 save_file << the_Seqs[i].seq() << endl;
389 save_file << window << endl;
391 //note more complex eventually since ana_name may need to have
392 //window size, threshold and other stuff to modify it...
393 the_paths.save_old(ana_name);
398 Mussa::load_old(char * load_file_path, int s_num)
401 string file_data_line;
402 int i, space_split_i, comma_split_i;
403 vector<int> loaded_path;
404 string node_pair, node;
408 the_paths.setup(seq_num, 0, 0);
409 save_file.open(load_file_path, ios::in);
411 // currently loads old mussa format
414 for(i = 0; i < seq_num; i++)
416 getline(save_file, file_data_line);
417 a_seq.set_seq(file_data_line);
418 the_Seqs.push_back(a_seq);
422 getline(save_file, file_data_line);
423 window = atoi(file_data_line.c_str());
426 while (!save_file.eof())
429 getline(save_file, file_data_line);
430 if (file_data_line != "")
431 for(i = 0; i < seq_num; i++)
433 space_split_i = file_data_line.find(" ");
434 node_pair = file_data_line.substr(0,space_split_i);
435 //cout << "np= " << node_pair;
436 comma_split_i = node_pair.find(",");
437 node = node_pair.substr(comma_split_i+1);
438 //cout << "n= " << node << " ";
439 loaded_path.push_back(atoi (node.c_str()));
440 file_data_line = file_data_line.substr(space_split_i+1);
443 the_paths.add_path(loaded_path);
447 //the_paths.save("tmp.save");