1 // ----------------------------------------
2 // ---------- mussa_class.cc -----------
3 // ----------------------------------------
5 #include "mussa_class.hh"
7 // doesn't do neg ints...
8 // should get rid of and replace int to str needs with string stream stuff...
10 int_to_str(int an_int)
22 remainder = an_int % 10;
25 converted_int = "0" + converted_int;
26 else if (remainder == 1)
27 converted_int = "1" + converted_int;
28 else if (remainder == 2)
29 converted_int = "2" + converted_int;
30 else if (remainder == 3)
31 converted_int = "3" + converted_int;
32 else if (remainder == 4)
33 converted_int = "4" + converted_int;
34 else if (remainder == 5)
35 converted_int = "5" + converted_int;
36 else if (remainder == 6)
37 converted_int = "6" + converted_int;
38 else if (remainder == 7)
39 converted_int = "7" + converted_int;
40 else if (remainder == 8)
41 converted_int = "8" + converted_int;
42 else if (remainder == 9)
43 converted_int = "9" + converted_int;
56 // set all parameters to null state
67 fasta_indices.clear();
69 sub_seq_starts.clear();
73 // these 5 simple methods manually set the parameters for doing an analysis
74 // used so that the gui can take input from user and setup the analysis
75 // note - still need a set_append(bool, bool) method...
77 Mussa::set_name(string a_name)
83 Mussa::set_seq_num(int a_num)
89 Mussa::set_window(int a_window)
95 Mussa::set_threshold(int a_threshold)
97 threshold = a_threshold;
100 // sets info for just 1 seq at a time
102 Mussa::set_seq_info(string seq_file, string annot_file, int fa_i, int a_start, int the_end)
104 seq_files.push_back(seq_file);
105 fasta_indices.push_back(fa_i);
106 annot_files.push_back(annot_file);
107 sub_seq_starts.push_back(a_start);
108 sub_seq_ends.push_back(the_end);
112 Mussa::load_mupa_file(string para_file_path)
115 string file_data_line;
116 string param, value, annot_file;
117 int split_index, fasta_index;
118 int sub_seq_start, sub_seq_end;
119 bool seq_params, did_seq;
127 para_file.open(para_file_path.c_str(), ios::in);
129 // if file was opened, read the parameter values
132 // setup loop by getting file's first line
133 getline(para_file,file_data_line);
134 split_index = file_data_line.find(" ");
135 param = file_data_line.substr(0,split_index);
136 value = file_data_line.substr(split_index+1);
138 while (!para_file.eof())
141 if (param == "ANA_NAME")
143 else if (param == "APPEND_WIN")
145 else if (param == "APPEND_THRES")
147 else if (param == "SEQUENCE_NUM")
148 seq_num = atoi(value.c_str());
149 else if (param == "WINDOW")
150 window = atoi(value.c_str());
151 else if (param == "THRESHOLD")
152 threshold = atoi(value.c_str());
153 else if (param == "SEQUENCE")
155 seq_files.push_back(value);
162 while ((!para_file.eof()) && seq_params)
164 getline(para_file,file_data_line);
165 split_index = file_data_line.find(" ");
166 param = file_data_line.substr(0,split_index);
167 value = file_data_line.substr(split_index+1);
169 if (param == "FASTA_INDEX")
170 fasta_index = atoi(value.c_str());
171 else if (param == "ANNOTATION")
173 else if (param == "SEQ_START")
174 sub_seq_start = atoi(value.c_str());
175 else if (param == "SEQ_END")
177 cout << "hey! " << atoi(value.c_str()) << endl;
178 sub_seq_end = atoi(value.c_str());
180 //ignore empty lines or that start with '#'
181 else if ((param == "") || (param == "#")) {}
182 else seq_params = false;
185 fasta_indices.push_back(fasta_index);
186 annot_files.push_back(annot_file);
187 sub_seq_starts.push_back(sub_seq_start);
188 sub_seq_ends.push_back(sub_seq_end);
191 //ignore empty lines or that start with '#'
192 else if ((param == "") || (param == "#")) {}
195 cout << "Illegal/misplaced mussa parameter in file\n";
196 cout << param << "\n";
201 getline(para_file,file_data_line);
202 split_index = file_data_line.find(" ");
203 param = file_data_line.substr(0,split_index);
204 value = file_data_line.substr(split_index+1);
211 cout << "ana_name = " << ana_name << win_append << win_append << "\n";
212 cout << "window = " << window << " threshold = " << threshold << "\n";
217 // no file was loaded, signal error
220 err_msg = "Config File: " + para_file_path + " not found";
225 // if (!((param == "") || (param == "#")))
226 // cout << value << " = " << param << endl;
230 Mussa::analyze(int w, int t)
233 time_t t1, t2, begin, end;
234 double setuptime, seqloadtime, seqcomptime, nwaytime, savetime, totaltime;
239 // now loading parameters from file must be done separately
241 //err_msg = load_mupa_file(para_file_path);
243 //setuptime = difftime(t2, t1);
254 err_msg = get_Seqs();
256 seqloadtime = difftime(t2, t1);
265 seqcomptime = difftime(t2, t1);
272 nwaytime = difftime(t2, t1);
279 savetime = difftime(t2, t1);
282 totaltime = difftime(end, begin);
285 cout << "seqload\tseqcomp\tnway\tsave\ttotal\n";
287 //cout << setuptime << "\t";
288 cout << seqloadtime << "\t";
289 cout << seqcomptime << "\t";
290 cout << nwaytime << "\t";
291 cout << savetime << "\t";
292 cout << totaltime << "\n";
305 list<string>::iterator seq_files_i, annot_files_i;
306 list<int>::iterator fasta_indices_i, seq_starts_i, seq_ends_i;
311 seq_files_i = seq_files.begin();
312 fasta_indices_i = fasta_indices.begin();
313 annot_files_i = annot_files.begin();
314 seq_starts_i = sub_seq_starts.begin();
315 seq_ends_i = sub_seq_ends.begin();
319 while ( (seq_files_i != seq_files.end()) && (err_msg == "") )
320 /* it should be guarenteed that each of the following exist
321 should I bother checking, and how to deal with if not true...
323 (fasta_indices_i != fasta_indices.end()) &&
324 (annot_files_i != annot_files.end()) &&
325 (seq_starts_i != sub_seq_starts.end()) &&
326 (seq_ends_i != sub_seq_ends.end()) )
329 err_msg = aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i,
333 if (*annot_files_i != "")
334 err_msg = aSeq.load_annot(*annot_files_i, *seq_starts_i, *seq_ends_i);
338 the_Seqs.push_back(aSeq);
339 cout << aSeq.hdr() << endl;
340 //cout << aSeq.seq() << endl;
342 ++seq_files_i; // advance all the iterators
362 int i, i2; // loop vars over sequences to analyze
363 vector<int> seq_lens;
364 vector<FLPs> empty_FLP_vector;
366 string save_file_string;
368 empty_FLP_vector.clear();
369 for(i = 0; i < seq_num; i++)
371 all_comps.push_back(empty_FLP_vector);
372 for(i2 = 0; i2 < seq_num; i2++)
373 all_comps[i].push_back(dummy_comp);
375 for(i = 0; i < seq_num; i++)
376 seq_lens.push_back(the_Seqs[i].len());
378 for(i = 0; i < seq_num; i++)
379 for(i2 = i+1; i2 < seq_num; i2++)
381 cout << "seqcomping: " << i << " v. " << i2 << endl;
382 all_comps[i][i2].setup("m", window, threshold, seq_lens[i],seq_lens[i2]);
383 all_comps[i][i2].seqcomp(the_Seqs[i].seq(), the_Seqs[i2].seq(), false);
384 all_comps[i][i2].seqcomp(the_Seqs[i].seq(),the_Seqs[i2].rev_comp(),true);
385 //all_comps[i][i2].file_save(save_file_string);
394 the_paths.setup(seq_num, window, threshold);
395 //the_paths.find_paths_r(all_comps);
396 the_paths.trans_path_search(all_comps);
397 //the_paths.radiate_path_search(all_comps);
398 the_paths.simple_refine();
404 string save_path_base, save_path;
408 // gotta do bit with adding win & thres if to be appended - need itos
410 // not sure why, but gotta close file each time since can't pass file streams
412 save_path_base = ana_name;
415 save_path_base += "_w" + int_to_str(window);
418 save_path_base += "_t" + int_to_str(threshold);
420 // save sequence and annots to a special mussa file
421 save_path = save_path_base + ".museq";
422 save_file.open(save_path.c_str(), ios::out);
423 save_file << "<Mussa_Sequence>" << endl;
426 for(i = 0; i < seq_num; i++)
427 the_Seqs[i].save(save_file);
429 //save_file.open(save_path.c_str(), ios::app);
430 save_file << "</Mussa_Sequence>" << endl;
433 // save nway paths to its mussa save file
434 save_path = save_path_base + ".muway";
435 the_paths.save(save_path);
439 Mussa::load(string ana_file)
442 string load_file_path;
448 load_file_path = ana_name + ".muway";
449 err_msg = the_paths.load(load_file_path);
453 seq_num = the_paths.seq_num();
457 load_file_path = ana_name + ".museq";
459 // this is a bit of a hack due to C++ not acting like it should with files
460 for (i = 1; i <= seq_num; i++)
463 tmp_seq.load_museq(load_file_path, i);
464 the_Seqs.push_back(tmp_seq);
488 save_file.open(ana_name.c_str(), ios::out);
490 for(i = 0; i < seq_num; i++)
491 save_file << the_Seqs[i].seq() << endl;
493 save_file << window << endl;
495 //note more complex eventually since ana_name may need to have
496 //window size, threshold and other stuff to modify it...
497 the_paths.save_old(ana_name);
502 Mussa::load_old(char * load_file_path, int s_num)
505 string file_data_line;
506 int i, space_split_i, comma_split_i;
507 vector<int> loaded_path;
508 string node_pair, node;
512 the_paths.setup(seq_num, 0, 0);
513 save_file.open(load_file_path, ios::in);
515 // currently loads old mussa format
518 for(i = 0; i < seq_num; i++)
520 getline(save_file, file_data_line);
521 a_seq.set_seq(file_data_line);
522 the_Seqs.push_back(a_seq);
526 getline(save_file, file_data_line);
527 window = atoi(file_data_line.c_str());
530 while (!save_file.eof())
533 getline(save_file, file_data_line);
534 if (file_data_line != "")
535 for(i = 0; i < seq_num; i++)
537 space_split_i = file_data_line.find(" ");
538 node_pair = file_data_line.substr(0,space_split_i);
539 //cout << "np= " << node_pair;
540 comma_split_i = node_pair.find(",");
541 node = node_pair.substr(comma_split_i+1);
542 //cout << "n= " << node << " ";
543 loaded_path.push_back(atoi (node.c_str()));
544 file_data_line = file_data_line.substr(space_split_i+1);
547 the_paths.add_path(loaded_path);
551 //the_paths.save("tmp.save");