From: Diane Trout Date: Tue, 14 Mar 2006 05:08:14 +0000 (+0000) Subject: just load the sequence X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=commitdiff_plain;h=cc0b69411a2d71e2b6e94d742f373a8ccd67b8ce;p=mussa.git just load the sequence The old load sequence code, loaded information about a sequence into a list of vectors and then, later went and actually loaded from the data files (Probably had something to do with mussa originally being a C program). This version loads the data when its been collected and shoves the resulting Sequence onto our vector. --- diff --git a/alg/mussa.cpp b/alg/mussa.cpp index 47794ef..224daa7 100644 --- a/alg/mussa.cpp +++ b/alg/mussa.cpp @@ -38,11 +38,6 @@ Mussa::clear() soft_thres = 0; win_append = false; thres_append = false; - seq_files.clear(); - fasta_indices.clear(); - annot_files.clear(); - sub_seq_starts.clear(); - sub_seq_ends.clear(); } // these 5 simple methods manually set the parameters for doing an analysis @@ -64,8 +59,6 @@ Mussa::size() const { if (the_seqs.size() > 0) return the_seqs.size(); - else if (seq_files.size() > 0) - return seq_files.size(); else return 0; } @@ -147,27 +140,27 @@ Mussa::add_a_seq(string a_seq) the_seqs.push_back(aSeq); } - -// sets info for just 1 seq at a time -void -Mussa::set_seq_info(string seq_file, string annot_file, int fa_i, int a_start, int the_end) -{ - seq_files.push_back(seq_file); - fasta_indices.push_back(fa_i); - annot_files.push_back(annot_file); - sub_seq_starts.push_back(a_start); - sub_seq_ends.push_back(the_end); -} - const vector& Mussa::sequences() const { return the_seqs; } +void Mussa::load_sequence(string seq_file, string annot_file, int fasta_index, + int sub_seq_start, int sub_seq_end) +{ + Sequence aseq; + aseq.load_fasta(seq_file, fasta_index, sub_seq_start, sub_seq_end); + if (annot_file.size() > 0) { + aseq.load_annot(annot_file, sub_seq_start, sub_seq_end); + } + the_seqs.push_back(aseq); +} + void Mussa::load_mupa_file(string para_file_path) { + string file_path_base; ifstream para_file; string file_data_line; string param, value, annot_file; @@ -223,7 +216,7 @@ Mussa::load_mupa_file(string para_file_path) threshold = atoi(value.c_str()); else if (param == "SEQUENCE") { - seq_files.push_back(file_path_base + value); + string seq_file = file_path_base + value; //cout << "seq_file_name " << seq_files.back() << endl; fasta_index = 1; annot_file = ""; @@ -252,11 +245,8 @@ Mussa::load_mupa_file(string para_file_path) else if ((param == "") || (param == "#")) {} else seq_params = false; } - - fasta_indices.push_back(fasta_index); - annot_files.push_back(annot_file); - sub_seq_starts.push_back(sub_seq_start); - sub_seq_ends.push_back(sub_seq_end); + load_sequence(seq_file, annot_file, fasta_index, sub_seq_start, + sub_seq_end); did_seq = true; } //ignore empty lines or that start with '#' @@ -311,7 +301,6 @@ Mussa::analyze(int w, int t, enum Mussa::analysis_modes the_ana_mode, double new } t1 = time(NULL); - load_sequence_data(); if (the_seqs.size() < 2) { throw mussa_analysis_error("you need to have at least 2 sequences to " @@ -351,49 +340,6 @@ Mussa::analyze(int w, int t, enum Mussa::analysis_modes the_ana_mode, double new //cout << totaltime << "\n"; } - -void -Mussa::load_sequence_data() -{ - list::iterator seq_files_i, annot_files_i; - list::iterator fasta_indices_i, seq_starts_i, seq_ends_i; - Sequence aSeq; - string err_msg; - - - seq_files_i = seq_files.begin(); - fasta_indices_i = fasta_indices.begin(); - annot_files_i = annot_files.begin(); - seq_starts_i = sub_seq_starts.begin(); - seq_ends_i = sub_seq_ends.begin(); - - while ( (seq_files_i != seq_files.end()) && (err_msg == "") ) - /* it should be guarenteed that each of the following exist - should I bother checking, and how to deal with if not true... - && - (fasta_indices_i != fasta_indices.end()) && - (annot_files_i != annot_files.end()) && - (seq_starts_i != sub_seq_starts.end()) && - (seq_ends_i != sub_seq_ends.end()) ) - */ - { - aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i, *seq_ends_i); - if (annot_files_i->size() > 0) - aSeq.load_annot(*annot_files_i, *seq_starts_i, *seq_ends_i); - - the_seqs.push_back(aSeq); - //cout << aSeq.get_header() << endl; - //cout << aSeq.get_seq() << endl; - aSeq.clear(); - ++seq_files_i; // advance all the iterators - ++fasta_indices_i; - ++annot_files_i; - ++seq_starts_i; - ++seq_ends_i; - } -} - - void Mussa::seqcomp() { diff --git a/alg/mussa.hpp b/alg/mussa.hpp index 805b09d..20b1069 100644 --- a/alg/mussa.hpp +++ b/alg/mussa.hpp @@ -89,9 +89,19 @@ class Mussa //! appends a string sequence to the list of the_seqs void add_a_seq(std::string a_seq); - // sets info to load a seq and annotations from a fasta file - void set_seq_info(std::string seq_file, std::string annot_file, - int fa_i, int a_start, int the_end); + //! Load a sequence from a fasta file and any annotations + /*! \param[in] seq_file the full path to the fasta file + * \param[in] annot_file the full path to an annotation file, + * if is an empty string, we won't bother loading anything + * \param[in] fasta_index specify which sequence in a multisequence fasta + * file + * \param[in] sub_seq_start starting slice index to select a subsequence + * use 0 start from the beginning. + * \param[in] sub_seq_end ending slice index to select a subsequence + * use 0 to go to the end. + */ + void load_sequence(std::string seq_file, std::string annot_file, + int fasta_index, int sub_seq_start=0, int sub_seq_end=0); //! allow examining the sequences we have loaded const std::vector& sequences() const; @@ -104,12 +114,9 @@ class Mussa // Private variables // parameters needed for a mussa analysis std::string analysis_name; - std::string file_path_base; int window, threshold, soft_thres; enum analysis_modes ana_mode; double ent_thres; - std::list seq_files, annot_files; - std::list fasta_indices, sub_seq_starts, sub_seq_ends; bool win_override, thres_override; bool win_append, thres_append; @@ -122,7 +129,6 @@ class Mussa // Private methods //! loads sequence and annotations from fasta and annotation file - void load_sequence_data(); void seqcomp(); }; diff --git a/gui/SetupWindow.cpp b/gui/SetupWindow.cpp index 7b57abd..fa63042 100644 --- a/gui/SetupWindow.cpp +++ b/gui/SetupWindow.cpp @@ -45,9 +45,10 @@ SetupWindow::real_do_analysis() an_analysis->set_window(window); an_analysis->set_threshold(threshold); - for(i=0; i < seq_num; i++) - an_analysis->set_seq_info(seq_files[i], annot_files[i], fasta_indices[i], - sub_seq_starts[i], sub_seq_ends[i]); + for(i=0; i < seq_num; i++) { + an_analysis->load_sequence(seq_files[i], annot_files[i], fasta_indices[i], + sub_seq_starts[i], sub_seq_ends[i]); + } try { an_analysis->analyze();