soft_thres = 0;
win_append = false;
thres_append = false;
- seq_files.clear();
- fasta_indices.clear();
- annot_files.clear();
- sub_seq_starts.clear();
- sub_seq_ends.clear();
}
// these 5 simple methods manually set the parameters for doing an analysis
{
if (the_seqs.size() > 0)
return the_seqs.size();
- else if (seq_files.size() > 0)
- return seq_files.size();
else
return 0;
}
the_seqs.push_back(aSeq);
}
-
-// sets info for just 1 seq at a time
-void
-Mussa::set_seq_info(string seq_file, string annot_file, int fa_i, int a_start, int the_end)
-{
- seq_files.push_back(seq_file);
- fasta_indices.push_back(fa_i);
- annot_files.push_back(annot_file);
- sub_seq_starts.push_back(a_start);
- sub_seq_ends.push_back(the_end);
-}
-
const vector<Sequence>&
Mussa::sequences() const
{
return the_seqs;
}
+void Mussa::load_sequence(string seq_file, string annot_file, int fasta_index,
+ int sub_seq_start, int sub_seq_end)
+{
+ Sequence aseq;
+ aseq.load_fasta(seq_file, fasta_index, sub_seq_start, sub_seq_end);
+ if (annot_file.size() > 0) {
+ aseq.load_annot(annot_file, sub_seq_start, sub_seq_end);
+ }
+ the_seqs.push_back(aseq);
+}
+
void
Mussa::load_mupa_file(string para_file_path)
{
+ string file_path_base;
ifstream para_file;
string file_data_line;
string param, value, annot_file;
threshold = atoi(value.c_str());
else if (param == "SEQUENCE")
{
- seq_files.push_back(file_path_base + value);
+ string seq_file = file_path_base + value;
//cout << "seq_file_name " << seq_files.back() << endl;
fasta_index = 1;
annot_file = "";
else if ((param == "") || (param == "#")) {}
else seq_params = false;
}
-
- fasta_indices.push_back(fasta_index);
- annot_files.push_back(annot_file);
- sub_seq_starts.push_back(sub_seq_start);
- sub_seq_ends.push_back(sub_seq_end);
+ load_sequence(seq_file, annot_file, fasta_index, sub_seq_start,
+ sub_seq_end);
did_seq = true;
}
//ignore empty lines or that start with '#'
}
t1 = time(NULL);
- load_sequence_data();
if (the_seqs.size() < 2) {
throw mussa_analysis_error("you need to have at least 2 sequences to "
//cout << totaltime << "\n";
}
-
-void
-Mussa::load_sequence_data()
-{
- list<string>::iterator seq_files_i, annot_files_i;
- list<int>::iterator fasta_indices_i, seq_starts_i, seq_ends_i;
- Sequence aSeq;
- string err_msg;
-
-
- seq_files_i = seq_files.begin();
- fasta_indices_i = fasta_indices.begin();
- annot_files_i = annot_files.begin();
- seq_starts_i = sub_seq_starts.begin();
- seq_ends_i = sub_seq_ends.begin();
-
- while ( (seq_files_i != seq_files.end()) && (err_msg == "") )
- /* it should be guarenteed that each of the following exist
- should I bother checking, and how to deal with if not true...
- &&
- (fasta_indices_i != fasta_indices.end()) &&
- (annot_files_i != annot_files.end()) &&
- (seq_starts_i != sub_seq_starts.end()) &&
- (seq_ends_i != sub_seq_ends.end()) )
- */
- {
- aSeq.load_fasta(*seq_files_i, *fasta_indices_i,*seq_starts_i, *seq_ends_i);
- if (annot_files_i->size() > 0)
- aSeq.load_annot(*annot_files_i, *seq_starts_i, *seq_ends_i);
-
- the_seqs.push_back(aSeq);
- //cout << aSeq.get_header() << endl;
- //cout << aSeq.get_seq() << endl;
- aSeq.clear();
- ++seq_files_i; // advance all the iterators
- ++fasta_indices_i;
- ++annot_files_i;
- ++seq_starts_i;
- ++seq_ends_i;
- }
-}
-
-
void
Mussa::seqcomp()
{
//! appends a string sequence to the list of the_seqs
void add_a_seq(std::string a_seq);
- // sets info to load a seq and annotations from a fasta file
- void set_seq_info(std::string seq_file, std::string annot_file,
- int fa_i, int a_start, int the_end);
+ //! Load a sequence from a fasta file and any annotations
+ /*! \param[in] seq_file the full path to the fasta file
+ * \param[in] annot_file the full path to an annotation file,
+ * if is an empty string, we won't bother loading anything
+ * \param[in] fasta_index specify which sequence in a multisequence fasta
+ * file
+ * \param[in] sub_seq_start starting slice index to select a subsequence
+ * use 0 start from the beginning.
+ * \param[in] sub_seq_end ending slice index to select a subsequence
+ * use 0 to go to the end.
+ */
+ void load_sequence(std::string seq_file, std::string annot_file,
+ int fasta_index, int sub_seq_start=0, int sub_seq_end=0);
//! allow examining the sequences we have loaded
const std::vector<Sequence>& sequences() const;
// Private variables
// parameters needed for a mussa analysis
std::string analysis_name;
- std::string file_path_base;
int window, threshold, soft_thres;
enum analysis_modes ana_mode;
double ent_thres;
- std::list<std::string> seq_files, annot_files;
- std::list<int> fasta_indices, sub_seq_starts, sub_seq_ends;
bool win_override, thres_override;
bool win_append, thres_append;
// Private methods
//! loads sequence and annotations from fasta and annotation file
- void load_sequence_data();
void seqcomp();
};