{
}
-Sequence::Sequence(string seq):sequence(filter_sequence(seq))
+Sequence::Sequence(string seq)
{
+ set_filtered_sequence(seq);
+}
+
+Sequence &Sequence::operator=(const Sequence& s)
+{
+ if (this != &s) {
+ sequence = s.sequence;
+ header = s.header;
+ species = s.species;
+ annots = s.annots;
+ }
+ return *this;
+}
+
+Sequence &Sequence::operator=(const std::string& s)
+{
+ set_filtered_sequence(s);
+ return *this;
}
//! load a fasta file into a sequence
data_file.open(file_path.c_str(), ios::in);
if (!data_file)
- {
+ {
throw mussa_load_error("Sequence File: " + file_path + " not found");
}
// if file opened okay, read it
end_index = sequence_raw.size();
// sequence filtering for upcasing agctn and convert non AGCTN to N
- sequence = filter_sequence(sequence_raw, start_index, end_index-start_index);
+ set_filtered_sequence(sequence_raw, start_index, end_index-start_index);
}
}
-string Sequence::filter_sequence(const string &old_seq,
- string::size_type start,
- string::size_type count) const
+void Sequence::set_filtered_sequence(const string &old_seq,
+ string::size_type start,
+ string::size_type count)
{
char conversionTable[257];
- string new_seq;
if ( count == 0)
count = old_seq.size() - start;
- new_seq.reserve(count);
+ sequence.clear();
+ sequence.reserve(count);
// Make a conversion table
// finally, the actual conversion loop
for(string::size_type seq_index = 0; seq_index < count; seq_index++)
{
- new_seq += conversionTable[ (int)old_seq[seq_index+start]];
+ sequence += conversionTable[ (int)old_seq[seq_index+start]];
}
- return new_seq;
}
// this doesn't work properly under gcc 3.x ... it can't recognize toupper
an_annot.end = atoi (annot_value.c_str());
file_data_line = file_data_line.substr(space_split_i+1);
- cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
- << endl;
+ //cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
+ // << endl;
// get annot name
space_split_i = file_data_line.find(" ");
}
}
+bool Sequence::empty() const
+{
+ return (size() == 0);
+}
+
const std::list<annot> Sequence::annotations() const
{
return annots;
return sequence.size();
}
+Sequence::iterator Sequence::begin()
+{
+ return sequence.begin();
+}
+
+Sequence::const_iterator Sequence::begin() const
+{
+ return sequence.begin();
+}
+
+Sequence::iterator Sequence::end()
+{
+ return sequence.end();
+}
+
+Sequence::const_iterator Sequence::end() const
+{
+ return sequence.end();
+}
+
const string&
Sequence::get_seq() const
void
Sequence::set_seq(const string& a_seq)
{
- sequence = filter_sequence(a_seq);
+ set_filtered_sequence(a_seq);
}
annot_value = file_data_line.substr(0,space_split_i);
an_annot.end = atoi (annot_value.c_str());
- if (space_split_i == string::npos) // no entry for type or name
- {
- cout << "seq, annots - no type or name\n";
- an_annot.type = "";
- an_annot.name = "";
- }
- else // else get annot type
- {
- file_data_line = file_data_line.substr(space_split_i+1);
- space_split_i = file_data_line.find(" ");
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.type = annot_value;
- if (space_split_i == string::npos) // no entry for name
- {
- cout << "seq, annots - no name\n";
- an_annot.name = "";
- }
- else // get annot name
- {
- file_data_line = file_data_line.substr(space_split_i+1);
- space_split_i = file_data_line.find(" ");
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.type = annot_value;
- }
- }
- annots.push_back(an_annot); // don't forget to actually add the annot
+ if (space_split_i == string::npos) // no entry for type or name
+ {
+ cout << "seq, annots - no type or name\n";
+ an_annot.type = "";
+ an_annot.name = "";
+ }
+ else // else get annot type
+ {
+ file_data_line = file_data_line.substr(space_split_i+1);
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.type = annot_value;
+ if (space_split_i == string::npos) // no entry for name
+ {
+ cout << "seq, annots - no name\n";
+ an_annot.name = "";
+ }
+ else // get annot name
+ {
+ file_data_line = file_data_line.substr(space_split_i+1);
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.type = annot_value;
+ }
+ }
+ annots.push_back(an_annot); // don't forget to actually add the annot
}
- cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
- << "-->" << an_annot.type << "::" << an_annot.name << endl;
+ //cout << "seq, annots: " << an_annot.start << ", " << an_annot.end
+ // << "-->" << an_annot.type << "::" << an_annot.name << endl;
}
}
load_file.close();
rev_comp += conversionTable[table_i];
}
- cout << "seq: " << a_motif << endl;
- cout << "rc: " << rev_comp << endl;
+ //cout << "seq: " << a_motif << endl;
+ //cout << "rc: " << rev_comp << endl;
return rev_comp;
}
valid_motif += 'B';
}
- cout << "valid_motif is: " << valid_motif << endl;
+ //cout << "valid_motif is: " << valid_motif << endl;
return valid_motif;
}
motif_match_starts.clear();
- cout << "motif is: " << a_motif << endl;
+ //cout << "motif is: " << a_motif << endl;
a_motif = motif_validate(a_motif);
//cout << "motif is: " << a_motif << endl;
if (a_motif != "")
{
- cout << "Sequence: none blank motif\n";
+ //cout << "Sequence: none blank motif\n";
motif_scan(a_motif, &motif_match_starts);
a_motif_rc = rc_motif(a_motif);
seq_i = 0;
while (seq_i < sequence.length())
{
- cout << seq_c[seq_i];
- //if ((seq_i > 10885) && (seq_i < 10917))
+ //cout << seq_c[seq_i];
//cout << seq_c[seq_i] << "?" << a_motif[motif_i] << ":" << motif_i << " ";
// this is pretty much a straight translation of Nora's python code
// to match iupac letter codes
cout << endl;
}
-/*
- // get annot start index
- space_split_i = file_data_line.find(" ");
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.name = annot_value;
- file_data_line = file_data_line.substr(space_split_i+1);
- // get annot start index
- space_split_i = file_data_line.find(" ");
- annot_value = file_data_line.substr(0,space_split_i);
- an_annot.type = annot_value;
-*/