+
+void
+Sequence::save(fstream &save_file)
+ //string save_file_path)
+{
+ //fstream save_file;
+ list<annot>::iterator annots_i;
+ int i;
+
+ // not sure why, or if i'm doing something wrong, but can't seem to pass
+ // file pointers down to this method from the mussa control class
+ // so each call to save a sequence appends to the file started by mussa_class
+ //save_file.open(save_file_path.c_str(), ios::app);
+
+ save_file << "<Sequence>" << endl;
+ save_file << sequence << endl;
+ save_file << "</Sequence>" << endl;
+
+ save_file << "<Annotations>" << endl;
+ for (annots_i = annots.begin(); annots_i != annots.end(); ++annots_i)
+ {
+ save_file << annots_i->start << " " << annots_i->end << " " ;
+ save_file << annots_i->name << " " << annots_i->type << endl;
+ }
+ save_file << "</Annotations>" << endl;
+ //save_file.close();
+}
+
+void
+Sequence::load_museq(string load_file_path, int seq_num)
+{
+ fstream load_file;
+ string file_data_line;
+ int seq_counter;
+ annot an_annot;
+ int space_split_i;
+ string annot_value;
+
+ annots.clear();
+ load_file.open(load_file_path.c_str(), ios::in);
+
+ seq_counter = 0;
+ // search for the seq_num-th sequence
+ while ( (!load_file.eof()) && (seq_counter < seq_num) )
+ {
+ getline(load_file,file_data_line);
+ if (file_data_line == "<Sequence>")
+ seq_counter++;
+ }
+ getline(load_file, file_data_line);
+ //cout << "*fee\n";
+ sequence = file_data_line;
+ //cout << "*fie\n";
+ getline(load_file, file_data_line);
+ getline(load_file, file_data_line);
+ if (file_data_line == "<Annotations>")
+ {
+ while ( (!load_file.eof()) && (file_data_line != "</Annotations>") )
+ {
+ //cout << "*foe\n";
+ getline(load_file,file_data_line);
+ if ((file_data_line != "") && (file_data_line != "</Annotations>"))
+ {
+ //cout << "*fum\n";
+ // need to get 4 values...almost same code 4 times...
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.start = atoi (annot_value.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot end index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.end = atoi (annot_value.c_str());
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.name = annot_value;
+ file_data_line = file_data_line.substr(space_split_i+1);
+ // get annot start index
+ space_split_i = file_data_line.find(" ");
+ annot_value = file_data_line.substr(0,space_split_i);
+ an_annot.type = annot_value;
+ annots.push_back(an_annot);
+ }
+ }
+ }
+ load_file.close();
+}
+
+
+list<int>
+Sequence::find_motif(string a_motif)
+{
+ char * seq_c;
+ int seq_i, motif_i, motif_len;
+ list<int> motif_match_starts;
+
+ motif_match_starts.clear();
+ // faster to loop thru the sequence as a old c string (ie char array)
+ seq_c = (char*)sequence.c_str();
+ motif_len = a_motif.length();
+
+
+ for (seq_i = 0; seq_i < length; seq_i++)
+ {
+
+ // this is pretty much a straight translation of Nora's python code
+ // to match iupac letter codes
+ if (a_motif[motif_i] == seq_c[seq_i])
+ motif_i++;
+ else if (seq_c[seq_i] =='N')
+ motif_i++;
+ else if ((a_motif[motif_i] =='M') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='R') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='G')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='W') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='S') &&
+ ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='G')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='Y') &&
+ ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='K') &&
+ ((seq_c[seq_i]=='G') || (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='V') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C') ||
+ (seq_c[seq_i]=='G')))
+ motif_i++;
+ else if ((a_motif[seq_i] =='H') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='C') ||
+ (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='D') &&
+ ((seq_c[seq_i]=='A') || (seq_c[seq_i]=='G') ||
+ (seq_c[seq_i]=='T')))
+ motif_i++;
+ else if ((a_motif[motif_i] =='B') &&
+ ((seq_c[seq_i]=='C') || (seq_c[seq_i]=='G') ||
+ (seq_c[seq_i]=='T')))
+ motif_i++;
+ else
+ motif_i = 0;
+
+ // end Nora stuff, now we see if a match is found this pass
+ if (motif_i == motif_len)
+ {
+ motif_match_starts.push_back(seq_i - motif_len + 1);
+ motif_i = 0;
+ }
+ }
+
+ return motif_match_starts;
+}