1 // ----------------------------------------
2 // ---------- sequence.cc -----------
3 // ----------------------------------------
14 Sequence::load_fasta(string file_path, int seq_num)
17 string file_data_line;
18 int header_counter = 0;
22 data_file.open(file_path.c_str(), ios::in);
25 // search for the header of the fasta sequence we want
26 while ( (!data_file.eof()) && (header_counter < seq_num) )
28 getline(data_file,file_data_line);
29 if (file_data_line.substr(0,1) == ">")
33 header = file_data_line.substr(1);
35 while ( !data_file.eof() && read_seq )
37 getline(data_file,file_data_line);
38 if (file_data_line.substr(0,1) == ">")
40 else sequence += file_data_line;
45 // need more sequence filtering for non AGCTN and convert to N
46 transform(sequence.begin(), sequence.end(), sequence.begin(), toupper);
51 Sequence::load_annot(string file_path)
54 string file_data_line;
57 data_file.open(file_path.c_str(), ios::in);
67 return sequence.length();
80 return sequence.c_str();
87 char conversionTable[257];
88 int seq_i, table_i, len;
90 len = sequence.length();
91 rev_comp.reserve(len);
92 // make a conversion table
93 // init all parts of conversion table to '~' character
94 // '~' I doubt will ever appear in a sequence file (jeez, I hope)
95 // and may the fleas of 1000 camels infest the genitals of any biologist (and
96 // seven generations of their progeny) who decides to make it mean
97 // something special!!!
98 // PS - double the curse for any smartass non-biologist who tries it as well
99 for(table_i=0; table_i < 256; table_i++)
101 conversionTable[table_i] = '~';
103 // add end of string character for printing out table for testing purposes
104 conversionTable[256] = '\0';
106 // add in the characters for the bases we want to convert
107 conversionTable['A'] = 'T';
108 conversionTable['T'] = 'A';
109 conversionTable['G'] = 'C';
110 conversionTable['C'] = 'G';
111 conversionTable['N'] = 'N';
113 // finally, the actual conversion loop
114 for(seq_i = len - 1; seq_i >= 0; seq_i--)
116 table_i = (int) sequence[seq_i];
117 rev_comp += conversionTable[table_i];
132 Sequence::set_seq(string a_seq)