9 #include "chrom_list.h"
13 string retrieveSequenceData(ChromList chrom_filenames, string chr, unsigned int pos, unsigned int len, string& seq);
15 int main(int argc, char** argv) {
16 if(argc != 3) { cerr << "Usage: " << argv[0] << " chromosomes_file variants_file" << endl; return(0); }
18 char chromosome_filename[1024]; strcpy(chromosome_filename,argv[1]);
19 char variants_filename[1024]; strcpy(variants_filename,argv[2]);
20 ChromList chromosome_files(chromosome_filename);
21 ifstream var(variants_filename);
23 unsigned int count = 0;
24 while(var.getline(buffer, 1024)) {
25 string temp(buffer); string delim("\t");
27 split(temp, delim, words);
28 if(words.size() != 3) { cerr << "Error in variant list. Expected 3 columns, but found " << words.size() << endl; exit(1); }
30 unsigned int pos = atoi(words[1].c_str());
32 retrieveSequenceData(chromosome_files, words[0], pos-50, 100, seq);
33 seq[49] = words[2][0];
34 cout << ">" << words[0] << "|" << pos << "\n" << seq << endl;
38 cerr << "Patched genome in " << count << " places\n";
41 string retrieveSequenceData(ChromList chrom_filenames, string chr, unsigned int pos, unsigned int len, string& seq) {
43 //pick out the correct chromosome file
44 string chrom_filename = chrom_filenames[chr];
45 ifstream chrom_file(chrom_filename.c_str());
47 //knock off the header line, and account for its size contribution to the file
48 char temp[1024]; chrom_file.getline(temp, 1024);
49 size_t offset = chrom_file.gcount();
51 //calculate positions in fasta file to extract.
53 unsigned int begin_pos = offset + (int)pos/50 + pos;
54 unsigned int end_pos = offset + (int)(pos+len)/50 + (pos+len);
56 //pull out all sequence, including newlines
57 unsigned int read_len = end_pos - begin_pos;
58 char buffer[read_len+1]; buffer[read_len] = '\0';
59 chrom_file.seekg(begin_pos, ios_base::beg);
60 chrom_file.read(buffer, read_len);
63 //remove newlines, leaving just the dna sequence
66 while( (a= (seq.find("\n"))) != string::npos) {