5 * Created by Alan You on 8/11/08.
6 * Copyright 2008 Hudson Alpha. All rights reserved.
16 #include <ext/hash_map>
22 using namespace __gnu_cxx;
25 bool operator() (const string& a, const string& b) const {
26 return(strcmp(a.c_str(), b.c_str()) == 0);
31 size_t operator() (const string& a) const {
37 typedef hash_map<string, string, hash_str, eq_str> ChromList;
39 void importWindows(string& filename, string& peakDataTitle, vector<Window>& peaks);
40 void retrieveSequenceData(ChromList chrom_files, vector<Window>& peaks);
41 void outputSequenceData(string peakDataTitle, vector<Window>& peaks);
42 void outputPrimer3Input(vector<Window>& peaks);
44 int main (int argc, char * const argv[])
47 cout << "Usage: " << argv[0] << " bed_file_of_peaks chromosome_file" << endl;
51 string filename(argv[1]);
52 string chrom_list(argv[2]);
58 setvbuf(stdout, (char*)NULL, _IONBF, 0);
59 if (pipe(input_pipe)) {
60 cout << "Pipe error!" << endl;
64 if ((pid = fork()) == -1) {
65 cout << "Fork error!" << endl;
71 dup2(input_pipe[0], 0); close(input_pipe[1]);
72 if (execlp("primer3_core", "primer3_core", NULL) == -1){
73 cerr << "execl error!" << endl;
76 cerr << "Done with primer3\n";
78 ChromList chromosome_files;
79 char buffer[1024]; string delim("\t");
80 ifstream chrom_file(chrom_list.c_str());
81 while(chrom_file.getline(buffer, 1024)) {
84 split(temp, delim,words);
85 if(words.size() != 2) { cerr << "Error in chrom list." << endl; exit(1); }
86 chromosome_files[ words[0] ] = words[1];
90 // ChIP-seq peak data (BED format required)
91 ifstream peakDataFile;
93 // Title line from ChIP-seq peak data
96 // All peak calls. Each peak call contains chromomosme number, starting base pair, post-ending base
97 // pair, peak length, score, and corresponding sequence.
100 dup2(input_pipe[1], 1); close(input_pipe[0]);
101 importWindows(filename, peakDataTitle, peaks);
102 retrieveSequenceData(chromosome_files, peaks);
103 outputPrimer3Input(peaks);
104 close(input_pipe[1]);
112 void importWindows(string& filename, string& peakDataTitle, vector<Window>& peaks)
114 ifstream peakDataFile;
115 peakDataFile.open(filename.c_str());
119 peakDataFile.getline(temp, 1024);
120 peakDataTitle = temp;
122 while(peakDataFile.getline(temp, 1024)) {
123 if(peakDataFile.eof()) { peakDataFile.close(); break; }
124 Window inputCall(temp);
125 peaks.push_back(inputCall);
128 sort(peaks.begin(), peaks.end());
132 void retrieveSequenceData(ChromList chrom_filenames, vector<Window>& peaks)
136 string chrom = peaks[0].chr;
137 string chrom_filename = chrom_filenames[chrom];
138 ifstream chrom_file(chrom_filename.c_str());
139 chrom_file.getline(temp, 1024);
140 size_t offset = chrom_file.gcount();
141 for(vector<Window>::iterator i = peaks.begin(); i != peaks.end(); ++i) {
142 if(i->chr != chrom) {
144 chrom_filename = chrom_filenames[chrom];
145 chrom_file.close(); chrom_file.open(chrom_filename.c_str());
146 chrom_file.getline(temp, 1024);
147 offset = chrom_file.gcount();
149 unsigned int begin = i->start;
150 unsigned int end = i->end;
152 unsigned int begin_pos = offset + (int)begin/50 + begin;
153 unsigned int end_pos = offset + (int)end/50 + end;
155 unsigned int read_len = end_pos - begin_pos;
156 char buffer[read_len+1];
157 chrom_file.seekg(begin_pos, ios_base::beg);
158 chrom_file.read(buffer, read_len+1);
159 buffer[read_len] = '\0';
160 i->sequence = buffer;
162 while( (a= ((i->sequence).find("\n"))) != string::npos) {
163 i->sequence.erase(a,1);
169 void outputPrimer3Input(vector<Window>& peaks)
171 cout << "PRIMER_NUM_RETURN=1" << endl;
172 cout << "PRIMER_LOWERCASE_MASKING=1" << endl;
173 //cout << "PRIMER_TM_SANTALUCIA=1" << endl;
174 cout << "PRIMER_SELF_END=2" << endl;
175 cout << "PRIMER_SELF_ANY=6" << endl;
176 cout << "PRIMER_OPT_SIZE=23" << endl;
177 cout << "PRIMER_MIN_SIZE=19" << endl;
178 cout << "PRIMER_MAX_SIZE=26" << endl;
179 cout << "PRIMER_MIN_TM=63" << endl;
180 cout << "PRIMER_OPT_TM=65" << endl;
181 cout << "PRIMER_MAX_TM=67" << endl;
182 cout << "PRIMER_PRODUCT_SIZE_RANGE=60-80 50-100" << endl;
184 unsigned int idx = 0;
185 for(vector<Window>::iterator i = peaks.begin(); i != peaks.end(); ++i) {
186 //cout << "PRIMER_SEQUENCE_ID=" << i->chr << "_" << idx++ << "_" << i->score << endl;
187 cout << "PRIMER_SEQUENCE_ID=" << i->name << i->score << endl;
188 cout << "SEQUENCE=" << i->sequence.c_str() << endl;