11 #include "chrom_list.h"
15 bool Wind_lt_Read(const Window& feat, const Read& read) { if(read.chr == feat.chr) { return feat.end < read.pos; } else { return feat.chr < read.chr; } }
17 extern bool compare_windows(const Window &a, const Window &b);
18 extern bool compare_reads(const Read &a, const Read &b);
20 void readFromPipe( int fd );
21 void read_windows(const char* filename, Windows& windows);
22 void read_align_file(char* filename, Reads& features);
23 void count_reads_in_windows(Reads& data, Windows& windows);
24 void recenter_windows(Windows& windows, unsigned int width);
25 void resize_windows(Windows& windows, unsigned int width);
27 void retrieveSequenceData(ChromList chrom_files, Windows& peaks);
28 void outputPrimer3Input(Windows& peaks, Windows& calls);
30 int main(int argc, char** argv) {
31 if(argc != 4) { cerr << "Usage: " << argv[0] << " peaks.bed aligned_reads chromosome_file" << endl; return(0); }
32 char window_filename[1024]; strcpy(window_filename,argv[1]);
33 char reads_filename[1024]; strcpy(reads_filename,argv[2]);
34 char chromosome_filename[1024]; strcpy(chromosome_filename,argv[3]);
38 int pipe_to_primer[2];
39 //int pipe_from_primer[2];
41 setvbuf(stdout, (char*)NULL, _IONBF, 0);
43 if (pipe(pipe_to_primer)) { cout << "Pipe error!" << endl; exit(1); }
44 //if (pipe(pipe_from_primer)) { cout << "Pipe error!" << endl; exit(1); }
45 if ((pid = fork()) == -1) { cout << "Fork error!" << endl; exit(1); }
49 dup2(pipe_to_primer[0], 0); close(pipe_to_primer[1]);
50 //dup2(pipe_from_primer[1], 1); close(pipe_from_primer[0]);
51 if (execlp("primer3_core","primer3_core", NULL) == -1){ cerr << "execl error!" << endl; exit(1); }
53 dup2(pipe_to_primer[1], 1); close(pipe_to_primer[0]);
54 //dup2(pipe_from_primer[0], 0); close(pipe_from_primer[1]);
57 ChromList chromosome_files(chromosome_filename);
59 // Title line from ChIP-seq peak data
62 // All peak calls. Each peak call contains chromomosme number, starting base pair, post-ending base
63 // pair, peak length, score, and corresponding sequence.
65 read_windows(window_filename, windows);
67 read_align_file(reads_filename, reads);
68 count_reads_in_windows(reads, windows);
69 for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) { i->count_hits(); }
71 Windows targets(windows);
72 recenter_windows(targets,60);
73 count_reads_in_windows(reads, targets);
75 unsigned int max_reads = 0;
76 unsigned int min_reads = 1e8;
77 Windows::iterator max, min;
78 for(Windows::iterator i = targets.begin(); i != targets.end(); ++i) {
80 if(i->total > max_reads) { max_reads = i->total; max = i; }
81 if(i->total < min_reads) { min_reads = i->total; min = i; }
82 cerr << i->total << " at " << i->chr << " " << i->start << " " << i->end << endl;
85 cerr << "Max: " << max_reads << " at " << max->chr << " " << max->start << " " << max->end << endl << "Min: " << min_reads << " at " << min->chr << " " << min->start << " " << min->end << endl;
87 retrieveSequenceData(chromosome_files, targets);
88 outputPrimer3Input(targets, windows);
89 close(pipe_to_primer[1]);
91 //readFromPipe(pipe_from_primer[0]);
92 //close(pipe_from_primer[0]);
98 void readFromPipe( int fd )
102 if ( (stream = fdopen( fd, "r" )) == NULL )
104 perror( "fdopen() r" );
107 while ( (ch = getc( stream )) != EOF )
113 void read_windows(const char* filename, Windows& windows) {
116 ifstream feat(filename);
121 feat.getline(line,1024,'\n');
122 while(feat.peek() != EOF) {
123 feat.getline(line,1024,'\n');
125 string line_str(line);
126 vector<string> fields;
127 split(line_str, delim, fields);
128 if(fields.size() < 4) { cerr << "Error (" << filename << "): wrong number of fields in feature list (line " << N << " has " << fields.size() << " fields)\n";
132 string chr = fields[0];
133 if(chr == "NA") { continue; }
134 int start = atoi(fields[1].c_str());
135 int stop = atoi(fields[2].c_str());
136 double score = strtod(fields[3].c_str(),NULL);
138 char name[100]; sprintf(name,"Window %d",N);
139 Window w(string(name),chr,start-300,stop+300,score);
140 windows.push_back(w);
143 //sort the features so we can run through it once
144 std::stable_sort(windows.begin(),windows.end(),compare_windows);
148 void count_reads_in_windows(Reads& data, Windows& windows) {
149 Windows::iterator wind_it = windows.begin();
151 for(Reads::iterator i = data.begin(); i != data.end(); ++i) {
152 //skip to first window after read
153 while(wind_it != windows.end() && Wind_lt_Read(*wind_it, *i)) {
157 //stop if we have run out of windows.
158 if(wind_it == windows.end()) { break; }
160 if(i->pos >= wind_it->start && i->pos < wind_it->end) {
161 int pos = i->pos - wind_it->start;
162 wind_it->counts[pos]++;
167 void read_align_file(char* filename, Reads& features) {
169 string location_delim(":");
170 char strand_str[2]; strand_str[1] = '\0';
171 ifstream seqs(filename);
175 while(seqs.peek() != EOF) {
176 seqs.getline(line,2048,'\n');
178 string line_str(line);
179 vector<string> fields;
180 split(line_str, delim, fields);
181 if(fields.size() != 7) { continue; }
183 vector<string> location; split(fields[3], location_delim, location);
184 string chr = location[0];
185 if(chr == "NA") { continue; }
186 int pos = atoi(location[1].c_str());
187 bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
190 Read read(chr,pos,strand);
191 features.push_back(read);
193 Read read(chr,pos+25,strand);
194 features.push_back(read);
199 //sort the data so we can run through it once
200 std::sort(features.begin(),features.end(),compare_reads);
201 cerr << "Reads sorted\n";
204 //Look at reads in each window, and pick out the max. Return target regions of width-bp before and after the region.
205 void recenter_windows(Windows& windows, unsigned int width) {
206 for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) {
207 unsigned int max_count = 0; unsigned int max_offset = 0;
208 for(unsigned int count_i = 0; count_i < i->counts.size(); count_i++) {
209 if(i->counts[count_i] > max_count) { max_count = i->counts[count_i]; max_offset = count_i; }
211 unsigned int newstart = i->start + max_offset - width;
212 unsigned int newend = i->start + max_offset + width;
213 i->reset(newstart, newend);
217 //Look at reads in each window, and pick out the max. Return target regions of width-bp before and after the region.
218 void resize_windows(Windows& windows, unsigned int width) {
219 for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) {
220 unsigned int middle = i->start + i->end / 2.0;
221 i->reset(middle - width, middle + width);
226 void retrieveSequenceData(ChromList chrom_filenames, Windows& peaks) {
229 string chrom = peaks[0].chr;
230 string chrom_filename = chrom_filenames[chrom];
231 ifstream chrom_file(chrom_filename.c_str());
232 chrom_file.getline(temp, 1024);
233 size_t offset = chrom_file.gcount();
234 for(Windows::iterator i = peaks.begin(); i != peaks.end(); ++i) {
235 if(i->chr != chrom) {
237 chrom_filename = chrom_filenames[chrom];
238 chrom_file.close(); chrom_file.open(chrom_filename.c_str());
239 chrom_file.getline(temp, 1024);
240 offset = chrom_file.gcount();
242 unsigned int begin = i->start;
243 unsigned int end = i->end;
245 unsigned int begin_pos = offset + (int)begin/50 + begin;
246 unsigned int end_pos = offset + (int)end/50 + end;
248 unsigned int read_len = end_pos - begin_pos;
249 char buffer[read_len+1];
250 chrom_file.seekg(begin_pos, ios_base::beg);
251 chrom_file.read(buffer, read_len+1);
252 buffer[read_len] = '\0';
253 i->sequence = buffer;
255 while( (a= ((i->sequence).find("\n"))) != string::npos) {
256 i->sequence.erase(a,1);
262 void outputPrimer3Input(Windows& peaks, Windows& calls) {
263 cout << "PRIMER_NUM_RETURN=1" << endl;
264 cout << "PRIMER_LOWERCASE_MASKING=1" << endl;
265 cout << "PRIMER_SELF_END=2" << endl;
266 cout << "PRIMER_SELF_ANY=6" << endl;
267 cout << "PRIMER_OPT_SIZE=23" << endl;
268 cout << "PRIMER_MIN_SIZE=19" << endl;
269 cout << "PRIMER_MAX_SIZE=26" << endl;
270 cerr << peaks.size() << " peaks\n";
271 cerr << calls.size() << " calls\n";
272 cout << "PRIMER_MIN_TM=63" << endl;
273 cout << "PRIMER_OPT_TM=65" << endl;
274 cout << "PRIMER_MAX_TM=67" << endl;
275 cout << "PRIMER_PRODUCT_SIZE_RANGE=60-80 50-100" << endl;
277 Windows::iterator j = calls.begin();
278 for(Windows::iterator i = peaks.begin(); i != peaks.end(); ++i) {
279 cout << "PRIMER_SEQUENCE_ID=" << i->chr << "_" << i->start << "_" << i->end << "_" << i->score << "_" << i->total << "_" << j->start << "_" << j->end << "_" << j->total << endl;
280 cout << "SEQUENCE=" << i->sequence.c_str() << endl;