11 #include "chrom_list.h"
15 bool Wind_lt_Read(const Window& feat, const Read& read) { if(read.chr == feat.chr) { return feat.end < read.pos; } else { return feat.chr < read.chr; } }
17 extern bool compare_windows(const Window &a, const Window &b);
18 extern bool compare_reads(const Read &a, const Read &b);
20 void readFromPipe( int fd );
21 void read_windows(const char* filename, Windows& windows);
22 void read_align_file(char* filename, Reads& features);
23 void count_reads_in_windows(Reads& data, Windows& windows);
24 void recenter_windows(Windows& windows, unsigned int width);
25 void resize_windows(Windows& windows, unsigned int width);
27 void retrieveSequenceData(ChromList chrom_files, Windows& peaks);
28 void outputPrimer3Input(Windows& peaks, Windows& calls);
30 int main(int argc, char** argv) {
31 if(argc != 4) { cerr << "Usage: " << argv[0] << " peaks.bed aligned_reads chromosome_file" << endl; return(0); }
32 char window_filename[1024]; strcpy(window_filename,argv[1]);
33 char reads_filename[1024]; strcpy(reads_filename,argv[2]);
34 char chromosome_filename[1024]; strcpy(chromosome_filename,argv[3]);
38 int pipe_to_primer[2];
39 //int pipe_from_primer[2];
41 setvbuf(stdout, (char*)NULL, _IONBF, 0);
43 if (pipe(pipe_to_primer)) { cout << "Pipe error!" << endl; exit(1); }
44 //if (pipe(pipe_from_primer)) { cout << "Pipe error!" << endl; exit(1); }
45 if ((pid = fork()) == -1) { cout << "Fork error!" << endl; exit(1); }
49 dup2(pipe_to_primer[0], 0); close(pipe_to_primer[1]);
50 //dup2(pipe_from_primer[1], 1); close(pipe_from_primer[0]);
51 if (execl(PRIMER_3_PATH, "primer3_core", NULL) == -1){ cerr << "execl error!" << endl; exit(1); }
53 dup2(pipe_to_primer[1], 1); close(pipe_to_primer[0]);
54 //dup2(pipe_from_primer[0], 0); close(pipe_from_primer[1]);
57 ChromList chromosome_files(chromosome_filename);
59 // Title line from ChIP-seq peak data
62 // All peak calls. Each peak call contains chromomosme number, starting base pair, post-ending base
63 // pair, peak length, score, and corresponding sequence.
65 read_windows(window_filename, windows);
67 read_align_file(reads_filename, reads);
68 count_reads_in_windows(reads, windows);
69 for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) { i->count_hits(); }
71 Windows targets(windows);
72 recenter_windows(targets,60);
73 count_reads_in_windows(reads, targets);
75 unsigned int max_reads = 0;
76 unsigned int min_reads = 1e8;
77 Windows::iterator max, min;
78 for(Windows::iterator i = targets.begin(); i != targets.end(); ++i) {
80 if(i->total > max_reads) { max_reads = i->total; max = i; }
81 if(i->total < min_reads) { min_reads = i->total; min = i; }
82 cerr << i->total << " at " << i->chr << " " << i->start << " " << i->end << endl;
85 cerr << "Max: " << max_reads << " at " << max->chr << " " << max->start << " " << max->end << endl << "Min: " << min_reads << " at " << min->chr << " " << min->start << " " << min->end << endl;
87 retrieveSequenceData(chromosome_files, targets);
88 outputPrimer3Input(targets, windows);
89 close(pipe_to_primer[1]);
91 //readFromPipe(pipe_from_primer[0]);
92 //close(pipe_from_primer[0]);
98 void readFromPipe( int fd )
102 if ( (stream = fdopen( fd, "r" )) == NULL )
104 perror( "fdopen() r" );
107 while ( (ch = getc( stream )) != EOF )
113 void read_windows(const char* filename, Windows& windows) {
116 ifstream feat(filename);
118 while(feat.peek() != EOF) {
120 feat.getline(line,1024,'\n');
122 string line_str(line);
123 vector<string> fields;
124 split(line_str, delim, fields);
125 if(fields.size() < 4) { cerr << "Error (" << filename << "): wrong number of fields in feature list (line " << N << " has " << fields.size() << " fields)\n";
129 string chr = fields[0];
130 if(chr == "NA") { continue; }
131 int start = atoi(fields[1].c_str());
132 int stop = atoi(fields[2].c_str());
133 double score = strtod(fields[3].c_str(),NULL);
135 char name[100]; sprintf(name,"Window %d",N);
136 Window w(string(name),chr,start-300,stop+300,score);
137 windows.push_back(w);
140 //sort the features so we can run through it once
141 std::stable_sort(windows.begin(),windows.end(),compare_windows);
145 void count_reads_in_windows(Reads& data, Windows& windows) {
146 Windows::iterator wind_it = windows.begin();
148 for(Reads::iterator i = data.begin(); i != data.end(); ++i) {
149 //skip to first window after read
150 while(wind_it != windows.end() && Wind_lt_Read(*wind_it, *i)) {
154 //stop if we have run out of windows.
155 if(wind_it == windows.end()) { break; }
157 if(i->pos >= wind_it->start && i->pos < wind_it->end) {
158 int pos = i->pos - wind_it->start;
159 wind_it->counts[pos]++;
164 void read_align_file(char* filename, Reads& features) {
166 string location_delim(":");
167 char strand_str[2]; strand_str[1] = '\0';
168 ifstream seqs(filename);
170 while(seqs.peek() != EOF) {
172 seqs.getline(line,2048,'\n');
174 string line_str(line);
175 vector<string> fields;
176 split(line_str, delim, fields);
177 if(fields.size() != 7) { continue; }
179 vector<string> location; split(fields[3], location_delim, location);
180 string chr = location[0];
181 if(chr == "NA") { continue; }
182 int pos = atoi(location[1].c_str());
183 bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
186 Read read(chr,pos,strand);
187 features.push_back(read);
189 Read read(chr,pos+25,strand);
190 features.push_back(read);
195 //sort the data so we can run through it once
196 std::sort(features.begin(),features.end(),compare_reads);
197 cerr << "Reads sorted\n";
200 //Look at reads in each window, and pick out the max. Return target regions of width-bp before and after the region.
201 void recenter_windows(Windows& windows, unsigned int width) {
202 for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) {
203 unsigned int max_count = 0; unsigned int max_offset = 0;
204 for(unsigned int count_i = 0; count_i < i->counts.size(); count_i++) {
205 if(i->counts[count_i] > max_count) { max_count = i->counts[count_i]; max_offset = count_i; }
207 unsigned int newstart = i->start + max_offset - width;
208 unsigned int newend = i->start + max_offset + width;
209 i->reset(newstart, newend);
213 //Look at reads in each window, and pick out the max. Return target regions of width-bp before and after the region.
214 void resize_windows(Windows& windows, unsigned int width) {
215 for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) {
216 unsigned int middle = i->start + i->end / 2.0;
217 i->reset(middle - width, middle + width);
222 void retrieveSequenceData(ChromList chrom_filenames, Windows& peaks) {
225 string chrom = peaks[0].chr;
226 string chrom_filename = chrom_filenames[chrom];
227 ifstream chrom_file(chrom_filename.c_str());
228 chrom_file.getline(temp, 1024);
229 size_t offset = chrom_file.gcount();
230 for(Windows::iterator i = peaks.begin(); i != peaks.end(); ++i) {
231 if(i->chr != chrom) {
233 chrom_filename = chrom_filenames[chrom];
234 chrom_file.close(); chrom_file.open(chrom_filename.c_str());
235 chrom_file.getline(temp, 1024);
236 offset = chrom_file.gcount();
238 unsigned int begin = i->start;
239 unsigned int end = i->end;
241 unsigned int begin_pos = offset + (int)begin/50 + begin;
242 unsigned int end_pos = offset + (int)end/50 + end;
244 unsigned int read_len = end_pos - begin_pos;
245 char buffer[read_len+1];
246 chrom_file.seekg(begin_pos, ios_base::beg);
247 chrom_file.read(buffer, read_len+1);
248 buffer[read_len] = '\0';
249 i->sequence = buffer;
251 while( (a= ((i->sequence).find("\n"))) != string::npos) {
252 i->sequence.erase(a,1);
258 void outputPrimer3Input(Windows& peaks, Windows& calls) {
259 cout << "PRIMER_NUM_RETURN=1" << endl;
260 cout << "PRIMER_LOWERCASE_MASKING=1" << endl;
261 cout << "PRIMER_SELF_END=2" << endl;
262 cout << "PRIMER_SELF_ANY=6" << endl;
263 cout << "PRIMER_OPT_SIZE=23" << endl;
264 cout << "PRIMER_MIN_SIZE=19" << endl;
265 cout << "PRIMER_MAX_SIZE=26" << endl;
266 cout << "PRIMER_MIN_TM=63" << endl;
267 cout << "PRIMER_OPT_TM=65" << endl;
268 cout << "PRIMER_MAX_TM=67" << endl;
269 cout << "PRIMER_PRODUCT_SIZE_RANGE=60-80 50-100" << endl;
271 Windows::iterator j = calls.begin();
272 for(Windows::iterator i = peaks.begin(); i != peaks.end(); ++i) {
273 cout << "PRIMER_SEQUENCE_ID=" << i->chr << "_" << i->start << "_" << i->end << "_" << i->score << "_" << i->total << "_" << j->start << "_" << j->end << "_" << j->total << endl;
274 cout << "SEQUENCE=" << i->sequence.c_str() << endl;