+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <queue>
+#include <math.h>
+
+#include "defs.h"
+#include "window.h"
+#include "read.h"
+#include "util.h"
+#include "chrom_list.h"
+
+using namespace std;
+
+bool Wind_lt_Read(const Window& feat, const Read& read) { if(read.chr == feat.chr) { return feat.end < read.pos; } else { return feat.chr < read.chr; } }
+
+extern bool compare_windows(const Window &a, const Window &b);
+extern bool compare_reads(const Read &a, const Read &b);
+
+void readFromPipe( int fd );
+void read_windows(const char* filename, Windows& windows);
+void read_align_file(char* filename, Reads& features);
+void count_reads_in_windows(Reads& data, Windows& windows);
+void recenter_windows(Windows& windows, unsigned int width);
+void resize_windows(Windows& windows, unsigned int width);
+
+void retrieveSequenceData(ChromList chrom_files, Windows& peaks);
+void outputPrimer3Input(Windows& peaks, Windows& calls);
+
+int main(int argc, char** argv) {
+ if(argc != 4) { cerr << "Usage: " << argv[0] << " peaks.bed aligned_reads chromosome_file" << endl; return(0); }
+ char window_filename[1024]; strcpy(window_filename,argv[1]);
+ char reads_filename[1024]; strcpy(reads_filename,argv[2]);
+ char chromosome_filename[1024]; strcpy(chromosome_filename,argv[3]);
+
+ pid_t pid;
+ int rv;
+ int pipe_to_primer[2];
+ //int pipe_from_primer[2];
+
+ setvbuf(stdout, (char*)NULL, _IONBF, 0);
+
+ if (pipe(pipe_to_primer)) { cout << "Pipe error!" << endl; exit(1); }
+ //if (pipe(pipe_from_primer)) { cout << "Pipe error!" << endl; exit(1); }
+ if ((pid = fork()) == -1) { cout << "Fork error!" << endl; exit(1); }
+
+ //child
+ if (pid == 0) {
+ dup2(pipe_to_primer[0], 0); close(pipe_to_primer[1]);
+ //dup2(pipe_from_primer[1], 1); close(pipe_from_primer[0]);
+ if (execl(PRIMER_3_PATH, "primer3_core", NULL) == -1){ cerr << "execl error!" << endl; exit(1); }
+ } else {
+ dup2(pipe_to_primer[1], 1); close(pipe_to_primer[0]);
+ //dup2(pipe_from_primer[0], 0); close(pipe_from_primer[1]);
+
+ Reads reads;
+ ChromList chromosome_files(chromosome_filename);
+
+ // Title line from ChIP-seq peak data
+ string peakDataTitle;
+
+ // All peak calls. Each peak call contains chromomosme number, starting base pair, post-ending base
+ // pair, peak length, score, and corresponding sequence.
+ Windows windows;
+ read_windows(window_filename, windows);
+
+ read_align_file(reads_filename, reads);
+ count_reads_in_windows(reads, windows);
+ for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) { i->count_hits(); }
+
+ Windows targets(windows);
+ recenter_windows(targets,60);
+ count_reads_in_windows(reads, targets);
+
+ unsigned int max_reads = 0;
+ unsigned int min_reads = 1e8;
+ Windows::iterator max, min;
+ for(Windows::iterator i = targets.begin(); i != targets.end(); ++i) {
+ i->count_hits();
+ if(i->total > max_reads) { max_reads = i->total; max = i; }
+ if(i->total < min_reads) { min_reads = i->total; min = i; }
+ cerr << i->total << " at " << i->chr << " " << i->start << " " << i->end << endl;
+ }
+
+ cerr << "Max: " << max_reads << " at " << max->chr << " " << max->start << " " << max->end << endl << "Min: " << min_reads << " at " << min->chr << " " << min->start << " " << min->end << endl;
+
+ retrieveSequenceData(chromosome_files, targets);
+ outputPrimer3Input(targets, windows);
+ close(pipe_to_primer[1]);
+ close(1);
+ //readFromPipe(pipe_from_primer[0]);
+ //close(pipe_from_primer[0]);
+ wait(&rv);
+ }
+ return 0;
+}
+
+void readFromPipe( int fd )
+{
+ FILE *stream;
+ int ch;
+ if ( (stream = fdopen( fd, "r" )) == NULL )
+ {
+ perror( "fdopen() r" );
+ exit(255);
+ }
+ while ( (ch = getc( stream )) != EOF )
+ putc( ch, stdout );
+ fflush( stdout );
+ fclose( stream );
+}
+
+void read_windows(const char* filename, Windows& windows) {
+ string delim("\t");
+
+ ifstream feat(filename);
+ unsigned int N = 0;
+ while(feat.peek() != EOF) {
+ char line[1024];
+ feat.getline(line,1024,'\n');
+ N++;
+ string line_str(line);
+ vector<string> fields;
+ split(line_str, delim, fields);
+ if(fields.size() < 4) { cerr << "Error (" << filename << "): wrong number of fields in feature list (line " << N << " has " << fields.size() << " fields)\n";
+ continue;
+ }
+
+ string chr = fields[0];
+ if(chr == "NA") { continue; }
+ int start = atoi(fields[1].c_str());
+ int stop = atoi(fields[2].c_str());
+ double score = strtod(fields[3].c_str(),NULL);
+
+ char name[100]; sprintf(name,"Window %d",N);
+ Window w(string(name),chr,start-300,stop+300,score);
+ windows.push_back(w);
+ }
+
+ //sort the features so we can run through it once
+ std::stable_sort(windows.begin(),windows.end(),compare_windows);
+ feat.close();
+}
+
+void count_reads_in_windows(Reads& data, Windows& windows) {
+ Windows::iterator wind_it = windows.begin();
+
+ for(Reads::iterator i = data.begin(); i != data.end(); ++i) {
+ //skip to first window after read
+ while(wind_it != windows.end() && Wind_lt_Read(*wind_it, *i)) {
+ wind_it++;
+ }
+
+ //stop if we have run out of windows.
+ if(wind_it == windows.end()) { break; }
+
+ if(i->pos >= wind_it->start && i->pos < wind_it->end) {
+ int pos = i->pos - wind_it->start;
+ wind_it->counts[pos]++;
+ }
+ }
+}
+
+void read_align_file(char* filename, Reads& features) {
+ string delim(" \n");
+ string location_delim(":");
+ char strand_str[2]; strand_str[1] = '\0';
+ ifstream seqs(filename);
+ string name("");
+ while(seqs.peek() != EOF) {
+ char line[2048];
+ seqs.getline(line,2048,'\n');
+
+ string line_str(line);
+ vector<string> fields;
+ split(line_str, delim, fields);
+ if(fields.size() != 7) { continue; }
+
+ vector<string> location; split(fields[3], location_delim, location);
+ string chr = location[0];
+ if(chr == "NA") { continue; }
+ int pos = atoi(location[1].c_str());
+ bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
+
+ if(strand == 0) {
+ Read read(chr,pos,strand);
+ features.push_back(read);
+ } else {
+ Read read(chr,pos+25,strand);
+ features.push_back(read);
+ }
+ }
+ seqs.close();
+
+ //sort the data so we can run through it once
+ std::sort(features.begin(),features.end(),compare_reads);
+ cerr << "Reads sorted\n";
+}
+
+//Look at reads in each window, and pick out the max. Return target regions of width-bp before and after the region.
+void recenter_windows(Windows& windows, unsigned int width) {
+ for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) {
+ unsigned int max_count = 0; unsigned int max_offset = 0;
+ for(unsigned int count_i = 0; count_i < i->counts.size(); count_i++) {
+ if(i->counts[count_i] > max_count) { max_count = i->counts[count_i]; max_offset = count_i; }
+ }
+ unsigned int newstart = i->start + max_offset - width;
+ unsigned int newend = i->start + max_offset + width;
+ i->reset(newstart, newend);
+ }
+}
+
+//Look at reads in each window, and pick out the max. Return target regions of width-bp before and after the region.
+void resize_windows(Windows& windows, unsigned int width) {
+ for(Windows::iterator i = windows.begin(); i != windows.end(); ++i) {
+ unsigned int middle = i->start + i->end / 2.0;
+ i->reset(middle - width, middle + width);
+ }
+}
+
+
+void retrieveSequenceData(ChromList chrom_filenames, Windows& peaks) {
+ char temp[1024];
+
+ string chrom = peaks[0].chr;
+ string chrom_filename = chrom_filenames[chrom];
+ ifstream chrom_file(chrom_filename.c_str());
+ chrom_file.getline(temp, 1024);
+ size_t offset = chrom_file.gcount();
+ for(Windows::iterator i = peaks.begin(); i != peaks.end(); ++i) {
+ if(i->chr != chrom) {
+ chrom = i->chr;
+ chrom_filename = chrom_filenames[chrom];
+ chrom_file.close(); chrom_file.open(chrom_filename.c_str());
+ chrom_file.getline(temp, 1024);
+ offset = chrom_file.gcount();
+ }
+ unsigned int begin = i->start;
+ unsigned int end = i->end;
+
+ unsigned int begin_pos = offset + (int)begin/50 + begin;
+ unsigned int end_pos = offset + (int)end/50 + end;
+
+ unsigned int read_len = end_pos - begin_pos;
+ char buffer[read_len+1];
+ chrom_file.seekg(begin_pos, ios_base::beg);
+ chrom_file.read(buffer, read_len+1);
+ buffer[read_len] = '\0';
+ i->sequence = buffer;
+ unsigned int a;
+ while( (a= ((i->sequence).find("\n"))) != string::npos) {
+ i->sequence.erase(a,1);
+ }
+ }
+ chrom_file.close();
+}
+
+void outputPrimer3Input(Windows& peaks, Windows& calls) {
+ cout << "PRIMER_NUM_RETURN=1" << endl;
+ cout << "PRIMER_LOWERCASE_MASKING=1" << endl;
+ cout << "PRIMER_SELF_END=2" << endl;
+ cout << "PRIMER_SELF_ANY=6" << endl;
+ cout << "PRIMER_OPT_SIZE=23" << endl;
+ cout << "PRIMER_MIN_SIZE=19" << endl;
+ cout << "PRIMER_MAX_SIZE=26" << endl;
+ cout << "PRIMER_MIN_TM=63" << endl;
+ cout << "PRIMER_OPT_TM=65" << endl;
+ cout << "PRIMER_MAX_TM=67" << endl;
+ cout << "PRIMER_PRODUCT_SIZE_RANGE=60-80 50-100" << endl;
+
+ Windows::iterator j = calls.begin();
+ for(Windows::iterator i = peaks.begin(); i != peaks.end(); ++i) {
+ cout << "PRIMER_SEQUENCE_ID=" << i->chr << "_" << i->start << "_" << i->end << "_" << i->score << "_" << i->total << "_" << j->start << "_" << j->end << "_" << j->total << endl;
+ cout << "SEQUENCE=" << i->sequence.c_str() << endl;
+ cout << "=" << endl;
+ j++;
+ }
+}