Added qPCR Validation design code
[htsworkflow.git] / htswanalysis / src / ValidationDesign / main.cpp
1 /*
2  *  main.cpp
3  *  WindowLocator
4  *
5  *  Created by Alan You on 8/11/08.
6  *  Copyright 2008 Hudson Alpha. All rights reserved.
7  *
8  */
9
10 #include <iostream>
11 #include <fstream>
12 #include <string>
13 #include <vector>
14 #include <cmath>
15
16 #include <ext/hash_map>
17
18 #include "window.h"
19 #include "util.h"
20
21 using namespace std;
22 using namespace __gnu_cxx;
23
24 struct eq_str {
25   bool operator() (const string& a, const string& b) const {
26     return(strcmp(a.c_str(), b.c_str()) == 0);
27   }
28 };
29
30 struct hash_str {
31   size_t operator() (const string& a) const {
32     hash<const char*> h; 
33     return(h(a.c_str())); 
34   }
35 };
36
37 typedef hash_map<string, string, hash_str, eq_str> ChromList;
38
39 void importWindows(string& filename, string& peakDataTitle, vector<Window>& peaks);
40 void retrieveSequenceData(ChromList chrom_files, vector<Window>& peaks);
41 void outputSequenceData(string peakDataTitle, vector<Window>& peaks);
42 void outputPrimer3Input(vector<Window>& peaks);
43
44 int main (int argc, char * const argv[])
45 {
46         if(argc < 3) { 
47           cout << "Usage: " << argv[0] << " bed_file_of_peaks chromosome_file" << endl;
48           exit(1);
49         }
50
51         string filename(argv[1]);
52         string chrom_list(argv[2]);
53
54         pid_t pid;
55         int rv;
56         int input_pipe[2];
57  
58         setvbuf(stdout, (char*)NULL, _IONBF, 0);
59         if (pipe(input_pipe)) {
60           cout << "Pipe error!" << endl;
61           exit(1);
62         }
63         
64         if ((pid = fork()) == -1) {
65                 cout << "Fork error!" << endl;
66                 exit(1);
67         }
68         
69         //child 
70         if (pid == 0) {
71           dup2(input_pipe[0], 0); close(input_pipe[1]);
72           if (execlp("primer3_core",  "primer3_core", NULL) == -1){
73             cerr << "execl error!" << endl;
74             exit(1);
75           }
76           cerr << "Done with primer3\n";
77        } else {
78          ChromList chromosome_files;
79          char buffer[1024]; string delim("\t");
80          ifstream chrom_file(chrom_list.c_str());
81          while(chrom_file.getline(buffer, 1024)) {
82            string temp(buffer); 
83            vector<string> words;
84            split(temp, delim,words);
85            if(words.size() != 2) { cerr << "Error in chrom list." << endl; exit(1); }
86            chromosome_files[ words[0] ] = words[1]; 
87          }
88          chrom_file.close();
89  
90         // ChIP-seq peak data (BED format required)
91         ifstream peakDataFile;
92         
93         // Title line from ChIP-seq peak data
94         string peakDataTitle;
95         
96         // All peak calls.  Each peak call contains chromomosme number, starting base pair, post-ending base
97         // pair, peak length, score, and corresponding sequence.
98         vector<Window> peaks;
99         
100         dup2(input_pipe[1], 1); close(input_pipe[0]);
101         importWindows(filename, peakDataTitle, peaks);
102         retrieveSequenceData(chromosome_files, peaks);
103         outputPrimer3Input(peaks);
104         close(input_pipe[1]);
105         close(1);
106         wait(&rv);
107       }
108
109         return 0;
110 }
111
112 void importWindows(string& filename, string& peakDataTitle, vector<Window>& peaks)
113 {
114         ifstream peakDataFile;
115         peakDataFile.open(filename.c_str());
116
117         char temp[1024];
118
119         peakDataFile.getline(temp, 1024);
120         peakDataTitle = temp;
121
122         while(peakDataFile.getline(temp, 1024)) {
123           if(peakDataFile.eof()) { peakDataFile.close(); break; }
124           Window inputCall(temp);
125           peaks.push_back(inputCall);
126         }       
127         
128         sort(peaks.begin(), peaks.end());
129
130 }
131
132 void retrieveSequenceData(ChromList chrom_filenames, vector<Window>& peaks)
133 {
134         char temp[1024];
135
136         string chrom = peaks[0].chr;
137         string chrom_filename = chrom_filenames[chrom];
138         ifstream chrom_file(chrom_filename.c_str());
139         chrom_file.getline(temp, 1024);
140         size_t offset = chrom_file.gcount();
141         for(vector<Window>::iterator i = peaks.begin(); i != peaks.end(); ++i) {
142           if(i->chr != chrom) { 
143             chrom = i->chr; 
144             chrom_filename = chrom_filenames[chrom];
145             chrom_file.close(); chrom_file.open(chrom_filename.c_str());
146             chrom_file.getline(temp, 1024);
147             offset = chrom_file.gcount();
148           }
149           unsigned int begin = i->start;
150           unsigned int end   = i->end;
151  
152           unsigned int begin_pos = offset + (int)begin/50 + begin;      
153           unsigned int end_pos = offset + (int)end/50 + end;      
154
155           unsigned int read_len = end_pos - begin_pos;
156           char buffer[read_len+1];
157           chrom_file.seekg(begin_pos, ios_base::beg);
158           chrom_file.read(buffer, read_len+1);
159           buffer[read_len] = '\0';
160           i->sequence = buffer;
161           unsigned int a;
162           while( (a= ((i->sequence).find("\n"))) != string::npos) {
163             i->sequence.erase(a,1);
164           }
165         } 
166         chrom_file.close(); 
167 }
168
169 void outputPrimer3Input(vector<Window>& peaks)
170 {
171   cout <<   "PRIMER_NUM_RETURN=1" << endl;
172   cout <<   "PRIMER_LOWERCASE_MASKING=1" << endl;
173   //cout <<   "PRIMER_TM_SANTALUCIA=1" << endl;
174   cout <<   "PRIMER_SELF_END=2" << endl;
175   cout <<   "PRIMER_SELF_ANY=6" << endl;
176   cout <<   "PRIMER_OPT_SIZE=23" << endl;
177   cout <<   "PRIMER_MIN_SIZE=19" << endl;
178   cout <<   "PRIMER_MAX_SIZE=26" << endl;
179   cout <<   "PRIMER_MIN_TM=63" << endl;
180   cout <<   "PRIMER_OPT_TM=65" << endl;
181   cout <<   "PRIMER_MAX_TM=67" << endl;
182   cout <<   "PRIMER_PRODUCT_SIZE_RANGE=60-80 50-100" << endl;
183                   
184   unsigned int idx = 0;
185   for(vector<Window>::iterator i = peaks.begin(); i != peaks.end(); ++i) {
186     //cout << "PRIMER_SEQUENCE_ID=" << i->chr << "_" << idx++ << "_" << i->score << endl;
187     cout << "PRIMER_SEQUENCE_ID=" << i->name << i->score << endl;
188     cout << "SEQUENCE=" << i->sequence.c_str() << endl;
189     cout << "=" << endl;
190   }
191 }