12 vector<int> profile(2000,0);
14 void int_to_chromstr(int chrom, char* chr);
15 int chromstr_to_int(string chrom);
17 void split (const string& text, const string& separators, vector<string>& words);
25 Hit(int chr, unsigned int pos, bool strand) {
26 this->chr = chr; this->pos = pos; this->strand = strand;
30 typedef vector<Hit> Hits;
32 bool compare_hits(const Hit &a, const Hit &b)
41 int main(int argc, char** argv) {
42 if(argc < 2) { cerr << "Usage: " << argv[0] << " [aligned reads filename] [file with list of feature starts]\n"; exit(0); }
43 char filename[128]; strcpy(filename,argv[1]);
44 char feature_filename[128]; strcpy(feature_filename,argv[2]);
50 string location_delim(":");
52 ifstream feat(feature_filename);
54 while(feat.peek() != EOF) {
56 feat.getline(line,1024,'\n');
58 string line_str(line);
59 vector<string> fields;
60 split(line_str, delim, fields);
61 if(fields.size() != 3) { cerr << "Error: wrong number of fields in feature list (line " << N << " has " << fields.size() << " fields)\n"; }
63 int chr = chromstr_to_int(fields[0]);
64 if(chr == -1) { continue; }
65 int pos = atoi(fields[1].c_str());
66 bool strand = (bool)(atoi(fields[2].c_str()));
68 Hit feat(chr,pos,strand);
69 features.push_back(feat);
72 cerr << "Found " << features.size() << " features\n";
74 //sort the features so we can run through it once
75 std::sort(features.begin(),features.end(),compare_hits);
79 char strand_str[2]; strand_str[1] = '\0';
80 ifstream seqs(filename);
81 while(seqs.peek() != EOF) {
83 seqs.getline(line,2048,'\n');
84 string line_str(line);
85 vector<string> fields;
86 split(line_str, delim, fields);
87 if(fields.size() == 3) { continue; }
89 vector<string> location; split(fields[3], location_delim, location);
90 int chr = chromstr_to_int(location[0]);
91 if(chr == -1) { continue; }
92 int pos = atoi(location[1].c_str());
93 bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
95 Hit hit(chr,pos,strand);
100 cerr << "Found " << data.size() << " reads\n";
102 //sort the data so we can run through it once
103 std::sort(data.begin(),data.end(),compare_hits);
106 unsigned int feat_idx = 0;
108 for(Hits::iterator i = data.begin(); i != data.end(); ++i) {
110 if(chrom == -1 || i->chr != chrom) {
113 while(feat_idx < features.size() && features[feat_idx].chr != chrom) { feat_idx++; }
114 if(feat_idx == features.size()) { break; }
115 cerr << chrom << " feat_idx: " << feat_idx << endl;
118 int dist_to_feature = i->pos - features[feat_idx].pos;
119 //if we have passed the last feature, fast forward to the next
120 while( feat_idx < features.size() && ((features[feat_idx].strand == 0 && dist_to_feature > 1000) || (features[feat_idx].strand == 1 && dist_to_feature > 1000))) {
121 if(features[feat_idx].chr != i->chr ) { goto end_loop; }
123 dist_to_feature = i->pos - features[feat_idx].pos;
126 if(features[feat_idx].strand == 1) { dist_to_feature *= -1; }
128 if(dist_to_feature > -1000 && dist_to_feature < 1000) {
129 profile[dist_to_feature + 1000]++;
135 for(unsigned int i = 0; i < profile.size(); i++) {
136 cout << (int)i - (int)1000 << "\t" << (double)profile[i] / (double)data.size() << endl;;
140 int chromstr_to_int(string chromstr) {
141 const char* chrom = chromstr.c_str();
142 if(chrom[0] != 'c' || chrom[1] != 'h' || chrom[2] != 'r') { return -1; }
143 if(chrom[3] == 'X') { return 23; } else
144 if(chrom[3] == 'Y') { return 24; } else
145 if(chrom[3] == 'M') { return 25; } else
146 if(chrom[3] >= '0' && chrom[3] <= '9') { return atoi(chrom+3); } else
150 void int_to_chromstr(int chrom, char* chr) {
151 strcpy(chr,"chr\0\0\0");
153 case 23: chr[3]='X'; break;
154 case 24: chr[3]='Y'; break;
155 case 25: chr[3]='M'; break;
156 default: chr[3]=(char)(48+(int)(chrom/10));
157 chr[4]=(char)(48+(int)(chrom - 10*(int)(chrom/10)));
158 if(chr[3]=='0') { chr[3]=chr[4]; chr[4]='\0'; }
163 void split (const string& text, const string& separators, vector<string>& words) {
165 size_t n = text.length ();
166 size_t start = text.find_first_not_of (separators);
169 size_t stop = text.find_first_of (separators, start);
170 if (stop > n) stop = n;
171 words.push_back (text.substr (start, stop-start));
172 start = text.find_first_not_of (separators, stop+1);