12 vector<int> profile(2000,0);
14 void split (const string& text, const string& separators, vector<string>& words);
22 Hit(string chr, unsigned int pos, bool strand) {
23 this->chr = chr; this->pos = pos; this->strand = strand;
27 typedef vector<Hit> Hits;
29 bool compare_hits(const Hit &a, const Hit &b)
38 int main(int argc, char** argv) {
39 if(argc < 2) { cerr << "Usage: " << argv[0] << " [aligned reads filename] [file with list of feature starts]\n"; exit(0); }
40 char filename[128]; strcpy(filename,argv[1]);
41 char feature_filename[128]; strcpy(feature_filename,argv[2]);
47 string location_delim(":");
49 ifstream feat(feature_filename);
51 while(feat.peek() != EOF) {
53 feat.getline(line,1024,'\n');
55 string line_str(line);
56 vector<string> fields;
57 split(line_str, delim, fields);
58 if(fields.size() != 3) { cerr << "Error: wrong number of fields in feature list (line " << N << " has " << fields.size() << " fields)\n"; }
60 string chr = fields[0];
62 int pos = atoi(fields[1].c_str());
63 bool strand = (bool)(atoi(fields[2].c_str()));
65 Hit feat(chr,pos,strand);
66 features.push_back(feat);
69 cerr << "Found " << features.size() << " features\n";
71 //sort the features so we can run through it once
72 std::sort(features.begin(),features.end(),compare_hits);
76 char strand_str[2]; strand_str[1] = '\0';
77 ifstream seqs(filename);
78 unsigned int linenum = 0;
79 while(seqs.peek() != EOF) {
80 cerr << ++linenum << endl;
82 seqs.getline(line,2048,'\n');
83 string line_str(line);
84 vector<string> fields;
85 split(line_str, delim, fields);
86 if(fields.size() == 3) { continue; }
88 vector<string> location; split(fields[3], location_delim, location);
89 string chr = location[0];
91 int pos = atoi(location[1].c_str());
92 bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
94 Hit hit(chr,pos,strand);
99 cerr << "Found " << data.size() << " reads\n";
101 //sort the data so we can run through it once
102 std::sort(data.begin(),data.end(),compare_hits);
105 unsigned int feat_idx = 0;
107 for(Hits::iterator i = data.begin(); i != data.end(); ++i) {
109 if(chrom == "" || i->chr != chrom) {
112 while(feat_idx < features.size() && features[feat_idx].chr != chrom) { feat_idx++; }
113 if(feat_idx == features.size()) { break; }
114 cerr << chrom.c_str() << " feat_idx: " << feat_idx << endl;
117 int dist_to_feature = i->pos - features[feat_idx].pos;
118 //if we have passed the last feature, fast forward to the next
119 while( feat_idx < features.size() && ((features[feat_idx].strand == 0 && dist_to_feature > 1000) || (features[feat_idx].strand == 1 && dist_to_feature > 1000))) {
120 if(features[feat_idx].chr != i->chr ) { goto end_loop; }
122 dist_to_feature = i->pos - features[feat_idx].pos;
125 if(features[feat_idx].strand == 1) { dist_to_feature *= -1; }
127 if(dist_to_feature > -1000 && dist_to_feature < 1000) {
128 profile[dist_to_feature + 1000]++;
134 for(unsigned int i = 0; i < profile.size(); i++) {
135 cout << (int)i - (int)1000 << "\t" << (double)profile[i] / (double)data.size() << endl;;
139 void split (const string& text, const string& separators, vector<string>& words) {
141 size_t n = text.length ();
142 size_t start = text.find_first_not_of (separators);
145 size_t stop = text.find_first_of (separators, start);
146 if (stop > n) stop = n;
147 words.push_back (text.substr (start, stop-start));
148 start = text.find_first_not_of (separators, stop+1);