12 vector<int> profile(2000,0);
14 void split (const string& text, const string& separators, vector<string>& words);
22 Hit(string chr, unsigned int pos, bool strand) {
23 this->chr = chr; this->pos = pos; this->strand = strand;
27 typedef vector<Hit> Hits;
29 bool compare_hits(const Hit &a, const Hit &b)
38 int main(int argc, char** argv) {
39 if(argc < 2) { cerr << "Usage: " << argv[0] << " [aligned reads filename] [file with list of feature starts]\n"; exit(0); }
40 char filename[128]; strcpy(filename,argv[1]);
41 char feature_filename[128]; strcpy(feature_filename,argv[2]);
47 string location_delim(":");
49 ifstream feat(feature_filename);
51 while(feat.peek() != EOF) {
53 feat.getline(line,1024,'\n');
55 string line_str(line);
56 vector<string> fields;
57 split(line_str, delim, fields);
58 if(fields.size() != 3) { cerr << "Error: wrong number of fields in feature list (line " << N << " has " << fields.size() << " fields)\n"; }
60 string chr = fields[0];
62 int pos = atoi(fields[1].c_str());
63 bool strand = (bool)(atoi(fields[2].c_str()));
65 Hit feat(chr,pos,strand);
66 features.push_back(feat);
69 cerr << "Found " << features.size() << " features\n";
71 //sort the features so we can run through it once
72 std::sort(features.begin(),features.end(),compare_hits);
76 char strand_str[2]; strand_str[1] = '\0';
77 ifstream seqs(filename);
78 while(seqs.peek() != EOF) {
80 seqs.getline(line,2048,'\n');
81 string line_str(line);
82 vector<string> fields;
83 split(line_str, delim, fields);
84 if(fields.size() <= 3) { continue; }
86 vector<string> location; split(fields[3], location_delim, location);
87 string chr = location[0];
89 int pos = atoi(location[1].c_str());
90 bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
92 Hit hit(chr,pos,strand);
97 cerr << "Found " << data.size() << " reads\n";
99 //sort the data so we can run through it once
100 std::sort(data.begin(),data.end(),compare_hits);
103 unsigned int feat_idx = 0;
105 for(Hits::iterator i = data.begin(); i != data.end(); ++i) {
107 if(chrom == "" || i->chr != chrom) {
110 while(feat_idx < features.size() && features[feat_idx].chr != chrom) { feat_idx++; }
111 if(feat_idx == features.size()) { break; }
112 cerr << chrom.c_str() << " feat_idx: " << feat_idx << endl;
115 int dist_to_feature = i->pos - features[feat_idx].pos;
116 //if we have passed the last feature, fast forward to the next
117 while( feat_idx < features.size() && ((features[feat_idx].strand == 0 && dist_to_feature > 1000) || (features[feat_idx].strand == 1 && dist_to_feature > 1000))) {
118 if(features[feat_idx].chr != i->chr ) { goto end_loop; }
120 dist_to_feature = i->pos - features[feat_idx].pos;
123 if(features[feat_idx].strand == 1) { dist_to_feature *= -1; }
125 if(dist_to_feature > -1000 && dist_to_feature < 1000) {
126 profile[dist_to_feature + 1000]++;
132 if(data.size() == 0) {
133 for(unsigned int i = 0; i < profile.size(); i++) {
134 cout << (int)i - (int)1000 << "\t0\n";
137 for(unsigned int i = 0; i < profile.size(); i++) {
138 cout << (int)i - (int)1000 << "\t" << (double)profile[i] / (double)data.size() << endl;;
143 void split (const string& text, const string& separators, vector<string>& words) {
145 size_t n = text.length ();
146 size_t start = text.find_first_not_of (separators);
149 size_t stop = text.find_first_of (separators, start);
150 if (stop > n) stop = n;
151 words.push_back (text.substr (start, stop-start));
152 start = text.find_first_not_of (separators, stop+1);