class Read {
public:
- int chr;
+ string chr;
int pos;
bool strand;
- Read(int chr, int pos, bool strand) { this->chr = chr; this->pos = pos; this->strand = strand; }
+ Read(string chr, int pos, bool strand) { this->chr = chr; this->pos = pos; this->strand = strand; }
+ Read(const Read& r) { this->chr = r.chr; this->pos = r.pos; this->strand = r.strand; }
+ Read& operator=(const Read& r) {this->chr = r.chr; this->pos = r.pos; this->strand = r.strand; return *this;}
};
class Hit {
public:
- int chr;
int start;
int end;
bool strand;
+ string chr;
string name;
unsigned int count[2];
- Hit(string name, int chr, int start, int end, bool strand) { this->name = name; this->chr = chr; this->start = start; this->end = end; this->strand = strand; count[0] = 0; count[1] = 0; }
+ Hit(string name, string chr, int start, int end, bool strand) {
+ this->name = name;
+ this->chr = chr;
+ this->start = start;
+ this->end = end;
+ this->strand = strand;
+ count[0] = 0;
+ count[1] = 0;
+ }
+
+ Hit(const Hit& h) {
+ this->name = h.name;
+ this->chr = h.chr;
+ this->start = h.start;
+ this->end = h.end;
+ this->strand = h.strand;
+ count[0] = h.count[0];
+ count[1] = h.count[1];
+ }
+
+ Hit& operator=(const Hit& h) {
+ this->name = h.name;
+ this->chr = h.chr;
+ this->start = h.start;
+ this->end = h.end;
+ this->strand = h.strand;
+ count[0] = h.count[0];
+ count[1] = h.count[1];
+ return *this;
+ }
};
+bool Feat_lt_Read(const Hit& feat, const Read& read) {
+ //First compare based on chromosomes
+ if(read.chr == feat.chr) { return feat.end < read.pos; }
+ else { return feat.chr < read.chr; }
+}
+
ostream &operator<<( ostream &out, const Hit &h ) {
- out << h.name << "\tchr" << h.chr << "\t" << h.start << "\t" << h.end << "\t" << h.strand << "\t" << h.count;
+ out << h.name.c_str() << "\tchr" << h.chr.c_str() << "\t" << h.start << "\t" << h.end << "\t" << h.strand << "\t" << h.count;
return out;
}
if(fields.size() < 5) { cerr << "Error (" << filename << "): wrong number of fields in feature list (line " << N << " has " << fields.size() << " fields)\n"; }
string name = fields[0];
- int chr = chromstr_to_int(fields[1]);
- if(chr == -1) { continue; }
+ string chr = fields[1];
+ if(chr == "NA") { continue; }
int start = atoi(fields[2].c_str());
int stop = atoi(fields[3].c_str());
bool strand = 0;
char line[2048];
seqs.getline(line,2048,'\n');
- //cerr << ++count << endl;
string line_str(line);
vector<string> fields;
split(line_str, delim, fields);
if(fields.size() != 7) { continue; }
vector<string> location; split(fields[3], location_delim, location);
- int chr = chromstr_to_int(location[0]);
- if(chr == -1) { continue; }
+ string chr = location[0];
+ if(chr == "NA") { continue; }
int pos = atoi(location[1].c_str());
bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
//sort the data so we can run through it once
std::sort(features.begin(),features.end(),compare_reads);
- cerr << "Reads sorted\n";
}
void count_read_in_features(Hits& features, Reads& data, unsigned int idx) {
- int chrom = -1;
- unsigned int feat_idx = 0;
- int num_hits = 0;
+ Hits::iterator feat_it = features.begin();
+
for(Reads::iterator i = data.begin(); i != data.end(); ++i) {
- num_hits++;
- if(chrom == -1 || i->chr != chrom) {
- chrom = i->chr;
- feat_idx = 0;
- while(feat_idx < features.size() && features[feat_idx].chr != chrom) { feat_idx++; }
- if(feat_idx == features.size()) { break; }
+ //skip to first feature after read
+ string start_chr = feat_it->chr;
+ while(feat_it != features.end() && Feat_lt_Read(*feat_it, *i)) {
+ feat_it++;
}
+
+ //stop if we have run out of features.
+ if(feat_it == features.end()) { break; }
- double hit_size = (double)features[feat_idx].end - features[feat_idx].start;
- double dist_to_feature = (double)(i->pos - features[feat_idx].start) / hit_size;
- //if we have passed the last feature, fast forward to the next
- while( feat_idx < features.size() && dist_to_feature > 1 ) {
- feat_idx++;
- if(features[feat_idx].chr != i->chr ) { goto end_loop; }
- hit_size = (double)features[feat_idx].end - features[feat_idx].start;
- dist_to_feature = (double)(i->pos - features[feat_idx].start) / hit_size;
+ if(i->pos >= feat_it->start && i->pos <= feat_it->end) {
+ feat_it->count[idx]++;
}
-
- if(features[feat_idx].strand == 1) {
- dist_to_feature = 1.0 - dist_to_feature;
- }
-
- if(dist_to_feature >= 0 && dist_to_feature <= 1) {
- features[feat_idx].count[idx]++;
- }
- end_loop:
- ;
}
}
for(unsigned int idx = 0; idx < features.size(); idx++) {
read_data[idx] = ((double)features[idx].count[0]/readZ);
bg_data[idx] = ((double)features[idx].count[1]/bgZ);
+ // cout << features[idx].name << "\t" << features[idx].chr << ":" << features[idx].start << "-" << features[idx].end << "\t" << read_data[idx] << "\t" << bg_data[idx] << endl;
}
double enrichment = gsl_stats_mean(read_data,1,features.size())/gsl_stats_mean(bg_data,1,features.size());
//qsort(data, features.size(), sizeof(double), compare_double);
//double median = gsl_stats_median_from_sorted_data(data, 1, features.size());
//double var = gsl_stats_variance_m(data,1,features.size(),mean);
- size_t basename_start = tests[i].find_last_of('/');
- cout << tests[i].substr(basename_start,tests[i].length()-basename_start).c_str() << "\t" << enrichment << endl;
- }
-}
-
-int chromstr_to_int(string chromstr) {
- const char* chrom = chromstr.c_str();
- if(chrom[0] != 'c' || chrom[1] != 'h' || chrom[2] != 'r') { return -1; }
- if(chrom[3] == 'X') { return 23; } else
- if(chrom[3] == 'Y') { return 24; } else
- if(chrom[3] == 'M') { return 25; } else
- if(chrom[3] >= '0' && chrom[3] <= '9') { return atoi(chrom+3); } else
- { return -1; }
-}
-
-void int_to_chromstr(int chrom, char* chr) {
- strcpy(chr,"chr");
- switch(chrom) {
- case 23: chr[3]='X'; break;
- case 24: chr[3]='Y'; break;
- case 25: chr[3]='M'; break;
- default: if(chrom < 10) { chr[3]=(char)(48+chrom); chr[4] = '\0'; }
- else { chr[3]=(char)(48+(int)(chrom/10));
- chr[4]=(char)(48+(int)(chrom - (int)(chr[3]-48)));
- chr[5]='\0';
- }
+ cout << tests[i].c_str() << "\t" << enrichment << endl;
}
}
-
void split (const string& text, const string& separators, vector<string>& words) {