using namespace std;
-typedef pair<unsigned int, unsigned int> Hit;
-typedef vector<Hit> Hits;
+class Read {
+ public:
+ string chr;
+ int pos;
+ bool strand;
+
+ Read(string chr, int pos, bool strand) { this->chr = chr; this->pos = pos; this->strand = strand; }
+ Read(const Read& r) { this->chr = r.chr; this->pos = r.pos; this->strand = r.strand; }
+ Read& operator=(const Read& r) {this->chr = r.chr; this->pos = r.pos; this->strand = r.strand; return *this;}
+};
+
+typedef vector<Read> Reads;
void print_wig_header(char* name, char* desc) {
//cout << "browser position chr1" << endl;
cout << "track type=wig name=" << name << " description=" << desc << " visibility=full autoScale=on color=0,0,0 priority=10" << endl;
}
-bool compare_hits(const Hit &a, const Hit &b)
-{
- if(a.first == b.first) {
- return a.second < b.second;
- } else {
- return a.first < b.first;
- }
-}
+bool compare_reads(const Read &a, const Read &b) { if(a.chr == b.chr) { return a.pos < b.pos; } else { return a.chr < b.chr; } }
+
+void split (const string& text, const string& separators, vector<string>& words);
+void read_align_file(char* filename, Reads& features);
int main(int argc, char** argv) {
if(argc != 4) { cerr << "Usage: " << argv[0] << " Aligned_reads Wiggle_Track_Name Wiggle_Track_Description" << endl; return(0); }
char filename[1024]; strcpy(filename,argv[1]);
- Hits data;
+ Reads data;
char track_name[1024]; strcpy(track_name, argv[2]);
char track_desc[1024]; strcpy(track_desc, argv[3]);
print_wig_header(track_name, track_desc);
+ read_align_file(filename, data);
- ifstream seqs(filename);
- while(seqs.peek() != EOF) {
- char chr_str[32];
- char pos_str[32];
- char n_match_str[32];
- char chr_start[4];
-
- seqs.ignore(128,' ');
- seqs.ignore(128,' ');
- seqs.getline(n_match_str,32,' ');
- n_match_str[seqs.gcount()] = '\0';
- unsigned int nmatch = atoi(n_match_str);
- if(nmatch != 1) { seqs.ignore(1024,'\n'); continue; }
-
- seqs.get(chr_start,4);
- chr_start[3] = '\0';
- if(chr_start[0] != 'c' || chr_start[1] != 'h' || chr_start[2] != 'r') { seqs.ignore(2048,'\n'); continue; }
- seqs.getline(chr_str,32,':');
- chr_str[seqs.gcount()] = '\0';
- seqs.getline(pos_str,32,' ');
- pos_str[seqs.gcount()] = '\0';
- seqs.ignore(1024,'\n');
-
- unsigned int chr;
-
- if(chr_str[0] == 'X') { chr = 23; } else
- if(chr_str[0] == 'Y') { chr = 24; } else
- if(chr_str[0] == 'M' || chr_str[0] == 'm') { chr = 25; } else
- if(chr_str[0] == 'R') { continue; } else
- { chr = atoi(chr_str); }
-
- unsigned int pos = atoi(pos_str);
-
- Hit hit(chr,pos);
- data.push_back(hit);
- }
- seqs.close();
-
- std::sort(data.begin(),data.end(),compare_hits);
-
unsigned int n_aligned_reads = data.size();
cerr << n_aligned_reads/1000000.00 << "M reads\n";
- queue<Hit> Q;
- for(Hits::iterator i = data.begin(); i != data.end();) {
- if(Q.empty() || Q.front().first != (*i).first) {
+ queue<Read> Q;
+ for(Reads::iterator i = data.begin(); i != data.end();) {
+ if(Q.empty() || Q.front().chr != (*i).chr) {
while(!Q.empty()) { Q.pop(); }
- char chr[3];
- switch(i->first) {
- case 23: chr[0]='X'; chr[1]='\0'; break;
- case 24: chr[0]='Y'; chr[1]='\0'; break;
- case 25: chr[0]='M'; chr[1]='\0'; break;
- case 26: chr[0]='R'; chr[1]='\0'; break;
- default: chr[0]=(char)(48+(int)(i->first/10));
- chr[1]=(char)(48+(int)(i->first - 10*(int)(i->first/10)));
- if(chr[0]=='0') { chr[0]=chr[1]; chr[1]='\0'; }
- }
- chr[2] = '\0';
- cout << "variableStep chrom=chr" << chr << endl;
+ cout << "variableStep chrom=" << i->chr.c_str() << endl;
}
- unsigned int chrnum = i->first;
- unsigned int posnum = i->second;
- while(i != data.end() && i->first == chrnum && i->second == posnum) {
+ string chr_string = i->chr;
+ int posnum = i->pos;
+ while(i != data.end() && i->chr == chr_string && i->pos == posnum) {
Q.push(*i); ++i;
}
- while(!Q.empty() && Q.front().second + WINDOW < Q.back().second) {
+ while(!Q.empty() && Q.front().pos + WINDOW < Q.back().pos) {
Q.pop();
}
if(Q.size() > 1) {
- //cout << Q.back().second << " " << log(Q.size()) << endl;
- cout << Q.back().second << " " << Q.size() / (double)(n_aligned_reads/1000000.0) << endl;
+ cout << Q.back().pos << " " << Q.size() / (double)(n_aligned_reads/1000000.0) << endl;
}
}
}
+void read_align_file(char* filename, Reads& features) {
+ string delim(" \n");
+ string location_delim(":");
+ char strand_str[2]; strand_str[1] = '\0';
+ ifstream seqs(filename);
+ string name("");
+ while(seqs.peek() != EOF) {
+ char line[2048];
+ seqs.getline(line,2048,'\n');
+
+ string line_str(line);
+ vector<string> fields;
+ split(line_str, delim, fields);
+ if(fields.size() != 7) { continue; }
+
+ vector<string> location; split(fields[3], location_delim, location);
+ string chr = location[0];
+ if(chr == "NA") { continue; }
+ int pos = atoi(location[1].c_str());
+ bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
+
+ if(strand == 0) {
+ Read read(chr,pos,false);
+ features.push_back(read);
+ } else {
+ Read read(chr,pos+25,true);
+ features.push_back(read);
+ }
+ }
+ seqs.close();
+
+ //sort the data so we can run through it once
+ std::sort(features.begin(),features.end(),compare_reads);
+ cerr << "Reads sorted\n";
+}
+
+void split (const string& text, const string& separators, vector<string>& words) {
+
+ size_t n = text.length ();
+ size_t start = text.find_first_not_of (separators);
+
+ while (start < n) {
+ size_t stop = text.find_first_of (separators, start);
+ if (stop > n) stop = n;
+ words.push_back (text.substr (start, stop-start));
+ start = text.find_first_not_of (separators, stop+1);
+ }
+}