#include <iostream>
+#include <string>
#include <fstream>
#include <vector>
-#include <queue>
#include <math.h>
+#include "SRLib/loci.h"
+#include "SRLib/util.h"
+
#define WINDOW 25
using namespace std;
-class Read {
+class EndPoint : public Loci {
public:
- string chr;
- int pos;
- bool strand;
+ bool type; //0 = start, 1 = end
+
+ EndPoint(string chr, unsigned int pos, bool type) : Loci(chr,pos) { this->type = type; }
+ EndPoint(const EndPoint &e) : Loci(e) { this->type = e.type; }
+ EndPoint& operator=(const EndPoint& e) {
+ if(this == &e) return *this;
- Read(string chr, int pos, bool strand) { this->chr = chr; this->pos = pos; this->strand = strand; }
- Read(const Read& r) { this->chr = r.chr; this->pos = r.pos; this->strand = r.strand; }
- Read& operator=(const Read& r) {this->chr = r.chr; this->pos = r.pos; this->strand = r.strand; return *this;}
+ Loci::operator=(e);
+ this->type = e.type;
+ return *this;
+ }
};
-typedef vector<Read> Reads;
+typedef vector<EndPoint> Ends;
-void print_wig_header(char* name, char* desc) {
- //cout << "browser position chr1" << endl;
- cout << "browser hide all" << endl;
- cout << "browser full altGraph" << endl;
- cout << "track type=bedGraph name=" << name << " description=" << desc << " visibility=full autoScale=on color=0,0,0 priority=10" << endl;
+void print_browser_line() {
+ cout << "browser hide all" << endl << "browser pack refGene encodeRegions" << endl << "browser full altGraph" << endl;
}
-bool compare_reads(const Read &a, const Read &b) { if(a.chr == b.chr) { return a.pos < b.pos; } else { return a.chr < b.chr; } }
+void print_wig_header(string name, string desc, string color = "200,100,0", string altColor = "0,100,200") {
+ //cout << "browser position chr1" << endl;
+ cout << "track type=bedGraph name=" << name << " description=" << desc << " visibility=full ";
+ cout << "color=" << color << " altColor="<< altColor << " priority=10 maxHeightPixels=128:64:11" << endl;
+}
-void split (const string& text, const string& separators, vector<string>& words);
-void read_align_file(char* filename, Reads& features);
+void read_align_file(char* filename, Ends& f, Ends& r);
+void print_data(Ends::iterator i, Ends::iterator end, double Z);
int main(int argc, char** argv) {
if(argc != 4) { cerr << "Usage: " << argv[0] << " Aligned_reads Wiggle_Track_Name Wiggle_Track_Description" << endl; return(0); }
char filename[1024]; strcpy(filename,argv[1]);
- Reads data;
- char track_name[1024]; strcpy(track_name, argv[2]);
- char track_desc[1024]; strcpy(track_desc, argv[3]);
+ Ends f, r;
- print_wig_header(track_name, track_desc);
- read_align_file(filename, data);
-
- unsigned int n_aligned_reads = data.size();
- cerr << n_aligned_reads/1000000.00 << "M reads\n";
-
- queue<Read> Q;
- for(Reads::iterator i = data.begin(); i != data.end();) {
- if(Q.empty() || Q.front().chr != (*i).chr) {
- while(!Q.empty()) { Q.pop(); }
- }
-
- string chr_string = i->chr;
- int posnum = i->pos;
- while(i != data.end() && i->chr == chr_string && i->pos == posnum) {
- Q.push(*i); ++i;
- }
+ string track_name = argv[2];
+ string track_desc = argv[3];
- while(!Q.empty() && Q.front().pos + WINDOW < Q.back().pos) {
- Q.pop();
- }
+ read_align_file(filename, f, r);
+
+ unsigned int n_aligned_reads = (f.size() + r.size()) / 2;
+ double Z = n_aligned_reads/1000000.0;
+ cerr << Z << "M reads\n";
+
+ print_browser_line();
+ print_wig_header(track_name+"_forward", track_desc+"_forward");
+ print_data(f.begin(),f.end(), Z);
+ print_wig_header(track_name+"_reverse", track_desc+"_reverse");
+ print_data(r.begin(),r.end(), -1.0*Z);
+}
- if(Q.size() > 1 && (i+1) != data.end() && Q.back().chr == (i+1)->chr) {
- cout << Q.back().chr << " " << Q.back().pos << " " << (i+1)->pos << " " << Q.size() / (double)(n_aligned_reads/1000000.0) << endl;
+void print_data(Ends::iterator i, Ends::iterator end, double Z) {
+ EndPoint begin_mark("start",0,0);
+ EndPoint prev = begin_mark;
+ int count = 0;
+ do {
+
+ //ok, first look at the next endpoint, and output the end of the corrent window.
+ i++;
+ if(i->chr != prev.chr)
+ count = 0;
+
+ // Using a very low noise threshold to help make for sane file sizes. Can override if need to see everything
+ // but for large datasets, no threshold can easily produce gigabyte sized files. This threshold drops the file
+ // size to about 1/8th the original size(of course, depending on the dataset).
+ else if(count > 2)
+ cout << prev.chr << "\t" << prev.pos << "\t" << i->pos << "\t" << count / Z << endl;
+
+ //now, count the new endpoint, and reset the prev marker
+ if(i->type == 0) { count++; } else { count--; }
+ prev = *i;
+
+ //finally, mark the current point, and while new points are the same position, keep adding them
+ while(i+1 != end && ((Loci)(*(i+1)) == (Loci)prev)) {
+ i++;
+ if(i->type == 0) { count++; } else { count--; }
}
- }
+ } while(i+1 != end);
}
-void read_align_file(char* filename, Reads& features) {
+/* example align file format
+CTTCAATGACCACTGGCCTCCCTG 12500 1 chr22:15254780 R CAGGGAGGCCAGTGGTCATTGAAGC 11453
+AGCGTGGGCAACAGGGAACGTGATG 11453 1 chr22:44584170 R CATCCCGTTCCCTGTTGCCCACGCT 9359
+*/
+
+void read_align_file(char* filename, Ends& f, Ends& r) {
string delim(" \n");
string location_delim(":");
char strand_str[2]; strand_str[1] = '\0';
if(fields.size() != 7) { continue; }
vector<string> location; split(fields[3], location_delim, location);
+
string chr = location[0];
- if(chr == "NA") { continue; }
- if(chr.substr(0,3) != "chr") { continue; }
int pos = atoi(location[1].c_str());
bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
if(strand == 0) {
- Read read(chr,pos,false);
- features.push_back(read);
+ EndPoint s(chr,pos,0);
+ EndPoint e(chr,pos+25,1);
+ f.push_back(s);
+ f.push_back(e);
} else {
- Read read(chr,pos+25,true);
- features.push_back(read);
+ EndPoint s(chr,pos,0);
+ EndPoint e(chr,pos+25,1);
+ r.push_back(s);
+ r.push_back(e);
}
}
seqs.close();
//sort the data so we can run through it once
- std::sort(features.begin(),features.end(),compare_reads);
+ std::sort(f.begin(),f.end());
+ std::sort(r.begin(),r.end());
cerr << "Reads sorted\n";
}
-
-void split (const string& text, const string& separators, vector<string>& words) {
-
- size_t n = text.length ();
- size_t start = text.find_first_not_of (separators);
-
- while (start < n) {
- size_t stop = text.find_first_of (separators, start);
- if (stop > n) stop = n;
- words.push_back (text.substr (start, stop-start));
- start = text.find_first_not_of (separators, stop+1);
- }
-}