7 #include "SRLib/loci.h"
8 #include "SRLib/util.h"
14 class EndPoint : public Loci {
16 bool type; //0 = start, 1 = end
18 EndPoint(string chr, unsigned int pos, bool type) : Loci(chr,pos) { this->type = type; }
19 EndPoint(const EndPoint &e) : Loci(e) { this->type = e.type; }
20 EndPoint& operator=(const EndPoint& e) {
21 if(this == &e) return *this;
29 typedef vector<EndPoint> Ends;
31 void print_browser_line() {
32 cout << "browser hide all" << endl << "browser pack refGene encodeRegions" << endl << "browser full altGraph" << endl;
35 void print_wig_header(string name, string desc, string color = "200,100,0", string altColor = "0,100,200") {
36 //cout << "browser position chr1" << endl;
37 cout << "track type=bedGraph name=" << name << " description=" << desc << " visibility=full ";
38 cout << "color=" << color << " altColor="<< altColor << " priority=10 maxHeightPixels=128:64:11" << endl;
41 void read_align_file(char* filename, Ends& f, Ends& r);
42 void print_data(Ends::iterator i, Ends::iterator end, double Z);
44 int main(int argc, char** argv) {
45 if(argc != 4) { cerr << "Usage: " << argv[0] << " Aligned_reads Wiggle_Track_Name Wiggle_Track_Description" << endl; return(0); }
46 char filename[1024]; strcpy(filename,argv[1]);
50 string track_name = argv[2];
51 string track_desc = argv[3];
53 read_align_file(filename, f, r);
55 unsigned int n_aligned_reads = (f.size() + r.size()) / 2;
56 double Z = n_aligned_reads/1000000.0;
57 cerr << Z << "M reads\n";
60 print_wig_header(track_name+"_forward", track_desc+"_forward");
61 print_data(f.begin(),f.end(), Z);
62 print_wig_header(track_name+"_reverse", track_desc+"_reverse");
63 print_data(r.begin(),r.end(), -1.0*Z);
66 void print_data(Ends::iterator i, Ends::iterator end, double Z) {
67 EndPoint begin_mark("start",0,0);
68 EndPoint prev = begin_mark;
72 //ok, first look at the next endpoint, and output the end of the corrent window.
74 if(i->chr != prev.chr)
77 // Using a very low noise threshold to help make for sane file sizes. Can override if need to see everything
78 // but for large datasets, no threshold can easily produce gigabyte sized files. This threshold drops the file
79 // size to about 1/8th the original size(of course, depending on the dataset).
81 cout << prev.chr << "\t" << prev.pos << "\t" << i->pos << "\t" << count / Z << endl;
83 //now, count the new endpoint, and reset the prev marker
84 if(i->type == 0) { count++; } else { count--; }
87 //finally, mark the current point, and while new points are the same position, keep adding them
88 while(i+1 != end && ((Loci)(*(i+1)) == (Loci)prev)) {
90 if(i->type == 0) { count++; } else { count--; }
95 /* example align file format
96 CTTCAATGACCACTGGCCTCCCTG 12500 1 chr22:15254780 R CAGGGAGGCCAGTGGTCATTGAAGC 11453
97 AGCGTGGGCAACAGGGAACGTGATG 11453 1 chr22:44584170 R CATCCCGTTCCCTGTTGCCCACGCT 9359
100 void read_align_file(char* filename, Ends& f, Ends& r) {
102 string location_delim(":");
103 char strand_str[2]; strand_str[1] = '\0';
104 ifstream seqs(filename);
106 while(seqs.peek() != EOF) {
108 seqs.getline(line,2048,'\n');
110 string line_str(line);
111 vector<string> fields;
112 split(line_str, delim, fields);
113 if(fields.size() != 7) { continue; }
115 vector<string> location; split(fields[3], location_delim, location);
117 string chr = location[0];
118 int pos = atoi(location[1].c_str());
119 bool strand = ((fields[4].c_str())[0] == 'F')?0:1;
122 EndPoint s(chr,pos,0);
123 EndPoint e(chr,pos+25,1);
127 EndPoint s(chr,pos,0);
128 EndPoint e(chr,pos+25,1);
135 //sort the data so we can run through it once
136 std::sort(f.begin(),f.end());
137 std::sort(r.begin(),r.end());
138 cerr << "Reads sorted\n";