8 double Nuc::pseudocount = 1e-10;
10 unsigned int Nuc::strong_coverage_threshold = 10;
12 double Nuc::qA = 0.25;
13 double Nuc::qC = 0.25;
14 double Nuc::qG = 0.25;
15 double Nuc::qT = 0.25;
24 Nuc::Nuc(const Nuc& n) {
25 this->n[0] = n.at(0); //A
26 this->n[1] = n.at(1); //C
27 this->n[2] = n.at(2); //G
28 this->n[3] = n.at(3); //T
31 Nuc& Nuc::operator=(const Nuc& n) {
33 this->n[0] = n.at(0); //A
34 this->n[1] = n.at(1); //C
35 this->n[2] = n.at(2); //G
36 this->n[3] = n.at(3); //T
41 //For internal (protected) use only
42 unsigned int& Nuc::operator[](size_t b) { return n[b]; }
44 //TODO: Handle degeneracy?
45 void Nuc::add_nuc(char b) {
47 case 'a': case 'A': n[0]++; break;
48 case 'c': case 'C': n[1]++; break;
49 case 'g': case 'G': n[2]++; break;
50 case 't': case 'T': n[3]++; break;
56 //Useful to get the listing of all the nucleotides sequenced for this position.
57 //Note that this function doesn't return the nucleotide in the order presented
58 char Nuc::nth_nuc(unsigned int i) {
59 if(i >= N()) { cerr << "Error: requested nucleotide " << i << " but only " << N() << " nucleotides have been recorded\n"; }
60 else if(i < n[0]) { return 'A'; }
61 else if(i < n[0] + n[1]) { return 'C'; }
62 else if(i < n[0] + n[1] + n[2]) { return 'G'; }
68 //returns the number of nucleotides at the position
69 //todo: may want to make this a cached value so we don't always recalculate
70 unsigned int Nuc::N() { return n[0] + n[1] + n[2] + n[3]; }
72 unsigned int Nuc::at(size_t b) const { return n[b]; }
73 unsigned int Nuc::A() const { return this->at(0); }
74 unsigned int Nuc::C() const { return this->at(1); }
75 unsigned int Nuc::G() const { return this->at(2); }
76 unsigned int Nuc::T() const { return this->at(3); }
79 double total = this->N() + 4*Nuc::pseudocount;
80 double pA = (Nuc::pseudocount + n[0]) / total;
81 double pC = (Nuc::pseudocount + n[1]) / total;
82 double pG = (Nuc::pseudocount + n[2]) / total;
83 double pT = (Nuc::pseudocount + n[3]) / total;
84 return pA*log2(pA/Nuc::qA) + pC*log2(pC/Nuc::qC) + pG*log2(pG/Nuc::qG) + pT*log2(pT/Nuc::qT);
87 double Nuc::chastity() {
88 unsigned int max = 0; unsigned int max_idx = 0;
89 for(unsigned int i = 0; i < 4; i++) { if(n[i] > max) { max = n[i]; max_idx = i; } }
91 unsigned int max2 = 0; unsigned int max_idx2 = 0;
92 for(unsigned int i = 0; i < 4; i++) { if(i != max_idx && n[i] >= max2) { max2 = n[i]; max_idx2 = i; } }
94 if(max_idx == max_idx2) { max_idx2++; }
96 double total = (double)n[max_idx] + (double)n[max_idx2];
97 return( 2 * (double)n[max_idx]/total );
100 char Nuc::consensus() {
101 unsigned int N = this->N();
102 if(N == 0) { return ' '; }
104 unsigned int max = 0; unsigned int max_idx = 0;
105 for(unsigned int i = 0; i < 4; i++) { if(n[i] > max) { max = n[i]; max_idx = i; } }
107 unsigned int max2 = 0; unsigned int max_idx2 = 0;
108 for(unsigned int i = 0; i < 4; i++) { if(i != max_idx && n[i] > max2) { max2 = n[i]; max_idx2 = i; } }
110 //For now pick arbitrary zygosity thresholds. Later, update to use mixture model.
114 if(chastity() >= 1.25) {
116 case 0: c = 'A'; break;
117 case 1: c = 'C'; break;
118 case 2: c = 'G'; break;
119 case 3: c = 'T'; break;
125 switch(max_idx | max_idx2) {
126 case 1: c = 'M'; break; //A,C
127 case 2: c = 'R'; break; //A,G
128 case 3: c = (max_idx == 0 || max_idx2 == 0)?'W':'S'; break; //A,T or C,G
129 case 4: c = 'Y'; break; //C,T
130 case 5: c = 'K'; break; //G,T
134 //strong_coverage_threshold is the number of reads needed to make a confident call
135 if(N < this->strong_coverage_threshold) { return tolower(c); } else { return c; }