+#include <iostream>
+#include <math.h>
+
+#include "nuc.h"
+
+using namespace std;
+
+double Nuc::pseudocount = 1e-10;
+
+unsigned int Nuc::strong_coverage_threshold = 10;
+
+double Nuc::qA = 0.25;
+double Nuc::qC = 0.25;
+double Nuc::qG = 0.25;
+double Nuc::qT = 0.25;
+
+Nuc::Nuc() {
+ n[0] = 0; //A
+ n[1] = 0; //C
+ n[2] = 0; //G
+ n[3] = 0; //T
+}
+
+Nuc::Nuc(const Nuc& n) {
+ this->n[0] = n.at(0); //A
+ this->n[1] = n.at(1); //C
+ this->n[2] = n.at(2); //G
+ this->n[3] = n.at(3); //T
+}
+
+Nuc& Nuc::operator=(const Nuc& n) {
+ if (this != &n) {
+ this->n[0] = n.at(0); //A
+ this->n[1] = n.at(1); //C
+ this->n[2] = n.at(2); //G
+ this->n[3] = n.at(3); //T
+ }
+ return *this;
+}
+
+//For internal (protected) use only
+unsigned int& Nuc::operator[](size_t b) { return n[b]; }
+
+//TODO: Handle degeneracy?
+void Nuc::add_nuc(char b) {
+ switch(b) {
+ case 'a': case 'A': n[0]++; break;
+ case 'c': case 'C': n[1]++; break;
+ case 'g': case 'G': n[2]++; break;
+ case 't': case 'T': n[3]++; break;
+ };
+}
+
+//nth_nuc
+//=======
+//Useful to get the listing of all the nucleotides sequenced for this position.
+//Note that this function doesn't return the nucleotide in the order presented
+char Nuc::nth_nuc(unsigned int i) {
+ if(i >= N()) { cerr << "Error: requested nucleotide " << i << " but only " << N() << " nucleotides have been recorded\n"; }
+ else if(i < n[0]) { return 'A'; }
+ else if(i < n[0] + n[1]) { return 'C'; }
+ else if(i < n[0] + n[1] + n[2]) { return 'G'; }
+ return 'T';
+}
+
+//N
+//====
+//returns the number of nucleotides at the position
+//todo: may want to make this a cached value so we don't always recalculate
+unsigned int Nuc::N() { return n[0] + n[1] + n[2] + n[3]; }
+
+unsigned int Nuc::at(size_t b) const { return n[b]; }
+unsigned int Nuc::A() const { return this->at(0); }
+unsigned int Nuc::C() const { return this->at(1); }
+unsigned int Nuc::G() const { return this->at(2); }
+unsigned int Nuc::T() const { return this->at(3); }
+
+double Nuc::RE() {
+ double total = this->N() + 4*Nuc::pseudocount;
+ double pA = (Nuc::pseudocount + n[0]) / total;
+ double pC = (Nuc::pseudocount + n[1]) / total;
+ double pG = (Nuc::pseudocount + n[2]) / total;
+ double pT = (Nuc::pseudocount + n[3]) / total;
+ return pA*log2(pA/Nuc::qA) + pC*log2(pC/Nuc::qC) + pG*log2(pG/Nuc::qG) + pT*log2(pT/Nuc::qT);
+}
+
+double Nuc::chastity() {
+ unsigned int max = 0; unsigned int max_idx = 0;
+ for(unsigned int i = 0; i < 4; i++) { if(n[i] > max) { max = n[i]; max_idx = i; } }
+
+ unsigned int max2 = 0; unsigned int max_idx2 = 0;
+ for(unsigned int i = 0; i < 4; i++) { if(i != max_idx && n[i] >= max2) { max2 = n[i]; max_idx2 = i; } }
+
+ if(max_idx == max_idx2) { max_idx2++; }
+
+ double total = (double)n[max_idx] + (double)n[max_idx2];
+ return( 2 * (double)n[max_idx]/total );
+}
+
+char Nuc::consensus() {
+ unsigned int N = this->N();
+ if(N == 0) { return ' '; }
+
+ unsigned int max = 0; unsigned int max_idx = 0;
+ for(unsigned int i = 0; i < 4; i++) { if(n[i] > max) { max = n[i]; max_idx = i; } }
+
+ unsigned int max2 = 0; unsigned int max_idx2 = 0;
+ for(unsigned int i = 0; i < 4; i++) { if(i != max_idx && n[i] > max2) { max2 = n[i]; max_idx2 = i; } }
+
+ //For now pick arbitrary zygosity thresholds. Later, update to use mixture model.
+ char c = '\0';
+
+ //homozygous
+ if(chastity() >= 1.25) {
+ switch(max_idx) {
+ case 0: c = 'A'; break;
+ case 1: c = 'C'; break;
+ case 2: c = 'G'; break;
+ case 3: c = 'T'; break;
+ }
+ }
+
+ //heterozygous
+ else {
+ switch(max_idx | max_idx2) {
+ case 1: c = 'M'; break; //A,C
+ case 2: c = 'R'; break; //A,G
+ case 3: c = (max_idx == 0 || max_idx2 == 0)?'W':'S'; break; //A,T or C,G
+ case 4: c = 'Y'; break; //C,T
+ case 5: c = 'K'; break; //G,T
+ }
+ }
+
+ //strong_coverage_threshold is the number of reads needed to make a confident call
+ if(N < this->strong_coverage_threshold) { return tolower(c); } else { return c; }
+}