X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bcftools%2Fmut.c;fp=bcftools%2Fmut.c;h=4b7cd104a60b7bc3d4fba7086b551ac72155f350;hp=0000000000000000000000000000000000000000;hb=e2bd290b8643f0a728c282a8b825392307750210;hpb=9f4bebab2e0917c676ae739b2d05cb22ad6c4ed5 diff --git a/bcftools/mut.c b/bcftools/mut.c new file mode 100644 index 0000000..4b7cd10 --- /dev/null +++ b/bcftools/mut.c @@ -0,0 +1,124 @@ +#include +#include +#include "bcf.h" + +#define MAX_GENO 359 + +int8_t seq_bitcnt[] = { 4, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; +char *seq_nt16rev = "XACMGRSVTWYHKDBN"; + +uint32_t *bcf_trio_prep(int is_x, int is_son) +{ + int i, j, k, n, map[10]; + uint32_t *ret; + ret = calloc(MAX_GENO, 4); + for (i = 0, k = 0; i < 4; ++i) + for (j = i; j < 4; ++j) + map[k++] = 1<n_smpl != 3) return -1; // not a trio + for (i = 0; i < b->n_gi; ++i) + if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; + if (i == b->n_gi) return -1; // no PL + gl10 = alloca(10 * b->n_smpl); + if (bcf_gl10(b, gl10) < 0) return -1; + PL = b->gi + i; + for (i = 0, k = 0; i < 4; ++i) + for (j = i; j < 4; ++j) + map[k++] = seq_nt16rev[1<data)[j * PL->len] != 0) break; + if (j < 3) { // we need to go through the complex procedure + uint8_t *g[3]; + int minc = 1<<30, minc_j = -1, minf = 0, gtf = 0, gtc = 0; + g[0] = gl10; + g[1] = gl10 + 10; + g[2] = gl10 + 20; + for (j = 1; j <= (int)prep[0]; ++j) { // compute LK with constraint + int sum = g[0][prep[j]&0xff] + g[1][prep[j]>>8&0xff] + g[2][prep[j]>>16&0xff]; + if (sum < minc) minc = sum, minc_j = j; + } + gtc |= map[prep[minc_j]&0xff]; gtc |= map[prep[minc_j]>>8&0xff]<<8; gtc |= map[prep[minc_j]>>16]<<16; + for (j = 0; j < 3; ++j) { // compute LK without constraint + int min = 1<<30, min_k = -1; + for (k = 0; k < 10; ++k) + if (g[j][k] < min) min = g[j][k], min_k = k; + gtf |= map[min_k]<<(j*8); + minf += min; + } + *llr = minc - minf; *gt = (int64_t)gtc<<32 | gtf; + } else *llr = 0, *gt = -1; + return 0; +} + +int bcf_pair_call(const bcf1_t *b) +{ + int i, j, k; + const bcf_ginfo_t *PL; + if (b->n_smpl != 2) return -1; // not a pair + for (i = 0; i < b->n_gi; ++i) + if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; + if (i == b->n_gi) return -1; // no PL + PL = b->gi + i; + for (j = 0; j < 2; ++j) // check if ref hom is the most probable in all members + if (((uint8_t*)PL->data)[j * PL->len] != 0) break; + if (j < 2) { // we need to go through the complex procedure + uint8_t *g[2]; + int minc = 1<<30, minf = 0; + g[0] = PL->data; + g[1] = (uint8_t*)PL->data + PL->len; + for (j = 0; j < PL->len; ++j) // compute LK with constraint + minc = minc < g[0][j] + g[1][j]? minc : g[0][j] + g[1][j]; + for (j = 0; j < 2; ++j) { // compute LK without constraint + int min = 1<<30; + for (k = 0; k < PL->len; ++k) + min = min < g[j][k]? min : g[j][k]; + minf += min; + } + return minc - minf; + } else return 0; +} + +int bcf_min_diff(const bcf1_t *b) +{ + int i, min = 1<<30; + const bcf_ginfo_t *PL; + for (i = 0; i < b->n_gi; ++i) + if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; + if (i == b->n_gi) return -1; // no PL + PL = b->gi + i; + for (i = 0; i < b->n_smpl; ++i) { + int m1, m2, j; + const uint8_t *p = (uint8_t*)PL->data; + m1 = m2 = 1<<30; + for (j = 0; j < PL->len; ++j) { + if ((int)p[j] < m1) m2 = m1, m1 = p[j]; + else if ((int)p[j] < m2) m2 = p[j]; + } + min = min < m2 - m1? min : m2 - m1; + } + return min; +}