Merge commit 'upstream/0.1.16'
[samtools.git] / bcftools / ld.c
diff --git a/bcftools/ld.c b/bcftools/ld.c
deleted file mode 100644 (file)
index 0207819..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include "bcf.h"
-
-static double g_q2p[256];
-
-#define LD_ITER_MAX 50
-#define LD_ITER_EPS 1e-4
-
-#define _G1(h, k) ((h>>1&1) + (k>>1&1))
-#define _G2(h, k) ((h&1) + (k&1))
-
-// 0: the previous site; 1: the current site
-static int freq_iter(int n, double *pdg[2], double f[4])
-{
-       double ff[4];
-       int i, k, h;
-       memset(ff, 0, 4 * sizeof(double));
-       for (i = 0; i < n; ++i) {
-               double *p[2], sum, tmp;
-               p[0] = pdg[0] + i * 3; p[1] = pdg[1] + i * 3;
-               for (k = 0, sum = 0.; k < 4; ++k)
-                       for (h = 0; h < 4; ++h)
-                               sum += f[k] * f[h] * p[0][_G1(k,h)] * p[1][_G2(k,h)];
-               for (k = 0; k < 4; ++k) {
-                       tmp = f[0] * (p[0][_G1(0,k)] * p[1][_G2(0,k)] + p[0][_G1(k,0)] * p[1][_G2(k,0)])
-                               + f[1] * (p[0][_G1(1,k)] * p[1][_G2(1,k)] + p[0][_G1(k,1)] * p[1][_G2(k,1)])
-                               + f[2] * (p[0][_G1(2,k)] * p[1][_G2(2,k)] + p[0][_G1(k,2)] * p[1][_G2(k,2)])
-                               + f[3] * (p[0][_G1(3,k)] * p[1][_G2(3,k)] + p[0][_G1(k,3)] * p[1][_G2(k,3)]);
-                       ff[k] += f[k] * tmp / sum;
-               }
-       }
-       for (k = 0; k < 4; ++k) f[k] = ff[k] / (2 * n);
-       return 0;
-}
-
-double bcf_ld_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4])
-{
-       const bcf1_t *b[2];
-       uint8_t *PL[2];
-       int i, j, PL_len[2], n_smpl;
-       double *pdg[2], flast[4], r;
-       // initialize g_q2p if necessary
-       if (g_q2p[0] == 0.)
-               for (i = 0; i < 256; ++i)
-                       g_q2p[i] = pow(10., -i / 10.);
-       // initialize others
-       if (b0->n_smpl != b1->n_smpl) return -1; // different number of samples
-       n_smpl = b0->n_smpl;
-       b[0] = b0; b[1] = b1;
-       f[0] = f[1] = f[2] = f[3] = -1.;
-       if (b[0]->n_alleles < 2 || b[1]->n_alleles < 2) return -1; // one allele only
-       // set PL and PL_len
-       for (j = 0; j < 2; ++j) {
-               const bcf1_t *bj = b[j];
-               for (i = 0; i < bj->n_gi; ++i) {
-                       if (bj->gi[i].fmt == bcf_str2int("PL", 2)) {
-                               PL[j] = (uint8_t*)bj->gi[i].data;
-                               PL_len[j] = bj->gi[i].len;
-                               break;
-                       }
-               }
-               if (i == bj->n_gi) return -1; // no PL
-       }
-       // fill pdg[2]
-       pdg[0] = malloc(3 * n_smpl * sizeof(double));
-       pdg[1] = malloc(3 * n_smpl * sizeof(double));
-       for (j = 0; j < 2; ++j) {
-               for (i = 0; i < n_smpl; ++i) {
-                       const uint8_t *pi = PL[j] + i * PL_len[j];
-                       double *p = pdg[j] + i * 3;
-                       p[0] = g_q2p[pi[2]]; p[1] = g_q2p[pi[1]]; p[2] = g_q2p[pi[0]];
-               }
-       }
-       // iteration
-       f[0] = f[1] = f[2] = f[3] = 0.25; // this is a really bad guess...
-       for (j = 0; j < LD_ITER_MAX; ++j) {
-               double eps = 0;
-               memcpy(flast, f, 4 * sizeof(double));
-               freq_iter(n_smpl, pdg, f);
-               for (i = 0; i < 4; ++i) {
-                       double x = fabs(f[i] - flast[i]);
-                       if (x > eps) eps = x;
-               }
-               if (eps < LD_ITER_EPS) break;
-       }
-       // free
-       free(pdg[0]); free(pdg[1]);
-       { // calculate r^2
-               double p[2], q[2], D;
-               p[0] = f[0] + f[1]; q[0] = 1 - p[0];
-               p[1] = f[0] + f[2]; q[1] = 1 - p[1];
-               D = f[0] * f[3] - f[1] * f[2];
-               r = sqrt(D * D / (p[0] * p[1] * q[0] * q[1]));
-               // fprintf(stderr, "R(%lf,%lf,%lf,%lf)=%lf\n", f[0], f[1], f[2], f[3], r2);
-               if (isnan(r)) r = -1.;
-       }
-       return r;
-}
-
-int bcf_main_pwld(int argc, char *argv[])
-{
-       bcf_t *fp;
-       bcf_hdr_t *h;
-       bcf1_t **b, *b0;
-       int i, j, m, n;
-       double f[4];
-       if (argc == 1) {
-               fprintf(stderr, "Usage: bcftools pwld <in.bcf>\n");
-               return 1;
-       }
-       fp = bcf_open(argv[1], "rb");
-       h = bcf_hdr_read(fp);
-       // read the entire BCF
-       m = n = 0; b = 0;
-       b0 = calloc(1, sizeof(bcf1_t));
-       while (bcf_read(fp, h, b0) >= 0) {
-               if (m == n) {
-                       m = m? m<<1 : 16;
-                       b = realloc(b, sizeof(void*) * m);
-               }
-               b[n] = calloc(1, sizeof(bcf1_t));
-               bcf_cpy(b[n++], b0);
-       }
-       bcf_destroy(b0);
-       // compute pair-wise r^2
-       printf("%d\n", n); // the number of loci
-       for (i = 0; i < n; ++i) {
-               printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1);
-               for (j = 0; j < i; ++j) {
-                       double r = bcf_ld_freq(b[i], b[j], f);
-                       printf("\t%.3f", r*r);
-               }
-               printf("\t1.000\n");
-       }
-       // free
-       for (i = 0; i < n; ++i) bcf_destroy(b[i]);
-       free(b);
-       bcf_hdr_destroy(h);
-       bcf_close(fp);
-       return 0;
-}