+++ /dev/null
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include "bcf.h"
-
-static double g_q2p[256];
-
-#define LD_ITER_MAX 50
-#define LD_ITER_EPS 1e-4
-
-#define _G1(h, k) ((h>>1&1) + (k>>1&1))
-#define _G2(h, k) ((h&1) + (k&1))
-
-// 0: the previous site; 1: the current site
-static int freq_iter(int n, double *pdg[2], double f[4])
-{
- double ff[4];
- int i, k, h;
- memset(ff, 0, 4 * sizeof(double));
- for (i = 0; i < n; ++i) {
- double *p[2], sum, tmp;
- p[0] = pdg[0] + i * 3; p[1] = pdg[1] + i * 3;
- for (k = 0, sum = 0.; k < 4; ++k)
- for (h = 0; h < 4; ++h)
- sum += f[k] * f[h] * p[0][_G1(k,h)] * p[1][_G2(k,h)];
- for (k = 0; k < 4; ++k) {
- tmp = f[0] * (p[0][_G1(0,k)] * p[1][_G2(0,k)] + p[0][_G1(k,0)] * p[1][_G2(k,0)])
- + f[1] * (p[0][_G1(1,k)] * p[1][_G2(1,k)] + p[0][_G1(k,1)] * p[1][_G2(k,1)])
- + f[2] * (p[0][_G1(2,k)] * p[1][_G2(2,k)] + p[0][_G1(k,2)] * p[1][_G2(k,2)])
- + f[3] * (p[0][_G1(3,k)] * p[1][_G2(3,k)] + p[0][_G1(k,3)] * p[1][_G2(k,3)]);
- ff[k] += f[k] * tmp / sum;
- }
- }
- for (k = 0; k < 4; ++k) f[k] = ff[k] / (2 * n);
- return 0;
-}
-
-double bcf_ld_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4])
-{
- const bcf1_t *b[2];
- uint8_t *PL[2];
- int i, j, PL_len[2], n_smpl;
- double *pdg[2], flast[4], r;
- // initialize g_q2p if necessary
- if (g_q2p[0] == 0.)
- for (i = 0; i < 256; ++i)
- g_q2p[i] = pow(10., -i / 10.);
- // initialize others
- if (b0->n_smpl != b1->n_smpl) return -1; // different number of samples
- n_smpl = b0->n_smpl;
- b[0] = b0; b[1] = b1;
- f[0] = f[1] = f[2] = f[3] = -1.;
- if (b[0]->n_alleles < 2 || b[1]->n_alleles < 2) return -1; // one allele only
- // set PL and PL_len
- for (j = 0; j < 2; ++j) {
- const bcf1_t *bj = b[j];
- for (i = 0; i < bj->n_gi; ++i) {
- if (bj->gi[i].fmt == bcf_str2int("PL", 2)) {
- PL[j] = (uint8_t*)bj->gi[i].data;
- PL_len[j] = bj->gi[i].len;
- break;
- }
- }
- if (i == bj->n_gi) return -1; // no PL
- }
- // fill pdg[2]
- pdg[0] = malloc(3 * n_smpl * sizeof(double));
- pdg[1] = malloc(3 * n_smpl * sizeof(double));
- for (j = 0; j < 2; ++j) {
- for (i = 0; i < n_smpl; ++i) {
- const uint8_t *pi = PL[j] + i * PL_len[j];
- double *p = pdg[j] + i * 3;
- p[0] = g_q2p[pi[2]]; p[1] = g_q2p[pi[1]]; p[2] = g_q2p[pi[0]];
- }
- }
- // iteration
- f[0] = f[1] = f[2] = f[3] = 0.25; // this is a really bad guess...
- for (j = 0; j < LD_ITER_MAX; ++j) {
- double eps = 0;
- memcpy(flast, f, 4 * sizeof(double));
- freq_iter(n_smpl, pdg, f);
- for (i = 0; i < 4; ++i) {
- double x = fabs(f[i] - flast[i]);
- if (x > eps) eps = x;
- }
- if (eps < LD_ITER_EPS) break;
- }
- // free
- free(pdg[0]); free(pdg[1]);
- { // calculate r^2
- double p[2], q[2], D;
- p[0] = f[0] + f[1]; q[0] = 1 - p[0];
- p[1] = f[0] + f[2]; q[1] = 1 - p[1];
- D = f[0] * f[3] - f[1] * f[2];
- r = sqrt(D * D / (p[0] * p[1] * q[0] * q[1]));
- // fprintf(stderr, "R(%lf,%lf,%lf,%lf)=%lf\n", f[0], f[1], f[2], f[3], r2);
- if (isnan(r)) r = -1.;
- }
- return r;
-}
-
-int bcf_main_pwld(int argc, char *argv[])
-{
- bcf_t *fp;
- bcf_hdr_t *h;
- bcf1_t **b, *b0;
- int i, j, m, n;
- double f[4];
- if (argc == 1) {
- fprintf(stderr, "Usage: bcftools pwld <in.bcf>\n");
- return 1;
- }
- fp = bcf_open(argv[1], "rb");
- h = bcf_hdr_read(fp);
- // read the entire BCF
- m = n = 0; b = 0;
- b0 = calloc(1, sizeof(bcf1_t));
- while (bcf_read(fp, h, b0) >= 0) {
- if (m == n) {
- m = m? m<<1 : 16;
- b = realloc(b, sizeof(void*) * m);
- }
- b[n] = calloc(1, sizeof(bcf1_t));
- bcf_cpy(b[n++], b0);
- }
- bcf_destroy(b0);
- // compute pair-wise r^2
- printf("%d\n", n); // the number of loci
- for (i = 0; i < n; ++i) {
- printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1);
- for (j = 0; j < i; ++j) {
- double r = bcf_ld_freq(b[i], b[j], f);
- printf("\t%.3f", r*r);
- }
- printf("\t1.000\n");
- }
- // free
- for (i = 0; i < n; ++i) bcf_destroy(b[i]);
- free(b);
- bcf_hdr_destroy(h);
- bcf_close(fp);
- return 0;
-}