X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=pysam.git;a=blobdiff_plain;f=samtools%2Fbam2bcf.c.pysam.c;h=06f85998d432012ac9baa56fa2f8ded3ae68ec5e;hp=ece837ae71089cb2640d5401143a449d5eae5fed;hb=e1756c41e7a1d7cc01fb95e42df9dd04da2d2991;hpb=ca46ef4ba4a883c57cea62d5bf1bc021f1185109 diff --git a/samtools/bam2bcf.c.pysam.c b/samtools/bam2bcf.c.pysam.c index ece837a..06f8599 100644 --- a/samtools/bam2bcf.c.pysam.c +++ b/samtools/bam2bcf.c.pysam.c @@ -41,7 +41,6 @@ void bcf_call_destroy(bcf_callaux_t *bca) * negative if we are looking at an indel. */ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r) { - static int *var_pos = NULL, nvar_pos = 0; int i, n, ref4, is_indel, ori_depth = 0; memset(r, 0, sizeof(bcf_callret1_t)); if (ref_base >= 0) { @@ -97,92 +96,9 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t r->depth = n; r->ori_depth = ori_depth; // glfgen errmod_cal(bca->e, n, 5, bca->bases, r->p); - - // Calculate the Variant Distance Bias (make it optional?) - if ( nvar_pos < _n ) { - nvar_pos = _n; - var_pos = realloc(var_pos,sizeof(int)*nvar_pos); - } - int alt_dp=0, read_len=0; - for (i=0; i<_n; i++) { - const bam_pileup1_t *p = pl + i; - if ( bam1_seqi(bam1_seq(p->b),p->qpos) == ref_base ) - continue; - - var_pos[alt_dp] = p->qpos; - if ( (bam1_cigar(p->b)[0]&BAM_CIGAR_MASK)==4 ) - var_pos[alt_dp] -= bam1_cigar(p->b)[0]>>BAM_CIGAR_SHIFT; - - alt_dp++; - read_len += p->b->core.l_qseq; - } - float mvd=0; - int j; - n=0; - for (i=0; imvd[0] = n ? mvd/n : 0; - r->mvd[1] = alt_dp; - r->mvd[2] = alt_dp ? read_len/alt_dp : 0; - return r->depth; } - -void calc_vdb(int n, const bcf_callret1_t *calls, bcf_call_t *call) -{ - // Variant distance bias. Samples merged by means of DP-weighted average. - - float weight=0, tot_prob=0; - - int i; - for (i=0; i2*mu ? 0 : sin(mvd*3.14/2/mu) / (4*mu/3.14); - } - else - { - // Scaled gaussian curve, crude approximation, but behaves well. Using fixed depth for bigger depths. - if ( dp>5 ) - dp = 5; - float sigma2 = (read_len/1.9/(dp+1)) * (read_len/1.9/(dp+1)); - float norm = 1.125*sqrt(2*3.14*sigma2); - float mu = read_len/2.9; - if ( mvd < mu ) - prob = exp(-(mvd-mu)*(mvd-mu)/2/sigma2)/norm; - else - prob = exp(-(mvd-mu)*(mvd-mu)/3.125/sigma2)/norm; - } - - //fprintf(pysamerr,"dp=%d mvd=%d read_len=%d -> prob=%f\n", dp,mvd,read_len,prob); - tot_prob += prob*dp; - weight += dp; - } - tot_prob = weight ? tot_prob/weight : 1; - //fprintf(pysamerr,"prob=%f\n", tot_prob); - call->vdb = tot_prob; -} - int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call) { int ref4, i, j, qsum[4]; @@ -256,9 +172,6 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, call->ori_depth += calls[i].ori_depth; for (j = 0; j < 16; ++j) call->anno[j] += calls[i].anno[j]; } - - calc_vdb(n, calls, call); - return 0; } @@ -312,10 +225,6 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bc if (i) kputc(',', &s); kputw(bc->anno[i], &s); } - if ( bc->vdb!=1 ) - { - ksprintf(&s, ";VDB=%.4f", bc->vdb); - } kputc('\0', &s); // FMT kputs("PL", &s); @@ -329,7 +238,7 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bc memcpy(b->gi[0].data, bc->PL, b->gi[0].len * bc->n); if (bcr) { uint16_t *dp = (uint16_t*)b->gi[1].data; - int32_t *sp = is_SP? b->gi[2].data : 0; + uint8_t *sp = is_SP? b->gi[2].data : 0; for (i = 0; i < bc->n; ++i) { bcf_callret1_t *p = bcr + i; dp[i] = p->depth < 0xffff? p->depth : 0xffff;