X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bam_maqcns.c;h=cad63d772268db289467c6a4fd625e50b7ac3889;hp=71c2185df0f27d53f74192d1d90625ca7ccb14ac;hb=cefa18095b2479339b08111936313066ec548657;hpb=317f5e8dd22cc9e1e5e05fbcaeb3b9aca7447351 diff --git a/bam_maqcns.c b/bam_maqcns.c index 71c2185..cad63d7 100644 --- a/bam_maqcns.c +++ b/bam_maqcns.c @@ -310,6 +310,7 @@ bam_maqindel_opt_t *bam_maqindel_opt_init() bam_maqindel_opt_t *mi = (bam_maqindel_opt_t*)calloc(1, sizeof(bam_maqindel_opt_t)); mi->q_indel = 40; mi->r_indel = 0.00015; + mi->r_snp = 0.001; // mi->mm_penalty = 3; mi->indel_err = 4; @@ -406,7 +407,8 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c } { // the core part char *ref2, *rs, *inscns = 0; - int k, l, *score, *pscore, max_ins = types[n_types-1]; + int qr_snp, k, l, *score, *pscore, max_ins = types[n_types-1]; + qr_snp = (int)(-4.343 * log(mi->r_snp) + .499); if (max_ins > 0) { // get the consensus of inserted sequences int *inscns_aux = (int*)calloc(4 * n_types * max_ins, sizeof(int)); // count occurrences @@ -446,12 +448,18 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c for (i = 0; i < n_types; ++i) { ka_param_t ap = ka_param_blast; ap.band_width = 2 * types[n_types - 1] + 2; + ap.gap_end = 0; // write ref2 for (k = 0, j = left; j <= pos; ++j) ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]]; if (types[i] <= 0) j += -types[i]; else for (l = 0; l < types[i]; ++l) ref2[k++] = bam_nt16_nt4_table[(int)inscns[i*max_ins + l]]; + if (types[0] < 0) { // mask deleted sequences + int jj, tmp = types[i] >= 0? -types[0] : -types[0] + types[i]; + for (jj = 0; jj < tmp && j < right && ref[j]; ++jj, ++j) + ref2[k++] = 4; + } for (; j < right && ref[j]; ++j) ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]]; if (j < right) right = j; @@ -482,22 +490,27 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c if (op == BAM_CMATCH) { int k; for (k = 0; k < len; ++k) - if (ref2[x+k] != rs[y+k]) ps += bam1_qual(p->b)[y+k]; + if (ref2[x+k] != rs[y+k] && ref2[x+k] < 4) + ps += bam1_qual(p->b)[y+k] < qr_snp? bam1_qual(p->b)[y+k] : qr_snp; x += len; y += len; } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { - if (op == BAM_CINS) ps += mi->q_indel * len; + if (op == BAM_CINS && l > 0 && l < n_acigar - 1) ps += mi->q_indel * len; y += len; } else if (op == BAM_CDEL) { - ps += mi->q_indel * len; + if (l > 0 && l < n_acigar - 1) ps += mi->q_indel * len; x += len; } } pscore[i*n+j] = ps; - /*if (pos == 2618517) { // for debugging only - fprintf(stderr, "pos=%d, type=%d, j=%d, score=%d, psore=%d, %d, %d, %d, %d, ", pos+1, types[i], j, score[i*n+j], pscore[i*n+j], tbeg, tend, qbeg, qend); - for (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]); fprintf(stderr, "\n"); - for (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l]], stderr); fputc('\n', stderr); - for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr); fputc('\n', stderr); + /*if (1) { // for debugging only + fprintf(stderr, "id=%d, pos=%d, type=%d, j=%d, score=%d, psore=%d, %d, %d, %d, %d, %d, ", + j, pos+1, types[i], j, score[i*n+j], pscore[i*n+j], tbeg, tend, qbeg, qend, mi->q_indel); + for (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]); + fprintf(stderr, "\n"); + for (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l+tbeg-left]], stderr); + fputc('\n', stderr); + for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr); + fputc('\n', stderr); }*/ free(acigar); } @@ -560,7 +573,7 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c ret->gl[0] = ret->gl[1] = 0; for (j = 0; j < n; ++j) { int s1 = pscore[max1_i*n + j], s2 = pscore[max2_i*n + j]; - //printf("%d, %d, %d, %d, %d\n", pl[j].b->core.pos+1, max1_i, max2_i, s1, s2); + //fprintf(stderr, "id=%d, %d, %d, %d, %d, %d\n", j, pl[j].b->core.pos+1, types[max1_i], types[max2_i], s1, s2); if (s1 > s2) ret->gl[0] += s1 - s2 < seq_err? s1 - s2 : seq_err; else ret->gl[1] += s2 - s1 < seq_err? s2 - s1 : seq_err; }