Copied from SVN revision 4710 and adapted to Git.
[samtools.git] / bam_maqcns.c
index 71c2185df0f27d53f74192d1d90625ca7ccb14ac..cad63d772268db289467c6a4fd625e50b7ac3889 100644 (file)
@@ -310,6 +310,7 @@ bam_maqindel_opt_t *bam_maqindel_opt_init()
        bam_maqindel_opt_t *mi = (bam_maqindel_opt_t*)calloc(1, sizeof(bam_maqindel_opt_t));
        mi->q_indel = 40;
        mi->r_indel = 0.00015;
+       mi->r_snp = 0.001;
        //
        mi->mm_penalty = 3;
        mi->indel_err = 4;
@@ -406,7 +407,8 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
        }
        { // the core part
                char *ref2, *rs, *inscns = 0;
-               int k, l, *score, *pscore, max_ins = types[n_types-1];
+               int qr_snp, k, l, *score, *pscore, max_ins = types[n_types-1];
+               qr_snp = (int)(-4.343 * log(mi->r_snp) + .499);
                if (max_ins > 0) { // get the consensus of inserted sequences
                        int *inscns_aux = (int*)calloc(4 * n_types * max_ins, sizeof(int));
                        // count occurrences
@@ -446,12 +448,18 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
                for (i = 0; i < n_types; ++i) {
                        ka_param_t ap = ka_param_blast;
                        ap.band_width = 2 * types[n_types - 1] + 2;
+                       ap.gap_end = 0;
                        // write ref2
                        for (k = 0, j = left; j <= pos; ++j)
                                ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]];
                        if (types[i] <= 0) j += -types[i];
                        else for (l = 0; l < types[i]; ++l)
                                         ref2[k++] = bam_nt16_nt4_table[(int)inscns[i*max_ins + l]];
+                       if (types[0] < 0) { // mask deleted sequences
+                               int jj, tmp = types[i] >= 0? -types[0] : -types[0] + types[i];
+                               for (jj = 0; jj < tmp && j < right && ref[j]; ++jj, ++j)
+                                       ref2[k++] = 4;
+                       }
                        for (; j < right && ref[j]; ++j)
                                ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]];
                        if (j < right) right = j;
@@ -482,22 +490,27 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
                                                if (op == BAM_CMATCH) {
                                                        int k;
                                                        for (k = 0; k < len; ++k)
-                                                               if (ref2[x+k] != rs[y+k]) ps += bam1_qual(p->b)[y+k];
+                                                               if (ref2[x+k] != rs[y+k] && ref2[x+k] < 4)
+                                                                       ps += bam1_qual(p->b)[y+k] < qr_snp? bam1_qual(p->b)[y+k] : qr_snp;
                                                        x += len; y += len;
                                                } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
-                                                       if (op == BAM_CINS) ps += mi->q_indel * len;
+                                                       if (op == BAM_CINS && l > 0 && l < n_acigar - 1) ps += mi->q_indel * len;
                                                        y += len;
                                                } else if (op == BAM_CDEL) {
-                                                       ps += mi->q_indel * len;
+                                                       if (l > 0 && l < n_acigar - 1) ps += mi->q_indel * len;
                                                        x += len;
                                                }
                                        }
                                        pscore[i*n+j] = ps;
-                                       /*if (pos == 2618517) { // for debugging only
-                                               fprintf(stderr, "pos=%d, type=%d, j=%d, score=%d, psore=%d, %d, %d, %d, %d, ", pos+1, types[i], j, score[i*n+j], pscore[i*n+j], tbeg, tend, qbeg, qend);
-                                               for (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]); fprintf(stderr, "\n");
-                                               for (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l]], stderr); fputc('\n', stderr);
-                                               for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr); fputc('\n', stderr);
+                                       /*if (1) { // for debugging only
+                                               fprintf(stderr, "id=%d, pos=%d, type=%d, j=%d, score=%d, psore=%d, %d, %d, %d, %d, %d, ",
+                                                               j, pos+1, types[i], j, score[i*n+j], pscore[i*n+j], tbeg, tend, qbeg, qend, mi->q_indel);
+                                               for (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]);
+                                               fprintf(stderr, "\n");
+                                               for (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l+tbeg-left]], stderr);
+                                               fputc('\n', stderr);
+                                               for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr);
+                                               fputc('\n', stderr);
                                                }*/
                                        free(acigar);
                                }
@@ -560,7 +573,7 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
                                ret->gl[0] = ret->gl[1] = 0;
                                for (j = 0; j < n; ++j) {
                                        int s1 = pscore[max1_i*n + j], s2 = pscore[max2_i*n + j];
-                                       //printf("%d, %d, %d, %d, %d\n", pl[j].b->core.pos+1, max1_i, max2_i, s1, s2);
+                                       //fprintf(stderr, "id=%d, %d, %d, %d, %d, %d\n", j, pl[j].b->core.pos+1, types[max1_i], types[max2_i], s1, s2);
                                        if (s1 > s2) ret->gl[0] += s1 - s2 < seq_err? s1 - s2 : seq_err;
                                        else ret->gl[1] += s2 - s1 < seq_err? s2 - s1 : seq_err;
                                }