X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bam_pileup.c;h=57434e098e2398fc4a6eab32b0d507d7724aa96e;hp=a49f795d73d4f0ed1ad3a9592108f3be2a917579;hb=HEAD;hpb=62781a2daa24d74a3c590e2669fad1fa7cabf933 diff --git a/bam_pileup.c b/bam_pileup.c index a49f795..57434e0 100644 --- a/bam_pileup.c +++ b/bam_pileup.c @@ -78,13 +78,13 @@ static inline int resolve_cigar2(bam_pileup1_t *p, uint32_t pos, cstate_t *s) if (s->k == -1) { // never processed is_head = 1; if (c->n_cigar == 1) { // just one operation, save a loop - if (_cop(cigar[0]) == BAM_CMATCH) s->k = 0, s->x = c->pos, s->y = 0; - } else { // find the first match + if (_cop(cigar[0]) == BAM_CMATCH || _cop(cigar[0]) == BAM_CEQUAL || _cop(cigar[0]) == BAM_CDIFF) s->k = 0, s->x = c->pos, s->y = 0; + } else { // find the first match or deletion for (k = 0, s->x = c->pos, s->y = 0; k < c->n_cigar; ++k) { int op = _cop(cigar[k]); int l = _cln(cigar[k]); - if (op == BAM_CMATCH) break; - else if (op == BAM_CDEL || op == BAM_CREF_SKIP) s->x += l; + if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CEQUAL || op == BAM_CDIFF) break; + else if (op == BAM_CREF_SKIP) s->x += l; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; } assert(k < c->n_cigar); @@ -95,16 +95,16 @@ static inline int resolve_cigar2(bam_pileup1_t *p, uint32_t pos, cstate_t *s) if (pos - s->x >= l) { // jump to the next operation assert(s->k < c->n_cigar); // otherwise a bug: this function should not be called in this case op = _cop(cigar[s->k+1]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP) { // jump to the next without a loop - if (_cop(cigar[s->k]) == BAM_CMATCH) s->y += l; + if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) { // jump to the next without a loop + if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; s->x += l; ++s->k; - } else { // find the next M/D/N - if (_cop(cigar[s->k]) == BAM_CMATCH) s->y += l; + } else { // find the next M/D/N/=/X + if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l; s->x += l; for (k = s->k + 1; k < c->n_cigar; ++k) { op = _cop(cigar[k]), l = _cln(cigar[k]); - if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP) break; + if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) break; else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l; } s->k = k; @@ -126,12 +126,12 @@ static inline int resolve_cigar2(bam_pileup1_t *p, uint32_t pos, cstate_t *s) for (k = s->k + 2; k < c->n_cigar; ++k) { op2 = _cop(cigar[k]); l2 = _cln(cigar[k]); if (op2 == BAM_CINS) l3 += l2; - else if (op2 == BAM_CDEL || op2 == BAM_CMATCH || op2 == BAM_CREF_SKIP) break; + else if (op2 == BAM_CDEL || op2 == BAM_CMATCH || op2 == BAM_CREF_SKIP || op2 == BAM_CEQUAL || op2 == BAM_CDIFF) break; } if (l3 > 0) p->indel = l3; } } - if (op == BAM_CMATCH) { + if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { p->qpos = s->y + (pos - s->x); } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) { p->is_del = 1; p->qpos = s->y; // FIXME: distinguish D and N!!!!! @@ -152,7 +152,7 @@ struct __bam_plp_t { mempool_t *mp; lbnode_t *head, *tail, *dummy; int32_t tid, pos, max_tid, max_pos; - int is_eof, flag_mask, max_plp, error; + int is_eof, flag_mask, max_plp, error, maxcnt; bam_pileup1_t *plp; // for the "auto" interface only bam1_t *b; @@ -169,6 +169,7 @@ bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data) iter->dummy = mp_alloc(iter->mp); iter->max_tid = iter->max_pos = -1; iter->flag_mask = BAM_DEF_MASK; + iter->maxcnt = 8000; if (func) { iter->func = func; iter->data = data; @@ -240,6 +241,7 @@ int bam_plp_push(bam_plp_t iter, const bam1_t *b) if (b) { if (b->core.tid < 0) return 0; if (b->core.flag & iter->flag_mask) return 0; + if (iter->tid == b->core.tid && iter->pos == b->core.pos && iter->mp->cnt > iter->maxcnt) return 0; bam_copy1(&iter->tail->b, b); iter->tail->beg = b->core.pos; iter->tail->end = bam_calend(&b->core, bam1_cigar(b)); iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t @@ -267,7 +269,7 @@ const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_ const bam_pileup1_t *plp; if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; } if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; - else { + else { // no pileup line can be obtained; read alignments *_n_plp = 0; if (iter->is_eof) return 0; while (iter->func(iter->data, iter->b) >= 0) { @@ -276,6 +278,7 @@ const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_ return 0; } if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; + // otherwise no pileup line can be returned; read the next alignment. } bam_plp_push(iter, 0); if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp; @@ -302,6 +305,11 @@ void bam_plp_set_mask(bam_plp_t iter, int mask) iter->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask); } +void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt) +{ + iter->maxcnt = maxcnt; +} + /***************** * callback APIs * *****************/ @@ -389,6 +397,13 @@ bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data) return iter; } +void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt) +{ + int i; + for (i = 0; i < iter->n; ++i) + iter->iter[i]->maxcnt = maxcnt; +} + void bam_mplp_destroy(bam_mplp_t iter) { int i;