6 #include "bam_endian.h"
14 Alignment indexing. Before indexing, BAM must be sorted based on the
15 leftmost coordinate of alignments. In indexing, BAM uses two indices:
16 a UCSC binning index and a simple linear index. The binning index is
17 efficient for alignments spanning long distance, while the auxiliary
18 linear index helps to reduce unnecessary seek calls especially for
21 The UCSC binning scheme was suggested by Richard Durbin and Lincoln
22 Stein and is explained by Kent et al. (2002). In this scheme, each bin
23 represents a contiguous genomic region which can be fully contained in
24 another bin; each alignment is associated with a bin which represents
25 the smallest region containing the entire alignment. The binning
26 scheme is essentially another representation of R-tree. A distinct bin
27 uniquely corresponds to a distinct internal node in a R-tree. Bin A is
28 a child of Bin B if region A is contained in B.
30 In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin
31 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp,
32 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to
33 find the alignments overlapped with a region [rbeg,rend), we need to
34 calculate the list of bins that may be overlapped the region and test
35 the alignments in the bins to confirm the overlaps. If the specified
36 region is short, typically only a few alignments in six bins need to
37 be retrieved. The overlapping alignments can be quickly fetched.
41 #define BAM_MIN_CHUNK_GAP 32768
42 // 1<<14 is the size of minimum bin.
43 #define BAM_LIDX_SHIFT 14
45 #define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
51 #define pair64_lt(a,b) ((a).u < (b).u)
52 KSORT_INIT(off, pair64_t, pair64_lt)
64 KHASH_MAP_INIT_INT(i, bam_binlist_t)
66 struct __bam_index_t {
68 uint64_t n_no_coor; // unmapped reads without coordinate
73 // requirement: len <= LEN_MASK
74 static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)
79 k = kh_put(i, h, bin, &ret);
81 if (ret) { // not present
83 l->list = (pair64_t*)calloc(l->m, 16);
87 l->list = (pair64_t*)realloc(l->list, l->m * 16);
89 l->list[l->n].u = beg; l->list[l->n++].v = end;
92 static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset)
95 beg = b->core.pos >> BAM_LIDX_SHIFT;
96 end = (bam_calend(&b->core, bam1_cigar(b)) - 1) >> BAM_LIDX_SHIFT;
97 if (index2->m < end + 1) {
98 int old_m = index2->m;
100 kroundup32(index2->m);
101 index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);
102 memset(index2->offset + old_m, 0, 8 * (index2->m - old_m));
105 if (index2->offset[beg] == 0) index2->offset[beg] = offset;
107 for (i = beg; i <= end; ++i)
108 if (index2->offset[i] == 0) index2->offset[i] = offset;
113 static void merge_chunks(bam_index_t *idx)
115 #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
119 for (i = 0; i < idx->n; ++i) {
120 index = idx->index[i];
121 for (k = kh_begin(index); k != kh_end(index); ++k) {
123 if (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;
124 p = &kh_value(index, k);
126 for (l = 1; l < p->n; ++l) {
127 #ifdef BAM_TRUE_OFFSET
128 if (p->list[m].v + BAM_MIN_CHUNK_GAP > p->list[l].u) p->list[m].v = p->list[l].v;
130 if (p->list[m].v>>16 == p->list[l].u>>16) p->list[m].v = p->list[l].v;
132 else p->list[++m] = p->list[l];
137 #endif // defined(BAM_TRUE_OFFSET) || defined(BAM_BGZF)
140 static void fill_missing(bam_index_t *idx)
143 for (i = 0; i < idx->n; ++i) {
144 bam_lidx_t *idx2 = &idx->index2[i];
145 for (j = 1; j < idx2->n; ++j)
146 if (idx2->offset[j] == 0)
147 idx2->offset[j] = idx2->offset[j-1];
151 bam_index_t *bam_index_core(bamFile fp)
157 uint32_t last_bin, save_bin;
158 int32_t last_coor, last_tid, save_tid;
160 uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor;
162 idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
163 b = (bam1_t*)calloc(1, sizeof(bam1_t));
164 h = bam_header_read(fp);
167 idx->n = h->n_targets;
168 bam_header_destroy(h);
169 idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
170 for (i = 0; i < idx->n; ++i) idx->index[i] = kh_init(i);
171 idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
173 save_bin = save_tid = last_tid = last_bin = 0xffffffffu;
174 save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu;
175 n_mapped = n_unmapped = n_no_coor = off_end = 0;
176 off_beg = off_end = bam_tell(fp);
177 while ((ret = bam_read1(fp, b)) >= 0) {
178 if (c->tid < 0) ++n_no_coor;
179 if (last_tid < c->tid || (last_tid >= 0 && c->tid < 0)) { // change of chromosomes
181 last_bin = 0xffffffffu;
182 } else if ((uint32_t)last_tid > (uint32_t)c->tid) {
183 fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %d-th chr > %d-th chr\n",
184 bam1_qname(b), last_tid+1, c->tid+1);
186 } else if ((int32_t)c->tid >= 0 && last_coor > c->pos) {
187 fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n",
188 bam1_qname(b), last_coor, c->pos, c->tid+1);
191 if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off);
192 if (c->bin != last_bin) { // then possibly write the binning index
193 if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
194 insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
195 if (last_bin == 0xffffffffu && save_tid != 0xffffffffu) { // write the meta element
197 insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end);
198 insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
199 n_mapped = n_unmapped = 0;
203 save_bin = last_bin = c->bin;
205 if (save_tid < 0) break;
207 if (bam_tell(fp) <= last_off) {
208 fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n",
209 (unsigned long long)bam_tell(fp), (unsigned long long)last_off);
212 if (c->flag & BAM_FUNMAP) ++n_unmapped;
214 last_off = bam_tell(fp);
215 last_coor = b->core.pos;
218 insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp));
219 insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, bam_tell(fp));
220 insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
225 while ((ret = bam_read1(fp, b)) >= 0) {
227 if (c->tid >= 0 && n_no_coor) {
228 fprintf(stderr, "[bam_index_core] the alignment is not sorted: reads without coordinates prior to reads with coordinates.\n");
233 if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret);
234 free(b->data); free(b);
235 idx->n_no_coor = n_no_coor;
239 void bam_index_destroy(bam_index_t *idx)
243 if (idx == 0) return;
244 for (i = 0; i < idx->n; ++i) {
245 khash_t(i) *index = idx->index[i];
246 bam_lidx_t *index2 = idx->index2 + i;
247 for (k = kh_begin(index); k != kh_end(index); ++k) {
248 if (kh_exist(index, k))
249 free(kh_value(index, k).list);
251 kh_destroy(i, index);
252 free(index2->offset);
254 free(idx->index); free(idx->index2);
258 void bam_index_save(const bam_index_t *idx, FILE *fp)
262 if (fwrite("BAI\1", 1, 4, fp) < 4) {
263 fprintf(stderr, "[%s] failed to write magic number.\n", __func__);
268 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
269 fprintf(stderr, "[%s] failed to write n_ref.\n", __func__);
274 if (fwrite(&idx->n, 4, 1, fp) < 1) {
275 fprintf(stderr, "[%s] failed to write n_ref.\n", __func__);
279 for (i = 0; i < idx->n; ++i) {
280 khash_t(i) *index = idx->index[i];
281 bam_lidx_t *index2 = idx->index2 + i;
282 // write binning index
283 size = kh_size(index);
284 if (bam_is_be) { // big endian
286 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
287 fprintf(stderr, "[%s] failed to write n_bin.\n", __func__);
292 if (fwrite(&size, 4, 1, fp) < 1) {
293 fprintf(stderr, "[%s] failed to write n_bin.\n", __func__);
297 for (k = kh_begin(index); k != kh_end(index); ++k) {
298 if (kh_exist(index, k)) {
299 bam_binlist_t *p = &kh_value(index, k);
300 if (bam_is_be) { // big endian
302 x = kh_key(index, k);
303 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
304 fprintf(stderr, "[%s] failed to write bin.\n", __func__);
308 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
309 fprintf(stderr, "[%s] failed to write n_chunk.\n", __func__);
312 for (x = 0; (int)x < p->n; ++x) {
313 bam_swap_endian_8p(&p->list[x].u);
314 bam_swap_endian_8p(&p->list[x].v);
316 if (fwrite(p->list, 16, p->n, fp) < p->n) {
317 fprintf(stderr, "[%s] failed to write %d chunks.\n", __func__, p->n);
321 for (x = 0; (int)x < p->n; ++x) {
322 bam_swap_endian_8p(&p->list[x].u);
323 bam_swap_endian_8p(&p->list[x].v);
327 if (fwrite(&kh_key(index, k), 4, 1, fp) < 1) {
328 fprintf(stderr, "[%s] failed to write bin.\n", __func__);
331 if (fwrite(&p->n, 4, 1, fp) < 1) {
332 fprintf(stderr, "[%s] failed to write n_chunk.\n", __func__);
335 if (fwrite(p->list, 16, p->n, fp) < p->n) {
336 fprintf(stderr, "[%s] failed to write %d chunks.\n", __func__, p->n);
342 // write linear index (index2)
345 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
346 fprintf(stderr, "[%s] failed to write n_intv.\n", __func__);
351 if (fwrite(&index2->n, 4, 1, fp) < 1) {
352 fprintf(stderr, "[%s] failed to write n_intv.\n", __func__);
356 if (bam_is_be) { // big endian
358 for (x = 0; (int)x < index2->n; ++x)
359 bam_swap_endian_8p(&index2->offset[x]);
360 if (fwrite(index2->offset, 8, index2->n, fp) < index2->n) {
361 fprintf(stderr, "[%s] failed to write ioffset.\n", __func__);
364 for (x = 0; (int)x < index2->n; ++x)
365 bam_swap_endian_8p(&index2->offset[x]);
368 if (fwrite(index2->offset, 8, index2->n, fp) < index2->n) {
369 fprintf(stderr, "[%s] failed to write ioffset.\n", __func__);
374 { // write the number of reads coor-less records.
375 uint64_t x = idx->n_no_coor;
376 if (bam_is_be) bam_swap_endian_8p(&x);
377 if (fwrite(&x, 8, 1, fp) < 1) {
378 fprintf(stderr, "[%s] failed to write n_no_coor.\n", __func__);
384 static bam_index_t *bam_index_load_core(FILE *fp)
390 fprintf(stderr, "[bam_index_load_core] fail to load index.\n");
393 if (fread(magic, 1, 4, fp) < 4) {
394 fprintf(stderr, "[%s] failed to read magic number.\n", __func__);
398 if (strncmp(magic, "BAI\1", 4)) {
399 fprintf(stderr, "[bam_index_load] wrong magic number.\n");
403 idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
404 if (fread(&idx->n, 4, 1, fp) < 1) {
405 fprintf(stderr, "[%s] failed to read n_ref.\n", __func__);
409 if (bam_is_be) bam_swap_endian_4p(&idx->n);
410 idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
411 idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
412 for (i = 0; i < idx->n; ++i) {
414 bam_lidx_t *index2 = idx->index2 + i;
419 index = idx->index[i] = kh_init(i);
420 // load binning index
421 if (fread(&size, 4, 1, fp) < 1) {
422 fprintf(stderr, "[%s] failed to read n_bin.\n", __func__);
426 if (bam_is_be) bam_swap_endian_4p(&size);
427 for (j = 0; j < (int)size; ++j) {
428 if (fread(&key, 4, 1, fp) < 1) {
429 fprintf(stderr, "[%s] failed to read bin.\n", __func__);
433 if (bam_is_be) bam_swap_endian_4p(&key);
434 k = kh_put(i, index, key, &ret);
435 p = &kh_value(index, k);
436 if (fread(&p->n, 4, 1, fp) < 1) {
437 fprintf(stderr, "[%s] failed to read n_chunk.\n", __func__);
441 if (bam_is_be) bam_swap_endian_4p(&p->n);
443 p->list = (pair64_t*)malloc(p->m * 16);
444 if (fread(p->list, 16, p->n, fp) < p->n) {
445 fprintf(stderr, "[%s] failed to read %d chunks.\n", __func__, p->n);
451 for (x = 0; x < p->n; ++x) {
452 bam_swap_endian_8p(&p->list[x].u);
453 bam_swap_endian_8p(&p->list[x].v);
458 if (fread(&index2->n, 4, 1, fp) < 1) {
459 fprintf(stderr, "[%s] failed to read n_intv", __func__);
463 if (bam_is_be) bam_swap_endian_4p(&index2->n);
465 index2->m = index2->n;
466 index2->offset = (uint64_t*)calloc(index2->m, 8);
467 if (fread(index2->offset, index2->n, 8, fp) < index2->n) {
468 fprintf(stderr, "[%s] failed to read %d intervals.", __func__, index2->n);
473 for (j = 0; j < index2->n; ++j) {
474 bam_swap_endian_8p(&index2->offset[j]);
479 if (fread(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0;
480 if (bam_is_be) bam_swap_endian_8p(&idx->n_no_coor);
484 bam_index_t *bam_index_load_local(const char *_fn)
489 if (strstr(_fn, "ftp://") == _fn || strstr(_fn, "http://") == _fn) {
492 for (p = _fn + l - 1; p >= _fn; --p)
493 if (*p == '/') break;
495 } else fn = strdup(_fn);
496 fnidx = (char*)calloc(strlen(fn) + 5, 1);
497 strcpy(fnidx, fn); strcat(fnidx, ".bai");
498 fp = fopen(fnidx, "rb");
499 if (fp == 0) { // try "{base}.bai"
500 char *s = strstr(fn, "bam");
501 if (s == fn + strlen(fn) - 3) {
503 fnidx[strlen(fn)-1] = 'i';
504 fp = fopen(fnidx, "rb");
507 free(fnidx); free(fn);
509 bam_index_t *idx = bam_index_load_core(fp);
516 static void download_from_remote(const char *url)
518 const int buf_size = 1 * 1024 * 1024;
524 if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return;
526 for (fn = (char*)url + l - 1; fn >= url; --fn)
527 if (*fn == '/') break;
528 ++fn; // fn now points to the file name
529 fp_remote = knet_open(url, "r");
530 if (fp_remote == 0) {
531 fprintf(stderr, "[download_from_remote] fail to open remote file.\n");
534 if ((fp = fopen(fn, "wb")) == 0) {
535 fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n");
536 knet_close(fp_remote);
539 buf = (uint8_t*)calloc(buf_size, 1);
540 while ((l = knet_read(fp_remote, buf, buf_size)) != 0) {
541 if (fwrite(buf, 1, l, fp) < l) {
542 fprintf(stderr, "[%s] fail to write to destination file.\n", __func__);
548 knet_close(fp_remote);
551 static void download_from_remote(const char *url)
557 bam_index_t *bam_index_load(const char *fn)
560 idx = bam_index_load_local(fn);
561 if (idx == 0 && (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)) {
562 char *fnidx = calloc(strlen(fn) + 5, 1);
563 strcat(strcpy(fnidx, fn), ".bai");
564 fprintf(stderr, "[bam_index_load] attempting to download the remote index file.\n");
565 download_from_remote(fnidx);
566 idx = bam_index_load_local(fn);
568 if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n");
572 int bam_index_build2(const char *fn, const char *_fnidx)
578 if ((fp = bam_open(fn, "r")) == 0) {
579 fprintf(stderr, "[bam_index_build2] fail to open the BAM file.\n");
582 idx = bam_index_core(fp);
585 fprintf(stderr, "[bam_index_build2] fail to index the BAM file.\n");
589 fnidx = (char*)calloc(strlen(fn) + 5, 1);
590 strcpy(fnidx, fn); strcat(fnidx, ".bai");
591 } else fnidx = strdup(_fnidx);
592 fpidx = fopen(fnidx, "wb");
594 fprintf(stderr, "[bam_index_build2] fail to create the index file.\n");
598 bam_index_save(idx, fpidx);
599 bam_index_destroy(idx);
605 int bam_index_build(const char *fn)
607 return bam_index_build2(fn, 0);
610 int bam_index(int argc, char *argv[])
613 fprintf(stderr, "Usage: samtools index <in.bam> [out.index]\n");
616 if (argc >= 3) bam_index_build2(argv[1], argv[2]);
617 else bam_index_build(argv[1]);
621 int bam_idxstats(int argc, char *argv[])
624 bam_header_t *header;
628 fprintf(stderr, "Usage: samtools idxstats <in.bam>\n");
631 fp = bam_open(argv[1], "r");
632 if (fp == 0) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
633 header = bam_header_read(fp);
635 idx = bam_index_load(argv[1]);
636 if (idx == 0) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; }
637 for (i = 0; i < idx->n; ++i) {
639 khash_t(i) *h = idx->index[i];
640 printf("%s\t%d", header->target_name[i], header->target_len[i]);
641 k = kh_get(i, h, BAM_MAX_BIN);
643 printf("\t%llu\t%llu", (long long)kh_val(h, k).list[1].u, (long long)kh_val(h, k).list[1].v);
644 else printf("\t0\t0");
647 printf("*\t0\t0\t%llu\n", (long long)idx->n_no_coor);
648 bam_header_destroy(header);
649 bam_index_destroy(idx);
653 static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[BAM_MAX_BIN])
656 if (beg >= end) return 0;
657 if (end >= 1u<<29) end = 1u<<29;
660 for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) list[i++] = k;
661 for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) list[i++] = k;
662 for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) list[i++] = k;
663 for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) list[i++] = k;
664 for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list[i++] = k;
668 static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
670 uint32_t rbeg = b->core.pos;
671 uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1;
672 return (rend > beg && rbeg < end);
675 struct __bam_iter_t {
676 int from_first; // read from the first record; no random access
677 int tid, beg, end, n_off, i, finished;
682 // bam_fetch helper function retrieves
683 bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end)
686 int i, n_bins, n_off;
693 if (beg < 0) beg = 0;
694 if (end < beg) return 0;
696 iter = calloc(1, sizeof(struct __bam_iter_t));
697 iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1;
699 bins = (uint16_t*)calloc(BAM_MAX_BIN, 2);
700 n_bins = reg2bins(beg, end, bins);
701 index = idx->index[tid];
702 if (idx->index2[tid].n > 0) {
703 min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]
704 : idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];
705 if (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4
706 int n = beg>>BAM_LIDX_SHIFT;
707 if (n > idx->index2[tid].n) n = idx->index2[tid].n;
708 for (i = n - 1; i >= 0; --i)
709 if (idx->index2[tid].offset[i] != 0) break;
710 if (i >= 0) min_off = idx->index2[tid].offset[i];
712 } else min_off = 0; // tabix 0.1.2 may produce such index files
713 for (i = n_off = 0; i < n_bins; ++i) {
714 if ((k = kh_get(i, index, bins[i])) != kh_end(index))
715 n_off += kh_value(index, k).n;
718 free(bins); return iter;
720 off = (pair64_t*)calloc(n_off, 16);
721 for (i = n_off = 0; i < n_bins; ++i) {
722 if ((k = kh_get(i, index, bins[i])) != kh_end(index)) {
724 bam_binlist_t *p = &kh_value(index, k);
725 for (j = 0; j < p->n; ++j)
726 if (p->list[j].v > min_off) off[n_off++] = p->list[j];
731 free(off); return iter;
734 bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
736 ks_introsort(off, n_off, off);
737 // resolve completely contained adjacent blocks
738 for (i = 1, l = 0; i < n_off; ++i)
739 if (off[l].v < off[i].v)
742 // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
743 for (i = 1; i < n_off; ++i)
744 if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;
745 { // merge adjacent blocks
746 #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
747 for (i = 1, l = 0; i < n_off; ++i) {
748 #ifdef BAM_TRUE_OFFSET
749 if (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v;
751 if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;
753 else off[++l] = off[i];
760 iter->n_off = n_off; iter->off = off;
764 pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off)
765 { // for pysam compatibility
768 iter = bam_iter_query(idx, tid, beg, end);
769 off = iter->off; *cnt_off = iter->n_off;
774 void bam_iter_destroy(bam_iter_t iter)
776 if (iter) { free(iter->off); free(iter); }
779 int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)
782 if (iter && iter->finished) return -1;
783 if (iter == 0 || iter->from_first) {
784 ret = bam_read1(fp, b);
785 if (ret < 0 && iter) iter->finished = 1;
788 if (iter->off == 0) return -1;
790 if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk
791 if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks
792 if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug
793 if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek
794 bam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);
795 iter->curr_off = bam_tell(fp);
799 if ((ret = bam_read1(fp, b)) >= 0) {
800 iter->curr_off = bam_tell(fp);
801 if (b->core.tid != iter->tid || b->core.pos >= iter->end) { // no need to proceed
802 ret = bam_validate1(NULL, b)? -1 : -5; // determine whether end of region or error
805 else if (is_overlap(iter->beg, iter->end, b)) return ret;
806 } else break; // end of file or error
812 int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
818 iter = bam_iter_query(idx, tid, beg, end);
819 while ((ret = bam_iter_read(fp, iter, b)) >= 0) func(b, data);
820 bam_iter_destroy(iter);
822 return (ret == -1)? 0 : ret;