6 #include "bam_endian.h"
14 Alignment indexing. Before indexing, BAM must be sorted based on the
15 leftmost coordinate of alignments. In indexing, BAM uses two indices:
16 a UCSC binning index and a simple linear index. The binning index is
17 efficient for alignments spanning long distance, while the auxiliary
18 linear index helps to reduce unnecessary seek calls especially for
21 The UCSC binning scheme was suggested by Richard Durbin and Lincoln
22 Stein and is explained by Kent et al. (2002). In this scheme, each bin
23 represents a contiguous genomic region which can be fully contained in
24 another bin; each alignment is associated with a bin which represents
25 the smallest region containing the entire alignment. The binning
26 scheme is essentially another representation of R-tree. A distinct bin
27 uniquely corresponds to a distinct internal node in a R-tree. Bin A is
28 a child of Bin B if region A is contained in B.
30 In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin
31 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp,
32 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to
33 find the alignments overlapped with a region [rbeg,rend), we need to
34 calculate the list of bins that may be overlapped the region and test
35 the alignments in the bins to confirm the overlaps. If the specified
36 region is short, typically only a few alignments in six bins need to
37 be retrieved. The overlapping alignments can be quickly fetched.
41 #define BAM_MIN_CHUNK_GAP 32768
42 // 1<<14 is the size of minimum bin.
43 #define BAM_LIDX_SHIFT 14
45 #define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
51 #define pair64_lt(a,b) ((a).u < (b).u)
52 KSORT_INIT(off, pair64_t, pair64_lt)
64 KHASH_MAP_INIT_INT(i, bam_binlist_t)
66 struct __bam_index_t {
68 uint64_t n_no_coor; // unmapped reads without coordinate
73 // requirement: len <= LEN_MASK
74 static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)
79 k = kh_put(i, h, bin, &ret);
81 if (ret) { // not present
83 l->list = (pair64_t*)calloc(l->m, 16);
87 l->list = (pair64_t*)realloc(l->list, l->m * 16);
89 l->list[l->n].u = beg; l->list[l->n++].v = end;
92 static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset)
95 beg = b->core.pos >> BAM_LIDX_SHIFT;
96 end = (bam_calend(&b->core, bam1_cigar(b)) - 1) >> BAM_LIDX_SHIFT;
97 if (index2->m < end + 1) {
98 int old_m = index2->m;
100 kroundup32(index2->m);
101 index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);
102 memset(index2->offset + old_m, 0, 8 * (index2->m - old_m));
105 if (index2->offset[beg] == 0) index2->offset[beg] = offset;
107 for (i = beg; i <= end; ++i)
108 if (index2->offset[i] == 0) index2->offset[i] = offset;
113 static void merge_chunks(bam_index_t *idx)
115 #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
119 for (i = 0; i < idx->n; ++i) {
120 index = idx->index[i];
121 for (k = kh_begin(index); k != kh_end(index); ++k) {
123 if (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;
124 p = &kh_value(index, k);
126 for (l = 1; l < p->n; ++l) {
127 #ifdef BAM_TRUE_OFFSET
128 if (p->list[m].v + BAM_MIN_CHUNK_GAP > p->list[l].u) p->list[m].v = p->list[l].v;
130 if (p->list[m].v>>16 == p->list[l].u>>16) p->list[m].v = p->list[l].v;
132 else p->list[++m] = p->list[l];
137 #endif // defined(BAM_TRUE_OFFSET) || defined(BAM_BGZF)
140 static void fill_missing(bam_index_t *idx)
143 for (i = 0; i < idx->n; ++i) {
144 bam_lidx_t *idx2 = &idx->index2[i];
145 for (j = 1; j < idx2->n; ++j)
146 if (idx2->offset[j] == 0)
147 idx2->offset[j] = idx2->offset[j-1];
151 bam_index_t *bam_index_core(bamFile fp)
157 uint32_t last_bin, save_bin;
158 int32_t last_coor, last_tid, save_tid;
160 uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor;
162 idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
163 b = (bam1_t*)calloc(1, sizeof(bam1_t));
164 h = bam_header_read(fp);
167 idx->n = h->n_targets;
168 bam_header_destroy(h);
169 idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
170 for (i = 0; i < idx->n; ++i) idx->index[i] = kh_init(i);
171 idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
173 save_bin = save_tid = last_tid = last_bin = 0xffffffffu;
174 save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu;
175 n_mapped = n_unmapped = n_no_coor = off_end = 0;
176 off_beg = off_end = bam_tell(fp);
177 while ((ret = bam_read1(fp, b)) >= 0) {
178 if (c->tid < 0) ++n_no_coor;
179 if (last_tid < c->tid || (last_tid >= 0 && c->tid < 0)) { // change of chromosomes
181 last_bin = 0xffffffffu;
182 } else if ((uint32_t)last_tid > (uint32_t)c->tid) {
183 fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %d-th chr > %d-th chr\n",
184 bam1_qname(b), last_tid+1, c->tid+1);
186 } else if ((int32_t)c->tid >= 0 && last_coor > c->pos) {
187 fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n",
188 bam1_qname(b), last_coor, c->pos, c->tid+1);
191 if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off);
192 if (c->bin != last_bin) { // then possibly write the binning index
193 if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
194 insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
195 if (last_bin == 0xffffffffu && save_tid != 0xffffffffu) { // write the meta element
197 insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end);
198 insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
199 n_mapped = n_unmapped = 0;
203 save_bin = last_bin = c->bin;
205 if (save_tid < 0) break;
207 if (bam_tell(fp) <= last_off) {
208 fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n",
209 (unsigned long long)bam_tell(fp), (unsigned long long)last_off);
212 if (c->flag & BAM_FUNMAP) ++n_unmapped;
214 last_off = bam_tell(fp);
215 last_coor = b->core.pos;
218 insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp));
219 insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, bam_tell(fp));
220 insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
225 while ((ret = bam_read1(fp, b)) >= 0) {
227 if (c->tid >= 0 && n_no_coor) {
228 fprintf(stderr, "[bam_index_core] the alignment is not sorted: reads without coordinates prior to reads with coordinates.\n");
233 if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret);
234 free(b->data); free(b);
235 idx->n_no_coor = n_no_coor;
239 void bam_index_destroy(bam_index_t *idx)
243 if (idx == 0) return;
244 for (i = 0; i < idx->n; ++i) {
245 khash_t(i) *index = idx->index[i];
246 bam_lidx_t *index2 = idx->index2 + i;
247 for (k = kh_begin(index); k != kh_end(index); ++k) {
248 if (kh_exist(index, k))
249 free(kh_value(index, k).list);
251 kh_destroy(i, index);
252 free(index2->offset);
254 free(idx->index); free(idx->index2);
258 void bam_index_save(const bam_index_t *idx, FILE *fp)
262 if (fwrite("BAI\1", 1, 4, fp) < 4) {
263 fprintf(stderr, "[bam_index_save] failed to write magic number.\n");
268 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
269 fprintf(stderr, "[bam_index_save] failed to write n_ref.\n");
272 } else fwrite(&idx->n, 4, 1, fp);
273 for (i = 0; i < idx->n; ++i) {
274 khash_t(i) *index = idx->index[i];
275 bam_lidx_t *index2 = idx->index2 + i;
276 // write binning index
277 size = kh_size(index);
278 if (bam_is_be) { // big endian
280 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
281 fprintf(stderr, "[bam_index_save] failed to write n_bin.\n");
284 } else if (fwrite(&size, 4, 1, fp) < 1) {
285 fprintf(stderr, "[bam_index_save] failed to write n_bin.\n");
288 for (k = kh_begin(index); k != kh_end(index); ++k) {
289 if (kh_exist(index, k)) {
290 bam_binlist_t *p = &kh_value(index, k);
291 if (bam_is_be) { // big endian
293 x = kh_key(index, k);
294 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
295 fprintf(stderr, "[bam_index_save] failed to write bin.\n");
299 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
300 fprintf(stderr, "[bam_index_save] failed to write n_chunk.\n");
303 for (x = 0; (int)x < p->n; ++x) {
304 bam_swap_endian_8p(&p->list[x].u);
305 bam_swap_endian_8p(&p->list[x].v);
307 if (fwrite(p->list, 16, p->n, fp) < p->n) {
308 fprintf(stderr, "[bam_index_save] failed to write chunk pair.\n");
311 // hmmm...sandbagging PPC or reverting?
312 for (x = 0; (int)x < p->n; ++x) {
313 bam_swap_endian_8p(&p->list[x].u);
314 bam_swap_endian_8p(&p->list[x].v);
317 if (fwrite(&kh_key(index, k), 4, 1, fp) < 1) {
318 fprintf(stderr, "[bam_index_save] failed to write bin.\n");
321 if (fwrite(&p->n, 4, 1, fp) < 1) {
322 fprintf(stderr, "[bam_index_save] failed to write n_chunk.\n");
325 if (fwrite(p->list, 16, p->n, fp) < p->n) {
326 fprintf(stderr, "[bam_index_save] failed to write chunk pair.\n");
332 // write linear index (index2)
335 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
336 fprintf(stderr, "[bam_index_save] failed to write n_intv.\n");
339 } else if (fwrite(&index2->n, 4, 1, fp) < 1) {
340 fprintf(stderr, "[bam_index_save] failed to write n_intv.\n");
343 if (bam_is_be) { // big endian
345 for (x = 0; (int)x < index2->n; ++x)
346 bam_swap_endian_8p(&index2->offset[x]);
347 if (fwrite(index2->offset, 8, index2->n, fp) < index2->n) {
348 fprintf(stderr, "[bam_index_save] failed to write ioffset.\n");
351 for (x = 0; (int)x < index2->n; ++x)
352 bam_swap_endian_8p(&index2->offset[x]);
353 } else if (fwrite(index2->offset, 8, index2->n, fp) < index2->n) {
354 fprintf(stderr, "[bam_index_save] failed to write ioffset.\n");
358 { // write the number of reads coor-less records.
359 uint64_t x = idx->n_no_coor;
360 if (bam_is_be) bam_swap_endian_8p(&x);
361 if (fwrite(&x, 8, 1, fp) < 1) {
362 fprintf(stderr, "[bam_index_save] failed to write n_no_coor.\n");
369 static bam_index_t *bam_index_load_core(FILE *fp)
375 fprintf(stderr, "[bam_index_load_core] fail to load index.\n");
378 if (fread(magic, 1, 4, fp) < 4) {
379 fprintf(stderr, "[bam_index_load] failed to read magic number.\n");
383 if (strncmp(magic, "BAI\1", 4)) {
384 fprintf(stderr, "[bam_index_load] wrong magic number.\n");
388 idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
389 if (fread(&idx->n, 4, 1, fp) < 1) {
390 fprintf(stderr, "[bam_index_load] failed to read n_ref field.\n");
394 if (bam_is_be) bam_swap_endian_4p(&idx->n);
395 idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
396 idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
397 for (i = 0; i < idx->n; ++i) {
399 bam_lidx_t *index2 = idx->index2 + i;
404 index = idx->index[i] = kh_init(i);
405 // load binning index
406 if (fread(&size, 4, 1, fp) < 1) {
407 fprintf(stderr, "[bam_index_load] read error while loading n_bin.\n");
411 if (bam_is_be) bam_swap_endian_4p(&size);
412 for (j = 0; j < (int)size; ++j) {
413 if (fread(&key, 4, 1, fp) < 1) {
414 fprintf(stderr, "[bam_index_load] read error while loading bin.\n");
418 if (bam_is_be) bam_swap_endian_4p(&key);
419 k = kh_put(i, index, key, &ret);
420 p = &kh_value(index, k);
421 if (fread(&p->n, 4, 1, fp) < 1) {
422 fprintf(stderr, "[bam_index_load] read error while loading n_chunk.\n");
426 if (bam_is_be) bam_swap_endian_4p(&p->n);
428 p->list = (pair64_t*)malloc(p->m * 16);
429 if (fread(p->list, 16, p->n, fp) < p->n) {
430 fprintf(stderr, "[bam_index_load] read error while loading chunk pairs.\n");
436 for (x = 0; x < p->n; ++x) {
437 bam_swap_endian_8p(&p->list[x].u);
438 bam_swap_endian_8p(&p->list[x].v);
443 if (fread(&index2->n, 4, 1, fp) < 1) {
444 fprintf(stderr, "[bam_index_load] read error while loading n_intv.\n");
448 if (bam_is_be) bam_swap_endian_4p(&index2->n);
449 index2->m = index2->n;
450 index2->offset = (uint64_t*)calloc(index2->m, 8);
451 ret = fread(index2->offset, index2->n, 8, fp);
452 if ((ret > 0) && (ret < 8)) {
453 fprintf(stderr, "[bam_index_load] read error while loading ioffset.\n");
458 for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]);
460 if (fread(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0;
461 if (bam_is_be) bam_swap_endian_8p(&idx->n_no_coor);
465 bam_index_t *bam_index_load_local(const char *_fn)
470 if (strstr(_fn, "ftp://") == _fn || strstr(_fn, "http://") == _fn) {
473 for (p = _fn + l - 1; p >= _fn; --p)
474 if (*p == '/') break;
476 } else fn = strdup(_fn);
477 fnidx = (char*)calloc(strlen(fn) + 5, 1);
478 strcpy(fnidx, fn); strcat(fnidx, ".bai");
479 fp = fopen(fnidx, "rb");
480 if (fp == 0) { // try "{base}.bai"
481 char *s = strstr(fn, "bam");
482 if (s == fn + strlen(fn) - 3) {
484 fnidx[strlen(fn)-1] = 'i';
485 fp = fopen(fnidx, "rb");
488 free(fnidx); free(fn);
490 bam_index_t *idx = bam_index_load_core(fp);
497 static void download_from_remote(const char *url)
499 const int buf_size = 1 * 1024 * 1024;
505 if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return;
507 for (fn = (char*)url + l - 1; fn >= url; --fn)
508 if (*fn == '/') break;
509 ++fn; // fn now points to the file name
510 fp_remote = knet_open(url, "r");
511 if (fp_remote == 0) {
512 fprintf(stderr, "[download_from_remote] fail to open remote file.\n");
515 if ((fp = fopen(fn, "wb")) == 0) {
516 fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n");
517 knet_close(fp_remote);
520 buf = (uint8_t*)calloc(buf_size, 1);
521 while ((l = knet_read(fp_remote, buf, buf_size)) != 0)
522 if (fwrite(buf, 1, l, fp) < l) {
523 fprintf(stderr, "[download_from_remote] fail to write to destination file.\n");
528 knet_close(fp_remote);
531 static void download_from_remote(const char *url)
537 bam_index_t *bam_index_load(const char *fn)
540 idx = bam_index_load_local(fn);
541 if (idx == 0 && (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)) {
542 char *fnidx = calloc(strlen(fn) + 5, 1);
543 strcat(strcpy(fnidx, fn), ".bai");
544 fprintf(stderr, "[bam_index_load] attempting to download the remote index file.\n");
545 download_from_remote(fnidx);
546 idx = bam_index_load_local(fn);
548 if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n");
552 int bam_index_build2(const char *fn, const char *_fnidx)
558 if ((fp = bam_open(fn, "r")) == 0) {
559 fprintf(stderr, "[bam_index_build2] fail to open the BAM file.\n");
562 idx = bam_index_core(fp);
565 fprintf(stderr, "[bam_index_build2] fail to index the BAM file.\n");
569 fnidx = (char*)calloc(strlen(fn) + 5, 1);
570 strcpy(fnidx, fn); strcat(fnidx, ".bai");
571 } else fnidx = strdup(_fnidx);
572 fpidx = fopen(fnidx, "wb");
574 fprintf(stderr, "[bam_index_build2] fail to create the index file.\n");
578 bam_index_save(idx, fpidx);
579 bam_index_destroy(idx);
585 int bam_index_build(const char *fn)
587 return bam_index_build2(fn, 0);
590 int bam_index(int argc, char *argv[])
593 fprintf(stderr, "Usage: samtools index <in.bam> [out.index]\n");
596 if (argc >= 3) bam_index_build2(argv[1], argv[2]);
597 else bam_index_build(argv[1]);
601 int bam_idxstats(int argc, char *argv[])
604 bam_header_t *header;
608 fprintf(stderr, "Usage: samtools idxstats <in.bam>\n");
611 fp = bam_open(argv[1], "r");
612 if (fp == 0) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
613 header = bam_header_read(fp);
615 idx = bam_index_load(argv[1]);
616 if (idx == 0) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; }
617 for (i = 0; i < idx->n; ++i) {
619 khash_t(i) *h = idx->index[i];
620 printf("%s\t%d", header->target_name[i], header->target_len[i]);
621 k = kh_get(i, h, BAM_MAX_BIN);
623 printf("\t%llu\t%llu", (long long)kh_val(h, k).list[1].u, (long long)kh_val(h, k).list[1].v);
624 else printf("\t0\t0");
627 printf("*\t0\t0\t%llu\n", (long long)idx->n_no_coor);
628 bam_header_destroy(header);
629 bam_index_destroy(idx);
633 static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[BAM_MAX_BIN])
636 if (beg >= end) return 0;
637 if (end >= 1u<<29) end = 1u<<29;
640 for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) list[i++] = k;
641 for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) list[i++] = k;
642 for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) list[i++] = k;
643 for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) list[i++] = k;
644 for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list[i++] = k;
648 static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
650 uint32_t rbeg = b->core.pos;
651 uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1;
652 return (rend > beg && rbeg < end);
655 struct __bam_iter_t {
656 int from_first; // read from the first record; no random access
657 int tid, beg, end, n_off, i, finished;
662 // bam_fetch helper function retrieves
663 bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end)
666 int i, n_bins, n_off;
673 if (beg < 0) beg = 0;
674 if (end < beg) return 0;
676 iter = calloc(1, sizeof(struct __bam_iter_t));
677 iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1;
679 bins = (uint16_t*)calloc(BAM_MAX_BIN, 2);
680 n_bins = reg2bins(beg, end, bins);
681 index = idx->index[tid];
682 if (idx->index2[tid].n > 0) {
683 min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]
684 : idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];
685 if (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4
686 int n = beg>>BAM_LIDX_SHIFT;
687 if (n > idx->index2[tid].n) n = idx->index2[tid].n;
688 for (i = n - 1; i >= 0; --i)
689 if (idx->index2[tid].offset[i] != 0) break;
690 if (i >= 0) min_off = idx->index2[tid].offset[i];
692 } else min_off = 0; // tabix 0.1.2 may produce such index files
693 for (i = n_off = 0; i < n_bins; ++i) {
694 if ((k = kh_get(i, index, bins[i])) != kh_end(index))
695 n_off += kh_value(index, k).n;
698 free(bins); return iter;
700 off = (pair64_t*)calloc(n_off, 16);
701 for (i = n_off = 0; i < n_bins; ++i) {
702 if ((k = kh_get(i, index, bins[i])) != kh_end(index)) {
704 bam_binlist_t *p = &kh_value(index, k);
705 for (j = 0; j < p->n; ++j)
706 if (p->list[j].v > min_off) off[n_off++] = p->list[j];
711 free(off); return iter;
714 bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
716 ks_introsort(off, n_off, off);
717 // resolve completely contained adjacent blocks
718 for (i = 1, l = 0; i < n_off; ++i)
719 if (off[l].v < off[i].v)
722 // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
723 for (i = 1; i < n_off; ++i)
724 if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;
725 { // merge adjacent blocks
726 #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
727 for (i = 1, l = 0; i < n_off; ++i) {
728 #ifdef BAM_TRUE_OFFSET
729 if (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v;
731 if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;
733 else off[++l] = off[i];
740 iter->n_off = n_off; iter->off = off;
744 pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off)
745 { // for pysam compatibility
748 iter = bam_iter_query(idx, tid, beg, end);
749 off = iter->off; *cnt_off = iter->n_off;
754 void bam_iter_destroy(bam_iter_t iter)
756 if (iter) { free(iter->off); free(iter); }
759 int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)
762 if (iter && iter->finished) return -1;
763 if (iter == 0 || iter->from_first) {
764 ret = bam_read1(fp, b);
765 if (ret < 0 && iter) iter->finished = 1;
768 if (iter->off == 0) return -1;
770 if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk
771 if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks
772 if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug
773 if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek
774 bam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);
775 iter->curr_off = bam_tell(fp);
779 if ((ret = bam_read1(fp, b)) >= 0) {
780 iter->curr_off = bam_tell(fp);
781 if (b->core.tid != iter->tid || b->core.pos >= iter->end) { // no need to proceed
782 ret = bam_validate1(NULL, b)? -1 : -5; // determine whether end of region or error
785 else if (is_overlap(iter->beg, iter->end, b)) return ret;
786 } else break; // end of file or error
792 int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
798 iter = bam_iter_query(idx, tid, beg, end);
799 while ((ret = bam_iter_read(fp, iter, b)) >= 0) func(b, data);
800 bam_iter_destroy(iter);
802 return (ret == -1)? 0 : ret;