6 #include "bam_endian.h"
14 Alignment indexing. Before indexing, BAM must be sorted based on the
15 leftmost coordinate of alignments. In indexing, BAM uses two indices:
16 a UCSC binning index and a simple linear index. The binning index is
17 efficient for alignments spanning long distance, while the auxiliary
18 linear index helps to reduce unnecessary seek calls especially for
21 The UCSC binning scheme was suggested by Richard Durbin and Lincoln
22 Stein and is explained by Kent et al. (2002). In this scheme, each bin
23 represents a contiguous genomic region which can be fully contained in
24 another bin; each alignment is associated with a bin which represents
25 the smallest region containing the entire alignment. The binning
26 scheme is essentially another representation of R-tree. A distinct bin
27 uniquely corresponds to a distinct internal node in a R-tree. Bin A is
28 a child of Bin B if region A is contained in B.
30 In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin
31 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp,
32 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to
33 find the alignments overlapped with a region [rbeg,rend), we need to
34 calculate the list of bins that may be overlapped the region and test
35 the alignments in the bins to confirm the overlaps. If the specified
36 region is short, typically only a few alignments in six bins need to
37 be retrieved. The overlapping alignments can be quickly fetched.
41 #define BAM_MIN_CHUNK_GAP 32768
42 // 1<<14 is the size of minimum bin.
43 #define BAM_LIDX_SHIFT 14
45 #define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
51 #define pair64_lt(a,b) ((a).u < (b).u)
52 KSORT_INIT(off, pair64_t, pair64_lt)
64 KHASH_MAP_INIT_INT(i, bam_binlist_t)
66 struct __bam_index_t {
68 uint64_t n_no_coor; // unmapped reads without coordinate
73 // requirement: len <= LEN_MASK
74 static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)
79 k = kh_put(i, h, bin, &ret);
81 if (ret) { // not present
83 l->list = (pair64_t*)calloc(l->m, 16);
87 l->list = (pair64_t*)realloc(l->list, l->m * 16);
89 l->list[l->n].u = beg; l->list[l->n++].v = end;
92 static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset)
95 beg = b->core.pos >> BAM_LIDX_SHIFT;
96 end = (bam_calend(&b->core, bam1_cigar(b)) - 1) >> BAM_LIDX_SHIFT;
97 if (index2->m < end + 1) {
98 int old_m = index2->m;
100 kroundup32(index2->m);
101 index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);
102 memset(index2->offset + old_m, 0, 8 * (index2->m - old_m));
105 if (index2->offset[beg] == 0) index2->offset[beg] = offset;
107 for (i = beg; i <= end; ++i)
108 if (index2->offset[i] == 0) index2->offset[i] = offset;
113 static void merge_chunks(bam_index_t *idx)
115 #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
119 for (i = 0; i < idx->n; ++i) {
120 index = idx->index[i];
121 for (k = kh_begin(index); k != kh_end(index); ++k) {
123 if (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;
124 p = &kh_value(index, k);
126 for (l = 1; l < p->n; ++l) {
127 #ifdef BAM_TRUE_OFFSET
128 if (p->list[m].v + BAM_MIN_CHUNK_GAP > p->list[l].u) p->list[m].v = p->list[l].v;
130 if (p->list[m].v>>16 == p->list[l].u>>16) p->list[m].v = p->list[l].v;
132 else p->list[++m] = p->list[l];
137 #endif // defined(BAM_TRUE_OFFSET) || defined(BAM_BGZF)
140 static void fill_missing(bam_index_t *idx)
143 for (i = 0; i < idx->n; ++i) {
144 bam_lidx_t *idx2 = &idx->index2[i];
145 for (j = 1; j < idx2->n; ++j)
146 if (idx2->offset[j] == 0)
147 idx2->offset[j] = idx2->offset[j-1];
151 bam_index_t *bam_index_core(bamFile fp)
157 uint32_t last_bin, save_bin;
158 int32_t last_coor, last_tid, save_tid;
160 uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor;
162 idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
163 b = (bam1_t*)calloc(1, sizeof(bam1_t));
164 h = bam_header_read(fp);
167 idx->n = h->n_targets;
168 bam_header_destroy(h);
169 idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
170 for (i = 0; i < idx->n; ++i) idx->index[i] = kh_init(i);
171 idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
173 save_bin = save_tid = last_tid = last_bin = 0xffffffffu;
174 save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu;
175 n_mapped = n_unmapped = n_no_coor = off_end = 0;
176 off_beg = off_end = bam_tell(fp);
177 while ((ret = bam_read1(fp, b)) >= 0) {
178 if (c->tid < 0) ++n_no_coor;
179 if (last_tid < c->tid || (last_tid >= 0 && c->tid < 0)) { // change of chromosomes
181 last_bin = 0xffffffffu;
182 } else if ((uint32_t)last_tid > (uint32_t)c->tid) {
183 fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %d-th chr > %d-th chr\n",
184 bam1_qname(b), last_tid+1, c->tid+1);
186 } else if ((int32_t)c->tid >= 0 && last_coor > c->pos) {
187 fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n",
188 bam1_qname(b), last_coor, c->pos, c->tid+1);
191 if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off);
192 if (c->bin != last_bin) { // then possibly write the binning index
193 if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
194 insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
195 if (last_bin == 0xffffffffu && save_tid != 0xffffffffu) { // write the meta element
197 insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end);
198 insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
199 n_mapped = n_unmapped = 0;
203 save_bin = last_bin = c->bin;
205 if (save_tid < 0) break;
207 if (bam_tell(fp) <= last_off) {
208 fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n",
209 (unsigned long long)bam_tell(fp), (unsigned long long)last_off);
212 if (c->flag & BAM_FUNMAP) ++n_unmapped;
214 last_off = bam_tell(fp);
215 last_coor = b->core.pos;
218 insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp));
219 insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, bam_tell(fp));
220 insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
225 while ((ret = bam_read1(fp, b)) >= 0) {
227 if (c->tid >= 0 && n_no_coor) {
228 fprintf(stderr, "[bam_index_core] the alignment is not sorted: reads without coordinates prior to reads with coordinates.\n");
233 if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret);
234 free(b->data); free(b);
235 idx->n_no_coor = n_no_coor;
239 void bam_index_destroy(bam_index_t *idx)
243 if (idx == 0) return;
244 for (i = 0; i < idx->n; ++i) {
245 khash_t(i) *index = idx->index[i];
246 bam_lidx_t *index2 = idx->index2 + i;
247 for (k = kh_begin(index); k != kh_end(index); ++k) {
248 if (kh_exist(index, k))
249 free(kh_value(index, k).list);
251 kh_destroy(i, index);
252 free(index2->offset);
254 free(idx->index); free(idx->index2);
258 void bam_index_save(const bam_index_t *idx, FILE *fp)
262 if (fwrite("BAI\1", 1, 4, fp) < 4) {
263 fprintf(stderr, "[bam_index_save] failed to write magic number.\n");
268 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
269 fprintf(stderr, "[bam_index_save] failed to write n_ref.\n");
272 } else fwrite(&idx->n, 4, 1, fp);
273 for (i = 0; i < idx->n; ++i) {
274 khash_t(i) *index = idx->index[i];
275 bam_lidx_t *index2 = idx->index2 + i;
276 // write binning index
277 size = kh_size(index);
278 if (bam_is_be) { // big endian
280 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
281 fprintf(stderr, "[bam_index_save] failed to write n_bin.\n");
284 } else if (fwrite(&size, 4, 1, fp) < 1) {
285 fprintf(stderr, "[bam_index_save] failed to write n_bin.\n");
288 for (k = kh_begin(index); k != kh_end(index); ++k) {
289 if (kh_exist(index, k)) {
290 bam_binlist_t *p = &kh_value(index, k);
291 if (bam_is_be) { // big endian
293 x = kh_key(index, k);
294 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
295 fprintf(stderr, "[bam_index_save] failed to write bin.\n");
299 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
300 fprintf(stderr, "[bam_index_save] failed to write n_chunk.\n");
303 for (x = 0; (int)x < p->n; ++x) {
304 bam_swap_endian_8p(&p->list[x].u);
305 bam_swap_endian_8p(&p->list[x].v);
307 if (fwrite(p->list, 16, p->n, fp) < p->n) {
308 fprintf(stderr, "[bam_index_save] failed to write chunk pair.\n");
311 // hmmm...sandbagging PPC or reverting?
312 for (x = 0; (int)x < p->n; ++x) {
313 bam_swap_endian_8p(&p->list[x].u);
314 bam_swap_endian_8p(&p->list[x].v);
317 if (fwrite(&kh_key(index, k), 4, 1, fp) < 1) {
318 fprintf(stderr, "[bam_index_save] failed to write bin.\n");
321 if (fwrite(&p->n, 4, 1, fp) < 1) {
322 fprintf(stderr, "[bam_index_save] failed to write n_chunk.\n");
325 if (fwrite(p->list, 16, p->n, fp) < p->n) {
326 fprintf(stderr, "[bam_index_save] failed to write chunk pair.\n");
332 // write linear index (index2)
335 if (fwrite(bam_swap_endian_4p(&x), 4, 1, fp) < 1) {
336 fprintf(stderr, "[bam_index_save] failed to write n_intv.\n");
339 } else if (fwrite(&index2->n, 4, 1, fp) < 1) {
340 fprintf(stderr, "[bam_index_save] failed to write n_intv.\n");
343 if (bam_is_be) { // big endian
345 for (x = 0; (int)x < index2->n; ++x)
346 bam_swap_endian_8p(&index2->offset[x]);
347 if (fwrite(index2->offset, 8, index2->n, fp) < index2->n) {
348 fprintf(stderr, "[bam_index_save] failed to write ioffset.\n");
351 for (x = 0; (int)x < index2->n; ++x)
352 bam_swap_endian_8p(&index2->offset[x]);
353 } else if (fwrite(index2->offset, 8, index2->n, fp) < index2->n) {
354 fprintf(stderr, "[bam_index_save] failed to write ioffset.\n");
358 { // write the number of reads coor-less records.
359 uint64_t x = idx->n_no_coor;
360 if (bam_is_be) bam_swap_endian_8p(&x);
361 if (fwrite(&x, 8, 1, fp) < 1) {
362 fprintf(stderr, "[bam_index_save] failed to write n_no_coor.\n");
369 static bam_index_t *bam_index_load_core(FILE *fp)
375 fprintf(stderr, "[bam_index_load_core] fail to load index.\n");
378 if (fread(magic, 1, 4, fp) < 4) {
379 fprintf(stderr, "[bam_index_load] failed to read magic number.\n");
383 if (strncmp(magic, "BAI\1", 4)) {
384 fprintf(stderr, "[bam_index_load] wrong magic number.\n");
388 idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
389 if (fread(&idx->n, 4, 1, fp) < 1) {
390 fprintf(stderr, "[bam_index_load] failed to read n_ref field.\n");
394 if (bam_is_be) bam_swap_endian_4p(&idx->n);
395 idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
396 idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
397 for (i = 0; i < idx->n; ++i) {
399 bam_lidx_t *index2 = idx->index2 + i;
404 index = idx->index[i] = kh_init(i);
405 // load binning index
406 if (fread(&size, 4, 1, fp) < 1) {
407 fprintf(stderr, "[bam_index_load] read error while loading n_bin.\n");
411 if (bam_is_be) bam_swap_endian_4p(&size);
412 for (j = 0; j < (int)size; ++j) {
413 if (fread(&key, 4, 1, fp) < 1) {
414 fprintf(stderr, "[bam_index_load] read error while loading bin.\n");
418 if (bam_is_be) bam_swap_endian_4p(&key);
419 k = kh_put(i, index, key, &ret);
420 p = &kh_value(index, k);
421 if (fread(&p->n, 4, 1, fp) < 1) {
422 fprintf(stderr, "[bam_index_load] read error while loading n_chunk.\n");
426 if (bam_is_be) bam_swap_endian_4p(&p->n);
428 p->list = (pair64_t*)malloc(p->m * 16);
429 if (fread(p->list, 16, p->n, fp) < p->n) {
430 fprintf(stderr, "[bam_index_load] read error while loading chunk pairs.\n");
436 for (x = 0; x < p->n; ++x) {
437 bam_swap_endian_8p(&p->list[x].u);
438 bam_swap_endian_8p(&p->list[x].v);
443 if (fread(&index2->n, 4, 1, fp) < 1) {
444 fprintf(stderr, "[bam_index_load] read error while loading n_intv.\n");
448 if (bam_is_be) bam_swap_endian_4p(&index2->n);
449 index2->m = index2->n;
450 index2->offset = (uint64_t*)calloc(index2->m, 8);
451 ret = fread(index2->offset, index2->n, 8, fp);
452 if ((ret > 0) && (ret < 8)) {
453 fprintf(stderr, "[bam_index_load] read error while loading ioffset.\n");
458 for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]);
460 if (fread(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0;
461 if (bam_is_be) bam_swap_endian_8p(&idx->n_no_coor);
465 bam_index_t *bam_index_load_local(const char *_fn)
470 if (strstr(_fn, "ftp://") == _fn || strstr(_fn, "http://") == _fn) {
473 for (p = _fn + l - 1; p >= _fn; --p)
474 if (*p == '/') break;
476 } else fn = strdup(_fn);
477 fnidx = (char*)calloc(strlen(fn) + 5, 1);
478 strcpy(fnidx, fn); strcat(fnidx, ".bai");
479 fp = fopen(fnidx, "rb");
480 if (fp == 0) { // try "{base}.bai"
481 char *s = strstr(fn, "bam");
482 if (s == fn + strlen(fn) - 3) {
484 fnidx[strlen(fn)-1] = 'i';
485 fp = fopen(fnidx, "rb");
488 free(fnidx); free(fn);
490 bam_index_t *idx = bam_index_load_core(fp);
497 static void download_from_remote(const char *url)
499 const int buf_size = 1 * 1024 * 1024;
505 if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return;
507 for (fn = (char*)url + l - 1; fn >= url; --fn)
508 if (*fn == '/') break;
509 ++fn; // fn now points to the file name
510 fp_remote = knet_open(url, "r");
511 if (fp_remote == 0) {
512 fprintf(stderr, "[download_from_remote] fail to open remote file.\n");
515 if ((fp = fopen(fn, "wb")) == 0) {
516 fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n");
517 knet_close(fp_remote);
520 buf = (uint8_t*)calloc(buf_size, 1);
521 while ((l = knet_read(fp_remote, buf, buf_size)) != 0)
522 if (fwrite(buf, 1, l, fp) < l) {
523 fprintf(stderr, "[download_from_remote] fail to write to destination file.\n");
525 knet_close(fp_remote);
531 knet_close(fp_remote);
534 static void download_from_remote(const char *url)
540 bam_index_t *bam_index_load(const char *fn)
543 idx = bam_index_load_local(fn);
544 if (idx == 0 && (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)) {
545 char *fnidx = calloc(strlen(fn) + 5, 1);
546 strcat(strcpy(fnidx, fn), ".bai");
547 fprintf(stderr, "[bam_index_load] attempting to download the remote index file.\n");
548 download_from_remote(fnidx);
549 idx = bam_index_load_local(fn);
551 if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n");
555 int bam_index_build2(const char *fn, const char *_fnidx)
561 if ((fp = bam_open(fn, "r")) == 0) {
562 fprintf(stderr, "[bam_index_build2] fail to open the BAM file.\n");
565 idx = bam_index_core(fp);
568 fprintf(stderr, "[bam_index_build2] fail to index the BAM file.\n");
572 fnidx = (char*)calloc(strlen(fn) + 5, 1);
573 strcpy(fnidx, fn); strcat(fnidx, ".bai");
574 } else fnidx = strdup(_fnidx);
575 fpidx = fopen(fnidx, "wb");
577 fprintf(stderr, "[bam_index_build2] fail to create the index file.\n");
581 bam_index_save(idx, fpidx);
582 bam_index_destroy(idx);
588 int bam_index_build(const char *fn)
590 return bam_index_build2(fn, 0);
593 int bam_index(int argc, char *argv[])
596 fprintf(stderr, "Usage: samtools index <in.bam> [out.index]\n");
599 if (argc >= 3) bam_index_build2(argv[1], argv[2]);
600 else bam_index_build(argv[1]);
604 int bam_idxstats(int argc, char *argv[])
607 bam_header_t *header;
611 fprintf(stderr, "Usage: samtools idxstats <in.bam>\n");
614 fp = bam_open(argv[1], "r");
615 if (fp == 0) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
616 header = bam_header_read(fp);
618 idx = bam_index_load(argv[1]);
619 if (idx == 0) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; }
620 for (i = 0; i < idx->n; ++i) {
622 khash_t(i) *h = idx->index[i];
623 printf("%s\t%d", header->target_name[i], header->target_len[i]);
624 k = kh_get(i, h, BAM_MAX_BIN);
626 printf("\t%llu\t%llu", (long long)kh_val(h, k).list[1].u, (long long)kh_val(h, k).list[1].v);
627 else printf("\t0\t0");
630 printf("*\t0\t0\t%llu\n", (long long)idx->n_no_coor);
631 bam_header_destroy(header);
632 bam_index_destroy(idx);
636 static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[BAM_MAX_BIN])
639 if (beg >= end) return 0;
640 if (end >= 1u<<29) end = 1u<<29;
643 for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) list[i++] = k;
644 for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) list[i++] = k;
645 for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) list[i++] = k;
646 for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) list[i++] = k;
647 for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list[i++] = k;
651 static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
653 uint32_t rbeg = b->core.pos;
654 uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1;
655 return (rend > beg && rbeg < end);
658 struct __bam_iter_t {
659 int from_first; // read from the first record; no random access
660 int tid, beg, end, n_off, i, finished;
665 // bam_fetch helper function retrieves
666 bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end)
669 int i, n_bins, n_off;
676 if (beg < 0) beg = 0;
677 if (end < beg) return 0;
679 iter = calloc(1, sizeof(struct __bam_iter_t));
680 iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1;
682 bins = (uint16_t*)calloc(BAM_MAX_BIN, 2);
683 n_bins = reg2bins(beg, end, bins);
684 index = idx->index[tid];
685 if (idx->index2[tid].n > 0) {
686 min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]
687 : idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];
688 if (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4
689 int n = beg>>BAM_LIDX_SHIFT;
690 if (n > idx->index2[tid].n) n = idx->index2[tid].n;
691 for (i = n - 1; i >= 0; --i)
692 if (idx->index2[tid].offset[i] != 0) break;
693 if (i >= 0) min_off = idx->index2[tid].offset[i];
695 } else min_off = 0; // tabix 0.1.2 may produce such index files
696 for (i = n_off = 0; i < n_bins; ++i) {
697 if ((k = kh_get(i, index, bins[i])) != kh_end(index))
698 n_off += kh_value(index, k).n;
701 free(bins); return iter;
703 off = (pair64_t*)calloc(n_off, 16);
704 for (i = n_off = 0; i < n_bins; ++i) {
705 if ((k = kh_get(i, index, bins[i])) != kh_end(index)) {
707 bam_binlist_t *p = &kh_value(index, k);
708 for (j = 0; j < p->n; ++j)
709 if (p->list[j].v > min_off) off[n_off++] = p->list[j];
714 free(off); return iter;
717 bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
719 ks_introsort(off, n_off, off);
720 // resolve completely contained adjacent blocks
721 for (i = 1, l = 0; i < n_off; ++i)
722 if (off[l].v < off[i].v)
725 // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
726 for (i = 1; i < n_off; ++i)
727 if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;
728 { // merge adjacent blocks
729 #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
730 for (i = 1, l = 0; i < n_off; ++i) {
731 #ifdef BAM_TRUE_OFFSET
732 if (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v;
734 if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;
736 else off[++l] = off[i];
743 iter->n_off = n_off; iter->off = off;
747 pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off)
748 { // for pysam compatibility
751 iter = bam_iter_query(idx, tid, beg, end);
752 off = iter->off; *cnt_off = iter->n_off;
757 void bam_iter_destroy(bam_iter_t iter)
759 if (iter) { free(iter->off); free(iter); }
762 int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)
765 if (iter && iter->finished) return -1;
766 if (iter == 0 || iter->from_first) {
767 ret = bam_read1(fp, b);
768 if (ret < 0 && iter) iter->finished = 1;
771 if (iter->off == 0) return -1;
773 if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk
774 if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks
775 if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug
776 if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek
777 bam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);
778 iter->curr_off = bam_tell(fp);
782 if ((ret = bam_read1(fp, b)) >= 0) {
783 iter->curr_off = bam_tell(fp);
784 if (b->core.tid != iter->tid || b->core.pos >= iter->end) { // no need to proceed
785 ret = bam_validate1(NULL, b)? -1 : -5; // determine whether end of region or error
788 else if (is_overlap(iter->beg, iter->end, b)) return ret;
789 } else break; // end of file or error
795 int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
801 iter = bam_iter_query(idx, tid, beg, end);
802 while ((ret = bam_iter_read(fp, iter, b)) >= 0) func(b, data);
803 bam_iter_destroy(iter);
805 return (ret == -1)? 0 : ret;