X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=blobdiff_plain;f=index.c;h=4ed32c2c2f4c83f89c7882f028a084276d340c75;hb=efe8d803dda3db37304c366b366f0fc759365383;hp=e5b227c1466eea9d8be901020ffc2726dc33cc9e;hpb=07678d3f22da823d211cb5687fe51335ed0498fd;p=tabix.git diff --git a/index.c b/index.c index e5b227c..4ed32c2 100644 --- a/index.c +++ b/index.c @@ -19,7 +19,7 @@ typedef struct { } pair64_t; #define pair64_lt(a,b) ((a).u < (b).u) -KSORT_INIT(off, pair64_t, pair64_lt) +KSORT_INIT(offt, pair64_t, pair64_lt) typedef struct { uint32_t m, n; @@ -192,17 +192,19 @@ static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv) } } else if ((idx->conf.preset&0xffff) == TI_PRESET_VCF) { // FIXME: the following is NOT tested and is likely to be buggy - if (id == 5) { // ALT - char *t; - int max = 1; - for (s = str->s + b; s < str->s + i;) { - if (s[i] == 'D') { - long x = strtol(s + 1, &t, 10); - if (x > max) max = x; - s = t + 1; - } else ++s; + if (id == 4) { + if (b < i) intv->end = intv->beg + (i - b); + } else if (id == 8) { // look for "END=" + int c = str->s[i]; + str->s[i] = 0; + s = strstr(str->s + b, "END="); + if (s == str->s + b) s += 4; + else if (s) { + s = strstr(str->s + b, ";END="); + if (s) s += 5; } - intv->end = intv->beg + max; + if (s) intv->end = strtol(s, &s, 0); + str->s[i] = c; } } } @@ -238,7 +240,7 @@ static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t l->list[l->n].u = beg; l->list[l->n++].v = end; } -static inline void insert_offset2(ti_lidx_t *index2, int _beg, int _end, uint64_t offset) +static inline uint64_t insert_offset2(ti_lidx_t *index2, int _beg, int _end, uint64_t offset) { int i, beg, end; beg = _beg >> TAD_LIDX_SHIFT; @@ -257,6 +259,7 @@ static inline void insert_offset2(ti_lidx_t *index2, int _beg, int _end, uint64_ if (index2->offset[i] == 0) index2->offset[i] = offset; } if (index2->n < end + 1) index2->n = end + 1; + return (uint64_t)beg<<32 | end; } static void merge_chunks(ti_index_t *idx) @@ -297,7 +300,7 @@ ti_index_t *ti_index_core(BGZF *fp, const ti_conf_t *conf) ti_index_t *idx; uint32_t last_bin, save_bin; int32_t last_coor, last_tid, save_tid; - uint64_t save_off, last_off, lineno = 0; + uint64_t save_off, last_off, lineno = 0, offset0 = (uint64_t)-1, tmp; kstring_t *str; str = calloc(1, sizeof(kstring_t)); @@ -320,13 +323,19 @@ ti_index_t *ti_index_core(BGZF *fp, const ti_conf_t *conf) } get_intv(idx, str, &intv); if (last_tid != intv.tid) { // change of chromosomes + if (last_tid>intv.tid ) + { + fprintf(stderr,"[ti_index_core] the chromosome blocks not continuous at line %llu, is the file sorted?\n",(unsigned long long)lineno); + exit(1); + } last_tid = intv.tid; last_bin = 0xffffffffu; } else if (last_coor > intv.beg) { fprintf(stderr, "[ti_index_core] the file out of order at line %llu\n", (unsigned long long)lineno); exit(1); } - insert_offset2(&idx->index2[intv.tid], intv.beg, intv.end, last_off); + tmp = insert_offset2(&idx->index2[intv.tid], intv.beg, intv.end, last_off); + if (last_off == 0) offset0 = tmp; if (intv.bin != last_bin) { // then possibly write the binning index if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record insert_offset(idx->index[save_tid], save_bin, save_off, last_off); @@ -346,6 +355,10 @@ ti_index_t *ti_index_core(BGZF *fp, const ti_conf_t *conf) if (save_tid >= 0) insert_offset(idx->index[save_tid], save_bin, save_off, bgzf_tell(fp)); merge_chunks(idx); fill_missing(idx); + if (offset0 != (uint64_t)-1 && idx->n && idx->index2[0].offset) { + int i, beg = offset0>>32, end = offset0&0xffffffffu; + for (i = beg; i <= end; ++i) idx->index2[0].offset[i] = 0; + } free(str->s); free(str); return idx; @@ -643,8 +656,8 @@ ti_index_t *ti_index_load(const char *fn) char *fname = get_local_version(fn); if (fname == 0) return 0; idx = ti_index_load_local(fname); + if (idx == 0) fprintf(stderr, "[ti_index_load] fail to load the index: %s\n", fname); free(fname); - if (idx == 0) fprintf(stderr, "[ti_index_load] fail to load BAM index.\n"); return idx; } @@ -654,7 +667,7 @@ int ti_index_build2(const char *fn, const ti_conf_t *conf, const char *_fnidx) BGZF *fp, *fpidx; ti_index_t *idx; if ((fp = bgzf_open(fn, "r")) == 0) { - fprintf(stderr, "[ti_index_build2] fail to open the BAM file.\n"); + fprintf(stderr, "[ti_index_build2] fail to open the file: %s\n", fn); return -1; } idx = ti_index_core(fp, conf); @@ -801,10 +814,13 @@ ti_iter_t ti_iter_query(const ti_index_t *idx, int tid, int beg, int end) if (p->list[j].v > min_off) off[n_off++] = p->list[j]; } } + if (n_off == 0) { + free(bins); free(off); return iter; + } free(bins); { int l; - ks_introsort(off, n_off, off); + ks_introsort(offt, n_off, off); // resolve completely contained adjacent blocks for (i = 1, l = 0; i < n_off; ++i) if (off[l].v < off[i].v)