X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=tabix.git;a=blobdiff_plain;f=index.c;h=cfb2938ee4ee9d490cd70f5635608b8f21abe291;hp=704b037d2add76bf0c9d02624c3f449ac2bd314b;hb=c70c92b7a385548d8e670638a93a083401f49e4d;hpb=7f5c10e58cb433a02137112209d408ecb9e65751 diff --git a/index.c b/index.c index 704b037..cfb2938 100644 --- a/index.c +++ b/index.c @@ -1,5 +1,6 @@ #include #include +#include #include "khash.h" #include "ksort.h" #include "kstring.h" @@ -161,6 +162,9 @@ static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv) // here ->beg is 0-based. intv->beg = intv->end = strtol(str->s + b, &s, 0); if (!(idx->conf.preset&TI_FLAG_UCSC)) --intv->beg; + else ++intv->end; + if (intv->beg < 0) intv->beg = 0; + if (intv->end < 1) intv->end = 1; } else { if ((idx->conf.preset&0xffff) == TI_PRESET_GENERIC) { if (id == idx->conf.ec) intv->end = strtol(str->s + b, &s, 0); @@ -174,6 +178,7 @@ static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv) if (op == 'M' || op == 'D' || op == 'N') l += x; s = t + 1; } + if (l == 0) l = 1; intv->end = intv->beg + l; } } else if ((idx->conf.preset&0xffff) == TI_PRESET_VCF) { @@ -262,6 +267,17 @@ static void merge_chunks(ti_index_t *idx) } // ~for(i) } +static void fill_missing(ti_index_t *idx) +{ + int i, j; + for (i = 0; i < idx->n; ++i) { + ti_lidx_t *idx2 = &idx->index2[i]; + for (j = 1; j < idx2->n; ++j) + if (idx2->offset[j] == 0) + idx2->offset[j] = idx2->offset[j-1]; + } +} + ti_index_t *ti_index_core(BGZF *fp, const ti_conf_t *conf) { int ret; @@ -316,6 +332,7 @@ ti_index_t *ti_index_core(BGZF *fp, const ti_conf_t *conf) } if (save_tid >= 0) insert_offset(idx->index[save_tid], save_bin, save_off, bgzf_tell(fp)); merge_chunks(idx); + fill_missing(idx); free(str->s); free(str); return idx; @@ -523,22 +540,10 @@ static ti_index_t *ti_index_load_core(BGZF *fp) return idx; } -ti_index_t *ti_index_load_local(const char *_fn) +ti_index_t *ti_index_load_local(const char *fnidx) { BGZF *fp; - char *fnidx, *fn; - - if (strstr(_fn, "ftp://") == _fn || strstr(_fn, "http://") == _fn) { - const char *p; - int l = strlen(_fn); - for (p = _fn + l - 1; p >= _fn; --p) - if (*p == '/') break; - fn = strdup(p + 1); - } else fn = strdup(_fn); - fnidx = (char*)calloc(strlen(fn) + 5, 1); - strcpy(fnidx, fn); strcat(fnidx, ".tbi"); fp = bgzf_open(fnidx, "r"); - free(fnidx); free(fn); if (fp) { ti_index_t *idx = ti_index_load_core(fp); bgzf_close(fp); @@ -584,17 +589,53 @@ static void download_from_remote(const char *url) } #endif +static char *get_local_version(const char *fn) +{ + struct stat sbuf; + char *fnidx = (char*)calloc(strlen(fn) + 5, 1); + strcat(strcpy(fnidx, fn), ".tbi"); + if ((strstr(fnidx, "ftp://") == fnidx || strstr(fnidx, "http://") == fnidx)) { + char *p, *url; + int l = strlen(fnidx); + for (p = fnidx + l - 1; p >= fnidx; --p) + if (*p == '/') break; + url = fnidx; fnidx = strdup(p + 1); + if (stat(fnidx, &sbuf) == 0) { + free(url); + return fnidx; + } + fprintf(stderr, "[%s] downloading the index file...\n", __func__); + download_from_remote(url); + free(url); + } + if (stat(fnidx, &sbuf) == 0) return fnidx; + free(fnidx); return 0; +} + +int ti_list_chromosomes(const char *fn) +{ + ti_index_t *idx; + char **names; + int i; + khint_t k; + idx = ti_index_load(fn); + names = calloc(idx->n, sizeof(void*)); + for (k = kh_begin(idx->tname); k < kh_end(idx->tname); ++k) + if (kh_exist(idx->tname, k)) + names[kh_val(idx->tname, k)] = (char*)kh_key(idx->tname, k); + for (i = 0; i < idx->n; ++i) printf("%s\n", names[i]); + free(names); + ti_index_destroy(idx); + return 0; +} + ti_index_t *ti_index_load(const char *fn) { ti_index_t *idx; - idx = ti_index_load_local(fn); - if (idx == 0 && (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)) { - char *fnidx = calloc(strlen(fn) + 5, 1); - strcat(strcpy(fnidx, fn), ".tbi"); - fprintf(stderr, "[ti_index_load] attempting to download the remote index file.\n"); - download_from_remote(fnidx); - idx = ti_index_load_local(fn); - } + char *fname = get_local_version(fn); + if (fname == 0) return 0; + idx = ti_index_load_local(fname); + free(fname); if (idx == 0) fprintf(stderr, "[ti_index_load] fail to load BAM index.\n"); return idx; } @@ -709,7 +750,8 @@ pair64_t *get_chunk_coordinates(const ti_index_t *idx, int tid, int beg, int end bins = (uint16_t*)calloc(MAX_BIN, 2); n_bins = reg2bins(beg, end, bins); index = idx->index[tid]; - min_off = (beg>>TAD_LIDX_SHIFT >= idx->index2[tid].n)? 0 : idx->index2[tid].offset[beg>>TAD_LIDX_SHIFT]; + min_off = (beg>>TAD_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1] + : idx->index2[tid].offset[beg>>TAD_LIDX_SHIFT]; for (i = n_off = 0; i < n_bins; ++i) { if ((k = kh_get(i, index, bins[i])) != kh_end(index)) n_off += kh_value(index, k).n;