X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=tabix.git;a=blobdiff_plain;f=main.c;h=ab2e1743b2225689f09993fed8fd3c6423ac5b50;hp=364abe5ff6751b3ea4720f3d1a45a7dcd22d84d4;hb=HEAD;hpb=6ef84accd7e4c5a48a7a959475bc1e5934e8adeb diff --git a/main.c b/main.c index 364abe5..ab2e174 100644 --- a/main.c +++ b/main.c @@ -6,8 +6,9 @@ #include #include "bgzf.h" #include "tabix.h" +#include "knetfile.h" -#define PACKAGE_VERSION "0.2.5 (r964)" +#define PACKAGE_VERSION "0.2.5 (r1005)" #define error(...) { fprintf(stderr,__VA_ARGS__); return -1; } @@ -54,45 +55,41 @@ int reheader_file(const char *header, const char *file, int meta) error("%s: %s", header,strerror(errno)); int page_size = getpagesize(); char *buf = valloc(page_size); - BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w"); + BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w"); ssize_t nread; while ( (nread=fread(buf,1,page_size-1,fh))>0 ) { if ( nreaderror); + if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode); } fclose(fh); if ( fp->block_length - skip_until > 0 ) { if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) - error("Error: %s\n",fp->error); + error("Error: %d\n",fp->errcode); } if (bgzf_flush(bgzf_out) < 0) - error("Error: %s\n",bgzf_out->error); + error("Error: %d\n",bgzf_out->errcode); while (1) { #ifdef _USE_KNETFILE - nread = knet_read(fp->x.fpr, buf, page_size); + nread = knet_read(fp->fp, buf, page_size); #else - nread = fread(buf, 1, page_size, fp->file); + nread = fread(buf, 1, page_size, fp->fp); #endif if ( nread<=0 ) break; -#ifdef _USE_KNETFILE - int count = fwrite(buf, 1, nread, bgzf_out->x.fpw); -#else - int count = fwrite(buf, 1, nread, bgzf_out->file); -#endif + int count = fwrite(buf, 1, nread, bgzf_out->fp); if (count != nread) error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread); } if (bgzf_close(bgzf_out) < 0) - error("Error: %s\n",bgzf_out->error); + error("Error: %d\n",bgzf_out->errcode); return 0; } @@ -100,21 +97,21 @@ int reheader_file(const char *header, const char *file, int meta) int main(int argc, char *argv[]) { - int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0; - ti_conf_t conf = ti_conf_gff; + int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, print_only_header = 0, bed_reg = 0; + ti_conf_t conf = ti_conf_gff, *conf_ptr = NULL; const char *reheader = NULL; - while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) { + while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhHfBr:")) >= 0) { switch (c) { case 'B': bed_reg = 1; break; case '0': conf.preset |= TI_FLAG_UCSC; break; case 'S': skip = atoi(optarg); break; case 'c': meta = optarg[0]; break; case 'p': - if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff; - else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed; - else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam; - else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf; - else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl; + if (strcmp(optarg, "gff") == 0) conf_ptr = &ti_conf_gff; + else if (strcmp(optarg, "bed") == 0) conf_ptr = &ti_conf_bed; + else if (strcmp(optarg, "sam") == 0) conf_ptr = &ti_conf_sam; + else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf_ptr = &ti_conf_vcf; + else if (strcmp(optarg, "psltbl") == 0) conf_ptr = &ti_conf_psltbl; else { fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg); return 1; @@ -125,12 +122,11 @@ int main(int argc, char *argv[]) case 'e': conf.ec = atoi(optarg); break; case 'l': list_chrms = 1; break; case 'h': print_header = 1; break; + case 'H': print_only_header = 1; break; case 'f': force = 1; break; case 'r': reheader = optarg; break; } } - if (skip >= 0) conf.line_skip = skip; - if (meta >= 0) conf.meta_char = meta; if (optind == argc) { fprintf(stderr, "\n"); fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n"); @@ -145,12 +141,28 @@ int main(int argc, char *argv[]) fprintf(stderr, " -r FILE replace the header with the content of FILE [null]\n"); fprintf(stderr, " -B region1 is a BED file (entire file will be read)\n"); fprintf(stderr, " -0 zero-based coordinate\n"); - fprintf(stderr, " -h print the header lines\n"); + fprintf(stderr, " -h print also the header lines\n"); + fprintf(stderr, " -H print only the header lines\n"); fprintf(stderr, " -l list chromosome names\n"); fprintf(stderr, " -f force to overwrite the index\n"); fprintf(stderr, "\n"); return 1; } + if ( !conf_ptr ) + { + int l = strlen(argv[optind]); + int strcasecmp(const char *s1, const char *s2); + if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf_ptr = &ti_conf_gff; + else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf_ptr = &ti_conf_bed; + else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf_ptr = &ti_conf_sam; + else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf_ptr = &ti_conf_vcf; + else if (l>=10 && strcasecmp(argv[optind]+l-10, ".psltbl.gz") == 0) conf_ptr = &ti_conf_psltbl; + } + if ( conf_ptr ) + conf = *conf_ptr; + + if (skip >= 0) conf.line_skip = skip; + if (meta >= 0) conf.meta_char = meta; if (list_chrms) { ti_index_t *idx; int i, n; @@ -173,7 +185,7 @@ int main(int argc, char *argv[]) char *fnidx = calloc(strlen(argv[optind]) + 5, 1); strcat(strcpy(fnidx, argv[optind]), ".tbi"); - if (optind + 1 == argc) { + if (optind + 1 == argc && !print_only_header) { if (force == 0) { if (stat(fnidx, &stat_tbi) == 0) { @@ -188,24 +200,41 @@ int main(int argc, char *argv[]) } } } - if ( bgzf_check_bgzf(argv[optind])!=1 ) + if ( bgzf_is_bgzf(argv[optind])!=1 ) { fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]); free(fnidx); return 1; + } + if ( !conf_ptr ) + { + // Building the index but the file type was neither recognised nor given. If no custom change + // has been made, warn the user that GFF is used + if ( conf.preset==ti_conf_gff.preset + && conf.sc==ti_conf_gff.sc + && conf.bc==ti_conf_gff.bc + && conf.ec==ti_conf_gff.ec + && conf.meta_char==ti_conf_gff.meta_char + && conf.line_skip==ti_conf_gff.line_skip ) + fprintf(stderr,"[tabix] The file type not recognised and -p not given, using the preset [gff].\n"); } return ti_index_build(argv[optind], &conf); } { // retrieve tabix_t *t; - // Common source of errors: new VCF is used with an old index - stat(fnidx, &stat_tbi); - stat(argv[optind], &stat_vcf); - if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime ) + // On some systems, stat on non-existent files returns undefined value for sm_mtime, the user had to use -f + int is_remote = (strstr(fnidx, "ftp://") == fnidx || strstr(fnidx, "http://") == fnidx) ? 1 : 0; + if ( !is_remote ) { - fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n"); - free(fnidx); - return 1; + // Common source of errors: new VCF is used with an old index + stat(fnidx, &stat_tbi); + stat(argv[optind], &stat_vcf); + if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime ) + { + fprintf(stderr, "[tabix] the index file either does not exist or is older than the vcf file. Please reindex.\n"); + free(fnidx); + return 1; + } } free(fnidx); @@ -213,6 +242,25 @@ int main(int argc, char *argv[]) fprintf(stderr, "[main] fail to open the data file.\n"); return 1; } + if ( print_only_header ) + { + ti_iter_t iter; + const char *s; + int len; + if (ti_lazy_index_load(t) < 0 && bed_reg == 0) { + fprintf(stderr,"[tabix] failed to load the index file.\n"); + return 1; + } + const ti_conf_t *idxconf = ti_get_conf(t->idx); + iter = ti_query(t, 0, 0, 0); + while ((s = ti_read(t, iter, &len)) != 0) { + if ((int)(*s) != idxconf->meta_char) break; + fputs(s, stdout); fputc('\n', stdout); + } + ti_iter_destroy(iter); + return 0; + } + if (strcmp(argv[optind+1], ".") == 0) { // retrieve all ti_iter_t iter; const char *s;