#include <errno.h>
#include "bgzf.h"
#include "tabix.h"
+#include "knetfile.h"
-#define PACKAGE_VERSION "0.2.4 (r949)"
+#define PACKAGE_VERSION "0.2.5 (r1005)"
#define error(...) { fprintf(stderr,__VA_ARGS__); return -1; }
error("%s: %s", header,strerror(errno));
int page_size = getpagesize();
char *buf = valloc(page_size);
- BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
+ BGZF *bgzf_out = bgzf_dopen(fileno(stdout), "w");
ssize_t nread;
while ( (nread=fread(buf,1,page_size-1,fh))>0 )
{
if ( nread<page_size-1 && buf[nread-1]!='\n' )
buf[nread++] = '\n';
- if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
+ if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %d\n",bgzf_out->errcode);
}
fclose(fh);
if ( fp->block_length - skip_until > 0 )
{
if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0)
- error("Error: %s\n",fp->error);
+ error("Error: %d\n",fp->errcode);
}
if (bgzf_flush(bgzf_out) < 0)
- error("Error: %s\n",bgzf_out->error);
+ error("Error: %d\n",bgzf_out->errcode);
while (1)
{
#ifdef _USE_KNETFILE
- nread = knet_read(fp->x.fpr, buf, page_size);
+ nread = knet_read(fp->fp, buf, page_size);
#else
- nread = fread(buf, 1, page_size, fp->file);
+ nread = fread(buf, 1, page_size, fp->fp);
#endif
if ( nread<=0 )
break;
-#ifdef _USE_KNETFILE
- int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
-#else
- int count = fwrite(buf, 1, nread, bgzf_out->file);
-#endif
+ int count = fwrite(buf, 1, nread, bgzf_out->fp);
if (count != nread)
error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
}
if (bgzf_close(bgzf_out) < 0)
- error("Error: %s\n",bgzf_out->error);
+ error("Error: %d\n",bgzf_out->errcode);
return 0;
}
int main(int argc, char *argv[])
{
- int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
- ti_conf_t conf = ti_conf_gff;
+ int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, print_only_header = 0, bed_reg = 0;
+ ti_conf_t conf = ti_conf_gff, *conf_ptr = NULL;
const char *reheader = NULL;
- while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
+ while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhHfBr:")) >= 0) {
switch (c) {
case 'B': bed_reg = 1; break;
case '0': conf.preset |= TI_FLAG_UCSC; break;
case 'S': skip = atoi(optarg); break;
case 'c': meta = optarg[0]; break;
case 'p':
- if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
- else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
- else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
- else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
- else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
+ if (strcmp(optarg, "gff") == 0) conf_ptr = &ti_conf_gff;
+ else if (strcmp(optarg, "bed") == 0) conf_ptr = &ti_conf_bed;
+ else if (strcmp(optarg, "sam") == 0) conf_ptr = &ti_conf_sam;
+ else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf_ptr = &ti_conf_vcf;
+ else if (strcmp(optarg, "psltbl") == 0) conf_ptr = &ti_conf_psltbl;
else {
fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
return 1;
case 'e': conf.ec = atoi(optarg); break;
case 'l': list_chrms = 1; break;
case 'h': print_header = 1; break;
+ case 'H': print_only_header = 1; break;
case 'f': force = 1; break;
case 'r': reheader = optarg; break;
}
}
- if (skip >= 0) conf.line_skip = skip;
- if (meta >= 0) conf.meta_char = meta;
if (optind == argc) {
fprintf(stderr, "\n");
fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
fprintf(stderr, " -r FILE replace the header with the content of FILE [null]\n");
fprintf(stderr, " -B region1 is a BED file (entire file will be read)\n");
fprintf(stderr, " -0 zero-based coordinate\n");
- fprintf(stderr, " -h print the header lines\n");
+ fprintf(stderr, " -h print also the header lines\n");
+ fprintf(stderr, " -H print only the header lines\n");
fprintf(stderr, " -l list chromosome names\n");
fprintf(stderr, " -f force to overwrite the index\n");
fprintf(stderr, "\n");
return 1;
}
+ if ( !conf_ptr )
+ {
+ int l = strlen(argv[optind]);
+ int strcasecmp(const char *s1, const char *s2);
+ if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf_ptr = &ti_conf_gff;
+ else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf_ptr = &ti_conf_bed;
+ else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf_ptr = &ti_conf_sam;
+ else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf_ptr = &ti_conf_vcf;
+ else if (l>=10 && strcasecmp(argv[optind]+l-10, ".psltbl.gz") == 0) conf_ptr = &ti_conf_psltbl;
+ }
+ if ( conf_ptr )
+ conf = *conf_ptr;
+
+ if (skip >= 0) conf.line_skip = skip;
+ if (meta >= 0) conf.meta_char = meta;
if (list_chrms) {
ti_index_t *idx;
int i, n;
char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
strcat(strcpy(fnidx, argv[optind]), ".tbi");
- if (optind + 1 == argc) {
+ if (optind + 1 == argc && !print_only_header) {
if (force == 0) {
if (stat(fnidx, &stat_tbi) == 0)
{
}
}
}
- if ( bgzf_check_bgzf(argv[optind])!=1 )
+ if ( bgzf_is_bgzf(argv[optind])!=1 )
{
fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
free(fnidx);
return 1;
+ }
+ if ( !conf_ptr )
+ {
+ // Building the index but the file type was neither recognised nor given. If no custom change
+ // has been made, warn the user that GFF is used
+ if ( conf.preset==ti_conf_gff.preset
+ && conf.sc==ti_conf_gff.sc
+ && conf.bc==ti_conf_gff.bc
+ && conf.ec==ti_conf_gff.ec
+ && conf.meta_char==ti_conf_gff.meta_char
+ && conf.line_skip==ti_conf_gff.line_skip )
+ fprintf(stderr,"[tabix] The file type not recognised and -p not given, using the preset [gff].\n");
}
return ti_index_build(argv[optind], &conf);
}
{ // retrieve
tabix_t *t;
- // Common source of errors: new VCF is used with an old index
- stat(fnidx, &stat_tbi);
- stat(argv[optind], &stat_vcf);
- if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
+ // On some systems, stat on non-existent files returns undefined value for sm_mtime, the user had to use -f
+ int is_remote = (strstr(fnidx, "ftp://") == fnidx || strstr(fnidx, "http://") == fnidx) ? 1 : 0;
+ if ( !is_remote )
{
- fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
- free(fnidx);
- return 1;
+ // Common source of errors: new VCF is used with an old index
+ stat(fnidx, &stat_tbi);
+ stat(argv[optind], &stat_vcf);
+ if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
+ {
+ fprintf(stderr, "[tabix] the index file either does not exist or is older than the vcf file. Please reindex.\n");
+ free(fnidx);
+ return 1;
+ }
}
free(fnidx);
fprintf(stderr, "[main] fail to open the data file.\n");
return 1;
}
+ if ( print_only_header )
+ {
+ ti_iter_t iter;
+ const char *s;
+ int len;
+ if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
+ fprintf(stderr,"[tabix] failed to load the index file.\n");
+ return 1;
+ }
+ const ti_conf_t *idxconf = ti_get_conf(t->idx);
+ iter = ti_query(t, 0, 0, 0);
+ while ((s = ti_read(t, iter, &len)) != 0) {
+ if ((int)(*s) != idxconf->meta_char) break;
+ fputs(s, stdout); fputc('\n', stdout);
+ }
+ ti_iter_destroy(iter);
+ return 0;
+ }
+
if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
ti_iter_t iter;
const char *s;