10 #define PACKAGE_VERSION "0.2.4 (r949)"
12 #define error(...) { fprintf(stderr,__VA_ARGS__); return -1; }
14 int reheader_file(const char *header, const char *file, int meta)
16 BGZF *fp = bgzf_open(file,"r");
17 if (bgzf_read_block(fp) != 0 || !fp->block_length)
20 char *buffer = fp->uncompressed_block;
23 if ( buffer[0]==meta )
30 if ( buffer[skip_until]=='\n' )
33 if ( skip_until>=fp->block_length )
35 if (bgzf_read_block(fp) != 0 || !fp->block_length)
39 // The header has finished
40 if ( buffer[skip_until]!=meta ) break;
43 if ( skip_until>=fp->block_length )
45 if (bgzf_read_block(fp) != 0 || !fp->block_length)
52 FILE *fh = fopen(header,"r");
54 error("%s: %s", header,strerror(errno));
55 int page_size = getpagesize();
56 char *buf = valloc(page_size);
57 BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
59 while ( (nread=fread(buf,1,page_size-1,fh))>0 )
61 if ( nread<page_size-1 && buf[nread-1]!='\n' )
63 if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
67 if ( fp->block_length - skip_until > 0 )
69 if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0)
70 error("Error: %s\n",fp->error);
72 if (bgzf_flush(bgzf_out) < 0)
73 error("Error: %s\n",bgzf_out->error);
78 nread = knet_read(fp->x.fpr, buf, page_size);
80 nread = fread(buf, 1, page_size, fp->file);
86 int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
88 int count = fwrite(buf, 1, nread, bgzf_out->file);
91 error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
94 if (bgzf_close(bgzf_out) < 0)
95 error("Error: %s\n",bgzf_out->error);
101 int main(int argc, char *argv[])
103 int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
104 ti_conf_t conf = ti_conf_gff;
105 const char *reheader = NULL;
106 while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
108 case 'B': bed_reg = 1; break;
109 case '0': conf.preset |= TI_FLAG_UCSC; break;
110 case 'S': skip = atoi(optarg); break;
111 case 'c': meta = optarg[0]; break;
113 if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
114 else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
115 else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
116 else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
117 else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
119 fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
123 case 's': conf.sc = atoi(optarg); break;
124 case 'b': conf.bc = atoi(optarg); break;
125 case 'e': conf.ec = atoi(optarg); break;
126 case 'l': list_chrms = 1; break;
127 case 'h': print_header = 1; break;
128 case 'f': force = 1; break;
129 case 'r': reheader = optarg; break;
132 if (skip >= 0) conf.line_skip = skip;
133 if (meta >= 0) conf.meta_char = meta;
134 if (optind == argc) {
135 fprintf(stderr, "\n");
136 fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
137 fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
138 fprintf(stderr, "Usage: tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
139 fprintf(stderr, "Options: -p STR preset: gff, bed, sam, vcf, psltbl [gff]\n");
140 fprintf(stderr, " -s INT sequence name column [1]\n");
141 fprintf(stderr, " -b INT start column [4]\n");
142 fprintf(stderr, " -e INT end column; can be identical to '-b' [5]\n");
143 fprintf(stderr, " -S INT skip first INT lines [0]\n");
144 fprintf(stderr, " -c CHAR symbol for comment/meta lines [#]\n");
145 fprintf(stderr, " -r FILE replace the header with the content of FILE [null]\n");
146 fprintf(stderr, " -B region1 is a BED file (entire file will be read)\n");
147 fprintf(stderr, " -0 zero-based coordinate\n");
148 fprintf(stderr, " -h print the header lines\n");
149 fprintf(stderr, " -l list chromosome names\n");
150 fprintf(stderr, " -f force to overwrite the index\n");
151 fprintf(stderr, "\n");
158 idx = ti_index_load(argv[optind]);
160 fprintf(stderr, "[main] fail to load the index file.\n");
163 names = ti_seqname(idx, &n);
164 for (i = 0; i < n; ++i) printf("%s\n", names[i]);
166 ti_index_destroy(idx);
170 return reheader_file(reheader,argv[optind],conf.meta_char);
172 struct stat stat_tbi,stat_vcf;
173 char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
174 strcat(strcpy(fnidx, argv[optind]), ".tbi");
176 if (optind + 1 == argc) {
178 if (stat(fnidx, &stat_tbi) == 0)
180 // Before complaining, check if the VCF file isn't newer. This is a common source of errors,
181 // people tend not to notice that tabix failed
182 stat(argv[optind], &stat_vcf);
183 if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
185 fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
191 if ( bgzf_check_bgzf(argv[optind])!=1 )
193 fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
197 return ti_index_build(argv[optind], &conf);
201 // Common source of errors: new VCF is used with an old index
202 stat(fnidx, &stat_tbi);
203 stat(argv[optind], &stat_vcf);
204 if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
206 fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
212 if ((t = ti_open(argv[optind], 0)) == 0) {
213 fprintf(stderr, "[main] fail to open the data file.\n");
216 if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
220 iter = ti_query(t, 0, 0, 0);
221 while ((s = ti_read(t, iter, &len)) != 0) {
222 fputs(s, stdout); fputc('\n', stdout);
224 ti_iter_destroy(iter);
225 } else { // retrieve from specified regions
229 const ti_conf_t *idxconf;
231 if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
232 fprintf(stderr,"[tabix] failed to load the index file.\n");
235 idxconf = ti_get_conf(t->idx);
239 // If requested, print the header lines here
240 iter = ti_query(t, 0, 0, 0);
241 while ((s = ti_read(t, iter, &len)) != 0) {
242 if ((int)(*s) != idxconf->meta_char) break;
243 fputs(s, stdout); fputc('\n', stdout);
245 ti_iter_destroy(iter);
248 extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
249 extern void *bed_read(const char *fn);
250 extern void bed_destroy(void *_h);
252 const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
253 void *bed = bed_read(argv[optind+1]); // load the BED file
257 fprintf(stderr, "[main] fail to read the BED file.\n");
260 iter = ti_query(t, 0, 0, 0);
261 while ((s = ti_read(t, iter, &len)) != 0) {
263 ti_get_intv(conf_, len, (char*)s, &intv);
264 c = *intv.se; *intv.se = '\0';
265 if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
271 ti_iter_destroy(iter);
274 for (i = optind + 1; i < argc; ++i) {
276 if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
277 iter = ti_queryi(t, tid, beg, end);
278 while ((s = ti_read(t, iter, &len)) != 0) {
279 fputs(s, stdout); fputc('\n', stdout);
281 ti_iter_destroy(iter);
283 // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");