+------------------------------------------------------------------------
+r875 | lh3lh3 | 2010-12-08 12:28:35 -0500 (Wed, 08 Dec 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/ChangeLog
+ M /trunk/tabix/index.c
+
+Fixed a minor bug in generating index
+
+------------------------------------------------------------------------
+r855 | petulda | 2010-11-25 11:50:13 -0500 (Thu, 25 Nov 2010) | 1 line
+Changed paths:
+ M /trunk/tabix/main.c
+
+Disable "unknown target name or minus interval" warning.
+------------------------------------------------------------------------
+r775 | petulda | 2010-10-26 15:02:30 -0400 (Tue, 26 Oct 2010) | 1 line
+Changed paths:
+ M /trunk/tabix/main.c
+
+Added -h option to print header lines
+------------------------------------------------------------------------
+r742 | jmarshall | 2010-09-27 06:47:23 -0400 (Mon, 27 Sep 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix
+
+Add svn:ignore properties for intermediate and generated files.
+
+------------------------------------------------------------------------
+r725 | lh3lh3 | 2010-09-15 13:01:53 -0400 (Wed, 15 Sep 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/bgzip.c
+
+patches by Peter Chines
+
+------------------------------------------------------------------------
+r714 | lh3lh3 | 2010-09-07 10:13:25 -0400 (Tue, 07 Sep 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/TabixReader.java
+ M /trunk/tabix/index.c
+ M /trunk/tabix/main.c
+
+fixed a bug in C/Java when n_off == 0
+
+------------------------------------------------------------------------
+r712 | lh3lh3 | 2010-09-03 09:21:23 -0400 (Fri, 03 Sep 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/TabixReader.java
+
+fixed a bug in parsing region strings
+
+------------------------------------------------------------------------
+r700 | petulda | 2010-08-25 10:42:37 -0400 (Wed, 25 Aug 2010) | 1 line
+Changed paths:
+ M /trunk/tabix/main.c
+
+Fix: Exit with an error rather than segfault when index is not present and region is queried
+------------------------------------------------------------------------
+r696 | petulda | 2010-08-24 10:24:12 -0400 (Tue, 24 Aug 2010) | 1 line
+Changed paths:
+ M /trunk/tabix/bgzf.c
+ M /trunk/tabix/bgzf.h
+ M /trunk/tabix/index.c
+ M /trunk/tabix/main.c
+
+Complain about not-bgzipped files and check for noncontinuous chromosome blocks
+------------------------------------------------------------------------
+r603 | lh3lh3 | 2010-06-28 10:49:39 -0400 (Mon, 28 Jun 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/NEWS
+ M /trunk/tabix/TabixReader.java
+ M /trunk/tabix/index.c
+ M /trunk/tabix/main.c
+
+Release tabix-0.2.2
+
+------------------------------------------------------------------------
+r597 | lh3lh3 | 2010-06-13 21:08:29 -0400 (Sun, 13 Jun 2010) | 3 lines
+Changed paths:
+ M /trunk/tabix/index.c
+
+Change the namespace of sorting, to avoid function name collision with samtools.
+
+
+------------------------------------------------------------------------
+r582 | lh3lh3 | 2010-06-03 10:40:25 -0400 (Thu, 03 Jun 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/NEWS
+ M /trunk/tabix/main.c
+ M /trunk/tabix/tabix.py
+
+Release tabix-0.2.1
+
+------------------------------------------------------------------------
+r581 | lh3lh3 | 2010-05-24 14:24:24 -0400 (Mon, 24 May 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/tabix.py
+
+OOP interface with the help from Aaron Quinlan
+
+------------------------------------------------------------------------
+r580 | lh3lh3 | 2010-05-23 23:36:05 -0400 (Sun, 23 May 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/tabix.py
+
+minor change
+
+------------------------------------------------------------------------
+r579 | lh3lh3 | 2010-05-23 23:25:24 -0400 (Sun, 23 May 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/tabix.py
+
+For Snow Leopard compatibility
+
+------------------------------------------------------------------------
+r575 | lh3lh3 | 2010-05-12 19:31:27 -0400 (Wed, 12 May 2010) | 4 lines
+Changed paths:
+ M /trunk/tabix/Makefile
+ M /trunk/tabix/index.c
+ M /trunk/tabix/tabix.h
+ A /trunk/tabix/tabix.py
+
+ * optionally generate shared library for Mac and Linux
+ * added a python script that directly calls the shared library
+ * added a new API for easy python access
+
+------------------------------------------------------------------------
+r574 | lh3lh3 | 2010-05-11 12:14:27 -0400 (Tue, 11 May 2010) | 2 lines
+Changed paths:
+ M /trunk/tabix/ChangeLog
+ M /trunk/tabix/NEWS
+ M /trunk/tabix/perl/Tabix.pm
+ M /trunk/tabix/perl/TabixIterator.pm
+ M /trunk/tabix/tabix.1
+
+Release tabix-0.2.0
+
------------------------------------------------------------------------
r573 | lh3lh3 | 2010-05-11 12:08:30 -0400 (Tue, 11 May 2010) | 2 lines
Changed paths:
+Beta Release 0.2.3 (8 December, 2010)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes:
+
+ * Fixed a minor bug where the first record in a headerless file may be
+ missed.
+
+ * Added an option to print header lines.
+
+ * Fixed a rare bug which may occasionally happen when retrieving data
+ from a region without any records.
+
+ * Enhanced error reporting.
+
+ * Fixed a bug in bgzip which may delete the original file even if not
+ intended.
+
+(0.2.3: 8 December 2010, r876)
+
+
+
Beta Release 0.2.2 (28 June, 2010)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
int[] ret = new int[3];
colon = reg.indexOf(':'); hyphen = reg.indexOf('-');
chr = colon >= 0? reg.substring(0, colon) : reg;
- ret[1] = colon >= 0? Integer.parseInt(reg.substring(colon+1, hyphen)) - 1 : 0;
+ ret[1] = colon >= 0? Integer.parseInt(reg.substring(colon+1, hyphen >= 0? hyphen : reg.length())) - 1 : 0;
ret[2] = hyphen >= 0? Integer.parseInt(reg.substring(hyphen+1)) : 0x7fffffff;
ret[0] = chr2tid(chr);
return ret;
for (int j = 0; j < chunks.length; ++j)
if (less64(min_off, chunks[j].v))
off[n_off++] = new TPair64(chunks[j]);
+ if (n_off == 0) return null;
Arrays.sort(off, 0, n_off);
// resolve completely contained adjacent blocks
for (i = 1, l = 0; i < n_off; ++i) {
System.out.println(s);
} else { // a region is specified; random access
TabixReader.Iterator iter = tr.query(args[1]); // get the iterator
- while ((s = iter.next()) != null)
+ while (iter != null && (s = iter.next()) != null)
System.out.println(s);
}
} catch (IOException e) {
fp->error = message;
}
+int is_bgzipped(const char *fn)
+{
+ BGZF *fp;
+ uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
+ int n;
+
+ if ((fp = bgzf_open(fn, "r")) == 0)
+ {
+ fprintf(stderr, "[is_bgzipped] failed to open the file: %s\n",fn);
+ return -1;
+ }
+
+#ifdef _USE_KNETFILE
+ n = knet_read(fp->x.fpr, buf, 10);
+#else
+ n = fread(buf, 1, 10, fp->file);
+#endif
+ bgzf_close(fp);
+
+ if ( n!=10 )
+ return -1;
+
+ if ( !memcmp(magic, buf, 10) ) return 1;
+ return 0;
+}
+
static BGZF *bgzf_read_init()
{
BGZF *fp;
extern "C" {
#endif
+/*
+ * Checks the magic string of the file. Returns 1
+ * for bgzipped files, -1 on errors and 0 for files
+ * without the bgzip magic string.
+ */
+int is_bgzipped(const char *path);
+
/*
* Open an existing file descriptor for reading or writing.
* Mode must be either "r" or "w".
if (bgzf_write(fp, buffer, c) < 0) fail(fp);
// f_dst will be closed here
if (bgzf_close(fp) < 0) fail(fp);
- if (argc > optind) unlink(argv[optind]);
+ if (argc > optind && !pstdout) unlink(argv[optind]);
free(buffer);
close(f_src);
return 0;
return 1;
}
- name = strdup(argv[optind]);
- name[strlen(name) - 3] = '\0';
- f_dst = write_open(name, is_forced);
- free(name);
+ if (pstdout) {
+ f_dst = fileno(stdout);
+ }
+ else {
+ name = strdup(argv[optind]);
+ name[strlen(name) - 3] = '\0';
+ f_dst = write_open(name, is_forced);
+ free(name);
+ }
}
else if (!pstdout && isatty(fileno((FILE *)stdin)) )
return bgzip_main_usage();
l->list[l->n].u = beg; l->list[l->n++].v = end;
}
-static inline void insert_offset2(ti_lidx_t *index2, int _beg, int _end, uint64_t offset)
+static inline uint64_t insert_offset2(ti_lidx_t *index2, int _beg, int _end, uint64_t offset)
{
int i, beg, end;
beg = _beg >> TAD_LIDX_SHIFT;
if (index2->offset[i] == 0) index2->offset[i] = offset;
}
if (index2->n < end + 1) index2->n = end + 1;
+ return (uint64_t)beg<<32 | end;
}
static void merge_chunks(ti_index_t *idx)
ti_index_t *idx;
uint32_t last_bin, save_bin;
int32_t last_coor, last_tid, save_tid;
- uint64_t save_off, last_off, lineno = 0;
+ uint64_t save_off, last_off, lineno = 0, offset0 = (uint64_t)-1, tmp;
kstring_t *str;
str = calloc(1, sizeof(kstring_t));
}
get_intv(idx, str, &intv);
if (last_tid != intv.tid) { // change of chromosomes
+ if (last_tid>intv.tid )
+ {
+ fprintf(stderr,"[ti_index_core] the chromosome blocks not continuous at line %llu, is the file sorted?\n",(unsigned long long)lineno);
+ exit(1);
+ }
last_tid = intv.tid;
last_bin = 0xffffffffu;
} else if (last_coor > intv.beg) {
fprintf(stderr, "[ti_index_core] the file out of order at line %llu\n", (unsigned long long)lineno);
exit(1);
}
- insert_offset2(&idx->index2[intv.tid], intv.beg, intv.end, last_off);
+ tmp = insert_offset2(&idx->index2[intv.tid], intv.beg, intv.end, last_off);
+ if (last_off == 0) offset0 = tmp;
if (intv.bin != last_bin) { // then possibly write the binning index
if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
if (save_tid >= 0) insert_offset(idx->index[save_tid], save_bin, save_off, bgzf_tell(fp));
merge_chunks(idx);
fill_missing(idx);
+ if (offset0 != (uint64_t)-1 && idx->n && idx->index2[0].offset) {
+ int i, beg = offset0>>32, end = offset0&0xffffffffu;
+ for (i = beg; i <= end; ++i) idx->index2[0].offset[i] = 0;
+ }
free(str->s); free(str);
return idx;
char *fname = get_local_version(fn);
if (fname == 0) return 0;
idx = ti_index_load_local(fname);
+ if (idx == 0) fprintf(stderr, "[ti_index_load] fail to load the index: %s\n", fname);
free(fname);
- if (idx == 0) fprintf(stderr, "[ti_index_load] fail to load BAM index.\n");
return idx;
}
BGZF *fp, *fpidx;
ti_index_t *idx;
if ((fp = bgzf_open(fn, "r")) == 0) {
- fprintf(stderr, "[ti_index_build2] fail to open the BAM file.\n");
+ fprintf(stderr, "[ti_index_build2] fail to open the file: %s\n", fn);
return -1;
}
idx = ti_index_core(fp, conf);
if (p->list[j].v > min_off) off[n_off++] = p->list[j];
}
}
+ if (n_off == 0) {
+ free(bins); free(off); return iter;
+ }
free(bins);
{
int l;
#include "bgzf.h"
#include "tabix.h"
-#define PACKAGE_VERSION "0.2.2 (r603)"
+#define PACKAGE_VERSION "0.2.3 (r876)"
int main(int argc, char *argv[])
{
- int c, skip = -1, meta = -1, list_chrms = 0, force = 0;
+ int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0;
ti_conf_t conf = ti_conf_gff;
- while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lf")) >= 0) {
+ while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhf")) >= 0) {
switch (c) {
case '0': conf.preset |= TI_FLAG_UCSC; break;
case 'S': skip = atoi(optarg); break;
case 'b': conf.bc = atoi(optarg); break;
case 'e': conf.ec = atoi(optarg); break;
case 'l': list_chrms = 1; break;
+ case 'h': print_header = 1; break;
case 'f': force = 1; break;
}
}
fprintf(stderr, " -S INT skip first INT lines [0]\n");
fprintf(stderr, " -c CHAR symbol for comment/meta lines [#]\n");
fprintf(stderr, " -0 zero-based coordinate\n");
+ fprintf(stderr, " -h print the VCF header\n");
fprintf(stderr, " -l list chromosome names\n");
fprintf(stderr, " -f force to overwrite the index\n");
fprintf(stderr, "\n");
}
free(fnidx);
}
+ if ( is_bgzipped(argv[optind])!=1 )
+ {
+ fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
+ return 1;
+ }
return ti_index_build(argv[optind], &conf);
}
{ // retrieve
ti_iter_destroy(iter);
} else { // retrieve from specified regions
int i;
- ti_lazy_index_load(t);
+ if ( ti_lazy_index_load(t) )
+ {
+ fprintf(stderr,"[tabix] failed to load the index file.\n");
+ return 1;
+ }
+
+ ti_iter_t iter;
+ const char *s;
+ int len;
+ if ( print_header )
+ {
+ // If requested, print the header lines here
+ iter = ti_query(t, 0, 0, 0);
+ while ((s = ti_read(t, iter, &len)) != 0) {
+ if ( *s != '#' ) break;
+ fputs(s, stdout); fputc('\n', stdout);
+ }
+ ti_iter_destroy(iter);
+ }
for (i = optind + 1; i < argc; ++i) {
int tid, beg, end;
if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
- ti_iter_t iter;
- const char *s;
- int len;
iter = ti_queryi(t, tid, beg, end);
while ((s = ti_read(t, iter, &len)) != 0) {
fputs(s, stdout); fputc('\n', stdout);
}
ti_iter_destroy(iter);
- } else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
+ }
+ // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
}
}
ti_close(t);