From 2112b1dba17e822d8028151605b4cdc01f0a4c3d Mon Sep 17 00:00:00 2001
From: Charles Plessy <plessy@debian.org>
Date: Tue, 12 Apr 2011 14:57:33 +0900
Subject: [PATCH] Imported Upstream version 0.2.4

---
 ChangeLog            | 103 +++++++++++
 Makefile             |   3 +-
 NEWS                 |  23 +++
 bedidx.c             | 152 ++++++++++++++++
 bgzf.c               | 122 +++++++------
 bgzf.h               |  14 +-
 example.gtf.gz.tbi   | Bin 260 -> 259 bytes
 index.c              |  80 ++++++---
 kseq.h               | 227 ++++++++++++++++++++++++
 main.c               | 206 ++++++++++++++++++----
 python/setup.py      |  55 ++++++
 python/tabixmodule.c | 408 +++++++++++++++++++++++++++++++++++++++++++
 python/test.py       |  91 ++++++++++
 tabix.1              |  10 +-
 tabix.h              |   8 +
 15 files changed, 1379 insertions(+), 123 deletions(-)
 create mode 100644 bedidx.c
 create mode 100644 kseq.h
 create mode 100644 python/setup.py
 create mode 100644 python/tabixmodule.c
 create mode 100755 python/test.py

diff --git a/ChangeLog b/ChangeLog
index 3594496..fd335b8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,106 @@
+------------------------------------------------------------------------
+r942 | lh3lh3 | 2011-03-31 16:39:50 -0400 (Thu, 31 Mar 2011) | 2 lines
+Changed paths:
+   M /trunk/tabix/main.c
+
+update version number
+
+------------------------------------------------------------------------
+r940 | lh3lh3 | 2011-03-31 16:38:03 -0400 (Thu, 31 Mar 2011) | 2 lines
+Changed paths:
+   M /trunk/tabix/bedidx.c
+   M /trunk/tabix/main.c
+
+fixed two bugs due to recent changes
+
+------------------------------------------------------------------------
+r939 | lh3lh3 | 2011-03-31 16:12:21 -0400 (Thu, 31 Mar 2011) | 2 lines
+Changed paths:
+   M /trunk/tabix/bgzf.c
+   M /trunk/tabix/bgzf.h
+   M /trunk/tabix/main.c
+
+update to the latest bgzf.*
+
+------------------------------------------------------------------------
+r938 | lh3lh3 | 2011-03-31 16:02:21 -0400 (Thu, 31 Mar 2011) | 2 lines
+Changed paths:
+   M /trunk/tabix/index.c
+   M /trunk/tabix/main.c
+   M /trunk/tabix/tabix.h
+
+BED support
+
+------------------------------------------------------------------------
+r937 | lh3lh3 | 2011-03-31 15:03:49 -0400 (Thu, 31 Mar 2011) | 2 lines
+Changed paths:
+   M /trunk/tabix/Makefile
+   A /trunk/tabix/bedidx.c
+   M /trunk/tabix/example.gtf.gz.tbi
+   M /trunk/tabix/index.c
+   A /trunk/tabix/kseq.h
+   M /trunk/tabix/tabix.h
+
+restructure get_intv() for BED support
+
+------------------------------------------------------------------------
+r919 | petulda | 2011-02-24 10:14:14 -0500 (Thu, 24 Feb 2011) | 1 line
+Changed paths:
+   M /trunk/tabix/bgzf.c
+   M /trunk/tabix/bgzf.h
+   M /trunk/tabix/index.c
+   M /trunk/tabix/main.c
+
+New -r (reheader) option for efficient header replacement.
+------------------------------------------------------------------------
+r915 | lh3lh3 | 2011-02-22 09:50:57 -0500 (Tue, 22 Feb 2011) | 2 lines
+Changed paths:
+   A /trunk/tabix/python
+   A /trunk/tabix/python/setup.py (from /trunk/tabix/setup.py:914)
+   A /trunk/tabix/python/tabixmodule.c (from /trunk/tabix/tabixmodule.c:914)
+   A /trunk/tabix/python/test.py (from /trunk/tabix/test.py:914)
+   D /trunk/tabix/setup.py
+   D /trunk/tabix/tabixmodule.c
+   D /trunk/tabix/test.py
+
+move to a new python/ directory
+
+------------------------------------------------------------------------
+r914 | lh3lh3 | 2011-02-22 09:49:35 -0500 (Tue, 22 Feb 2011) | 2 lines
+Changed paths:
+   A /trunk/tabix/setup.py
+   A /trunk/tabix/tabixmodule.c
+   A /trunk/tabix/test.py
+
+CPython C-API by Hyeshik Chang
+
+------------------------------------------------------------------------
+r904 | petulda | 2011-01-28 08:06:27 -0500 (Fri, 28 Jan 2011) | 1 line
+Changed paths:
+   M /trunk/tabix/index.c
+
+Check the number of fields on each line and exit nicely without segfault
+------------------------------------------------------------------------
+r901 | petulda | 2011-01-21 06:45:37 -0500 (Fri, 21 Jan 2011) | 1 line
+Changed paths:
+   M /trunk/tabix/main.c
+
+Fix: Complain only when VCF is newer, not newer or same mtime
+------------------------------------------------------------------------
+r900 | petulda | 2011-01-21 04:23:04 -0500 (Fri, 21 Jan 2011) | 1 line
+Changed paths:
+   M /trunk/tabix/main.c
+
+Prevent the common user mistake and check the timestamps of the vcf and index file
+------------------------------------------------------------------------
+r876 | lh3lh3 | 2010-12-08 12:38:45 -0500 (Wed, 08 Dec 2010) | 2 lines
+Changed paths:
+   M /trunk/tabix/ChangeLog
+   M /trunk/tabix/NEWS
+   M /trunk/tabix/main.c
+
+Release tabix-0.2.3
+
 ------------------------------------------------------------------------
 r875 | lh3lh3 | 2010-12-08 12:28:35 -0500 (Wed, 08 Dec 2010) | 2 lines
 Changed paths:
diff --git a/Makefile b/Makefile
index 9013642..3ba7d5d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 CC=			gcc
 CFLAGS=		-g -Wall -O2 -fPIC #-m64 #-arch ppc
 DFLAGS=		-D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE
-LOBJS=		bgzf.o kstring.o knetfile.o index.o
+LOBJS=		bgzf.o kstring.o knetfile.o index.o bedidx.o
 AOBJS=		main.o
 PROG=		tabix bgzip
 INCLUDES=
@@ -52,6 +52,7 @@ bgzf.o:bgzf.h knetfile.h
 index.o:bgzf.h tabix.h khash.h ksort.h kstring.h
 main.o:tabix.h kstring.h bgzf.h
 bgzip.o:bgzf.h
+bedidx.o:kseq.h khash.h
 
 tabix.pdf:tabix.tex
 		pdflatex tabix.tex
diff --git a/NEWS b/NEWS
index ff28ded..d230541 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,26 @@
+Release 0.2.4 (10 April, 2011)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Notable changes:
+
+ * Give an error if the index file is older than the data file.
+
+ * Avoid a segfault given flawed input.
+
+ * Added Python APIs contributed by Hyeshik Chang. The new APIs do not bind to
+   the dynamic library and are reported to be faster. Pysam also comes with a
+   tabix binding.
+
+ * Added option "-r" for efficient header replacement.
+
+ * Added BED support.
+
+ * Synchronized the BGZF library between tabix and samtools.
+
+(0.2.4: 10 April 2011, r949)
+
+
+
 Beta Release 0.2.3 (8 December, 2010)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/bedidx.c b/bedidx.c
new file mode 100644
index 0000000..9297831
--- /dev/null
+++ b/bedidx.c
@@ -0,0 +1,152 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <zlib.h>
+
+#include "kseq.h"
+KSTREAM_INIT(gzFile, gzread, 8192)
+
+typedef struct {
+	int n, m;
+	uint64_t *a;
+	int *idx;
+} bed_reglist_t;
+
+#include "khash.h"
+KHASH_MAP_INIT_STR(reg, bed_reglist_t)
+
+#define LIDX_SHIFT 13
+
+typedef kh_reg_t reghash_t;
+
+int *bed_index_core(int n, uint64_t *a, int *n_idx)
+{
+	int i, j, m, *idx;
+	m = *n_idx = 0; idx = 0;
+	for (i = 0; i < n; ++i) {
+		int beg, end;
+		beg = a[i]>>32 >> LIDX_SHIFT; end = ((uint32_t)a[i]) >> LIDX_SHIFT;
+		if (m < end + 1) {
+			int oldm = m;
+			m = end + 1;
+			kroundup32(m);
+			idx = realloc(idx, m * sizeof(int));
+			for (j = oldm; j < m; ++j) idx[j] = -1;
+		}
+		if (beg == end) {
+			if (idx[beg] < 0) idx[beg] = i;
+		} else {
+			for (j = beg; j <= end; ++j)
+				if (idx[j] < 0) idx[j] = i;
+		}
+		*n_idx = end + 1;
+	}
+	return idx;
+}
+
+void bed_index(void *_h)
+{
+	reghash_t *h = (reghash_t*)_h;
+	khint_t k;
+	for (k = 0; k < kh_end(h); ++k) {
+		if (kh_exist(h, k)) {
+			bed_reglist_t *p = &kh_val(h, k);
+			if (p->idx) free(p->idx);
+			p->idx = bed_index_core(p->n, p->a, &p->m);
+		}
+	}
+}
+
+int bed_overlap_core(const bed_reglist_t *p, int beg, int end)
+{
+	int i, min_off;
+	if (p->n == 0) return 0;
+	min_off = (beg>>LIDX_SHIFT >= p->n)? p->idx[p->n-1] : p->idx[beg>>LIDX_SHIFT];
+	if (min_off < 0) { // TODO: this block can be improved, but speed should not matter too much here
+		int n = beg>>LIDX_SHIFT;
+		if (n > p->n) n = p->n;
+		for (i = n - 1; i >= 0; --i)
+			if (p->idx[i] >= 0) break;
+		min_off = i >= 0? p->idx[i] : 0;
+	}
+	for (i = min_off; i < p->n; ++i) {
+		if ((int)(p->a[i]>>32) >= end) break; // out of range; no need to proceed
+		if ((int32_t)p->a[i] > beg && (int32_t)(p->a[i]>>32) < end)
+			return 1; // find the overlap; return
+	}
+	return 0;
+}
+
+int bed_overlap(const void *_h, const char *chr, int beg, int end)
+{
+	const reghash_t *h = (const reghash_t*)_h;
+	khint_t k;
+	if (!h) return 0;
+	k = kh_get(reg, h, chr);
+	if (k == kh_end(h)) return 0;
+	return bed_overlap_core(&kh_val(h, k), beg, end);
+}
+
+void *bed_read(const char *fn)
+{
+	reghash_t *h = kh_init(reg);
+	gzFile fp;
+	kstream_t *ks;
+	int dret;
+	kstring_t *str;
+	// read the list
+	fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) return 0;
+	str = calloc(1, sizeof(kstring_t));
+	ks = ks_init(fp);
+	while (ks_getuntil(ks, 0, str, &dret) >= 0) { // read the chr name
+		int beg = -1, end = -1;
+		bed_reglist_t *p;
+		khint_t k = kh_get(reg, h, str->s);
+		if (k == kh_end(h)) { // absent from the hash table
+			int ret;
+			char *s = strdup(str->s);
+			k = kh_put(reg, h, s, &ret);
+			memset(&kh_val(h, k), 0, sizeof(bed_reglist_t));
+		}
+		p = &kh_val(h, k);
+		if (dret != '\n') { // if the lines has other characters
+			if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) {
+				beg = atoi(str->s); // begin
+				if (dret != '\n') {
+					if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0]))
+						end = atoi(str->s); // end
+				}
+			}
+		}
+		if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); // skip the rest of the line
+		if (end < 0 && beg > 0) end = beg, beg = beg - 1; // if there is only one column
+		if (beg >= 0 && end > beg) {
+			if (p->n == p->m) {
+				p->m = p->m? p->m<<1 : 4;
+				p->a = realloc(p->a, p->m * 8);
+			}
+			p->a[p->n++] = (uint64_t)beg<<32 | end;
+		}
+	}
+	ks_destroy(ks);
+	gzclose(fp);
+	free(str->s); free(str);
+	bed_index(h);
+	return h;
+}
+
+void bed_destroy(void *_h)
+{
+	reghash_t *h = (reghash_t*)_h;
+	khint_t k;
+	for (k = 0; k < kh_end(h); ++k) {
+		if (kh_exist(h, k)) {
+			free(kh_val(h, k).a);
+			free(kh_val(h, k).idx);
+			free((char*)kh_key(h, k));
+		}
+	}
+	kh_destroy(reg, h);
+}
diff --git a/bgzf.c b/bgzf.c
index 94e6194..216cd04 100644
--- a/bgzf.c
+++ b/bgzf.c
@@ -111,7 +111,7 @@ report_error(BGZF* fp, const char* message) {
     fp->error = message;
 }
 
-int is_bgzipped(const char *fn)
+int bgzf_check_bgzf(const char *fn)
 {
     BGZF *fp;
     uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
@@ -119,7 +119,7 @@ int is_bgzipped(const char *fn)
 
     if ((fp = bgzf_open(fn, "r")) == 0) 
     {
-        fprintf(stderr, "[is_bgzipped] failed to open the file: %s\n",fn);
+        fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
         return -1;
     }
 
@@ -174,7 +174,7 @@ open_read(int fd)
 
 static
 BGZF*
-open_write(int fd, bool is_uncompressed)
+open_write(int fd, int compress_level) // compress_level==-1 for the default level
 {
     FILE* file = fdopen(fd, "w");
     BGZF* fp;
@@ -182,7 +182,9 @@ open_write(int fd, bool is_uncompressed)
 	fp = malloc(sizeof(BGZF));
     fp->file_descriptor = fd;
     fp->open_mode = 'w';
-    fp->owned_file = 0; fp->is_uncompressed = is_uncompressed;
+    fp->owned_file = 0;
+	fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
+	if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
 #ifdef _USE_KNETFILE
     fp->x.fpw = file;
 #else
@@ -203,7 +205,7 @@ BGZF*
 bgzf_open(const char* __restrict path, const char* __restrict mode)
 {
     BGZF* fp = NULL;
-    if (mode[0] == 'r' || mode[0] == 'R') { /* The reading mode is preferred. */
+    if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */
 #ifdef _USE_KNETFILE
 		knetFile *file = knet_open(path, mode);
 		if (file == 0) return 0;
@@ -220,18 +222,23 @@ bgzf_open(const char* __restrict path, const char* __restrict mode)
 		if (fd == -1) return 0;
         fp = open_read(fd);
 #endif
-    } else if (mode[0] == 'w' || mode[0] == 'W') {
-		int fd, oflag = O_WRONLY | O_CREAT | O_TRUNC;
+    } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
+		int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC;
 #ifdef _WIN32
 		oflag |= O_BINARY;
 #endif
 		fd = open(path, oflag, 0666);
 		if (fd == -1) return 0;
-        fp = open_write(fd, strstr(mode, "u")? 1 : 0);
-    }
-    if (fp != NULL) {
-        fp->owned_file = 1;
+		{ // set compress_level
+			int i;
+			for (i = 0; mode[i]; ++i)
+				if (mode[i] >= '0' && mode[i] <= '9') break;
+			if (mode[i]) compress_level = (int)mode[i] - '0';
+			if (strchr(mode, 'u')) compress_level = 0;
+		}
+        fp = open_write(fd, compress_level);
     }
+    if (fp != NULL) fp->owned_file = 1;
     return fp;
 }
 
@@ -242,7 +249,12 @@ bgzf_fdopen(int fd, const char * __restrict mode)
     if (mode[0] == 'r' || mode[0] == 'R') {
         return open_read(fd);
     } else if (mode[0] == 'w' || mode[0] == 'W') {
-        return open_write(fd, strstr(mode, "u")? 1 : 0);
+		int i, compress_level = -1;
+		for (i = 0; mode[i]; ++i)
+			if (mode[i] >= '0' && mode[i] <= '9') break;
+		if (mode[i]) compress_level = (int)mode[i] - '0';
+		if (strchr(mode, 'u')) compress_level = 0;
+        return open_write(fd, compress_level);
     } else {
         return NULL;
     }
@@ -282,7 +294,6 @@ deflate_block(BGZF* fp, int block_length)
     int input_length = block_length;
     int compressed_length = 0;
     while (1) {
-		int compress_level = fp->is_uncompressed? 0 : Z_DEFAULT_COMPRESSION;
         z_stream zs;
         zs.zalloc = NULL;
         zs.zfree = NULL;
@@ -291,7 +302,7 @@ deflate_block(BGZF* fp, int block_length)
         zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
         zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
 
-        int status = deflateInit2(&zs, compress_level, Z_DEFLATED,
+        int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED,
                                   GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
         if (status != Z_OK) {
             report_error(fp, "deflate init failed");
@@ -358,6 +369,7 @@ inflate_block(BGZF* fp, int block_length)
     // Inflate the block in fp->compressed_block into fp->uncompressed_block
 
     z_stream zs;
+	int status;
     zs.zalloc = NULL;
     zs.zfree = NULL;
     zs.next_in = fp->compressed_block + 18;
@@ -365,7 +377,7 @@ inflate_block(BGZF* fp, int block_length)
     zs.next_out = fp->uncompressed_block;
     zs.avail_out = fp->uncompressed_block_size;
 
-    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
+    status = inflateInit2(&zs, GZIP_WINDOW_BITS);
     if (status != Z_OK) {
         report_error(fp, "inflate init failed");
         return -1;
@@ -459,15 +471,15 @@ int
 bgzf_read_block(BGZF* fp)
 {
     bgzf_byte_t header[BLOCK_HEADER_LENGTH];
-	int size = 0;
+	int count, size = 0, block_length, remaining;
 #ifdef _USE_KNETFILE
     int64_t block_address = knet_tell(fp->x.fpr);
 	if (load_block_from_cache(fp, block_address)) return 0;
-    int count = knet_read(fp->x.fpr, header, sizeof(header));
+    count = knet_read(fp->x.fpr, header, sizeof(header));
 #else
     int64_t block_address = ftello(fp->file);
 	if (load_block_from_cache(fp, block_address)) return 0;
-    int count = fread(header, 1, sizeof(header), fp->file);
+    count = fread(header, 1, sizeof(header), fp->file);
 #endif
     if (count == 0) {
         fp->block_length = 0;
@@ -482,10 +494,10 @@ bgzf_read_block(BGZF* fp)
         report_error(fp, "invalid block header");
         return -1;
     }
-    int block_length = unpackInt16((uint8_t*)&header[16]) + 1;
+    block_length = unpackInt16((uint8_t*)&header[16]) + 1;
     bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
     memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
-    int remaining = block_length - BLOCK_HEADER_LENGTH;
+    remaining = block_length - BLOCK_HEADER_LENGTH;
 #ifdef _USE_KNETFILE
     count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
 #else
@@ -497,9 +509,7 @@ bgzf_read_block(BGZF* fp)
     }
 	size += count;
     count = inflate_block(fp, block_length);
-    if (count < 0) {
-        return -1;
-    }
+    if (count < 0) return -1;
     if (fp->block_length != 0) {
         // Do not reset offset if this read follows a seek.
         fp->block_offset = 0;
@@ -524,7 +534,8 @@ bgzf_read(BGZF* fp, void* data, int length)
     int bytes_read = 0;
     bgzf_byte_t* output = data;
     while (bytes_read < length) {
-        int available = fp->block_length - fp->block_offset;
+        int copy_length, available = fp->block_length - fp->block_offset;
+		bgzf_byte_t *buffer;
         if (available <= 0) {
             if (bgzf_read_block(fp) != 0) {
                 return -1;
@@ -534,8 +545,8 @@ bgzf_read(BGZF* fp, void* data, int length)
                 break;
             }
         }
-        int copy_length = bgzf_min(length-bytes_read, available);
-        bgzf_byte_t* buffer = fp->uncompressed_block;
+        copy_length = bgzf_min(length-bytes_read, available);
+        buffer = fp->uncompressed_block;
         memcpy(output, buffer + fp->block_offset, copy_length);
         fp->block_offset += copy_length;
         output += copy_length;
@@ -553,19 +564,16 @@ bgzf_read(BGZF* fp, void* data, int length)
     return bytes_read;
 }
 
-static
-int
-flush_block(BGZF* fp)
+int bgzf_flush(BGZF* fp)
 {
     while (fp->block_offset > 0) {
-        int block_length = deflate_block(fp, fp->block_offset);
-        if (block_length < 0) {
-            return -1;
-        }
+        int count, block_length;
+		block_length = deflate_block(fp, fp->block_offset);
+        if (block_length < 0) return -1;
 #ifdef _USE_KNETFILE
-        int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
+        count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
 #else
-        int count = fwrite(fp->compressed_block, 1, block_length, fp->file);
+        count = fwrite(fp->compressed_block, 1, block_length, fp->file);
 #endif
         if (count != block_length) {
             report_error(fp, "write failed");
@@ -576,21 +584,28 @@ flush_block(BGZF* fp)
     return 0;
 }
 
-int
-bgzf_write(BGZF* fp, const void* data, int length)
+int bgzf_flush_try(BGZF *fp, int size)
 {
+	if (fp->block_offset + size > fp->uncompressed_block_size)
+		return bgzf_flush(fp);
+	return -1;
+}
+
+int bgzf_write(BGZF* fp, const void* data, int length)
+{
+	const bgzf_byte_t *input = data;
+	int block_length, bytes_written;
     if (fp->open_mode != 'w') {
         report_error(fp, "file not open for writing");
         return -1;
     }
 
-    if (fp->uncompressed_block == NULL) {
+    if (fp->uncompressed_block == NULL)
         fp->uncompressed_block = malloc(fp->uncompressed_block_size);
-    }
 
-    const bgzf_byte_t* input = data;
-    int block_length = fp->uncompressed_block_size;
-    int bytes_written = 0;
+    input = data;
+    block_length = fp->uncompressed_block_size;
+    bytes_written = 0;
     while (bytes_written < length) {
         int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
         bgzf_byte_t* buffer = fp->uncompressed_block;
@@ -599,7 +614,7 @@ bgzf_write(BGZF* fp, const void* data, int length)
         input += copy_length;
         bytes_written += copy_length;
         if (fp->block_offset == block_length) {
-            if (flush_block(fp) != 0) {
+            if (bgzf_flush(fp) != 0) {
                 break;
             }
         }
@@ -607,13 +622,10 @@ bgzf_write(BGZF* fp, const void* data, int length)
     return bytes_written;
 }
 
-int
-bgzf_close(BGZF* fp)
+int bgzf_close(BGZF* fp)
 {
     if (fp->open_mode == 'w') {
-        if (flush_block(fp) != 0) {
-            return -1;
-        }
+        if (bgzf_flush(fp) != 0) return -1;
 		{ // add an empty block
 			int count, block_length = deflate_block(fp, 0);
 #ifdef _USE_KNETFILE
@@ -638,9 +650,7 @@ bgzf_close(BGZF* fp)
 		else ret = knet_close(fp->x.fpr);
         if (ret != 0) return -1;
 #else
-        if (fclose(fp->file) != 0) {
-            return -1;
-        }
+        if (fclose(fp->file) != 0) return -1;
 #endif
     }
     free(fp->uncompressed_block);
@@ -674,9 +684,11 @@ int bgzf_check_EOF(BGZF *fp)
 	return (memcmp(magic, buf, 28) == 0)? 1 : 0;
 }
 
-int64_t
-bgzf_seek(BGZF* fp, int64_t pos, int where)
+int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
 {
+	int block_offset;
+	int64_t block_address;
+
     if (fp->open_mode != 'r') {
         report_error(fp, "file not open for read");
         return -1;
@@ -685,8 +697,8 @@ bgzf_seek(BGZF* fp, int64_t pos, int where)
         report_error(fp, "unimplemented seek option");
         return -1;
     }
-    int block_offset = pos & 0xFFFF;
-    int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
+    block_offset = pos & 0xFFFF;
+    block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
 #ifdef _USE_KNETFILE
     if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
 #else
diff --git a/bgzf.h b/bgzf.h
index 70f497e..7295f37 100644
--- a/bgzf.h
+++ b/bgzf.h
@@ -26,7 +26,6 @@
 
 #include <stdint.h>
 #include <stdio.h>
-#include <stdbool.h>
 #include <zlib.h>
 #ifdef _USE_KNETFILE
 #include "knetfile.h"
@@ -37,7 +36,7 @@
 typedef struct {
     int file_descriptor;
     char open_mode;  // 'r' or 'w'
-    bool owned_file, is_uncompressed;
+    int16_t owned_file, compress_level;
 #ifdef _USE_KNETFILE
 	union {
 		knetFile *fpr;
@@ -62,13 +61,6 @@ typedef struct {
 extern "C" {
 #endif
 
-/* 
- * Checks the magic string of the file. Returns 1
- *  for bgzipped files, -1 on errors and 0 for files
- *  without the bgzip magic string.
- */
-int is_bgzipped(const char *path);
-
 /*
  * Open an existing file descriptor for reading or writing.
  * Mode must be either "r" or "w".
@@ -133,8 +125,10 @@ int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
 void bgzf_set_cache_size(BGZF *fp, int cache_size);
 
 int bgzf_check_EOF(BGZF *fp);
-
 int bgzf_read_block(BGZF* fp);
+int bgzf_flush(BGZF* fp);
+int bgzf_flush_try(BGZF *fp, int size);
+int bgzf_check_bgzf(const char *fn);
 
 #ifdef __cplusplus
 }
diff --git a/example.gtf.gz.tbi b/example.gtf.gz.tbi
index eb3f24c6ba9cd8cf5c4551ced2edfd08518ce9e7..84a3c841cb94ec6287a948bc8ef238afcd653736 100644
GIT binary patch
delta 105
zcmV-v0G9uR0)qk(ABzYC000000RIL6LPG)o=8+MGOfUco3v|E0+}gh#mHx8>)oz&C
zpwNefuagACo(Kg99RhVnv=)TVz_5%DM8d)s<}O!d2n`Eg7$2sO2g%(qJutf{rD1je
L01K{~J(1)fkbx&9

delta 135
zcmZo>YGD$T@8)1(0D=E(3{K8W49_PDw&kh?@E<<JbXVVC`?q}erT^bJY?V$k)A*A3
z@U;r#Ms+7fr*?<Cf@@hH8XM?5W}dj=L+j4dp^{FA4~ZXZfB87kfnkQ+jMyoc46GTh
VO*wESjER8(-4<yEX0W9o0s!N(EW7{!

diff --git a/index.c b/index.c
index 4ed32c2..8f254e3 100644
--- a/index.c
+++ b/index.c
@@ -156,32 +156,31 @@ static int get_tid(ti_index_t *idx, const char *ss)
 	return tid;
 }
 
-static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv)
+int ti_get_intv(const ti_conf_t *conf, int len, char *line, ti_interval_t *intv)
 {
-	int i, b = 0, id = 1;
+	int i, b = 0, id = 1, ncols = 0;
 	char *s;
-	intv->tid = intv->beg = intv->end = intv->bin = -1;
-	for (i = 0; i <= str->l; ++i) {
-		if (str->s[i] == '\t' || str->s[i] == 0) {
-			if (id == idx->conf.sc) {
-				str->s[i] = 0;
-				intv->tid = get_tid(idx, str->s + b);
-				if (i != str->l) str->s[i] = '\t';
-			} else if (id == idx->conf.bc) {
+	intv->ss = intv->se = 0; intv->beg = intv->end = -1;
+	for (i = 0; i <= len; ++i) {
+		if (line[i] == '\t' || line[i] == 0) {
+            ++ncols;
+			if (id == conf->sc) {
+				intv->ss = line + b; intv->se = line + i;
+			} else if (id == conf->bc) {
 				// here ->beg is 0-based.
-				intv->beg = intv->end = strtol(str->s + b, &s, 0);
-				if (!(idx->conf.preset&TI_FLAG_UCSC)) --intv->beg;
+				intv->beg = intv->end = strtol(line + b, &s, 0);
+				if (!(conf->preset&TI_FLAG_UCSC)) --intv->beg;
 				else ++intv->end;
 				if (intv->beg < 0) intv->beg = 0;
 				if (intv->end < 1) intv->end = 1;
 			} else {
-				if ((idx->conf.preset&0xffff) == TI_PRESET_GENERIC) {
-					if (id == idx->conf.ec) intv->end = strtol(str->s + b, &s, 0);
-				} else if ((idx->conf.preset&0xffff) == TI_PRESET_SAM) {
+				if ((conf->preset&0xffff) == TI_PRESET_GENERIC) {
+					if (id == conf->ec) intv->end = strtol(line + b, &s, 0);
+				} else if ((conf->preset&0xffff) == TI_PRESET_SAM) {
 					if (id == 6) { // CIGAR
 						int l = 0, op;
 						char *t;
-						for (s = str->s + b; s < str->s + i;) {
+						for (s = line + b; s < line + i;) {
 							long x = strtol(s, &t, 10);
 							op = toupper(*t);
 							if (op == 'M' || op == 'D' || op == 'N') l += x;
@@ -190,21 +189,21 @@ static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv)
 						if (l == 0) l = 1;
 						intv->end = intv->beg + l;
 					}
-				} else if ((idx->conf.preset&0xffff) == TI_PRESET_VCF) {
+				} else if ((conf->preset&0xffff) == TI_PRESET_VCF) {
 					// FIXME: the following is NOT tested and is likely to be buggy
 					if (id == 4) {
 						if (b < i) intv->end = intv->beg + (i - b);
 					} else if (id == 8) { // look for "END="
-						int c = str->s[i];
-						str->s[i] = 0;
-						s = strstr(str->s + b, "END=");
-						if (s == str->s + b) s += 4;
+						int c = line[i];
+						line[i] = 0;
+						s = strstr(line + b, "END=");
+						if (s == line + b) s += 4;
 						else if (s) {
-							s = strstr(str->s + b, ";END=");
+							s = strstr(line + b, ";END=");
 							if (s) s += 5;
 						}
 						if (s) intv->end = strtol(s, &s, 0);
-						str->s[i] = c;
+						line[i] = c;
 					}
 				}
 			}
@@ -212,11 +211,33 @@ static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv)
 			++id;
 		}
 	}
-	if (intv->tid < 0 || intv->beg < 0 || intv->end < 0) return -1;
-	intv->bin = ti_reg2bin(intv->beg, intv->end);
+/*
+	if (ncols < conf->sc || ncols < conf->bc || ncols < conf->ec) {
+		if (ncols == 1) fprintf(stderr,"[get_intv] Is the file tab-delimited? The line has %d field only: %s\n", ncols, line);
+		else fprintf(stderr,"[get_intv] The line has %d field(s) only: %s\n", ncols, line);
+		exit(1);
+	}
+*/
+	if (intv->ss == 0 || intv->se == 0 || intv->beg < 0 || intv->end < 0) return -1;
 	return 0;
 }
 
+static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv)
+{
+	ti_interval_t x;
+	intv->tid = intv->beg = intv->end = intv->bin = -1;
+	if (ti_get_intv(&idx->conf, str->l, str->s, &x) == 0) {
+		int c = *x.se;
+		*x.se = '\0'; intv->tid = get_tid(idx, x.ss); *x.se = c;
+		intv->beg = x.beg; intv->end = x.end;
+		intv->bin = ti_reg2bin(intv->beg, intv->end);
+		return (intv->tid >= 0 && intv->beg >= 0 && intv->end >= 0)? 0 : -1;
+	} else {
+		fprintf(stderr, "[%s] the following line cannot be parsed and skipped: %s\n", __func__, str->s);
+		return -1;
+	}
+}
+
 /************
  * indexing *
  ************/
@@ -322,10 +343,15 @@ ti_index_t *ti_index_core(BGZF *fp, const ti_conf_t *conf)
 			continue;
 		}
 		get_intv(idx, str, &intv);
+        if ( intv.beg<0 || intv.end<0 )
+        {
+            fprintf(stderr,"[ti_index_core] the indexes overlap or are out of bounds\n");
+            exit(1);
+        }
 		if (last_tid != intv.tid) { // change of chromosomes
             if (last_tid>intv.tid )
             {
-                fprintf(stderr,"[ti_index_core] the chromosome blocks not continuous at line %llu, is the file sorted?\n",(unsigned long long)lineno);
+                fprintf(stderr,"[ti_index_core] the chromosome blocks not continuous at line %llu, is the file sorted? [pos %d]\n",(unsigned long long)lineno,intv.beg+1);
                 exit(1);
             }
 			last_tid = intv.tid;
@@ -902,6 +928,8 @@ int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *d
 	return 0;
 }
 
+const ti_conf_t *ti_get_conf(ti_index_t *idx) { return idx? &idx->conf : 0; }
+
 /*******************
  * High-level APIs *
  *******************/
diff --git a/kseq.h b/kseq.h
new file mode 100644
index 0000000..82face0
--- /dev/null
+++ b/kseq.h
@@ -0,0 +1,227 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+/*
+  2009-07-16 (lh3): in kstream_t, change "char*" to "unsigned char*"
+ */
+
+/* Last Modified: 12APR2009 */
+
+#ifndef AC_KSEQ_H
+#define AC_KSEQ_H
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
+#define KS_SEP_TAB   1 // isspace() && !' '
+#define KS_SEP_MAX   1
+
+#define __KS_TYPE(type_t)						\
+	typedef struct __kstream_t {				\
+		unsigned char *buf;						\
+		int begin, end, is_eof;					\
+		type_t f;								\
+	} kstream_t;
+
+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
+
+#define __KS_BASIC(type_t, __bufsize)								\
+	static inline kstream_t *ks_init(type_t f)						\
+	{																\
+		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
+		ks->f = f;													\
+		ks->buf = malloc(__bufsize);								\
+		return ks;													\
+	}																\
+	static inline void ks_destroy(kstream_t *ks)					\
+	{																\
+		if (ks) {													\
+			free(ks->buf);											\
+			free(ks);												\
+		}															\
+	}
+
+#define __KS_GETC(__read, __bufsize)						\
+	static inline int ks_getc(kstream_t *ks)				\
+	{														\
+		if (ks->is_eof && ks->begin >= ks->end) return -1;	\
+		if (ks->begin >= ks->end) {							\
+			ks->begin = 0;									\
+			ks->end = __read(ks->f, ks->buf, __bufsize);	\
+			if (ks->end < __bufsize) ks->is_eof = 1;		\
+			if (ks->end == 0) return -1;					\
+		}													\
+		return (int)ks->buf[ks->begin++];					\
+	}
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#define __KS_GETUNTIL(__read, __bufsize)								\
+	static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
+	{																	\
+		if (dret) *dret = 0;											\
+		str->l = 0;														\
+		if (ks->begin >= ks->end && ks->is_eof) return -1;				\
+		for (;;) {														\
+			int i;														\
+			if (ks->begin >= ks->end) {									\
+				if (!ks->is_eof) {										\
+					ks->begin = 0;										\
+					ks->end = __read(ks->f, ks->buf, __bufsize);		\
+					if (ks->end < __bufsize) ks->is_eof = 1;			\
+					if (ks->end == 0) break;							\
+				} else break;											\
+			}															\
+			if (delimiter > KS_SEP_MAX) {								\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (ks->buf[i] == delimiter) break;					\
+			} else if (delimiter == KS_SEP_SPACE) {						\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (isspace(ks->buf[i])) break;						\
+			} else if (delimiter == KS_SEP_TAB) {						\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
+			} else i = 0; /* never come to here! */						\
+			if (str->m - str->l < i - ks->begin + 1) {					\
+				str->m = str->l + (i - ks->begin) + 1;					\
+				kroundup32(str->m);										\
+				str->s = (char*)realloc(str->s, str->m);				\
+			}															\
+			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
+			str->l = str->l + (i - ks->begin);							\
+			ks->begin = i + 1;											\
+			if (i < ks->end) {											\
+				if (dret) *dret = ks->buf[i];							\
+				break;													\
+			}															\
+		}																\
+		if (str->l == 0) {												\
+			str->m = 1;													\
+			str->s = (char*)calloc(1, 1);								\
+		}																\
+		str->s[str->l] = '\0';											\
+		return str->l;													\
+	}
+
+#define KSTREAM_INIT(type_t, __read, __bufsize) \
+	__KS_TYPE(type_t)							\
+	__KS_BASIC(type_t, __bufsize)				\
+	__KS_GETC(__read, __bufsize)				\
+	__KS_GETUNTIL(__read, __bufsize)
+
+#define __KSEQ_BASIC(type_t)											\
+	static inline kseq_t *kseq_init(type_t fd)							\
+	{																	\
+		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
+		s->f = ks_init(fd);												\
+		return s;														\
+	}																	\
+	static inline void kseq_rewind(kseq_t *ks)							\
+	{																	\
+		ks->last_char = 0;												\
+		ks->f->is_eof = ks->f->begin = ks->f->end = 0;					\
+	}																	\
+	static inline void kseq_destroy(kseq_t *ks)							\
+	{																	\
+		if (!ks) return;												\
+		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
+		ks_destroy(ks->f);												\
+		free(ks);														\
+	}
+
+/* Return value:
+   >=0  length of the sequence (normal)
+   -1   end-of-file
+   -2   truncated quality string
+ */
+#define __KSEQ_READ														\
+	static int kseq_read(kseq_t *seq)									\
+	{																	\
+		int c;															\
+		kstream_t *ks = seq->f;											\
+		if (seq->last_char == 0) { /* then jump to the next header line */ \
+			while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');	\
+			if (c == -1) return -1; /* end of file */					\
+			seq->last_char = c;											\
+		} /* the first header char has been read */						\
+		seq->comment.l = seq->seq.l = seq->qual.l = 0;					\
+		if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;			\
+		if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);			\
+		while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+			if (isgraph(c)) { /* printable non-space character */		\
+				if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
+					seq->seq.m = seq->seq.l + 2;						\
+					kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
+					seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
+				}														\
+				seq->seq.s[seq->seq.l++] = (char)c;						\
+			}															\
+		}																\
+		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
+		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */		\
+		if (c != '+') return seq->seq.l; /* FASTA */					\
+		if (seq->qual.m < seq->seq.m) {	/* allocate enough memory */	\
+			seq->qual.m = seq->seq.m;									\
+			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);		\
+		}																\
+		while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+		if (c == -1) return -2; /* we should not stop here */			\
+		while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)		\
+			if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c;	\
+		seq->qual.s[seq->qual.l] = 0; /* null terminated string */		\
+		seq->last_char = 0;	/* we have not come to the next header line */ \
+		if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
+		return seq->seq.l;												\
+	}
+
+#define __KSEQ_TYPE(type_t)						\
+	typedef struct {							\
+		kstring_t name, comment, seq, qual;		\
+		int last_char;							\
+		kstream_t *f;							\
+	} kseq_t;
+
+#define KSEQ_INIT(type_t, __read)				\
+	KSTREAM_INIT(type_t, __read, 4096)			\
+	__KSEQ_TYPE(type_t)							\
+	__KSEQ_BASIC(type_t)						\
+	__KSEQ_READ
+
+#endif
diff --git a/main.c b/main.c
index 8ae6b2e..4664251 100644
--- a/main.c
+++ b/main.c
@@ -3,17 +3,109 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/stat.h>
+#include <errno.h>
 #include "bgzf.h"
 #include "tabix.h"
 
-#define PACKAGE_VERSION "0.2.3 (r876)"
+#define PACKAGE_VERSION "0.2.4 (r949)"
+
+#define error(...) { fprintf(stderr,__VA_ARGS__); return -1; }
+
+int reheader_file(const char *header, const char *file, int meta)
+{
+    BGZF *fp = bgzf_open(file,"r");
+    if (bgzf_read_block(fp) != 0 || !fp->block_length)
+        return -1;
+    
+    char *buffer = fp->uncompressed_block;
+    int skip_until = 0;
+
+    if ( buffer[0]==meta )
+    {
+        skip_until = 1;
+
+        // Skip the header
+        while (1)
+        {
+            if ( buffer[skip_until]=='\n' )
+            {
+                skip_until++;
+                if ( skip_until>=fp->block_length )
+                {
+                    if (bgzf_read_block(fp) != 0 || !fp->block_length)
+                        error("no body?\n");
+                    skip_until = 0;
+                }
+                // The header has finished
+                if ( buffer[skip_until]!=meta ) break;
+            }
+            skip_until++;
+            if ( skip_until>=fp->block_length )
+            {
+                if (bgzf_read_block(fp) != 0 || !fp->block_length)
+                    error("no body?\n");
+                skip_until = 0;
+            }
+        }
+    }
+
+    FILE *fh = fopen(header,"r");
+    if ( !fh )
+        error("%s: %s", header,strerror(errno));
+    int page_size = getpagesize();
+    char *buf = valloc(page_size);
+    BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
+    ssize_t nread;
+    while ( (nread=fread(buf,1,page_size-1,fh))>0 )
+    {
+        if ( nread<page_size-1 && buf[nread-1]!='\n' )
+            buf[nread++] = '\n';
+        if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
+    }
+    fclose(fh);
+
+    if ( fp->block_length - skip_until > 0 )
+    {
+        if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) 
+            error("Error: %s\n",fp->error);
+    }
+    if (bgzf_flush(bgzf_out) < 0) 
+        error("Error: %s\n",bgzf_out->error);
+
+    while (1)
+    {
+#ifdef _USE_KNETFILE
+        nread = knet_read(fp->x.fpr, buf, page_size);
+#else
+        nread = fread(buf, 1, page_size, fp->file);
+#endif
+        if ( nread<=0 ) 
+            break;
+
+#ifdef _USE_KNETFILE
+        int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
+#else
+        int count = fwrite(buf, 1, nread, bgzf_out->file);
+#endif
+        if (count != nread)
+            error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
+    }
+
+    if (bgzf_close(bgzf_out) < 0) 
+        error("Error: %s\n",bgzf_out->error);
+   
+    return 0;
+}
+
 
 int main(int argc, char *argv[])
 {
-	int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0;
+	int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
 	ti_conf_t conf = ti_conf_gff;
-	while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhf")) >= 0) {
+    const char *reheader = NULL;
+	while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
 		switch (c) {
+		case 'B': bed_reg = 1; break;
 		case '0': conf.preset |= TI_FLAG_UCSC; break;
 		case 'S': skip = atoi(optarg); break;
 		case 'c': meta = optarg[0]; break;
@@ -34,6 +126,7 @@ int main(int argc, char *argv[])
         case 'l': list_chrms = 1; break;
         case 'h': print_header = 1; break;
 		case 'f': force = 1; break;
+        case 'r': reheader = optarg; break;
 		}
 	}
 	if (skip >= 0) conf.line_skip = skip;
@@ -46,11 +139,13 @@ int main(int argc, char *argv[])
 		fprintf(stderr, "Options: -p STR     preset: gff, bed, sam, vcf, psltbl [gff]\n");
 		fprintf(stderr, "         -s INT     sequence name column [1]\n");
 		fprintf(stderr, "         -b INT     start column [4]\n");
-		fprintf(stderr, "         -e INT     end column [5]\n");
+		fprintf(stderr, "         -e INT     end column; can be identical to '-b' [5]\n");
 		fprintf(stderr, "         -S INT     skip first INT lines [0]\n");
 		fprintf(stderr, "         -c CHAR    symbol for comment/meta lines [#]\n");
+	    fprintf(stderr, "         -r FILE    replace the header with the content of FILE [null]\n");
+		fprintf(stderr, "         -B         region1 is a BED file (entire file will be read)\n");
 		fprintf(stderr, "         -0         zero-based coordinate\n");
-		fprintf(stderr, "         -h         print the VCF header\n");
+		fprintf(stderr, "         -h         print the header lines\n");
 		fprintf(stderr, "         -l         list chromosome names\n");
 		fprintf(stderr, "         -f         force to overwrite the index\n");
 		fprintf(stderr, "\n");
@@ -71,27 +166,49 @@ int main(int argc, char *argv[])
 		ti_index_destroy(idx);
 		return 0;
 	}
+    if (reheader)
+        return reheader_file(reheader,argv[optind],conf.meta_char);
+
+	struct stat stat_tbi,stat_vcf;
+    char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
+   	strcat(strcpy(fnidx, argv[optind]), ".tbi");
+
 	if (optind + 1 == argc) {
 		if (force == 0) {
-			struct stat buf;
-			char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
-			strcat(strcpy(fnidx, argv[optind]), ".tbi");
-			if (stat(fnidx, &buf) == 0) {
-				fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
-				free(fnidx);
-				return 1;
+			if (stat(fnidx, &stat_tbi) == 0) 
+            {
+                // Before complaining, check if the VCF file isn't newer. This is a common source of errors,
+                //  people tend not to notice that tabix failed
+                stat(argv[optind], &stat_vcf);
+                if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
+                {
+                    fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
+                    free(fnidx);
+                    return 1;
+                }
 			}
-			free(fnidx);
 		}
-        if ( is_bgzipped(argv[optind])!=1 )
+        if ( bgzf_check_bgzf(argv[optind])!=1 )
         {
             fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
+            free(fnidx);
             return 1;
         }
 		return ti_index_build(argv[optind], &conf);
 	}
 	{ // retrieve
 		tabix_t *t;
+        // Common source of errors: new VCF is used with an old index
+        stat(fnidx, &stat_tbi);
+        stat(argv[optind], &stat_vcf);
+        if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
+        {
+            fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
+            free(fnidx);
+            return 1;
+        }
+        free(fnidx);
+
 		if ((t = ti_open(argv[optind], 0)) == 0) {
 			fprintf(stderr, "[main] fail to open the data file.\n");
 			return 1;
@@ -106,36 +223,65 @@ int main(int argc, char *argv[])
 			}
 			ti_iter_destroy(iter);
 		} else { // retrieve from specified regions
-			int i;
-			if ( ti_lazy_index_load(t) ) 
-            {
+			int i, len;
+            ti_iter_t iter;
+            const char *s;
+			const ti_conf_t *idxconf;
+
+			if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
                 fprintf(stderr,"[tabix] failed to load the index file.\n");
                 return 1;
             }
+			idxconf = ti_get_conf(t->idx);
 
-            ti_iter_t iter;
-            const char *s;
-            int len;
             if ( print_header )
             {
                 // If requested, print the header lines here
                 iter = ti_query(t, 0, 0, 0);
                 while ((s = ti_read(t, iter, &len)) != 0) {
-                    if ( *s != '#' ) break;
+                    if ((int)(*s) != idxconf->meta_char) break;
                     fputs(s, stdout); fputc('\n', stdout);
                 }
                 ti_iter_destroy(iter);
             }
-			for (i = optind + 1; i < argc; ++i) {
-				int tid, beg, end;
-				if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
-					iter = ti_queryi(t, tid, beg, end);
-					while ((s = ti_read(t, iter, &len)) != 0) {
-						fputs(s, stdout); fputc('\n', stdout);
+			if (bed_reg) {
+				extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
+				extern void *bed_read(const char *fn);
+				extern void bed_destroy(void *_h);
+
+				const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
+				void *bed = bed_read(argv[optind+1]); // load the BED file
+				ti_interval_t intv;
+
+				if (bed == 0) {
+					fprintf(stderr, "[main] fail to read the BED file.\n");
+					return 1;
+				}
+				iter = ti_query(t, 0, 0, 0);
+				while ((s = ti_read(t, iter, &len)) != 0) {
+					int c;
+					ti_get_intv(conf_, len, (char*)s, &intv);
+					c = *intv.se; *intv.se = '\0';
+					if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
+						*intv.se = c;
+						puts(s);
 					}
-					ti_iter_destroy(iter);
-				} 
-                // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
+					*intv.se = c;
+				}
+                ti_iter_destroy(iter);
+				bed_destroy(bed);
+			} else {
+				for (i = optind + 1; i < argc; ++i) {
+					int tid, beg, end;
+					if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
+						iter = ti_queryi(t, tid, beg, end);
+							while ((s = ti_read(t, iter, &len)) != 0) {
+							fputs(s, stdout); fputc('\n', stdout);
+						}
+						ti_iter_destroy(iter);
+					} 
+            	    // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
+				}
 			}
 		}
 		ti_close(t);
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000..2771eda
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+#
+# The MIT License
+#
+# Copyright (c) 2011 Seoul National University.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
+
+from distutils.core import setup, Extension
+
+# Change this to True when you need the knetfile support.
+USE_KNETFILE = False
+
+TABIX_SOURCE_FILES = [
+    '../bgzf.c', '../bgzip.c', '../index.c', '../knetfile.c', '../kstring.c'
+]
+
+define_options = [('_FILE_OFFSET_BITS', 64)]
+if USE_KNETFILE:
+    define_options.append(('_USE_KNETFILE', 1))
+
+ext_modules = [Extension("tabix", ["tabixmodule.c"] + TABIX_SOURCE_FILES,
+                         include_dirs=['..'],
+                         libraries=['z'],
+                         define_macros=define_options)]
+
+setup (name = 'tabix',
+       version = '1.0',
+       description = 'Python interface to tabix, a generic indexer '
+                     'for TAB-delimited genome position files',
+       author = 'Hyeshik Chang',
+       author_email = 'hyeshik@snu.ac.kr',
+       license = 'MIT',
+       ext_modules = ext_modules
+)
diff --git a/python/tabixmodule.c b/python/tabixmodule.c
new file mode 100644
index 0000000..d04d097
--- /dev/null
+++ b/python/tabixmodule.c
@@ -0,0 +1,408 @@
+/*-
+ * The MIT License
+ *
+ * Copyright (c) 2011 Seoul National University.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
+ */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#include "tabix.h"
+
+static PyObject *TabixError;
+
+typedef struct {
+    PyObject_HEAD
+    tabix_t *tb;
+    char *fn;
+} TabixObject;
+
+typedef struct {
+    PyObject_HEAD
+    TabixObject *tbobj;
+    ti_iter_t iter;
+} TabixIteratorObject;
+
+static PyTypeObject Tabix_Type, TabixIterator_Type;
+
+/* --- TabixIterator --------------------------------------------------- */
+
+static PyObject *
+tabixiter_create(TabixObject *parentidx, ti_iter_t iter)
+{
+    TabixIteratorObject *self;
+
+    self = PyObject_New(TabixIteratorObject, &TabixIterator_Type);
+    if (self == NULL)
+        return NULL;
+
+    Py_INCREF(parentidx);
+    self->tbobj = parentidx;
+    self->iter = iter;
+
+    return (PyObject *)self;
+}
+
+static void
+tabixiter_dealloc(TabixIteratorObject *self)
+{
+    ti_iter_destroy(self->iter);
+    Py_DECREF(self->tbobj);
+    PyObject_Del(self);
+}
+
+static PyObject *
+tabixiter_iter(PyObject *self)
+{
+    Py_INCREF(self);
+    return self;
+}
+
+#if PY_MAJOR_VERSION < 3
+# define PYOBJECT_FROM_STRING_AND_SIZE PyString_FromStringAndSize
+#else
+# define PYOBJECT_FROM_STRING_AND_SIZE PyUnicode_FromStringAndSize
+#endif
+
+static PyObject *
+tabixiter_iternext(TabixIteratorObject *self)
+{
+    const char *chunk;
+    int len, i;
+
+    chunk = ti_read(self->tbobj->tb, self->iter, &len);
+    if (chunk != NULL) {
+        PyObject *ret, *column;
+        Py_ssize_t colidx;
+        const char *ptr, *begin;
+
+        ret = PyList_New(0);
+        if (ret == NULL)
+            return NULL;
+
+        colidx = 0;
+        ptr = begin = chunk;
+        for (i = len; i > 0; i--, ptr++)
+            if (*ptr == '\t') {
+                column = PYOBJECT_FROM_STRING_AND_SIZE(begin,
+                                                       (Py_ssize_t)(ptr - begin));
+                if (column == NULL || PyList_Append(ret, column) == -1) {
+                    Py_DECREF(ret);
+                    return NULL;
+                }
+
+                Py_DECREF(column);
+                begin = ptr + 1;
+                colidx++;
+            }
+
+        column = PYOBJECT_FROM_STRING_AND_SIZE(begin, (Py_ssize_t)(ptr - begin));
+        if (column == NULL || PyList_Append(ret, column) == -1) {
+            Py_DECREF(ret);
+            return NULL;
+        }
+        Py_DECREF(column);
+
+        return ret;
+    }
+    else
+        return NULL;
+}
+
+static PyMethodDef tabixiter_methods[] = {
+    {NULL, NULL} /* sentinel */
+};
+
+static PyTypeObject TabixIterator_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "tabix.TabixIterator",      /*tp_name*/
+    sizeof(TabixIteratorObject), /*tp_basicsize*/
+    0,                          /*tp_itemsize*/
+    /* methods */
+    (destructor)tabixiter_dealloc,  /*tp_dealloc*/
+    0,                          /*tp_print*/
+    0,                          /*tp_getattr*/
+    0,                          /*tp_setattr*/
+    0,                          /*tp_compare*/
+    0,                          /*tp_repr*/
+    0,                          /*tp_as_number*/
+    0,                          /*tp_as_sequence*/
+    0,                          /*tp_as_mapping*/
+    0,                          /*tp_hash*/
+    0,                          /*tp_call*/
+    0,                          /*tp_str*/
+    0,                          /*tp_getattro*/
+    0,                          /*tp_setattro*/
+    0,                          /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT,         /*tp_flags*/
+    0,                          /*tp_doc*/
+    0,                          /*tp_traverse*/
+    0,                          /*tp_clear*/
+    0,                          /*tp_richcompare*/
+    0,                          /*tp_weaklistoffset*/
+    tabixiter_iter,             /*tp_iter*/
+    (iternextfunc)tabixiter_iternext, /*tp_iternext*/
+    tabixiter_methods,          /*tp_methods*/
+    0,                          /*tp_members*/
+    0,                          /*tp_getset*/
+    0,                          /*tp_base*/
+    0,                          /*tp_dict*/
+    0,                          /*tp_descr_get*/
+    0,                          /*tp_descr_set*/
+    0,                          /*tp_dictoffset*/
+    0,                          /*tp_init*/
+    0,                          /*tp_alloc*/
+    0,                          /*tp_new*/
+    0,                          /*tp_free*/
+    0,                          /*tp_is_gc*/
+};
+
+
+/* --- Tabix ----------------------------------------------------------- */
+
+static PyObject *
+tabix_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    TabixObject *self;
+    const char *fn, *fnidx=NULL;
+    static char *kwnames[]={"fn", "fnidx", NULL};
+    tabix_t *tb;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|z:Tabix",
+                                     kwnames, &fn, &fnidx))
+        return NULL;
+
+    tb = ti_open(fn, fnidx);
+    if (tb == NULL) {
+        PyErr_SetString(TabixError, "Can't open the index file.");
+        return NULL;
+    }
+
+    self = (TabixObject *)type->tp_alloc(type, 0);
+    if (self == NULL)
+        return NULL;
+
+    self->tb = tb;
+    self->fn = strdup(fn);
+
+    return (PyObject *)self;
+}
+
+static void
+tabix_dealloc(TabixObject *self)
+{
+    free(self->fn);
+    ti_close(self->tb);
+    PyObject_Del(self);
+}
+
+static PyObject *
+tabix_query(TabixObject *self, PyObject *args)
+{
+    char *name;
+    int begin, end;
+    ti_iter_t result;
+
+    if (!PyArg_ParseTuple(args, "sii:query", &name, &begin, &end))
+        return NULL;
+
+    result = ti_query(self->tb, name, begin, end);
+    if (result == NULL) {
+        PyErr_SetString(TabixError, "query failed");
+        return NULL;
+    }
+
+    return tabixiter_create(self, result);
+}
+
+static PyObject *
+tabix_queryi(TabixObject *self, PyObject *args)
+{
+    int tid, begin, end;
+    ti_iter_t result;
+
+    if (!PyArg_ParseTuple(args, "iii:queryi", &tid, &begin, &end))
+        return NULL;
+
+    result = ti_queryi(self->tb, tid, begin, end);
+    if (result == NULL) {
+        PyErr_SetString(TabixError, "query failed");
+        return NULL;
+    }
+
+    return tabixiter_create(self, result);
+}
+
+static PyObject *
+tabix_querys(TabixObject *self, PyObject *args)
+{
+    const char *reg;
+    ti_iter_t result;
+
+    if (!PyArg_ParseTuple(args, "s:querys", &reg))
+        return NULL;
+
+    result = ti_querys(self->tb, reg);
+    if (result == NULL) {
+        PyErr_SetString(TabixError, "query failed");
+        return NULL;
+    }
+
+    return tabixiter_create(self, result);
+}
+
+static PyObject *
+tabix_repr(TabixObject *self)
+{
+#if PY_MAJOR_VERSION < 3
+    return PyString_FromFormat("<Tabix fn=\"%s\">", self->fn);
+#else
+    return PyUnicode_FromFormat("<Tabix fn=\"%s\">", self->fn);
+#endif
+}
+
+static PyMethodDef tabix_methods[] = {
+    {"query",           (PyCFunction)tabix_query, METH_VARARGS,
+        PyDoc_STR("T.query(name, begin, end) -> iterator")},
+    {"queryi",          (PyCFunction)tabix_queryi, METH_VARARGS,
+        PyDoc_STR("T.queryi(tid, begin, id) -> iterator")},
+    {"querys",          (PyCFunction)tabix_querys, METH_VARARGS,
+        PyDoc_STR("T.querys(region) -> iterator")},
+    {NULL,              NULL}           /* sentinel */
+};
+
+static PyTypeObject Tabix_Type = {
+    /* The ob_type field must be initialized in the module init function
+     * to be portable to Windows without using C++. */
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "tabix.Tabix",              /*tp_name*/
+    sizeof(TabixObject),        /*tp_basicsize*/
+    0,                          /*tp_itemsize*/
+    /* methods */
+    (destructor)tabix_dealloc,  /*tp_dealloc*/
+    0,                          /*tp_print*/
+    0,                          /*tp_getattr*/
+    0,                          /*tp_setattr*/
+    0,                          /*tp_compare*/
+    (reprfunc)tabix_repr,       /*tp_repr*/
+    0,                          /*tp_as_number*/
+    0,                          /*tp_as_sequence*/
+    0,                          /*tp_as_mapping*/
+    0,                          /*tp_hash*/
+    0,                          /*tp_call*/
+    0,                          /*tp_str*/
+    0,                          /*tp_getattro*/
+    0,                          /*tp_setattro*/
+    0,                          /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT,         /*tp_flags*/
+    0,                          /*tp_doc*/
+    0,                          /*tp_traverse*/
+    0,                          /*tp_clear*/
+    0,                          /*tp_richcompare*/
+    0,                          /*tp_weaklistoffset*/
+    0,                          /*tp_iter*/
+    0,                          /*tp_iternext*/
+    tabix_methods,              /*tp_methods*/
+    0,                          /*tp_members*/
+    0,                          /*tp_getset*/
+    0,                          /*tp_base*/
+    0,                          /*tp_dict*/
+    0,                          /*tp_descr_get*/
+    0,                          /*tp_descr_set*/
+    0,                          /*tp_dictoffset*/
+    0,                          /*tp_init*/
+    0,                          /*tp_alloc*/
+    (newfunc)tabix_new,         /*tp_new*/
+    0,                          /*tp_free*/
+    0,                          /*tp_is_gc*/
+};
+/* --------------------------------------------------------------------- */
+
+static PyMethodDef tabix_functions[] = {
+    {NULL, NULL} /* sentinel */
+};
+
+PyDoc_STRVAR(module_doc,
+"Python interface to tabix, Heng Li's generic indexer for TAB-delimited "
+"genome position filesThis is a template module just for instruction.");
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef tabixmodule = { 
+    PyModuleDef_HEAD_INIT,
+    "tabix",
+    module_doc,
+    -1, 
+    tabix_functions,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+#endif
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC inittabix(void)
+#else
+PyMODINIT_FUNC PyInit_tabix(void)
+#endif
+{
+    PyObject *m;
+
+    if (PyType_Ready(&Tabix_Type) < 0)
+        goto fail;
+    if (PyType_Ready(&TabixIterator_Type) < 0)
+        goto fail;
+
+#if PY_MAJOR_VERSION < 3
+    m = Py_InitModule3("tabix", tabix_functions, module_doc);
+#else
+    m = PyModule_Create(&tabixmodule);
+#endif
+    if (m == NULL)
+        goto fail;
+
+    if (TabixError == NULL) {
+        TabixError = PyErr_NewException("tabix.error", NULL, NULL);
+        if (TabixError == NULL)
+            goto fail;
+    }
+    Py_INCREF(TabixError);
+    PyModule_AddObject(m, "error", TabixError);
+
+    PyModule_AddObject(m, "Tabix", (PyObject *)&Tabix_Type);
+    PyModule_AddObject(m, "TabixIterator", (PyObject *)&TabixIterator_Type);
+
+#if PY_MAJOR_VERSION >= 3
+    return m;
+#endif
+
+ fail:
+#if PY_MAJOR_VERSION < 3
+    return;
+#else
+    return NULL;
+#endif
+}
diff --git a/python/test.py b/python/test.py
new file mode 100755
index 0000000..804a565
--- /dev/null
+++ b/python/test.py
@@ -0,0 +1,91 @@
+#
+# The MIT License
+#
+# Copyright (c) 2011 Seoul National University.
+#
+# Permission is hereby granted, free of charge, to any person obtaining
+# a copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sublicense, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
+
+import unittest
+import random
+import gzip
+import tabix
+
+EXAMPLEFILE = '../example.gtf.gz'
+
+def load_example_regions(path):
+    alldata = []
+    for line in gzip.GzipFile(EXAMPLEFILE):
+        fields = line.decode('ascii')[:-1].split('\t')
+        seqid = fields[0]
+        begin = int(fields[3])
+        end = int(fields[4])
+        alldata.append((seqid, begin, end, fields[:7]))
+
+    return alldata
+
+def does_overlap(A, B, C, D):
+    return (A <= D <= B) or (C <= B <= D)
+
+def sample_test_dataset(regions, ntests):
+    seqids = [seqid for seqid, _, _, _ in regions]
+    lowerbound = max(0, min(begin for _, begin, _, _ in regions) - 1000)
+    upperbound = max(end for _, _, end, _ in regions) + 1000
+
+    tests = []
+    for i in range(ntests):
+        seqid = random.choice(seqids)
+        low = random.randrange(lowerbound, upperbound)
+        high = random.randrange(low, upperbound)
+
+        # for 1-based both-end inclusive intervals
+        matches = [info for seq, begin, end, info in regions
+                   if seqid == seq and does_overlap(begin, end, low, high)]
+
+        tests.append((seqid, low, high, matches))
+
+    return tests
+
+def tbresult2excerpt(tbmatches):
+    return [fields[:7] for fields in tbmatches]
+
+class TabixTest(unittest.TestCase):
+    regions = load_example_regions(EXAMPLEFILE)
+    testset = sample_test_dataset(regions, 500)
+
+    def setUp(self):
+        self.tb = tabix.Tabix(EXAMPLEFILE)
+
+    def testQuery(self):
+        for seqid, low, high, matches in self.testset:
+            tbresult = tbresult2excerpt(self.tb.query(seqid, low, high))
+            self.assertEqual(tbresult, matches)
+
+    def testQueryS(self):
+        for seqid, low, high, matches in self.testset:
+            tbresult = tbresult2excerpt(self.tb.querys('%s:%d-%d' %
+                                                       (seqid, low, high)))
+            self.assertEqual(tbresult, matches)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tabix.1 b/tabix.1
index 3b82c76..1bd9533 100644
--- a/tabix.1
+++ b/tabix.1
@@ -7,7 +7,7 @@ tabix - Generic indexer for TAB-delimited genome position files
 .SH SYNOPSIS
 .PP
 .B bgzip
-.RB [ \-cdh ]
+.RB [ \-cdhB ]
 .RB [ \-b
 .IR virtualOffset ]
 .RB [ \-s
@@ -82,6 +82,14 @@ Skip lines started with character CHAR. [#]
 Specify that the position in the data file is 0-based (e.g. UCSC files)
 rather than 1-based.
 .TP
+.B -h
+Print the header/meta lines.
+.TP
+.B -B
+The second argument is a BED file. When this option is in use, the input
+file may not be sorted or indexed. The entire input will be read sequentially. Nonetheless,
+with this option, the format of the input must be specificed correctly on the command line.
+.TP
 .B -f
 Force to overwrite the index file if it is present.
 .TP
diff --git a/tabix.h b/tabix.h
index 4390c09..7b4497a 100644
--- a/tabix.h
+++ b/tabix.h
@@ -58,6 +58,11 @@ typedef struct {
 	int32_t meta_char, line_skip;
 } ti_conf_t;
 
+typedef struct {
+	int beg, end;
+	char *ss, *se;
+} ti_interval_t;
+
 extern ti_conf_t ti_conf_gff, ti_conf_bed, ti_conf_psltbl, ti_conf_vcf, ti_conf_sam; // preset
 
 #ifdef __cplusplus
@@ -120,6 +125,9 @@ extern "C" {
 	/* Get the data line pointed by the iterator and iterate to the next record. */
 	const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len);
 
+	const ti_conf_t *ti_get_conf(ti_index_t *idx);
+	int ti_get_intv(const ti_conf_t *conf, int len, char *line, ti_interval_t *intv);
+
 	/*******************
 	 * Deprecated APIs *
 	 *******************/
-- 
2.30.2