From 6a7c3f175b210cc16d09a5e8e4c1d47333dbe1c6 Mon Sep 17 00:00:00 2001
From: Diane Trout <diane@caltech.edu>
Date: Fri, 19 Nov 2010 11:43:43 -0800
Subject: [PATCH] Imported Upstream version 0.3

---
 MANIFEST.in              |  10 +
 PKG-INFO                 |   2 +-
 pysam/__init__.py        |  12 +-
 pysam/csamtools.pxd      |  84 ++--
 pysam/csamtools.pyx      | 646 ++++++++++++++++++--------
 pysam/ctabix.pxd         | 171 +++++++
 pysam/ctabix.pyx         | 881 ++++++++++++++++++++++++++++++++++++
 pysam/pysam_util.c       | 229 +---------
 pysam/pysam_util.h       |  69 +--
 pysam/version.py         |   7 +
 samtools/bam.c           |  55 ++-
 samtools/bam.h           | 107 +++--
 samtools/bam_aux.c       |   2 +-
 samtools/bam_import.c    |  26 +-
 samtools/bam_index.c     | 216 +++++++--
 samtools/bam_maqcns.c    |  33 +-
 samtools/bam_maqcns.h    |   5 +-
 samtools/bam_md.c        |  34 +-
 samtools/bam_pileup.c    | 342 ++++++++++----
 samtools/bam_plcmd.c     | 250 ++++++++--
 samtools/bam_reheader.c  |  60 +++
 samtools/bam_sort.c      |   4 +-
 samtools/bam_tview.c     |  19 +-
 samtools/bgzf.c          |  76 ++--
 samtools/bgzf.h          |  25 +-
 samtools/faidx.c         |   2 +-
 samtools/knetfile.c      |   6 +-
 samtools/kstring.h       |  34 ++
 samtools/sam.c           |   1 +
 samtools/sam_header.c    |  38 +-
 samtools/sam_view.c      |  43 +-
 setup.py                 |  78 +++-
 tabix/bam_endian.h       |  42 ++
 tabix/bgzf.c             | 676 +++++++++++++++++++++++++++
 tabix/bgzf.h             | 156 +++++++
 tabix/bgzip.c            | 201 +++++++++
 tabix/index.c            | 954 +++++++++++++++++++++++++++++++++++++++
 tabix/khash.h            | 486 ++++++++++++++++++++
 tabix/knetfile.c         | 632 ++++++++++++++++++++++++++
 tabix/knetfile.h         |  75 +++
 tabix/ksort.h            | 271 +++++++++++
 tabix/kstring.c          | 165 +++++++
 tabix/kstring.h          |  68 +++
 tabix/tabix.h            | 137 ++++++
 tests/ex3.sam            |   2 +-
 tests/example.gtf.gz     | Bin 0 -> 3778 bytes
 tests/example.gtf.gz.tbi | Bin 0 -> 260 bytes
 tests/pysam_test.py      | 148 +++++-
 tests/tabix_test.py      | 225 +++++++++
 49 files changed, 6938 insertions(+), 867 deletions(-)
 create mode 100644 pysam/ctabix.pxd
 create mode 100644 pysam/ctabix.pyx
 create mode 100644 pysam/version.py
 create mode 100644 samtools/bam_reheader.c
 create mode 100644 tabix/bam_endian.h
 create mode 100644 tabix/bgzf.c
 create mode 100644 tabix/bgzf.h
 create mode 100644 tabix/bgzip.c
 create mode 100644 tabix/index.c
 create mode 100644 tabix/khash.h
 create mode 100644 tabix/knetfile.c
 create mode 100644 tabix/knetfile.h
 create mode 100644 tabix/ksort.h
 create mode 100644 tabix/kstring.c
 create mode 100644 tabix/kstring.h
 create mode 100644 tabix/tabix.h
 create mode 100644 tests/example.gtf.gz
 create mode 100644 tests/example.gtf.gz.tbi
 create mode 100644 tests/tabix_test.py

diff --git a/MANIFEST.in b/MANIFEST.in
index 11fb9d1..4bbbc8e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -9,8 +9,12 @@ include INSTALL
 include KNOWN_BUGS
 include THANKS
 include pysam/csamtools.pxd
+include pysam/ctabix.pxd
 include pysam/pysam_util.h
 include samtools/*.h
+include tabix/*.h
+
+# pysam tests
 include tests/00README.txt
 include tests/Makefile
 include tests/ex1.fa
@@ -24,3 +28,9 @@ include tests/example.py
 include tests/pysam_test.py
 include tests/segfault_tests.py
 
+# tabix tests
+include tests/tabix_test.py
+include tests/example.gtf.gz
+include tests/example.gtf.gz.tbi
+
+
diff --git a/PKG-INFO b/PKG-INFO
index 3e3b745..b95ed79 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: pysam
-Version: 0.2
+Version: 0.3
 Summary: pysam
 Home-page: http://code.google.com/p/pysam/
 Author: Andreas Heger
diff --git a/pysam/__init__.py b/pysam/__init__.py
index 3062753..9f257c2 100644
--- a/pysam/__init__.py
+++ b/pysam/__init__.py
@@ -1,4 +1,7 @@
 from csamtools import *
+from ctabix import *
+import csamtools
+import ctabix
 import Pileup
 import sys
 import os
@@ -50,7 +53,9 @@ class SamtoolsDispatcher(object):
         # Note that there is sometimes output on stderr that is not an error,
         # for example: [sam_header_read2] 2 sequences loaded.
         # Ignore messages like these
-        stderr = [ x for x in stderr if not x.startswith( "[sam_header_read2]" ) ]
+        stderr = [ x for x in stderr \
+                       if not x.startswith( "[sam_header_read2]" ) or \
+                       x.startswith("[bam_index_load]") ]
         if stderr: raise SamtoolsError( "\n".join( stderr ) )
 
         # call parser for stdout:
@@ -96,6 +101,9 @@ for key, options in SAMTOOLS_DISPATCH.iteritems():
     globals()[key] = SamtoolsDispatcher(cmd, parser)
 
 # hack to export all the symbols from csamtools
-__all__ = csamtools.__all__ + [ "SamtoolsError", "SamtoolsDispatcher" ] + list(SAMTOOLS_DISPATCH) +\
+__all__ = csamtools.__all__ + \
+    ctabix.__all__ + \
+    [ "SamtoolsError", "SamtoolsDispatcher" ] + list(SAMTOOLS_DISPATCH) +\
     ["Pileup",] 
 
+from version import __version__, __samtools_version__
diff --git a/pysam/csamtools.pxd b/pysam/csamtools.pxd
index 7dac38d..b614a84 100644
--- a/pysam/csamtools.pxd
+++ b/pysam/csamtools.pxd
@@ -27,7 +27,7 @@ cdef extern from "stdio.h":
   FILE * stdout
   int fclose(FILE *)
   int sscanf(char *str,char *fmt,...)
-  int printf(char *str,char *fmt,...)
+  int printf(char *fmt,...)
   int sprintf(char *str,char *fmt,...)
   int fprintf(FILE *ifile,char *fmt,...)
   char *fgets(char *str,int size,FILE *ifile)
@@ -50,6 +50,9 @@ cdef extern from "string.h":
   size_t strlen(char *s)
   int memcmp( void * s1, void *s2, size_t len )
 
+cdef extern from "Python.h":
+   long _Py_HashPointer(void*)
+
 cdef extern from "razf.h":
   pass
 
@@ -120,15 +123,37 @@ cdef extern from "bam.h":
   ctypedef struct bam_plbuf_t:
       pass
 
+  ctypedef struct bam_iter_t:
+      pass
+
+  bam1_t * bam_init1()
+  void bam_destroy1(bam1_t *)
+
   bamFile razf_dopen(int data_fd, char *mode)
 
-  # removed - macros not found
+  int64_t bam_seek( bamFile fp, uint64_t voffset, int where)
+  int64_t bam_tell( bamFile fp )
 
-  # int64_t bam_seek( bamFile fp, uint64_t voffset, int where)
-  # int64_t bam_tell( bamFile fp )
-  # void bam_destroy1( bam1_t * b) 
   # void bam_init_header_hash(bam_header_t *header)
 
+  ###############################################
+  # stand-ins for samtools macros
+  uint32_t * bam1_cigar( bam1_t * b)
+  char * bam1_qname( bam1_t * b)
+  uint8_t * bam1_seq( bam1_t * b)
+  uint8_t * bam1_qual( bam1_t * b)
+  uint8_t * bam1_aux( bam1_t * b)
+
+  ###############################################
+  # bam iterator interface
+  bam_iter_t bam_iter_query( bam_index_t *idx, int tid, int beg, int end)
+
+  int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)
+
+  void bam_iter_destroy(bam_iter_t iter)
+
+  ###############################################
+
   bam1_t * bam_dup1( bam1_t *src ) 
   
   bam1_t * bam_copy1(bam1_t *bdst, bam1_t *bsrc)
@@ -138,6 +163,7 @@ cdef extern from "bam.h":
 
   int bam_parse_region(bam_header_t *header, char *str, int *ref_id, int *begin, int *end)
 
+  ###############################################
   bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data)
 
   int bam_fetch(bamFile fp, bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
@@ -145,6 +171,22 @@ cdef extern from "bam.h":
   int bam_plbuf_push(bam1_t *b, bam_plbuf_t *buf)
 
   void bam_plbuf_destroy(bam_plbuf_t *buf)
+  ########################################
+  # pileup iterator interface
+  ctypedef struct bam_plp_t:
+      pass
+
+  ctypedef int (*bam_plp_auto_f)(void *data, bam1_t *b)
+
+  bam_plp_t bam_plp_init( bam_plp_auto_f func, void *data)
+  int bam_plp_push( bam_plp_t iter,  bam1_t *b)
+  bam_pileup1_t *bam_plp_next( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
+  bam_pileup1_t *bam_plp_auto( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
+  void bam_plp_set_mask(bam_plp_t iter, int mask)
+  void bam_plp_reset(bam_plp_t iter)
+  void bam_plp_destroy(bam_plp_t iter)
+
+  ##################################################
 
   int bam_read1(bamFile fp, bam1_t *b)
 
@@ -209,15 +251,20 @@ cdef extern from "faidx.h":
 
    char *fai_fetch(faidx_t *fai, char *reg, int *len)
 
-cdef extern from "pysam_util.h":
+   int faidx_fetch_nseq(faidx_t *fai)
 
-    int pysam_bam_plbuf_push(bam1_t *b, bam_plbuf_t *buf, int cont)
+   char *faidx_fetch_seq(faidx_t *fai, char *c_name, 
+                         int p_beg_i, int p_end_i, int *len)
 
-    int pysam_get_pos( bam_plbuf_t *buf)
+cdef extern from "pysam_util.h":
 
-    int pysam_get_tid( bam_plbuf_t *buf)
+    int pysam_pileup_next(bam1_t *b, 
+                          bam_plbuf_t *buf, 
+                          bam_pileup1_t ** plp,
+                          int * tid,
+                          int * pos,
+                          int * n_plp )
 
-    bam_pileup1_t * pysam_get_pileup( bam_plbuf_t *buf)
 
     int pysam_dispatch(int argc, char *argv[] )
 
@@ -233,19 +280,4 @@ cdef extern from "pysam_util.h":
     # translate char to unsigned char
     unsigned char pysam_translate_sequence( char s )
 
-    # stand-ins for samtools macros
-    uint32_t * pysam_bam1_cigar( bam1_t * b)
-    char * pysam_bam1_qname( bam1_t * b)
-    uint8_t * pysam_bam1_seq( bam1_t * b)
-    uint8_t * pysam_bam1_qual( bam1_t * b)
-    uint8_t * pysam_bam1_aux( bam1_t * b)
-
-    # iterator implemenation
-    ctypedef struct bam_fetch_iterator_t:
-        pass
-  
-    bam_fetch_iterator_t* bam_init_fetch_iterator(bamFile fp, bam_index_t *idx, int tid, int beg, int end)
-  
-    bam1_t * bam_fetch_iterate(bam_fetch_iterator_t *iter)
-  
-    void bam_cleanup_fetch_iterator(bam_fetch_iterator_t *iter)
+    
diff --git a/pysam/csamtools.pyx b/pysam/csamtools.pyx
index 0da8d9e..242e68a 100644
--- a/pysam/csamtools.pyx
+++ b/pysam/csamtools.pyx
@@ -1,8 +1,12 @@
 # cython: embedsignature=True
+# cython: profile=True
 # adds doc-strings for sphinx
 
 import tempfile, os, sys, types, itertools, struct, ctypes
 
+from python_string cimport PyString_FromStringAndSize, PyString_AS_STRING
+from python_exc    cimport PyErr_SetString
+
 # defines imported from samtools
 DEF SEEK_SET = 0
 DEF SEEK_CUR = 1
@@ -36,6 +40,14 @@ DEF BAM_FDUP        =1024
 DEF BAM_CIGAR_SHIFT=4
 DEF BAM_CIGAR_MASK=((1 << BAM_CIGAR_SHIFT) - 1)
 
+DEF BAM_CMATCH     = 0
+DEF BAM_CINS       = 1
+DEF BAM_CDEL       = 2
+DEF BAM_CREF_SKIP  = 3
+DEF BAM_CSOFT_CLIP = 4
+DEF BAM_CHARD_CLIP = 5
+DEF BAM_CPAD       = 6
+
 #####################################################################
 #####################################################################
 #####################################################################
@@ -48,15 +60,17 @@ cdef makeAlignedRead( bam1_t * src):
     dest = AlignedRead()
     # destroy dummy delegate created in constructor
     # to prevent memory leak.
-    pysam_bam_destroy1(dest._delegate)
+    bam_destroy1(dest._delegate)
     dest._delegate = bam_dup1(src)
     return dest
 
 cdef class PileupProxy
-cdef makePileupProxy( bam_plbuf_t * buf, int n ):
+cdef makePileupProxy( bam_pileup1_t * plp, int tid, int pos, int n ):
      cdef PileupProxy dest
      dest = PileupProxy()
-     dest.buf = buf
+     dest.plp = plp
+     dest.tid = tid
+     dest.pos = pos
      dest.n = n
      return dest
 
@@ -127,6 +141,7 @@ cdef int pileup_callback( uint32_t tid, uint32_t pos, int n, bam_pileup1_t *pl,
     p.n = n
     pileups = []
 
+    cdef int x
     for x from 0 <= x < n:
         pileups.append( makePileupRead( &(pl[x]) ) )
     p.pileups = pileups
@@ -185,6 +200,7 @@ VALID_HEADER_ORDER = { "HD" : ( "VN", "SO", "GO" ),
                        "RG" : ( "ID", "SM", "LB", "DS" , "PU" , "PI" , "CN" , "DT", "PL" ),
                        "PG" : ( "ID", "VN", "CL" ), }
 
+
 ######################################################################
 ######################################################################
 ######################################################################
@@ -229,9 +245,12 @@ cdef class Samfile:
     cdef bam_index_t *index
     # true if file is a bam file
     cdef int isbam
-
+    # true if file is not on the local filesystem
+    cdef int isremote
     # current read within iteration
     cdef bam1_t * b
+    # file opening mode
+    cdef char * mode
 
     def __cinit__(self, *args, **kwargs ):
         self.samfile = NULL
@@ -251,12 +270,13 @@ cdef class Samfile:
 
     def _open( self, 
                char * filename, 
-               mode ='r',
+               mode = 'r',
                Samfile template = None,
                referencenames = None,
                referencelengths = None,
-               char * text = NULL,
+               text = None,
                header = None,
+               port = None,
               ):
         '''open a sam/bam file.
 
@@ -277,6 +297,12 @@ cdef class Samfile:
 
         self.isbam = len(mode) > 1 and mode[1] == 'b'
 
+        self.isremote = strncmp(filename,"http:",5) == 0 or \
+            strncmp(filename,"ftp:",4) == 0 
+
+        cdef char * ctext
+        ctext = NULL
+
         if mode[0] == 'w':
             # open file for writing
             
@@ -306,11 +332,12 @@ cdef class Samfile:
                     header_to_write.target_name[x] = <char*>calloc(len(name)+1, sizeof(char))
                     strncpy( header_to_write.target_name[x], name, len(name) )
 
-                if text != NULL:
+                if text != None:
                     # copy without \0
-                    header_to_write.l_text = strlen(text)
-                    header_to_write.text = <char*>calloc( strlen(text), sizeof(char) )
-                    memcpy( header_to_write.text, text, strlen(text) )
+                    ctext = text
+                    header_to_write.l_text = strlen(ctext)
+                    header_to_write.text = <char*>calloc( strlen(ctext), sizeof(char) )
+                    memcpy( header_to_write.text, ctext, strlen(ctext) )
 
                 header_to_write.hash = NULL
                 header_to_write.rg2lib = NULL
@@ -327,7 +354,9 @@ cdef class Samfile:
 
         elif mode[0] == "r":
             # open file for reading
-            if strncmp( filename, "-", 1) != 0 and not os.path.exists( filename ):
+            if strncmp( filename, "-", 1) != 0 and \
+                    not self.isremote and \
+                    not os.path.exists( filename ):
                 raise IOError( "file `%s` not found" % filename)
 
             store = StderrStore()
@@ -337,15 +366,22 @@ cdef class Samfile:
         if self.samfile == NULL:
             raise IOError("could not open file `%s`" % filename )
 
+        # check for index and open if present
         if mode[0] == "r" and self.isbam:
-            if not os.path.exists(filename + ".bai"):
-                self.index = NULL
+
+            if not self.isremote:
+                if not os.path.exists(filename +".bai"): 
+                    self.index = NULL
+                else:
+                    # returns NULL if there is no index or index could not be opened
+                    self.index = bam_index_load(filename)
+                    if self.index == NULL:
+                        raise IOError("error while opening index `%s` " % filename )
             else:
-                # returns NULL if there is no index or index could not be opened
                 self.index = bam_index_load(filename)
                 if self.index == NULL:
                     raise IOError("error while opening index `%s` " % filename )
-
+                                    
     def getrname( self, tid ):
         '''(tid )
         convert numerical :term:`tid` into :ref:`reference` name.'''
@@ -394,6 +430,22 @@ cdef class Samfile:
             if not 0 <= rend < max_pos: raise ValueError( 'end out of range (%i)' % rend )
 
         return region, rtid, rstart, rend
+    
+    def seek( self, uint64_t offset, int where = 0):
+        '''move to current file to position *offset*'''
+
+        if not self._isOpen():
+            raise ValueError( "I/O operation on closed file" )
+        if not self.isbam:
+            raise NotImplementedError("seek only available in bam files")
+        return bam_seek( self.samfile.x.bam, offset, where )
+
+    def tell( self ):
+        '''return current file position'''
+        if not self.isbam:
+            raise NotImplementedError("seek only available in bam files")
+
+        return bam_tell( self.samfile.x.bam )
 
     def fetch( self, 
                reference = None, 
@@ -428,16 +480,24 @@ cdef class Samfile:
 
         if not self._isOpen():
             raise ValueError( "I/O operation on closed file" )
-
+        
         region, rtid, rstart, rend = self._parseRegion( reference, start, end, region )
 
         if self.isbam:
+            if not until_eof and not self._hasIndex() and not self.isremote: 
+                raise ValueError( "fetch called on bamfile without index" )
+
             if callback:
                 if not region:
                     raise ValueError( "callback functionality requires a region/reference" )
                 if not self._hasIndex(): raise ValueError( "no index available for fetch" )
                 return bam_fetch(self.samfile.x.bam, 
-                                 self.index, rtid, rstart, rend, <void*>callback, fetch_callback )
+                                 self.index, 
+                                 rtid, 
+                                 rstart, 
+                                 rend, 
+                                 <void*>callback, 
+                                 fetch_callback )
             else:
                 if region:
                     return IteratorRow( self, rtid, rstart, rend )
@@ -453,7 +513,12 @@ cdef class Samfile:
                         for rtid from 0 <= rtid < self.nreferences: 
                             i.append( IteratorRow( self, rtid, rstart, rend))
                         return itertools.chain( *i )
-        else:                    
+        else:   
+            # check if header is present - otherwise sam_read1 aborts
+            # this happens if a bamfile is opened with mode 'r'
+            if self.samfile.header.n_targets == 0:
+                raise ValueError( "fetch called for samfile without header")
+                  
             if region != None:
                 raise ValueError ("fetch for a region is not available for sam files" )
             if callback:
@@ -528,11 +593,11 @@ cdef class Samfile:
             self.samfile = NULL
 
     def __dealloc__( self ):
-        '''clean up.'''
         # remember: dealloc cannot call other methods
-        # Note that __del__ is not called.
+        # note: no doc string
+        # note: __del__ is not called.
         self.close()
-        pysam_bam_destroy1(self.b)
+        bam_destroy1(self.b)
 
     def write( self, AlignedRead read ):
         '''(AlignedRead read )
@@ -542,6 +607,13 @@ cdef class Samfile:
         '''
         return samwrite( self.samfile, read._delegate )
 
+    def __enter__(self):
+        return self
+    
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+        return False
+
     property nreferences:
         '''number of :term:`reference` sequences in the file.'''
         def __get__(self):
@@ -567,13 +639,7 @@ cdef class Samfile:
     property text:
         '''full contents of the :term:`sam file` header as a string.'''
         def __get__(self):
-            # create a temporary 0-terminated copy
-            cdef char * t
-            t = <char*>calloc( self.samfile.header.l_text + 1, sizeof(char) )
-            memcpy( t, self.samfile.header.text, self.samfile.header.l_text )
-            result = t
-            free(t)
-            return result
+            return PyString_FromStringAndSize(self.samfile.header.text, self.samfile.header.l_text)
 
     property header:
         '''header information within the :term:`sam file`. The records and fields are returned as 
@@ -730,6 +796,10 @@ cdef class Fastafile:
         '''return true if samfile has been opened.'''
         return self.fastafile != NULL
 
+    def __len__(self):
+        assert self.fastafile != NULL
+        return faidx_fetch_nseq(self.fastafile)
+
     def _open( self, 
                char * filename ):
         '''open an indexed fasta file.
@@ -758,8 +828,14 @@ cdef class Fastafile:
                
         '''*(reference = None, start = None, end = None, region = None)*
                
-        fetch :meth:`AlignedRead` objects in a :term:`region` using 0-based indexing. The region is specified by
-        :term:`reference`, *start* and *end*. Alternatively, a samtools :term:`region` string can be supplied.
+        fetch :meth:`AlignedRead` objects in a :term:`region` using 0-based indexing. 
+        The region is specified by :term:`reference`, *start* and *end*. 
+
+        If *reference* is given and *start* is None, the sequence from the 
+        first base is returned. Similarly, if *end* is None, the sequence 
+        until the last base is returned.
+        
+        Alternatively, a samtools :term:`region` string can be supplied.
         '''
         
         if not self._isOpen():
@@ -770,59 +846,85 @@ cdef class Fastafile:
         max_pos = 2 << 29
 
         if not region:
-            if reference == None: raise ValueError( 'no sequence/region supplied.' )
-            if start == None and end == None:
-                region = "%s" % str(reference)
-            elif start == None or end == None:
-                raise ValueError( 'only start or only end of region supplied' )
-            else:
-                if start > end: raise ValueError( 'invalid region: start (%i) > end (%i)' % (start, end) )
-		# valid ranges are from 0 to 2^29-1
-                if not 0 <= start < max_pos: raise ValueError( 'start out of range (%i)' % start )
-                if not 0 <= end < max_pos: raise ValueError( 'end out of range (%i)' % end )
-                region = "%s:%i-%i" % (reference, start+1, end )
-
-        # samtools adds a '\0' at the end
-        seq = fai_fetch( self.fastafile, region, &len )
+            if reference is None: raise ValueError( 'no sequence/region supplied.' )
+            if start is None: start = 0
+            if end is None: end = max_pos -1
+
+            if start > end: raise ValueError( 'invalid region: start (%i) > end (%i)' % (start, end) )
+            if start == end: return ""
+            # valid ranges are from 0 to 2^29-1
+            if not 0 <= start < max_pos: raise ValueError( 'start out of range (%i)' % start )
+            if not 0 <= end < max_pos: raise ValueError( 'end out of range (%i)' % end )
+
+            seq = faidx_fetch_seq(self.fastafile, reference, 
+                                  start,
+                                  end-1, &len)
+        else:
+            # samtools adds a '\0' at the end
+            seq = fai_fetch( self.fastafile, region, &len )
+
         # copy to python
-        result = seq
-        # clean up
-        free(seq)
+        if seq == NULL: 
+            return ""
+        else:
+            result = seq
+            # clean up
+            free(seq)
         
         return result
 
+###########################################################################
+###########################################################################
+###########################################################################
 ## turning callbacks elegantly into iterators is an unsolved problem, see the following threads:
 ## http://groups.google.com/group/comp.lang.python/browse_frm/thread/0ce55373f128aa4e/1d27a78ca6408134?hl=en&pli=1
 ## http://www.velocityreviews.com/forums/t359277-turning-a-callback-function-into-a-generator.html
 ## Thus I chose to rewrite the functions requiring callbacks. The downside is that if the samtools C-API or code
 ## changes, the changes have to be manually entered.
-
 cdef class IteratorRow:
     """iterates over mapped reads in a region.
+
+    The samtools iterators assume that the file
+    position between iterations do not change.
+    As a consequence, no two iterators can work
+    on the same file. To permit this, each iterator
+    creates its own file handle by re-opening the
+    file.
+
+    Note that the index will be shared between 
+    samfile and the iterator.
     """
     
-    cdef bam_fetch_iterator_t*  bam_iter # iterator state object
+    cdef bam_iter_t             iter # iterator state object
     cdef bam1_t *               b
-    cdef                        error_msg
-    cdef int                    error_state
+    cdef int                    retval
     cdef Samfile                samfile
+    cdef samfile_t              * fp
+
     def __cinit__(self, Samfile samfile, int tid, int beg, int end ):
-        self.bam_iter = NULL
 
         assert samfile._isOpen()
         assert samfile._hasIndex()
         
         # makes sure that samfile stays alive as long as the
-        # iterator is alive.
+        # iterator is alive
         self.samfile = samfile
 
-        # parse the region
-        self.error_state = 0
-        self.error_msg = None
+        if samfile.isbam: mode = "rb"
+        else: mode = "r"
+
+        # reopen the file
+        store = StderrStore()
+        self.fp = samopen( samfile.filename, mode, NULL )
+        store.release()
 
-        cdef bamFile  fp
-        fp = samfile.samfile.x.bam
-        self.bam_iter = bam_init_fetch_iterator(fp, samfile.index, tid, beg, end)
+        self.retval = 0
+
+        self.iter = bam_iter_query(self.samfile.index, 
+                                   tid, 
+                                   beg, 
+                                   end)
+        self.b = bam_init1()
 
     def __iter__(self):
         return self 
@@ -832,29 +934,21 @@ cdef class IteratorRow:
 
     cdef int cnext(self):
         '''cversion of iterator. Used by IteratorColumn'''
-        self.b = bam_fetch_iterate(self.bam_iter)
-        if self.b == NULL: return 0
-        return 1
-
+        self.retval = bam_iter_read( self.fp.x.bam, 
+                                     self.iter, 
+                                     self.b)
+        
     def __next__(self): 
         """python version of next().
-
-        pyrex uses this non-standard name instead of next()
         """
-        if self.error_state:
-            raise ValueError( self.error_msg)
-        
-        self.b = bam_fetch_iterate(self.bam_iter)
-        if self.b != NULL:
-            return makeAlignedRead( self.b )
-        else:
-            raise StopIteration
+        self.cnext()
+        if self.retval < 0: raise StopIteration
+        return makeAlignedRead( self.b )
 
     def __dealloc__(self):
-        '''remember: dealloc cannot call other methods!'''
-        if self.bam_iter:
-            bam_cleanup_fetch_iterator(self.bam_iter)
-        
+        bam_destroy1(self.b)
+        samclose( self.fp )
+
 cdef class IteratorRowAll:
     """iterates over all mapped reads
     """
@@ -866,7 +960,13 @@ cdef class IteratorRowAll:
 
         assert samfile._isOpen()
 
-        self.fp = samfile.samfile
+        if samfile.isbam: mode = "rb"
+        else: mode = "r"
+
+        # reopen the file to avoid iterator conflict
+        store = StderrStore()
+        self.fp = samopen( samfile.filename, mode, NULL )
+        store.release()
 
         # allocate memory for alignment
         self.b = <bam1_t*>calloc(1, sizeof(bam1_t))
@@ -895,9 +995,18 @@ cdef class IteratorRowAll:
             raise StopIteration
 
     def __dealloc__(self):
-        '''remember: dealloc cannot call other methods!'''
-        pysam_bam_destroy1(self.b)
-        
+        bam_destroy1(self.b)
+        samclose( self.fp )
+
+ctypedef struct __iterdata:
+    bamFile fp
+    bam_iter_t iter
+
+cdef int __advance( void * data, bam1_t * b ):
+    cdef __iterdata * d
+    d = <__iterdata*>data
+    return bam_iter_read( d.fp, d.iter, b )
+
 cdef class IteratorColumn:
     '''iterates over columns.
 
@@ -922,79 +1031,138 @@ cdef class IteratorColumn:
     Here, result will be a list of ``n`` lists of objects of type :class:`PileupRead`.
 
     '''
-    cdef bam_plbuf_t *buf
 
-    # check if first iteration
-    cdef int notfirst
     # result of the last plbuf_push
-    cdef int n_pu
-    cdef int eof 
     cdef IteratorRow iter
-
+    cdef int tid
+    cdef int pos
+    cdef int n_plp
+    cdef bam_pileup1_t * plp
+    cdef bam_plp_t pileup_iter
+    cdef __iterdata iterdata 
     def __cinit__(self, Samfile samfile, int tid, int start, int end ):
 
         self.iter = IteratorRow( samfile, tid, start, end )
-        self.buf = bam_plbuf_init(NULL, NULL )
-        self.n_pu = 0
-        self.eof = 0
+        self.iterdata.fp = samfile.samfile.x.bam
+        self.iterdata.iter = self.iter.iter
+
+        self.pileup_iter = bam_plp_init( &__advance, &self.iterdata )
+        self.n_plp = 0
+        self.tid = 0
+        self.pos = 0
+        self.plp = NULL
 
     def __iter__(self):
         return self 
 
     cdef int cnext(self):
         '''perform next iteration.
-        
-        return 1 if there is a buffer to emit. Return 0 for end of iteration.
         '''
+        self.plp = bam_plp_auto( self.pileup_iter, 
+                                 &self.tid,
+                                 &self.pos,
+                                 &self.n_plp )
 
-        cdef int retval1, retval2
+    def __next__(self): 
+        """python version of next().
 
-        # pysam bam_plbuf_push returns:
-        # 1: if buf is full and can be emitted
-        # 0: if b has been added
-        # -1: if there was an error
+        pyrex uses this non-standard name instead of next()
+        """
+        self.cnext()
+        if self.n_plp < 0:
+            raise ValueError("error during iteration" )
+        
+        if self.plp == NULL:
+            raise StopIteration
 
-        # check if previous plbuf was incomplete. If so, continue within
-        # the loop and yield if necessary
-        if self.n_pu > 0:
-            self.n_pu = pysam_bam_plbuf_push( self.iter.getCurrent(), self.buf, 1)
-            if self.n_pu > 0: return 1
+        return makePileupProxy( self.plp, self.tid, self.pos, self.n_plp )
 
-        if self.eof: return 0
+    def __dealloc__(self):
+        bam_plp_destroy(self.pileup_iter)
+
+cdef inline int32_t query_start(bam1_t *src) except -1:
+    cdef uint32_t * cigar_p, op
+    cdef uint32_t k
+    cdef uint32_t start_offset = 0
+
+    if src.core.n_cigar:
+        cigar_p = bam1_cigar(src);
+        for k from 0 <= k < src.core.n_cigar:
+            op = cigar_p[k] & BAM_CIGAR_MASK
+            if op==BAM_CHARD_CLIP:
+                if start_offset!=0 and start_offset!=src.core.l_qseq:
+                    PyErr_SetString(ValueError, 'Invalid clipping in CIGAR string')
+                    return -1
+            elif op==BAM_CSOFT_CLIP:
+                start_offset += cigar_p[k] >> BAM_CIGAR_SHIFT
+            else:
+                break
+
+    return start_offset
+
+
+cdef inline int32_t query_end(bam1_t *src) except -1:
+    cdef uint32_t * cigar_p, op
+    cdef uint32_t k
+    cdef uint32_t end_offset = src.core.l_qseq
+
+    if src.core.n_cigar>1:
+        cigar_p = bam1_cigar(src);
+        for k from src.core.n_cigar > k >= 1:
+            op = cigar_p[k] & BAM_CIGAR_MASK
+            if op==BAM_CHARD_CLIP:
+                if end_offset!=0 and end_offset!=src.core.l_qseq:
+                    PyErr_SetString(ValueError, 'Invalid clipping in CIGAR string')
+                    return -1
+            elif op==BAM_CSOFT_CLIP:
+                end_offset -= cigar_p[k] >> BAM_CIGAR_SHIFT
+            else:
+                break
 
-        # get next alignments and submit until plbuf indicates that
-        # an new column has finished
-        while self.n_pu == 0:
-            retval1 = self.iter.cnext()
-            # wrap up if no more input
-            if retval1 == 0: 
-                self.n_pu = pysam_bam_plbuf_push( NULL, self.buf, 0)            
-                self.eof = 1
-                return self.n_pu
+    if end_offset==0:
+        end_offset = src.core.l_qseq
 
-            # submit to plbuf
-            self.n_pu = pysam_bam_plbuf_push( self.iter.getCurrent(), self.buf, 0)            
-            if self.n_pu < 0: raise ValueError( "error while iterating" )
+    return end_offset
 
-        # plbuf has yielded
-        return 1
 
-    def __next__(self): 
-        """python version of next().
+cdef inline object get_seq_range(bam1_t *src, uint32_t start, uint32_t end):
+    cdef uint8_t * p
+    cdef uint32_t k
+    cdef char * s
+    cdef char * bam_nt16_rev_table = "=ACMGRSVTWYHKDBN"
 
-        pyrex uses this non-standard name instead of next()
-        """
-        cdef int ret
-        ret = self.cnext()
-        cdef bam_pileup1_t * pl
+    if not src.core.l_qseq:
+        return None
 
-        if ret > 0 :
-            return makePileupProxy( self.buf, self.n_pu )
-        else:
-            raise StopIteration
+    seq = PyString_FromStringAndSize(NULL, end-start)
+    s   = PyString_AS_STRING(seq)
+    p   = bam1_seq(src)
 
-    def __dealloc__(self):
-        bam_plbuf_destroy(self.buf);
+    for k from start <= k < end:
+        # equivalent to bam_nt16_rev_table[bam1_seqi(s, i)] (see bam.c)
+        # note: do not use string literal as it will be a python string
+        s[k-start] = bam_nt16_rev_table[p[k/2] >> 4 * (1 - k%2) & 0xf]
+
+    return seq
+
+
+cdef inline object get_qual_range(bam1_t *src, uint32_t start, uint32_t end):
+    cdef uint8_t * p
+    cdef uint32_t k
+    cdef char * q
+
+    p = bam1_qual(src)
+    if p[0] == 0xff:
+        return None
+
+    qual = PyString_FromStringAndSize(NULL, end-start)
+    q    = PyString_AS_STRING(qual)
+
+    for k from start <= k < end:
+        ## equivalent to t[i] + 33 (see bam.c)
+        q[k-start] = p[k] + 33
+
+    return qual
 
 cdef class AlignedRead:
     '''
@@ -1030,8 +1198,7 @@ cdef class AlignedRead:
         self._delegate.data_len = 0
 
     def __dealloc__(self):
-        '''clear up memory.'''
-        pysam_bam_destroy1(self._delegate)
+        bam_destroy1(self._delegate)
     
     def __str__(self):
         """todo"""
@@ -1046,10 +1213,12 @@ cdef class AlignedRead:
                                    self.tags)))
     
        
-    def __cmp__(self, AlignedRead other):
-        '''return true, if contents in this are binary equal to ``other``.'''
+    def compare(self, AlignedRead other):
+        '''return -1,0,1, if contents in this are binary <,=,> to *other*'''
+
         cdef int retval, x
         cdef bam1_t *t, *o
+
         t = self._delegate
         o = other._delegate
 
@@ -1062,16 +1231,20 @@ cdef class AlignedRead:
         # oo = <unsigned char*>(o.data)
         # for x from 0 <= x < max(t.data_len, o.data_len): print x, tt[x], oo[x], chr(tt[x]), chr(oo[x])
 
-        retval = memcmp( &t.core, 
-                          &o.core, 
-                          sizeof( bam1_core_t ))
+        # Fast-path test for object identity
+        if t==o:
+            return 0
+
+        retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t))
 
         if retval: return retval
-        retval = cmp( t.data_len, o.data_len)
+        retval = cmp(t.data_len, o.data_len)
         if retval: return retval
-        return memcmp( t.data, 
-                       o.data, 
-                       sizeof( t.data_len ))
+        return memcmp(t.data, o.data, t.data_len)
+
+    # Disabled so long as __cmp__ is a special method
+    def __hash__(self):
+        return _Py_HashPointer(<void *>self)
 
     property qname:
         """the query name (None if not present)"""
@@ -1079,7 +1252,7 @@ cdef class AlignedRead:
             cdef bam1_t * src 
             src = self._delegate
             if src.core.l_qname == 0: return None
-            return <char *>pysam_bam1_qname( src )
+            return <char *>bam1_qname( src )
 
         def __set__(self, qname ):
             if qname == None or len(qname) == 0: return
@@ -1088,7 +1261,7 @@ cdef class AlignedRead:
             cdef char * p
 
             src = self._delegate            
-            p = pysam_bam1_qname( src )
+            p = bam1_qname( src )
 
             # the qname is \0 terminated
             l = len(qname) + 1
@@ -1101,7 +1274,7 @@ cdef class AlignedRead:
 
             # re-acquire pointer to location in memory
             # as it might have moved
-            p = pysam_bam1_qname(src)
+            p = bam1_qname(src)
 
             strncpy( p, qname, l )
             
@@ -1112,11 +1285,13 @@ cdef class AlignedRead:
             cdef uint32_t * cigar_p
             cdef bam1_t * src 
             cdef op, l, cigar
+            cdef int k
+
             src = self._delegate
             if src.core.n_cigar == 0: return None
             
             cigar = []
-            cigar_p = pysam_bam1_cigar(src);
+            cigar_p = bam1_cigar(src);
             for k from 0 <= k < src.core.n_cigar:
                 op = cigar_p[k] & BAM_CIGAR_MASK
                 l = cigar_p[k] >> BAM_CIGAR_SHIFT
@@ -1135,7 +1310,7 @@ cdef class AlignedRead:
             src = self._delegate
 
             # get location of cigar string
-            p = pysam_bam1_cigar(src)
+            p = bam1_cigar(src)
 
             # create space for cigar data within src.data
             pysam_bam_update( src, 
@@ -1148,7 +1323,7 @@ cdef class AlignedRead:
 
             # re-acquire pointer to location in memory
             # as it might have moved
-            p = pysam_bam1_cigar(src)
+            p = bam1_cigar(src)
 
             # insert cigar operations
             for op, l in values:
@@ -1159,24 +1334,16 @@ cdef class AlignedRead:
             src.core.bin = bam_reg2bin( src.core.pos, bam_calend( &src.core, p))
 
     property seq:
-        """the query sequence (None if not present)"""
+        """read sequence bases, including :term:`soft clipped` bases (None if not present)"""
         def __get__(self):
             cdef bam1_t * src
-            cdef uint8_t * p 
             cdef char * s
+
             src = self._delegate
-            bam_nt16_rev_table = "=ACMGRSVTWYHKDBN"
-            ## parse qseq (bam1_seq)
+
             if src.core.l_qseq == 0: return None
 
-            s = < char *> calloc(src.core.l_qseq + 1 , sizeof(char))
-            p = pysam_bam1_seq( src )
-            for k from 0 <= k < src.core.l_qseq:
-            ## equivalent to bam_nt16_rev_table[bam1_seqi(s, i)] (see bam.c)
-                s[k] = "=ACMGRSVTWYHKDBN"[((p)[(k) / 2] >> 4 * (1 - (k) % 2) & 0xf)]
-            retval=s
-            free(s)
-            return retval
+            return get_seq_range(src, 0, src.core.l_qseq)
 
         def __set__(self,seq):
             # samtools manages sequence and quality length memory together
@@ -1186,9 +1353,10 @@ cdef class AlignedRead:
             cdef bam1_t * src
             cdef uint8_t * p 
             cdef char * s
-            src = self._delegate
             cdef int l, k, nbytes_new, nbytes_old
 
+            src = self._delegate
+
             l = len(seq)
             
             # as the sequence is stored in half-bytes, the total length (sequence
@@ -1196,7 +1364,7 @@ cdef class AlignedRead:
             nbytes_new = (l+1)/2 + l
             nbytes_old = (src.core.l_qseq+1)/2 + src.core.l_qseq
             # acquire pointer to location in memory
-            p = pysam_bam1_seq( src )
+            p = bam1_seq( src )
             src.core.l_qseq = l
 
             pysam_bam_update( src, 
@@ -1205,7 +1373,7 @@ cdef class AlignedRead:
                               p)
             # re-acquire pointer to location in memory
             # as it might have moved
-            p = pysam_bam1_seq( src )
+            p = bam1_seq( src )
             for k from 0 <= k < nbytes_new: p[k] = 0
             # convert to C string
             s = seq
@@ -1213,38 +1381,32 @@ cdef class AlignedRead:
                 p[k/2] |= pysam_translate_sequence(s[k]) << 4 * (1 - k % 2)
 
             # erase qualities
-            p = pysam_bam1_qual( src )
+            p = bam1_qual( src )
             p[0] = 0xff
 
+
     property qual:
-        """the base quality (None if not present)"""
+        """read sequence base qualities, including :term:`soft clipped` bases (None if not present)"""
         def __get__(self):
-            cdef bam1_t * src 
-            cdef uint8_t * p
+
+            cdef bam1_t * src
             cdef char * q
+
             src = self._delegate
-            if src.core.l_qseq == 0: return None
 
-            p = pysam_bam1_qual( src )
-            if p[0] == 0xff: return None
+            if src.core.l_qseq == 0: return None
 
-            q = < char *>calloc(src.core.l_qseq + 1 , sizeof(char))
-            for k from 0 <= k < src.core.l_qseq:
-            ## equivalent to t[i] + 33 (see bam.c)
-                q[k] = p[k] + 33
-            # convert to python string
-            retval=q
-            # clean up
-            free(q)
-            return retval
+            return get_qual_range(src, 0, src.core.l_qseq)
 
         def __set__(self,qual):
             # note that space is already allocated via the sequences
             cdef bam1_t * src
             cdef uint8_t * p
             cdef char * q 
+            cdef int k
+
             src = self._delegate
-            p = pysam_bam1_qual( src )
+            p = bam1_qual( src )
             if qual == None or len(qual) == 0:
                 # if absent - set to 0xff
                 p[0] = 0xff
@@ -1259,8 +1421,74 @@ cdef class AlignedRead:
             for k from 0 <= k < l:
                 p[k] = <uint8_t>q[k] - 33
 
+    property query:
+        """aligned portion of the read and excludes any flanking bases that were :term:`soft clipped` (None if not present)
+
+        SAM/BAM files may included extra flanking bases sequences that were
+        not part of the alignment.  These bases may be the result of the
+        Smith-Waterman or other algorithms, which may not require alignments
+        that begin at the first residue or end at the last.  In addition,
+        extra sequencing adapters, multiplex identifiers, and low-quality bases that
+        were not considered for alignment may have been retained."""
+
+        def __get__(self):
+            cdef bam1_t * src
+            cdef uint32_t start, end
+            cdef char * s
+
+            src = self._delegate
+
+            if src.core.l_qseq == 0: return None
+
+            start = query_start(src)
+            end   = query_end(src)
+
+            return get_seq_range(src, start, end)
+
+    property qqual:
+        """aligned query sequence quality values (None if not present)"""
+        def __get__(self):
+            cdef bam1_t * src
+            cdef uint32_t start, end
+            cdef char * q
+
+            src = self._delegate
+
+            if src.core.l_qseq == 0: return None
+
+            start = query_start(src)
+            end   = query_end(src)
+
+            return get_qual_range(src, start, end)
+
+    property qstart:
+        """start index of the aligned query portion of the sequence (0-based, inclusive)"""
+        def __get__(self):
+            return query_start(self._delegate)
+
+    property qend:
+        """end index of the aligned query portion of the sequence (0-based, exclusive)"""
+        def __get__(self):
+            return query_end(self._delegate)
+
+    property qlen:
+        """Length of the aligned query sequence"""
+        def __get__(self):
+            cdef bam1_t * src
+            src = self._delegate
+            return query_end(src)-query_start(src)
+
     property tags:
-        """the tags in the AUX field."""
+        """the tags in the AUX field.
+        This property permits convenience access to 
+        the tags. Changes it the returned list will
+        not update the tags automatically. Instead,
+        the following is required for adding a 
+        new tag::
+
+            read.tags = read.tags + [("RG",0)]
+
+        """
         def __get__(self):
             cdef char * ctag
             cdef bam1_t * src
@@ -1270,7 +1498,7 @@ cdef class AlignedRead:
             src = self._delegate
             if src.l_aux == 0: return None
             
-            s = pysam_bam1_aux( src )
+            s = bam1_aux( src )
             result = []
             ctag = <char*>calloc( 3, sizeof(char) )
             cdef int x
@@ -1290,27 +1518,27 @@ cdef class AlignedRead:
                 # how do I do char literal comparison in cython?
                 # the code below works (i.e, is C comparison)
                 tpe = toupper(s[0])
-                if tpe == 'S'[0]:
+                if tpe == 'S':
                     value = <int>bam_aux2i(s)            
                     s += 2
-                elif tpe == 'I'[0]:
+                elif tpe == 'I':
                     value = <int>bam_aux2i(s)            
                     s += 4
-                elif tpe == 'F'[0]:
+                elif tpe == 'F':
                     value = <float>bam_aux2f(s)
                     s += 4
-                elif tpe == 'D'[0]:
+                elif tpe == 'D':
                     value = <double>bam_aux2d(s)
                     s += 8
-                elif tpe == 'C'[0]:
+                elif tpe == 'C':
                     value = <int>bam_aux2i(s)
                     s += 1
-                elif tpe == 'A'[0]:
+                elif tpe == 'A':
                     # there might a more efficient way
                     # to convert a char into a string
                     value = "%c" % <char>bam_aux2A(s)
                     s += 1
-                elif tpe == 'Z'[0]:
+                elif tpe == 'Z':
                     value = <char*>bam_aux2Z(s)
                     # +1 for NULL terminated string
                     s += len(value) + 1
@@ -1377,14 +1605,14 @@ cdef class AlignedRead:
             pysam_bam_update( src, 
                               src.l_aux,
                               offset,
-                              pysam_bam1_aux( src ) )
+                              bam1_aux( src ) )
             
             src.l_aux = offset
 
             if offset == 0: return
 
             # get location of new data
-            s = pysam_bam1_aux( src )            
+            s = bam1_aux( src )            
             
             # check if there is direct path from buffer.raw to tmp
             cdef char * temp 
@@ -1416,7 +1644,7 @@ cdef class AlignedRead:
             cdef bam1_t * src
             src = self._delegate
             if src.core.n_cigar:
-                src.core.bin = bam_reg2bin( src.core.pos, bam_calend( &src.core, pysam_bam1_cigar(src)) )
+                src.core.bin = bam_reg2bin( src.core.pos, bam_calend( &src.core, bam1_cigar(src)) )
             else:
                 src.core.bin = bam_reg2bin( src.core.pos, src.core.pos + 1)
             self._delegate.core.pos = pos
@@ -1427,6 +1655,27 @@ cdef class AlignedRead:
     property rlen:
         '''length of the read (read only). Returns 0 if not given.'''
         def __get__(self): return self._delegate.core.l_qseq
+    property aend:
+        '''aligned end position of the read (read only).  Returns
+        None if not available.'''
+        def __get__(self):
+            cdef bam1_t * src
+            src = self._delegate
+            if (self.flag & BAM_FUNMAP) or src.core.n_cigar == 0:
+                return None
+            return bam_calend(&src.core, bam1_cigar(src))
+    property alen:
+        '''aligned length of the read (read only).  Returns None if
+        not available.'''
+        def __get__(self):
+            cdef bam1_t * src
+            src = self._delegate
+            if (self.flag & BAM_FUNMAP) or src.core.n_cigar == 0:
+                return None
+            return bam_calend(&src.core, 
+                               bam1_cigar(src)) - \
+                               self._delegate.core.pos
+
     property mapq: 
         """mapping quality"""
         def __get__(self): return self._delegate.core.qual
@@ -1585,9 +1834,11 @@ cdef class PileupProxy:
     If the underlying engine iterator advances, the results of this column
     will change.
     '''
-    cdef bam_plbuf_t * buf
+    cdef bam_pileup1_t * plp
+    cdef int tid
+    cdef int pos
     cdef int n_pu
-
+    
     def __cinit__(self ):
         pass
 
@@ -1598,7 +1849,7 @@ cdef class PileupProxy:
 
     property tid:
         '''the chromosome ID as is defined in the header'''
-        def __get__(self): return pysam_get_tid( self.buf )
+        def __get__(self): return self.tid
 
     property n:
         '''number of reads mapping to this column.'''
@@ -1606,18 +1857,17 @@ cdef class PileupProxy:
         def __set__(self, n): self.n_pu = n
 
     property pos:
-        def __get__(self): return pysam_get_pos( self.buf )
+        def __get__(self): return self.pos
 
     property pileups:
         '''list of reads (:class:`pysam.PileupRead`) aligned to this column'''
         def __get__(self):
-            cdef bam_pileup1_t * pl
-            pl = pysam_get_pileup( self.buf )
+            cdef int x
             pileups = []
             # warning: there could be problems if self.n and self.buf are
             # out of sync.
             for x from 0 <= x < self.n_pu:
-                pileups.append( makePileupRead( &pl[x]) )
+                pileups.append( makePileupRead( &(self.plp[x])) )
             return pileups
 
 cdef class PileupRead:
diff --git a/pysam/ctabix.pxd b/pysam/ctabix.pxd
new file mode 100644
index 0000000..ef735b6
--- /dev/null
+++ b/pysam/ctabix.pxd
@@ -0,0 +1,171 @@
+
+cdef extern from "string.h":
+  ctypedef int size_t
+  void *memcpy(void *dst,void *src,size_t len)
+  void *memmove(void *dst,void *src,size_t len)
+  void *memset(void *b,int c,size_t len)
+  char *strtok_r(char *str, char *delim, char **saveptr)
+  char *strncpy(char *dest, char *src, size_t n)
+  void *memchr(void *s, int c, size_t n)
+
+cdef extern from "stdlib.h":
+  void free(void *)
+  void *malloc(size_t)
+  void *calloc(size_t,size_t)
+  void *realloc(void *,size_t)
+  void qsort(void *base, size_t nmemb, size_t size,
+             int (*compar)(void *,void *))
+  int c_abs "abs" (int)
+  int atoi( char *nptr)
+  long atol( char *nptr)
+  double atof( char *nptr)
+
+cdef extern from "stdio.h":
+  ctypedef struct FILE:
+    pass
+  FILE *fopen(char *,char *)
+  FILE *freopen(char *path, char *mode, FILE *stream)
+  int fileno(FILE *stream)
+  int dup2(int oldfd, int newfd)
+  int fflush(FILE *stream)
+
+  FILE * stderr
+  FILE * stdout
+  int fclose(FILE *)
+  int sscanf(char *str,char *fmt,...)
+  int printf(char *str,char *fmt,...)
+  int sprintf(char *str,char *fmt,...)
+  int fprintf(FILE *ifile,char *fmt,...)
+  char *fgets(char *str,int size,FILE *ifile)
+
+cdef extern from "ctype.h":
+  int toupper(int c)
+  int tolower(int c)
+
+cdef extern from "sys/types.h":
+  pass
+
+cdef extern from "sys/stat.h":
+  pass
+
+cdef extern from "fcntl.h":
+  int open(char *pathname, int flags)
+  
+cdef extern from "unistd.h":
+  ctypedef int ssize_t
+  char *ttyname(int fd)
+  int isatty(int fd)  
+  ssize_t read(int fd, void *buf, size_t count)
+
+cdef extern from "string.h":
+  int strcmp(char *s1, char *s2)
+  int strncmp(char *s1,char *s2,size_t len)
+  char *strcpy(char *dest,char *src)
+  char *strncpy(char *dest,char *src, size_t len)
+  char *strdup(char *)
+  char *strcat(char *,char *)
+  size_t strlen(char *s)
+  int memcmp( void * s1, void *s2, size_t len )
+
+cdef extern from "stdint.h":
+  ctypedef int int64_t
+  ctypedef int int32_t
+  ctypedef int uint32_t
+  ctypedef int uint8_t
+  ctypedef int uint64_t
+
+cdef extern from "Python.h":
+    ctypedef struct FILE
+    FILE* PyFile_AsFile(object)
+    char *fgets(char *str, int size, FILE *ifile)
+    int feof(FILE *stream)
+    size_t strlen(char *s)
+    size_t getline(char **lineptr, size_t *n, FILE *stream)
+    char *strstr(char *, char *)
+    char *strchr(char *string, int c)
+    int fileno(FILE *stream)
+
+cdef extern from "bgzf.h":
+
+  ctypedef struct BGZF:
+    pass
+
+  int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
+
+  BGZF * bgzf_open(char * path, char * mode)
+
+  int bgzf_write(BGZF * fp, void* data, int length)
+
+  int bgzf_close(BGZF* fp)
+
+# tabix support
+cdef extern from "tabix.h":
+
+  ctypedef struct ti_index_t:
+    pass
+
+  ctypedef struct tabix_t: 
+    BGZF *fp
+    ti_index_t *idx
+    char *fn
+    char *fnidx
+
+  ctypedef struct ti_iter_t:
+    pass
+
+  ctypedef struct ti_conf_t:
+    int32_t preset
+    int32_t sc, bc, ec
+    int32_t meta_char, line_skip
+
+  tabix_t *ti_open(char *fn, char *fnidx)
+
+  int ti_lazy_index_load(tabix_t *t)
+
+  void ti_close(tabix_t *t)
+
+  ti_iter_t ti_query(tabix_t *t, char *name, int beg, int end)
+  ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end)
+  ti_iter_t ti_querys(tabix_t *t, char *reg)
+  char * ti_read(tabix_t *t, ti_iter_t iter, int *len)
+
+  # Get the list of sequence names. Each "char*" pointer points to a
+  #	internal member of the index, so DO NOT modify the returned
+  #	 pointer; otherwise the index will be corrupted. The returned
+  #	pointer should be freed by a single free() call by the routine
+  #	calling this function. The number of sequences is returned at *n
+  char **ti_seqname(ti_index_t *idx, int *n)
+
+  
+  # Destroy the iterator
+  void ti_iter_destroy(ti_iter_t iter)
+
+  # Build the index for file <fn>. File <fn>.tbi will be generated
+  # and overwrite the file of the same name. Return -1 on failure. */
+  int ti_index_build(char *fn, ti_conf_t *conf)
+
+  #/* Load the index from file <fn>.tbi. If <fn> is a URL and the index
+  #   * file is not in the working directory, <fn>.tbi will be
+  #   * downloaded. Return NULL on failure. */
+  ti_index_t *ti_index_load( char *fn)
+
+  ti_index_t *ti_index_load_local(char *fnidx)
+
+  #/* Destroy the index */
+  void ti_index_destroy(ti_index_t *idx)
+
+  #/* Parse a region like: chr2, chr2:100, chr2:100-200. Return -1 on failure. */
+  int ti_parse_region( ti_index_t *idx,  char *str, int *tid, int *begin, int *end)
+
+  int ti_get_tid( ti_index_t *idx,  char *name)
+
+  #  /* Get the iterator pointing to the first record at the current file
+  #   * position. If the file is just openned, the iterator points to the
+  #   * first record in the file. */
+  ti_iter_t ti_iter_first()
+
+  #  /* Get the iterator pointing to the first record in region tid:beg-end */
+  ti_iter_t ti_iter_query( ti_index_t *idx, int tid, int beg, int end)
+
+  #  /* Get the data line pointed by the iterator and iterate to the next record. */
+  # char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len)
diff --git a/pysam/ctabix.pyx b/pysam/ctabix.pyx
new file mode 100644
index 0000000..8715e5d
--- /dev/null
+++ b/pysam/ctabix.pyx
@@ -0,0 +1,881 @@
+# cython: embedsignature=True
+# adds doc-strings for sphinx
+
+import tempfile, os, sys, types, itertools, struct, ctypes
+
+cdef class Tabixfile:
+    '''*(filename, mode='r')*
+
+    opens a :term:`tabix file` for reading. A missing
+    index (*filename* + ".tbi") will raise an exception.
+    '''
+
+    cdef char * filename
+
+    # pointer to tabixfile
+    cdef tabix_t * tabixfile
+
+    def __cinit__(self, *args, **kwargs ):
+        self.tabixfile = NULL
+        self._open( *args, **kwargs )
+
+    def _isOpen( self ):
+        '''return true if samfile has been opened.'''
+        return self.tabixfile != NULL
+
+    def _open( self, 
+               char * filename, 
+               mode ='r',
+              ):
+        '''open a :term:`tabix file` for reading.
+        '''
+
+        assert mode in ( "r",), "invalid file opening mode `%s`" % mode
+
+        # close a previously opened file
+        if self.tabixfile != NULL: self.close()
+        self.tabixfile = NULL
+
+        self.filename = filename
+        filename_index = filename + ".tbi"
+
+        if mode[0] == 'w':
+            # open file for writing
+            pass
+
+        elif mode[0] == "r":
+            # open file for reading
+            if not os.path.exists( self.filename ):
+                raise IOError( "file `%s` not found" % self.filename)
+
+            if not os.path.exists( filename_index ):
+                raise IOError( "index `%s` not found" % filename_index)
+
+            # open file and load index
+            self.tabixfile = ti_open( self.filename, filename_index )
+
+        if self.tabixfile == NULL:
+            raise IOError("could not open file `%s`" % filename )
+
+    def _parseRegion( self, 
+                      reference = None, 
+                      start = None, 
+                      end = None, 
+                      region = None ):
+        '''parse region information.
+
+        raise ValueError for for invalid regions.
+
+        returns a tuple of region, tid, start and end. Region
+        is a valid samtools :term:`region` or None if the region
+        extends over the whole file.
+
+        Note that regions are 1-based, while start,end are python coordinates.
+        '''
+        ti_lazy_index_load( self.tabixfile )
+
+        cdef int rtid
+        cdef int rstart
+        cdef int rend
+        cdef int max_pos
+        max_pos = 2 << 29
+
+        rtid = rstart = rend = 0
+
+        # translate to a region
+        if reference:
+            if start != None and end != None:
+                region = "%s:%i-%i" % (reference, start+1, end)
+            elif start == None and end != None:
+                region = "%s:%i-%i" % (reference, 1, end)
+            elif end == None and start != None:
+                region = "%s:%i-%i" % (reference, start+1, max_pos-1)
+            else:
+                region = reference
+
+        if region:
+            ti_parse_region( self.tabixfile.idx, region, &rtid, &rstart, &rend)        
+            if rtid < 0: raise ValueError( "invalid region `%s`" % region )
+            if rstart > rend: raise ValueError( 'invalid region: start (%i) > end (%i)' % (rstart, rend) )
+            if not 0 <= rstart < max_pos: raise ValueError( 'start out of range (%i)' % rstart )
+            if not 0 <= rend < max_pos: raise ValueError( 'end out of range (%i)' % rend )
+
+        return region, rtid, rstart, rend
+
+    def fetch( self, 
+               reference = None,
+               start = None, 
+               end = None, 
+               region = None,
+               parser = None ):
+        '''
+               
+        fetch one or more rows in a :term:`region` using 0-based indexing. The region is specified by
+        :term:`reference`, *start* and *end*. Alternatively, a samtools :term:`region` string can be supplied.
+
+        Without *reference* or *region* all entries will be fetched. 
+        
+        If only *reference* is set, all reads matching on *reference* will be fetched.
+
+        If *parser* is None, the results are returned as an unparsed string.
+        Otherwise, *parser* is assumed to be a functor that will return parsed 
+        data (see for example :meth:`asTuple` and :meth:`asGTF`).
+        '''
+        ti_lazy_index_load( self.tabixfile )
+
+        if not self._isOpen():
+            raise ValueError( "I/O operation on closed file" )
+
+        region, rtid, rstart, rend = self._parseRegion( reference, start, end, region )
+
+        if parser == None:
+            if region:
+                return TabixIterator( self, rtid, rstart, rend )
+            else:
+                return TabixIterator( self, -1, 0, 0 )
+        else:
+            if region:
+                return TabixIteratorParsed( self, rtid, rstart, rend, parser )
+            else:
+                return TabixIteratorParsed( self, -1, 0, 0, parser )
+
+    property contigs:
+       '''chromosome names'''
+       def __get__(self):
+           cdef char ** sequences
+           cdef int nsequences
+           
+           ti_lazy_index_load( self.tabixfile )
+           sequences = ti_seqname( self.tabixfile.idx, &nsequences ) 
+           cdef int x
+           result = []
+           for x from 0 <= x < nsequences:
+               result.append( sequences[x] )
+           return result
+            
+cdef class TabixIterator:
+    """iterates over rows in *tabixfile* in region
+    given by *tid*, *start* and *end*.
+    """
+    
+    cdef ti_iter_t iterator
+    cdef tabix_t * tabixfile
+
+    def __cinit__(self, Tabixfile tabixfile, 
+                  int tid, int start, int end ):
+        
+        assert tabixfile._isOpen()
+        
+        # makes sure that samfile stays alive as long as the
+        # iterator is alive.
+        self.tabixfile = tabixfile.tabixfile
+
+        if tid < 0:
+            # seek to start of file to ensure iteration is over
+            # all entries.
+            bgzf_seek( self.tabixfile.fp, 0, 0)
+            self.iterator = ti_iter_first()
+        else:
+            self.iterator = ti_queryi(self.tabixfile, tid, start, end) 
+
+        if <void*>self.iterator == NULL:
+            raise ValueError("malformatted query or wrong sequence name.\n")
+
+    def __iter__(self):
+        return self 
+
+    def __next__(self): 
+        """python version of next().
+
+        pyrex uses this non-standard name instead of next()
+        """
+    
+        cdef char * s
+        cdef int len
+        s = ti_read(self.tabixfile, self.iterator, &len)
+        if s == NULL: raise StopIteration
+        return s
+
+    def __dealloc__(self):
+        if <void*>self.iterator != NULL:
+            ti_iter_destroy(self.iterator)
+
+def toDot( v ):
+    '''convert value to '.' if None'''
+    if v == None: return "." 
+    else: return str(v)
+
+def quote( v ):
+    '''return a quoted attribute.'''
+    if type(v) in types.StringTypes:
+        return '"%s"' % v
+    else: 
+        return str(v)
+
+cdef class TupleProxy:
+    '''Proxy class for access to parsed row as a tuple.
+
+    This class represents a table row for fast read-access.
+    '''
+
+    cdef:
+        char * data
+        char ** fields
+        int nfields
+        int index
+
+    def __cinit__(self ): 
+
+        self.data = NULL
+        self.fields = NULL
+        self.index = 0
+
+    cdef take( self, char * buffer, size_t nbytes ):
+        '''start presenting buffer.
+
+        Take ownership of the pointer.
+        '''
+        self.data = buffer
+        self.update( buffer, nbytes )
+
+    cdef present( self, char * buffer, size_t nbytes ):
+        '''start presenting buffer.
+
+        Do not take ownership of the pointer.
+        '''
+        self.update( buffer, nbytes )
+
+    cdef copy( self, char * buffer, size_t nbytes ):
+        '''start presenting buffer.
+
+        Take a copy of buffer.
+        '''
+        cdef int s
+        # +1 for '\0'
+        s = sizeof(char) *  (nbytes + 1)
+        self.data = <char*>malloc( s ) 
+        memcpy( <char*>self.data, buffer, s )
+        self.update( self.data, nbytes )
+
+    cdef update( self, char * buffer, size_t nbytes ):
+        '''update internal data.'''
+        cdef char * pos
+        cdef char * old_pos
+        cdef int field
+        cdef int max_fields
+        field = 0
+
+        if buffer[nbytes] != 0:
+            raise ValueError( "incomplete line at %s" % buffer )
+        
+        if self.fields != NULL:
+            free(self.fields)
+        
+        max_fields = nbytes / 4
+        self.fields = <char **>calloc( max_fields, sizeof(char *) ) 
+        
+        pos = buffer
+        self.fields[0] = pos
+        field += 1
+        old_pos = pos
+        
+        while 1:
+
+            pos = <char*>memchr( pos, '\t', nbytes )
+            if pos == NULL: break
+            pos[0] = '\0'
+            pos += 1
+            self.fields[field] = pos
+            field += 1
+            if field >= max_fields:
+                raise ValueError("row too large - more than %i fields" % max_fields )
+            nbytes -= pos - old_pos
+            if nbytes < 0: break
+            old_pos = pos
+
+        self.nfields = field
+
+    def __getitem__( self, key ):
+
+        cdef int i
+        i = key
+        if i < 0: i += self.nfields
+        if i >= self.nfields or i < 0:
+            raise IndexError( "list index out of range" )
+        return self.fields[i]
+
+    def __len__(self):
+        return self.nfields
+
+    def __dealloc__(self):
+        if self.data != NULL:
+            free(self.data)
+
+    def __iter__(self):
+        self.index = 0
+        return self
+
+    def __next__(self): 
+        """python version of next().
+        """
+        if self.index >= self.nfields:
+            raise StopIteration
+        self.index += 1
+        return self.fields[self.index-1]
+
+cdef class GTFProxy:
+    '''Proxy class for access to GTF fields.
+
+    This class represents a GTF entry for fast read-access.
+    Write-access has been added as well, though some care must
+    be taken. If any of the string fields (contig, source, ...)
+    are set, the new value is tied to the lifetime of the
+    argument that was supplied.
+
+    The only exception is the attributes field when set from
+    a dictionary - this field will manage its own memory.
+
+    '''
+
+    cdef:
+        char * contig
+        char * source
+        char * feature
+        uint32_t start
+        uint32_t end
+        char * score
+        char * strand
+        char * frame
+        char * attributes
+        int nbytes
+        char * data
+        cdef bint isModified
+        cdef bint hasOwnAttributes
+
+    def __cinit__(self ): 
+        self.data = NULL
+        self.isModified = False
+        self.hasOwnAttributes = False
+
+    cdef take( self, char * buffer, size_t nbytes ):
+        '''start presenting buffer.
+
+        Take ownership of the pointer.
+        '''
+        self.data = buffer
+        self.update( buffer, nbytes )
+        self.isModified = False
+
+    cdef present( self, char * buffer, size_t nbytes ):
+        '''start presenting buffer.
+
+        Do not take ownership of the pointer.
+        '''
+        self.update( buffer, nbytes )
+        self.isModified = False
+
+    cdef copy( self, char * buffer, size_t nbytes ):
+        '''start presenting buffer.
+
+        Take a copy of buffer.
+        '''
+        cdef int s
+        # +1 for '\0'
+        s = sizeof(char) *  (nbytes + 1)
+        self.data = <char*>malloc( s ) 
+        memcpy( <char*>self.data, buffer, s )
+        self.update( self.data, nbytes )
+        self.isModified = False
+
+    cdef update( self, char * buffer, size_t nbytes ):
+        '''update internal data.
+
+        nbytes does not include the terminal '\0'.
+        '''
+        cdef int end
+        cdef char * cstart, * cend, * cscore
+        self.contig = buffer
+        self.nbytes = nbytes
+        cdef char * pos
+
+        if buffer[nbytes] != 0:
+            raise ValueError( "incomplete line at %s" % buffer )
+        
+        pos = strchr( buffer, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        self.source = pos
+
+        pos = strchr( pos, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        self.feature = pos
+
+        pos = strchr( pos, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        cstart = pos
+
+        pos = strchr( pos, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        cend = pos
+
+        pos = strchr( pos, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        self.score = pos
+
+        pos = strchr( pos, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        self.strand = pos
+
+        pos = strchr( pos, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        self.frame = pos
+
+        pos = strchr( pos, '\t' )
+        if pos == NULL: raise ValueError( "malformatted entry at %s" % buffer )
+        pos[0] = '\0'
+        pos += 1
+        self.attributes = pos
+        self.start = atoi( cstart ) - 1
+        self.end = atoi( cend )
+                      
+    property contig:
+       '''contig of feature.'''
+       def __get__( self ): return self.contig
+       def __set__( self, value ): 
+           self.isModified = True
+           self.contig = value
+
+    property feature:
+       '''feature name.'''
+       def __get__( self ): return self.feature
+       def __set__( self, value ): 
+           self.isModified = True
+           self.feature = value
+
+    property source:
+       '''feature source.'''
+       def __get__( self ): return self.source
+       def __set__( self, value ): 
+           self.isModified = True
+           self.source = value
+
+    property start:
+       '''feature start (in 0-based open/closed coordinates).'''
+       def __get__( self ): return self.start
+       def __set__( self, value ): 
+           self.isModified = True
+           self.start = value
+
+    property end:
+       '''feature end (in 0-based open/closed coordinates).'''
+       def __get__( self ): return self.end
+       def __set__( self, value ): 
+           self.isModified = True
+           self.end = value
+
+    property score:
+       '''feature score.'''
+       def __get__( self ): 
+           if self.score[0] == '.' and self.score[1] == '\0' :
+               return None
+           else:
+               return atof(self.score)
+       def __set__( self, value ): 
+           self.isModified = True
+           self.score = value
+
+    property strand:
+       '''feature strand.'''
+       def __get__( self ): return self.strand
+       def __set__( self, value ): 
+           self.isModified = True
+           self.strand = value
+
+    property frame:
+       '''feature frame.'''
+       def __get__( self ): return self.frame
+       def __set__( self, value ): 
+           self.isModified = True
+           self.frame = value
+
+    property attributes:
+       '''feature attributes (as a string).'''
+       def __get__( self ): return self.attributes
+       def __set__( self, value ): 
+           self.isModified = True
+           self.attributes = value
+
+    def asDict( self ):
+        """parse attributes - return as dict
+        """
+
+        # remove comments
+        attributes = self.attributes
+
+        # separate into fields
+        fields = [ x.strip() for x in attributes.split(";")[:-1]]
+        
+        result = {}
+
+        for f in fields:
+            
+            d = [ x.strip() for x in f.split(" ")]
+            
+            n,v = d[0], d[1]
+            if len(d) > 2: v = d[1:]
+
+            if v[0] == '"' and v[-1] == '"':
+                v = v[1:-1]
+            else:
+                ## try to convert to a value
+                try:
+                    v = float( v )
+                    v = int( v )
+                except ValueError:
+                    pass
+                except TypeError:
+                    pass
+
+            result[n] = v
+        
+        return result
+    
+    def fromDict( self, d ):
+        '''set attributes from a dictionary.'''
+        cdef char * p
+        cdef int l
+
+        # clean up if this field is set twice
+        if self.hasOwnAttributes:
+            free(self.attributes)
+
+        aa = []
+        for k,v in d.items():
+            if type(v) == types.StringType:
+                aa.append( '%s "%s"' % (k,v) )
+            else:
+                aa.append( '%s %s' % (k,str(v)) )
+
+        a = "; ".join( aa ) + ";"
+        p = a
+        l = len(a)
+        self.attributes = <char *>calloc( l + 1, sizeof(char) )
+        memcpy( self.attributes, p, l )
+
+        self.hasOwnAttributes = True
+        self.isModified = True
+
+    def __str__(self):
+        cdef char * cpy
+        cdef int x
+
+        if self.isModified:
+            return "\t".join( 
+                (self.contig, 
+                 self.source, 
+                 self.feature, 
+                 str(self.start+1),
+                 str(self.end),
+                 toDot(self.score),
+                 self.strand,
+                 self.frame,
+                 self.attributes ) )
+        else: 
+            cpy = <char*>calloc( sizeof(char), self.nbytes+1 )
+            memcpy( cpy, self.data, self.nbytes+1)
+            for x from 0 <= x < self.nbytes:
+                if cpy[x] == '\0': cpy[x] = '\t'
+            result = cpy
+            free(cpy)
+            return result
+
+    def invert( self, int lcontig ):
+        '''invert coordinates to negative strand coordinates
+        
+        This method will only act if the feature is on the
+        negative strand.'''
+
+        if self.strand[0] == '-':
+            start = min(self.start, self.end)
+            end = max(self.start, self.end)
+            self.start, self.end = lcontig - end, lcontig - start
+
+    def keys( self ):
+        '''return a list of attributes defined in this entry.'''
+        r = self.attributes
+        return [ x.strip().split(" ")[0] for x in r.split(";") if x.strip() != '' ]
+
+    def __getitem__(self, item):
+        return self.__getattr__( item )
+
+    def __dealloc__(self):
+        if self.data != NULL:
+            free(self.data)
+        if self.hasOwnAttributes:
+            free(self.attributes)
+
+    def __getattr__(self, item ):
+        """Generic lookup of attribute from GFF/GTF attributes 
+        Only called if there *isn't* an attribute with this name
+        """
+        cdef char * start
+        cdef char * query 
+        cdef char * cpy
+        cdef char * end
+        cdef int l
+        query = item
+        
+        start = strstr( self.attributes, query)
+        if start == NULL:
+            raise AttributeError("'GTFProxy' has no attribute '%s'" % item )
+
+        start += strlen(query) + 1
+        # skip gaps before
+        while start[0] == " ": start += 1
+        if start[0] == '"':
+            start += 1
+            end = start
+            while end[0] != '\0' and end[0] != '"': end += 1
+            l = end - start + 1
+            cpy = <char*>calloc( l, sizeof(char ) )
+            memcpy( cpy, start, l )
+            cpy[l-1] = '\0'
+            result = cpy
+            free(cpy)
+            return result
+        else:
+            return start
+
+    def setAttribute( self, name, value ):
+        '''convenience method to set an attribute.'''
+        r = self.asDict()
+        r[name] = value
+        self.fromDict( r )
+
+cdef class Parser:
+    pass
+
+cdef class asTuple(Parser):
+    '''converts a :term:`tabix row` into a python tuple.''' 
+    def __call__(self, char * buffer, int len):
+        cdef TupleProxy r
+        r = TupleProxy()
+        # need to copy - there were some
+        # persistence issues with "present"
+        r.copy( buffer, len )
+        return r
+
+cdef class asGTF(Parser):
+    '''converts a :term:`tabix row` into a GTF record.''' 
+    def __call__(self, char * buffer, int len):
+        cdef GTFProxy r
+        r = GTFProxy()
+        r.copy( buffer, len )
+        return r
+
+cdef class TabixIteratorParsed:
+    """iterates over mapped reads in a region.
+    """
+    
+    cdef ti_iter_t iterator
+    cdef tabix_t * tabixfile
+    cdef Parser parser
+
+    def __cinit__(self, 
+                  Tabixfile tabixfile, 
+                  int tid, 
+                  int start, 
+                  int end,
+                  Parser parser ):
+
+        assert tabixfile._isOpen()
+        self.parser = parser
+
+        # makes sure that samfile stays alive as long as the
+        # iterator is alive.
+        self.tabixfile = tabixfile.tabixfile
+
+        if tid < 0:
+            # seek to start of file to ensure iteration is over
+            # all entries.
+            bgzf_seek( self.tabixfile.fp, 0, 0)
+            self.iterator = ti_iter_first()
+        else:
+            self.iterator = ti_queryi(self.tabixfile, tid, start, end) 
+
+        if <void*>self.iterator == NULL:
+            raise ValueError("malformatted query or wrong sequence name.\n")
+
+    def __iter__(self):
+        return self 
+
+    def __next__(self): 
+        """python version of next().
+
+        pyrex uses this non-standard name instead of next()
+        """
+    
+        cdef char * s
+        cdef int len
+        s = ti_read(self.tabixfile, self.iterator, &len)
+        if s == NULL: raise StopIteration
+        return self.parser(s, len)
+
+    def __dealloc__(self):
+        if <void*>self.iterator != NULL:
+            ti_iter_destroy(self.iterator)
+        
+def tabix_compress( filename_in, 
+              filename_out,
+              force = False ):
+
+    '''
+    compress *filename_in* writing the output to *filename_out*.
+    
+    Raise an IOError if *filename_out* already exists, unless *force* is set.
+    '''
+
+    if not force and os.path.exists(filename_out ):
+        raise IOError( "Filename '%s' already exists, use *force* to overwrite" % filename_out)
+
+    cdef int WINDOW_SIZE
+    cdef int c, r
+    cdef void * buffer
+    cdef BGZF * fp
+    cdef int fd_src
+
+    cdef int O_RDONLY
+    O_RDONLY = os.O_RDONLY
+
+    WINDOW_SIZE = 64 * 1024
+
+    fp = bgzf_open( filename_out, "w")
+    if fp == NULL:
+        raise IOError( "could not open '%s' for writing" )
+
+    fd_src = open(filename_in, O_RDONLY)
+    if fd_src == 0:
+        raise IOError( "could not open '%s' for reading" )
+
+    buffer = malloc(WINDOW_SIZE)
+
+    while c > 0:
+        c = read(fd_src, buffer, WINDOW_SIZE)
+        r = bgzf_write(fp, buffer, c)
+        if r < 0:
+            free( buffer )
+            raise OSError("writing failed")
+        
+    free( buffer )
+    r = bgzf_close(fp)
+    if r < 0: raise OSError("writing failed")
+
+def tabix_index( filename, 
+                 force = False,
+                 seq_col = None, 
+                 start_col = None, 
+                 end_col = None,
+                 preset = None,
+                 meta_char = "#",
+                 zerobased = False,
+                ):
+    '''
+    index tab-separated *filename* using tabix.
+
+    An existing index will not be overwritten unless
+    *force* is set.
+
+    The index will be built from coordinates
+    in columns *seq_col*, *start_col* and *end_col*.
+
+    The contents of *filename* have to be sorted by 
+    contig and position - the method does not check
+    if the file is sorted.
+
+    Column indices are 0-based. Coordinates in the file
+    are assumed to be 1-based.
+
+    If *preset* is provided, the column coordinates
+    are taken from a preset. Valid values for preset
+    are "gff", "bed", "sam", "vcf", psltbl", "pileup".
+    
+    Lines beginning with *meta_char* and the first
+    *line_skip* lines will be skipped.
+    
+    If *filename* does not end in ".gz", it will be automatically
+    compressed. The original file will be removed and only the 
+    compressed file will be retained. 
+
+    If *filename* ends in *gz*, the file is assumed to be already
+    compressed with bgzf.
+
+    returns the filename of the compressed data
+    '''
+    
+    if not os.path.exists(filename): raise IOError("No such file '%s'" % filename)
+
+    if not filename.endswith(".gz"): 
+        
+        tabix_compress( filename, filename + ".gz", force = force )
+        os.unlink( filename )
+        filename += ".gz"
+
+    if not force and os.path.exists(filename + ".tbi" ):
+        raise IOError( "Filename '%s.tbi' already exists, use *force* to overwrite" )
+
+    # columns (1-based)
+    # preset-code, contig, start, end, metachar for commends, lines to ignore at beginning
+    # 0 is a missing column
+    preset2conf = {
+        'gff' : ( 0, 1, 4, 5, ord('#'), 0 ),
+        'bed' : ( 0x10000, 1, 2, 3, ord('#'), 0 ),
+        'psltbl' : ( 0x10000, 15, 17, 18, ord('#'), 0 ),
+        'sam' : ( 1, 3, 4, 0, ord('#'), 0 ),
+        'vcf' : ( 2, 1, 2, 0, ord('#'), 0 ),
+        'pileup': (3, 1, 2, 0, ord('#'), 0 ),
+        }
+
+    if preset:
+        try:
+            conf_data = preset2conf[preset]
+        except KeyError:
+            raise KeyError( "unknown preset '%s', valid presets are '%s'" % (preset, ",".join(preset2conf.keys() )))
+    else:
+        if end_col == None: end_col = -1
+        preset = 0
+
+        # note that tabix internally works with 0-based coordinates and open/closed intervals.
+        # When using a preset, conversion is automatically taken care of.
+        # Otherwise, the coordinates are assumed to be 1-based closed intervals and 
+        # -1 is subtracted from the start coordinate. To avoid doing this, set
+        # the TI_FLAG_UCSC=0x10000 flag:
+        if zerobased: preset = preset | 0x10000
+
+        conf_data = (preset, seq_col+1, start_col+1, end_col+1, ord(meta_char), 0)
+                
+    cdef ti_conf_t conf
+    conf.preset, conf.sc, conf.bc, conf.ec, conf.meta_char, conf.line_skip = conf_data
+
+    ti_index_build( filename, &conf)
+    
+    return filename
+    
+__all__ = ["tabix_index", 
+           "tabix_compress",
+           "Tabixfile", 
+           "asTuple",
+           "asGTF",
+           ]
diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c
index 5360626..91b6fa7 100644
--- a/pysam/pysam_util.c
+++ b/pysam/pysam_util.c
@@ -141,11 +141,8 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
 	}
 	assert(x > pos); // otherwise a bug
 	return ret;
-}
-
-
-
 
+}
 // the following code has been taken from bam_plbuf_push
 // and modified such that instead of a function call
 // the function returns and will continue (if cont is true).
@@ -155,98 +152,16 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
 // 1: if buf is full and can be emitted
 // 0: if b has been added
 // -1: if there was an error
-int pysam_bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf, int cont)
+int pysam_pileup_next(const bam1_t *b,
+		      bam_plbuf_t *buf,
+		      bam_pileup1_t ** plp,
+		      int * tid,
+		      int * pos,
+		      int * n_plp )
 {
-  if (!cont)
-    {
-      if (b) { // fill buffer
-	if (b->core.tid < 0) return 0;
-	if (b->core.flag & buf->flag_mask) return 0;
-	bam_copy1(&buf->tail->b, b);
-	buf->tail->beg = b->core.pos; buf->tail->end = bam_calend(&b->core, bam1_cigar(b));
-	if (!(b->core.tid >= buf->max_tid || (b->core.tid == buf->max_tid && buf->tail->beg >= buf->max_pos))) {
-	  fprintf(stderr, "[bam_pileup_core] the input is not sorted. Abort!\n");
-	  abort();
-	}
-	buf->max_tid = b->core.tid; buf->max_pos = buf->tail->beg;
-	if (buf->tail->end > buf->pos || buf->tail->b.core.tid > buf->tid) {
-	  buf->tail->next = mp_alloc(buf->mp);
-	  buf->tail = buf->tail->next;
-	}
-      } else buf->is_eof = 1;
-    }
-  else
-    // continue end of loop
-    {
-      // update tid and pos
-      if (buf->head->next) {
-	if (buf->tid > buf->head->b.core.tid) {
-	  fprintf(stderr, "[bam_plbuf_push] unsorted input. Pileup aborts.\n");
-	  return -1;
-	}
-      }
-      if (buf->tid < buf->head->b.core.tid) { // come to a new reference sequence
-	buf->tid = buf->head->b.core.tid; buf->pos = buf->head->beg; // jump to the next reference
-      } else if (buf->pos < buf->head->beg) { // here: tid == head->b.core.tid
-	buf->pos = buf->head->beg; // jump to the next position
-      } else ++buf->pos; // scan contiguously
-      if (buf->is_eof && buf->head->next == 0) return 0;
-    }
-
-  // enter yield loop
-  while (buf->is_eof || buf->max_tid > buf->tid || (buf->max_tid == buf->tid && buf->max_pos > buf->pos))
-    {
-      int n_pu = 0;
-      lbnode_t *p, *q;
-      buf->dummy->next = buf->head;
-      for (p = buf->head, q = buf->dummy; p->next; q = p, p = p->next) {
-	if (p->b.core.tid < buf->tid || (p->b.core.tid == buf->tid && p->end <= buf->pos)) { // then remove from the list
-	  q->next = p->next; mp_free(buf->mp, p); p = q;
-	} else if (p->b.core.tid == buf->tid && p->beg <= buf->pos) { // here: p->end > pos; then add to pileup
-	  if (n_pu == buf->max_pu) { // then double the capacity
-	    buf->max_pu = buf->max_pu? buf->max_pu<<1 : 256;
-	    buf->pu = (bam_pileup1_t*)realloc(buf->pu, sizeof(bam_pileup1_t) * buf->max_pu);
-	  }
-	  buf->pu[n_pu].b = &p->b;
-	  if (resolve_cigar(buf->pu + n_pu, buf->pos)) ++n_pu; // skip the read if we are looking at BAM_CREF_SKIP
-	}
-      }
-      buf->head = buf->dummy->next; // dummy->next may be changed
-
-      // exit if alignments need to be emitted
-      if (n_pu) { return n_pu; }
-      
-      // update tid and pos
-      if (buf->head->next) {
-	if (buf->tid > buf->head->b.core.tid) {
-	  fprintf(stderr, "[bam_plbuf_push] unsorted input. Pileup aborts.\n");
-	  return -2;
-	}
-      }
-      if (buf->tid < buf->head->b.core.tid) { // come to a new reference sequence
-	buf->tid = buf->head->b.core.tid; buf->pos = buf->head->beg; // jump to the next reference
-      } else if (buf->pos < buf->head->beg) { // here: tid == head->b.core.tid
-	buf->pos = buf->head->beg; // jump to the next position
-      } else ++buf->pos; // scan contiguously
-      if (buf->is_eof && buf->head->next == 0) break;
-    }
-  return 0;
-}
-
-int pysam_get_pos( const bam_plbuf_t *buf) 
-{
-  return buf->pos;
-}
-
-  
-int pysam_get_tid( const bam_plbuf_t *buf)
-{
-  return buf->tid;
-}
-
-bam_pileup1_t * pysam_get_pileup( const bam_plbuf_t *buf)
-{
-  return buf->pu;
+  *plp = bam_plp_next(buf->iter, tid, pos, n_plp);
+  if (plp == NULL) return 0;
+  return 1;
 }
 
 // pysam dispatch function to emulate the samtools
@@ -309,15 +224,6 @@ int pysam_dispatch(int argc, char *argv[] )
   return 0;
 }
 
-// standin for bam_destroy1 in bam.h
-// deletes all variable length data
-void pysam_bam_destroy1( bam1_t * b )
-{
-  if (b == NULL) return;
-  if (b->data != NULL) free(b->data);
-  free(b);
-}
-
 // taken from samtools/bam_import.c
 static inline uint8_t *alloc_data(bam1_t *b, size_t size)
 {
@@ -379,121 +285,6 @@ unsigned char pysam_translate_sequence( const unsigned char s )
   return bam_nt16_table[s];
 }
 
-// stand-ins for samtools macros in bam.h
-char * pysam_bam1_qname( const bam1_t * b)
-{
-  return (char*)b->data;
-}
-
-uint32_t * pysam_bam1_cigar( const bam1_t * b) 
-{
-  return (uint32_t*)(b->data + b->core.l_qname);
-}
-
-uint8_t * pysam_bam1_seq( const bam1_t * b) 
-{
-  return (uint8_t*)(b->data + b->core.n_cigar*4 + b->core.l_qname);
-}
-
-uint8_t * pysam_bam1_qual( const bam1_t * b)
-{
-  return (uint8_t*)(b->data + b->core.n_cigar*4 + b->core.l_qname + (b->core.l_qseq + 1)/2);
-}
-
-uint8_t * pysam_bam1_aux( const bam1_t * b)
-{
-  return (uint8_t*)(b->data + b->core.n_cigar*4 + b->core.l_qname + b->core.l_qseq + (b->core.l_qseq + 1)/2);
-}
-
-// #######################################################
-// Iterator implementation
-// #######################################################
-
-// functions defined in bam_index.c
-extern pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int* cnt_off);
-
-static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
-{
-	uint32_t rbeg = b->core.pos;
-	uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1;
-	return (rend > beg && rbeg < end);
-}
-
-struct __bam_fetch_iterator_t
-{
-  bam1_t *        b;
-  pair64_t *      off;
-  int             n_off;
-  uint64_t        curr_off;
-  int             curr_chunk;
-  bamFile 		fp;
-  int				tid;
-  int				beg;
-  int				end;
-  int             n_seeks;
-};
- 
-bam_fetch_iterator_t* bam_init_fetch_iterator(bamFile fp, const bam_index_t *idx, int tid, int beg, int end)
-{
-	// iterator contains current alignment position
-	//      and will contain actual alignment during iterations
-	bam_fetch_iterator_t* iter  = (bam_fetch_iterator_t*)calloc(1, sizeof(bam_fetch_iterator_t));
-	iter->b                     = (bam1_t*)calloc(1, sizeof(bam1_t));
-		
-	// list of chunks containing our alignments
-	iter->off = get_chunk_coordinates(idx, tid, beg, end, &iter->n_off);
-	
-	// initialise other state variables in iterator
-	iter->fp                = fp;
-	iter->curr_chunk        = -1;   
-	iter->curr_off          =  0;
-	iter->n_seeks           =  0;    
-	iter->tid				= tid;
-	iter->beg				= beg;
-	iter->end				= end;
-	return iter;
-}
-
-bam1_t * bam_fetch_iterate(bam_fetch_iterator_t *iter)
-{
-	if (!iter->off) {
-		return 0;
-	}
-
-	int ret;
-	// iterate through all alignments in chunks
-	for (;;) {
-		if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->curr_chunk].v) { // then jump to the next chunk
-			if (iter->curr_chunk == iter->n_off - 1) break; // no more chunks
-			if (iter->curr_chunk >= 0) assert(iter->curr_off == iter->off[iter->curr_chunk].v); // otherwise bug
-			if (iter->curr_chunk < 0 || iter->off[iter->curr_chunk].v != iter->off[iter->curr_chunk+1].u) { // not adjacent chunks; then seek
-				bam_seek(iter->fp, iter->off[iter->curr_chunk+1].u, SEEK_SET);
-				iter->curr_off = bam_tell(iter->fp);
-				++iter->n_seeks;
-			}
-			++iter->curr_chunk;
-		}
-		if ((ret = bam_read1(iter->fp, iter->b)) > 0) {
-			iter->curr_off = bam_tell(iter->fp);
-			if (iter->b->core.tid != iter->tid || iter->b->core.pos >= iter->end) break; // no need to proceed
-			else if (is_overlap(iter->beg, iter->end, iter->b)) 
-				//
-				//func(iter->b, data);
-				//
-				return iter->b;
-		} else 
-			return 0; // end of file
-	}
-	return 0;
-}
-
-void bam_cleanup_fetch_iterator(bam_fetch_iterator_t *iter)
-{
-  //  fprintf(stderr, "[bam_fetch] # seek calls: %d\n", iter->n_seeks);
-  bam_destroy1(iter->b);
-  free(iter->off);
-}
 
-       
 
 
diff --git a/pysam/pysam_util.h b/pysam/pysam_util.h
index ff5d569..bfbd6dd 100644
--- a/pysam/pysam_util.h
+++ b/pysam/pysam_util.h
@@ -1,75 +1,22 @@
 #ifndef PYSAM_UTIL_H
 #define PYSAM_UTIL_H
 
-//////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////
-// code for iterator
-
-/*! @typedef
-  @Structure for holding current state (current alignment etc.) for iterating through
-  alignments overlapping a specified region.
-  @field  b           pointer to the current alignment
-  @field  off         pointer to an array of chunk loci (each with beg/end positions)
-  @field  n_off       The number of chunks
-  @field  curr_off    The current file positon
-  @field  curr_chunk  The item in a list of chunk
-  @discussion See also bam_fetch_iterate
-*/
-struct __bam_fetch_iterator_t;
-typedef struct __bam_fetch_iterator_t bam_fetch_iterator_t;
-	
-/*!
-  @abstract Retrieve the alignments that are overlapped with the
-  specified region.
-  
-  @discussion Returns iterator object to retrieve successive alignments ordered by
-  start position. 
-  @param  fp    BAM file handler
-  @param  idx   pointer to the alignment index
-  @param  tid   chromosome ID as is defined in the header
-  @param  beg   start coordinate, 0-based
-  @param  end   end coordinate, 0-based
-*/
-bam_fetch_iterator_t * bam_init_fetch_iterator(bamFile fp, const bam_index_t *idx, int tid, int beg, int end);
-
-
-/*!
-  @abstract Iterates through alignments overlapped the specified region.
-  @discussion Returns pointer to successive alignments ordered by start position.
-  Returns null pointer to signal the end of the iteration.
-  The alignment data is nested within the iterator to avoid unnecessary allocations.
-*/
-bam1_t * bam_fetch_iterate(bam_fetch_iterator_t *iter);
-
-bam_fetch_iterator_t* bam_init_fetchall_iterator(bamFile fp, const bam_index_t *idx);
-bam1_t * bam_fetchall_iterate(bam_fetch_iterator_t *iter);
-
 //////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////
 // various helper functions
+//
+// fill pileup buffer for next position.
 
-int pysam_bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf, int cont);
-
-// accessor functions - necessary as bam_plbuf_t is hidden
-// among the implementation
-int pysam_get_pos( const bam_plbuf_t *buf);
-int pysam_get_tid( const bam_plbuf_t *buf);
-bam_pileup1_t * pysam_get_pileup( const bam_plbuf_t *buf);
+int pysam_pileup_next(const bam1_t *b,
+		      bam_plbuf_t *buf,
+		      bam_pileup1_t ** plp,
+		      int * tid,
+		      int * pos,
+		      int * n_plp);
 
 int pysam_dispatch(int argc, char *argv[] );
 
-// stand-in for macro - not wrappable in pyrex
-void pysam_bam_destroy1( bam1_t * b );
-
-// stand-in for other samtools macros
-uint32_t * pysam_bam1_cigar( const bam1_t * b);
-char * pysam_bam1_qname( const bam1_t * b);
-uint8_t * pysam_bam1_seq( const bam1_t * b);
-uint8_t * pysam_bam1_qual( const bam1_t * b);
-uint8_t * pysam_bam1_aux( const bam1_t * b);
-
 /*!
   @abstract Update the variable length data within a bam1_t entry
 
diff --git a/pysam/version.py b/pysam/version.py
new file mode 100644
index 0000000..5965c7c
--- /dev/null
+++ b/pysam/version.py
@@ -0,0 +1,7 @@
+# pysam versioning information
+
+__version__ = "0.3"
+
+__samtools_version__ = "0.1.8"
+
+__tabix_version__ = "0.2.1"
diff --git a/samtools/bam.c b/samtools/bam.c
index ee7642b..94b0aa8 100644
--- a/samtools/bam.c
+++ b/samtools/bam.c
@@ -70,6 +70,7 @@ bam_header_t *bam_header_read(bamFile fp)
 {
 	bam_header_t *header;
 	char buf[4];
+	int magic_len;
 	int32_t i = 1, name_len;
 	// check EOF
 	i = bgzf_check_EOF(fp);
@@ -80,9 +81,9 @@ bam_header_t *bam_header_read(bamFile fp)
 	}
 	else if (i == 0) fprintf(stderr, "[bam_header_read] EOF marker is absent.\n");
 	// read "BAM1"
-	if (bam_read(fp, buf, 4) != 4) return 0;
-	if (strncmp(buf, "BAM\001", 4)) {
-		fprintf(stderr, "[bam_header_read] wrong header\n");
+	magic_len = bam_read(fp, buf, 4);
+	if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) {
+		fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n");
 		return 0;
 	}
 	header = bam_header_init();
@@ -140,6 +141,7 @@ int bam_header_write(bamFile fp, const bam_header_t *header)
 			bam_write(fp, &x, 4);
 		} else bam_write(fp, &header->target_len[i], 4);
 	}
+	bgzf_flush(fp);
 	return 0;
 }
 
@@ -207,6 +209,7 @@ inline int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8
 	x[5] = c->mtid;
 	x[6] = c->mpos;
 	x[7] = c->isize;
+	bgzf_flush_try(fp, 4 + block_len);
 	if (bam_is_be) {
 		for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);
 		y = block_len;
@@ -232,8 +235,8 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
 	kstring_t str;
 	str.l = str.m = 0; str.s = 0;
 
-	ksprintf(&str, "%s\t", bam1_qname(b));
-	if (of == BAM_OFDEC) ksprintf(&str, "%d\t", c->flag);
+	kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str);
+	if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); }
 	else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag);
 	else { // BAM_OFSTR
 		for (i = 0; i < 16; ++i)
@@ -241,41 +244,43 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
 				kputc(bam_flag2char_table[i], &str);
 		kputc('\t', &str);
 	}
-	if (c->tid < 0) kputs("*\t", &str);
-	else ksprintf(&str, "%s\t", header->target_name[c->tid]);
-	ksprintf(&str, "%d\t%d\t", c->pos + 1, c->qual);
+	if (c->tid < 0) kputsn("*\t", 2, &str);
+	else { kputs(header->target_name[c->tid], &str); kputc('\t', &str); }
+	kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str);
 	if (c->n_cigar == 0) kputc('*', &str);
 	else {
-		for (i = 0; i < c->n_cigar; ++i)
-			ksprintf(&str, "%d%c", bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, "MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK]);
+		for (i = 0; i < c->n_cigar; ++i) {
+			kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);
+			kputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);
+		}
 	}
 	kputc('\t', &str);
-	if (c->mtid < 0) kputs("*\t", &str);
-	else if (c->mtid == c->tid) kputs("=\t", &str);
-	else ksprintf(&str, "%s\t", header->target_name[c->mtid]);
-	ksprintf(&str, "%d\t%d\t", c->mpos + 1, c->isize);
+	if (c->mtid < 0) kputsn("*\t", 2, &str);
+	else if (c->mtid == c->tid) kputsn("=\t", 2, &str);
+	else { kputs(header->target_name[c->mtid], &str); kputc('\t', &str); }
+	kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str);
 	if (c->l_qseq) {
 		for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
 		kputc('\t', &str);
 		if (t[0] == 0xff) kputc('*', &str);
 		else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
-	} else ksprintf(&str, "*\t*");
+	} else kputsn("*\t*", 3, &str);
 	s = bam1_aux(b);
 	while (s < b->data + b->data_len) {
 		uint8_t type, key[2];
 		key[0] = s[0]; key[1] = s[1];
 		s += 2; type = *s; ++s;
-		ksprintf(&str, "\t%c%c:", key[0], key[1]);
-		if (type == 'A') { ksprintf(&str, "A:%c", *s); ++s; }
-		else if (type == 'C') { ksprintf(&str, "i:%u", *s); ++s; }
-		else if (type == 'c') { ksprintf(&str, "i:%d", *s); ++s; }
-		else if (type == 'S') { ksprintf(&str, "i:%u", *(uint16_t*)s); s += 2; }
-		else if (type == 's') { ksprintf(&str, "i:%d", *(int16_t*)s); s += 2; }
-		else if (type == 'I') { ksprintf(&str, "i:%u", *(uint32_t*)s); s += 4; }
-		else if (type == 'i') { ksprintf(&str, "i:%d", *(int32_t*)s); s += 4; }
+		kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str);
+		if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; }
+		else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; }
+		else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; }
+		else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; }
+		else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; }
+		else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; }
+		else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; }
 		else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
 		else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
-		else if (type == 'Z' || type == 'H') { ksprintf(&str, "%c:", type); while (*s) kputc(*s++, &str); ++s; }
+		else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; }
 	}
 	return str.s;
 }
@@ -288,7 +293,7 @@ char *bam_format1(const bam_header_t *header, const bam1_t *b)
 void bam_view1(const bam_header_t *header, const bam1_t *b)
 {
 	char *s = bam_format1(header, b);
-	printf("%s\n", s);
+	puts(s);
 	free(s);
 }
 
diff --git a/samtools/bam.h b/samtools/bam.h
index 291b303..8e26ea6 100644
--- a/samtools/bam.h
+++ b/samtools/bam.h
@@ -87,7 +87,7 @@ typedef struct {
 	char **target_name;
 	uint32_t *target_len;
 	void *dict, *hash, *rg2lib;
-	int l_text;
+	size_t l_text, n_text;
 	char *text;
 } bam_header_t;
 
@@ -190,6 +190,8 @@ typedef struct {
 	uint8_t *data;
 } bam1_t;
 
+typedef struct __bam_iter_t *bam_iter_t;
+
 #define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0)
 #define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
 
@@ -272,6 +274,10 @@ extern char bam_nt16_nt4_table[];
 extern "C" {
 #endif
 
+	/*********************
+	 * Low-level SAM I/O *
+	 *********************/
+
 	/*! @abstract TAM file handler */
 	typedef struct __tamFile_t *tamFile;
 
@@ -323,6 +329,7 @@ extern "C" {
 	  be destroyed in the first place.
 	 */
 	int sam_header_parse(bam_header_t *h);
+	int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);
 
 	/*!
 	  @abstract       Parse @RG lines a update a header struct
@@ -336,12 +343,22 @@ extern "C" {
 
 #define sam_write1(header, b) bam_view1(header, b)
 
+
+	/********************************
+	 * APIs for string dictionaries *
+	 ********************************/
+
 	int bam_strmap_put(void *strmap, const char *rg, const char *lib);
 	const char *bam_strmap_get(const void *strmap, const char *rg);
 	void *bam_strmap_dup(const void*);
 	void *bam_strmap_init();
 	void bam_strmap_destroy(void *strmap);
 
+
+	/*********************
+	 * Low-level BAM I/O *
+	 *********************/
+
 	/*!
 	  @abstract Initialize a header structure.
 	  @return   the pointer to the header structure
@@ -440,6 +457,11 @@ extern "C" {
 
 	const char *bam_get_library(bam_header_t *header, const bam1_t *b);
 
+
+	/***************
+	 * pileup APIs *
+	 ***************/
+
 	/*! @typedef
 	  @abstract Structure for one alignment covering the pileup position.
 	  @field  b      pointer to the alignment
@@ -461,11 +483,25 @@ extern "C" {
 		uint32_t is_del:1, is_head:1, is_tail:1;
 	} bam_pileup1_t;
 
-	struct __bam_plbuf_t;
-	/*! @abstract pileup buffer */
-	typedef struct __bam_plbuf_t bam_plbuf_t;
+	typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
 
-	void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask);
+	struct __bam_plp_t;
+	typedef struct __bam_plp_t *bam_plp_t;
+
+	bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
+	int bam_plp_push(bam_plp_t iter, const bam1_t *b);
+	const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
+	const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
+	void bam_plp_set_mask(bam_plp_t iter, int mask);
+	void bam_plp_reset(bam_plp_t iter);
+	void bam_plp_destroy(bam_plp_t iter);
+
+	struct __bam_mplp_t;
+	typedef struct __bam_mplp_t *bam_mplp_t;
+
+	bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
+	void bam_mplp_destroy(bam_mplp_t iter);
+	int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
 
 	/*! @typedef
 	  @abstract    Type of function to be called by bam_plbuf_push().
@@ -478,44 +514,16 @@ extern "C" {
 	 */
 	typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
 
-	/*!
-	  @abstract     Reset a pileup buffer for another pileup process
-	  @param  buf   the pileup buffer to be reset
-	 */
-	void bam_plbuf_reset(bam_plbuf_t *buf);
+	typedef struct {
+		bam_plp_t iter;
+		bam_pileup_f func;
+		void *data;
+	} bam_plbuf_t;
 
-	/*!
-	  @abstract     Initialize a buffer for pileup.
-	  @param  func  fucntion to be called by bam_pileup_core()
-	  @param  data  user provided data
-	  @return       pointer to the pileup buffer
-	 */
+	void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask);
+	void bam_plbuf_reset(bam_plbuf_t *buf);
 	bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data);
-
-	/*!
-	  @abstract    Destroy a pileup buffer.
-	  @param  buf  pointer to the pileup buffer
-	 */
 	void bam_plbuf_destroy(bam_plbuf_t *buf);
-
-	/*!
-	  @abstract    Push an alignment to the pileup buffer.
-	  @param  b    alignment to be pushed
-	  @param  buf  pileup buffer
-	  @see         bam_plbuf_init()
-	  @return      always 0 currently
-
-	  @discussion If all the alignments covering a particular site have
-	  been collected, this function will call the user defined function
-	  as is provided to bam_plbuf_init(). The coordinate of the site and
-	  all the alignments will be transferred to the user defined
-	  function as function parameters.
-	 
-	  When all the alignments are pushed to the buffer, this function
-	  needs to be called with b equal to NULL. This will flush the
-	  buffer. A pileup buffer can only be reused when bam_plbuf_reset()
-	  is called.
-	 */
 	int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf);
 
 	int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data);
@@ -534,6 +542,11 @@ extern "C" {
 	/*! @abstract  bam_plbuf_push() equivalent with level calculated. */
 	int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf);
 
+
+	/*********************
+	 * BAM indexing APIs *
+	 *********************/
+
 	struct __bam_index_t;
 	typedef struct __bam_index_t bam_index_t;
 
@@ -582,6 +595,10 @@ extern "C" {
 	 */
 	int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func);
 
+	bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end);
+	int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b);
+	void bam_iter_destroy(bam_iter_t iter);
+
 	/*!
 	  @abstract       Parse a region in the format: "chr2:100,000-200,000".
 	  @discussion     bam_header_t::hash will be initialized if empty.
@@ -594,6 +611,11 @@ extern "C" {
 	 */
 	int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end);
 
+
+	/**************************
+	 * APIs for optional tags *
+	 **************************/
+
 	/*!
 	  @abstract       Retrieve data of a tag
 	  @param  b       pointer to an alignment struct
@@ -617,6 +639,11 @@ extern "C" {
 	void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
 	uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()
 
+
+	/*****************
+	 * Miscellaneous *
+	 *****************/
+
 	/*!  
 	  @abstract Calculate the rightmost coordinate of an alignment on the
 	  reference genome.
diff --git a/samtools/bam_aux.c b/samtools/bam_aux.c
index 89e99f2..fbcd982 100644
--- a/samtools/bam_aux.c
+++ b/samtools/bam_aux.c
@@ -115,7 +115,7 @@ int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *be
 	*ref_id = kh_value(h, iter);
 	if (i == k) { /* dump the whole sequence */
 		*begin = 0; *end = 1<<29; free(s);
-		return -1;
+		return 0;
 	}
 	for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break;
 	*begin = atoi(p);
diff --git a/samtools/bam_import.c b/samtools/bam_import.c
index 9d463d1..9d84328 100644
--- a/samtools/bam_import.c
+++ b/samtools/bam_import.c
@@ -116,7 +116,7 @@ static bam_header_t *hash2header(const kh_ref_t *hash)
 bam_header_t *sam_header_read2(const char *fn)
 {
 	bam_header_t *header;
-	int c, dret, ret;
+	int c, dret, ret, error = 0;
 	gzFile fp;
 	kstream_t *ks;
 	kstring_t *str;
@@ -135,6 +135,10 @@ bam_header_t *sam_header_read2(const char *fn)
 		ks_getuntil(ks, 0, str, &dret);
 		len = atoi(str->s);
 		k = kh_put(ref, hash, s, &ret);
+		if (ret == 0) {
+			fprintf(stderr, "[sam_header_read2] duplicated sequence name: %s\n", s);
+			error = 1;
+		}
 		kh_value(hash, k) = (uint64_t)len<<32 | i;
 		if (dret != '\n')
 			while ((c = ks_getc(ks)) != '\n' && c != -1);
@@ -143,6 +147,7 @@ bam_header_t *sam_header_read2(const char *fn)
 	gzclose(fp);
 	free(str->s); free(str);
 	fprintf(stderr, "[sam_header_read2] %d sequences loaded.\n", kh_size(hash));
+	if (error) return 0;
 	header = hash2header(hash);
 	kh_destroy(ref, hash);
 	return header;
@@ -163,9 +168,24 @@ static inline void parse_error(int64_t n_lines, const char * __restrict msg)
 }
 static inline void append_text(bam_header_t *header, kstring_t *str)
 {
-	int x = header->l_text, y = header->l_text + str->l + 2; // 2 = 1 byte dret + 1 byte null
+	size_t x = header->l_text, y = header->l_text + str->l + 2; // 2 = 1 byte dret + 1 byte null
 	kroundup32(x); kroundup32(y);
-	if (x < y) header->text = (char*)realloc(header->text, y);
+	if (x < y) 
+    {
+        header->n_text = y;
+        header->text = (char*)realloc(header->text, y);
+        if ( !header->text ) 
+        {
+            fprintf(stderr,"realloc failed to alloc %ld bytes\n", y);
+            abort();
+        }
+    }
+    // Sanity check
+    if ( header->l_text+str->l+1 >= header->n_text )
+    {
+        fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n",  header->l_text+str->l+1,header->n_text,x,y);
+        abort();
+    }
 	strncpy(header->text + header->l_text, str->s, str->l+1); // we cannot use strcpy() here.
 	header->l_text += str->l + 1;
 	header->text[header->l_text] = 0;
diff --git a/samtools/bam_index.c b/samtools/bam_index.c
index a627884..4152f20 100644
--- a/samtools/bam_index.c
+++ b/samtools/bam_index.c
@@ -42,6 +42,8 @@
 // 1<<14 is the size of minimum bin.
 #define BAM_LIDX_SHIFT    14
 
+#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
+
 typedef struct {
 	uint64_t u, v;
 } pair64_t;
@@ -63,6 +65,7 @@ KHASH_MAP_INIT_INT(i, bam_binlist_t)
 
 struct __bam_index_t {
 	int32_t n;
+	uint64_t n_no_coor; // unmapped reads without coordinate
 	khash_t(i) **index;
 	bam_lidx_t *index2;
 };
@@ -98,8 +101,12 @@ static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset
 		index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);
 		memset(index2->offset + old_m, 0, 8 * (index2->m - old_m));
 	}
-	for (i = beg + 1; i <= end; ++i)
-		if (index2->offset[i] == 0) index2->offset[i] = offset;
+	if (beg == end) {
+		if (index2->offset[beg] == 0) index2->offset[beg] = offset;
+	} else {
+		for (i = beg; i <= end; ++i)
+			if (index2->offset[i] == 0) index2->offset[i] = offset;
+	}
 	index2->n = end + 1;
 }
 
@@ -113,7 +120,7 @@ static void merge_chunks(bam_index_t *idx)
 		index = idx->index[i];
 		for (k = kh_begin(index); k != kh_end(index); ++k) {
 			bam_binlist_t *p;
-			if (!kh_exist(index, k)) continue;
+			if (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;
 			p = &kh_value(index, k);
 			m = 0;
 			for (l = 1; l < p->n; ++l) {
@@ -130,6 +137,17 @@ static void merge_chunks(bam_index_t *idx)
 #endif // defined(BAM_TRUE_OFFSET) || defined(BAM_BGZF)
 }
 
+static void fill_missing(bam_index_t *idx)
+{
+	int i, j;
+	for (i = 0; i < idx->n; ++i) {
+		bam_lidx_t *idx2 = &idx->index2[i];
+		for (j = 1; j < idx2->n; ++j)
+			if (idx2->offset[j] == 0)
+				idx2->offset[j] = idx2->offset[j-1];
+	}
+}
+
 bam_index_t *bam_index_core(bamFile fp)
 {
 	bam1_t *b;
@@ -139,7 +157,7 @@ bam_index_t *bam_index_core(bamFile fp)
 	uint32_t last_bin, save_bin;
 	int32_t last_coor, last_tid, save_tid;
 	bam1_core_t *c;
-	uint64_t save_off, last_off;
+	uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor;
 
 	idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
 	b = (bam1_t*)calloc(1, sizeof(bam1_t));
@@ -154,7 +172,10 @@ bam_index_t *bam_index_core(bamFile fp)
 
 	save_bin = save_tid = last_tid = last_bin = 0xffffffffu;
 	save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu;
+    n_mapped = n_unmapped = n_no_coor = off_end = 0;
+	off_beg = off_end = bam_tell(fp);
 	while ((ret = bam_read1(fp, b)) >= 0) {
+		if (c->tid < 0) ++n_no_coor;
 		if (last_tid != c->tid) { // change of chromosomes
 			last_tid = c->tid;
 			last_bin = 0xffffffffu;
@@ -163,10 +184,17 @@ bam_index_t *bam_index_core(bamFile fp)
 					bam1_qname(b), last_coor, c->pos, c->tid+1);
 			exit(1);
 		}
-		if (b->core.tid >= 0 && b->core.bin < 4681) insert_offset2(&idx->index2[b->core.tid], b, last_off);
+		if (c->tid >= 0) insert_offset2(&idx->index2[b->core.tid], b, last_off);
 		if (c->bin != last_bin) { // then possibly write the binning index
 			if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
 				insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
+			if (last_bin == 0xffffffffu && save_tid != 0xffffffffu) { // write the meta element
+				off_end = last_off;
+				insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end);
+				insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
+				n_mapped = n_unmapped = 0;
+				off_beg = off_end;
+			}
 			save_off = last_off;
 			save_bin = last_bin = c->bin;
 			save_tid = c->tid;
@@ -177,13 +205,23 @@ bam_index_t *bam_index_core(bamFile fp)
 					(unsigned long long)bam_tell(fp), (unsigned long long)last_off);
 			exit(1);
 		}
+		if (c->flag & BAM_FUNMAP) ++n_unmapped;
+		else ++n_mapped;
 		last_off = bam_tell(fp);
 		last_coor = b->core.pos;
 	}
-	if (save_tid >= 0) insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp));
+	if (save_tid >= 0) {
+		insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp));
+		insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end);
+		insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
+	}
 	merge_chunks(idx);
+	fill_missing(idx);
+	if (ret >= 0)
+		while ((ret = bam_read1(fp, b)) >= 0) ++n_no_coor;
 	if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret);
 	free(b->data); free(b);
+	idx->n_no_coor = n_no_coor;
 	return idx;
 }
 
@@ -261,6 +299,11 @@ void bam_index_save(const bam_index_t *idx, FILE *fp)
 				bam_swap_endian_8p(&index2->offset[x]);
 		} else fwrite(index2->offset, 8, index2->n, fp);
 	}
+	{ // write the number of reads coor-less records.
+		uint64_t x = idx->n_no_coor;
+		if (bam_is_be) bam_swap_endian_8p(&x);
+		fwrite(&x, 8, 1, fp);
+	}
 	fflush(fp);
 }
 
@@ -322,6 +365,8 @@ static bam_index_t *bam_index_load_core(FILE *fp)
 		if (bam_is_be)
 			for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]);
 	}
+	if (fread(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0;
+	if (bam_is_be) bam_swap_endian_8p(&idx->n_no_coor);
 	return idx;
 }
 
@@ -339,13 +384,13 @@ bam_index_t *bam_index_load_local(const char *_fn)
 	} else fn = strdup(_fn);
 	fnidx = (char*)calloc(strlen(fn) + 5, 1);
 	strcpy(fnidx, fn); strcat(fnidx, ".bai");
-	fp = fopen(fnidx, "r");
+	fp = fopen(fnidx, "rb");
 	if (fp == 0) { // try "{base}.bai"
 		char *s = strstr(fn, "bam");
 		if (s == fn + strlen(fn) - 3) {
 			strcpy(fnidx, fn);
 			fnidx[strlen(fn)-1] = 'i';
-			fp = fopen(fnidx, "r");
+			fp = fopen(fnidx, "rb");
 		}
 	}
 	free(fnidx); free(fn);
@@ -375,7 +420,7 @@ static void download_from_remote(const char *url)
 		fprintf(stderr, "[download_from_remote] fail to open remote file.\n");
 		return;
 	}
-	if ((fp = fopen(fn, "w")) == 0) {
+	if ((fp = fopen(fn, "wb")) == 0) {
 		fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n");
 		knet_close(fp_remote);
 		return;
@@ -425,7 +470,7 @@ int bam_index_build2(const char *fn, const char *_fnidx)
 		fnidx = (char*)calloc(strlen(fn) + 5, 1);
 		strcpy(fnidx, fn); strcat(fnidx, ".bai");
 	} else fnidx = strdup(_fnidx);
-	fpidx = fopen(fnidx, "w");
+	fpidx = fopen(fnidx, "wb");
 	if (fpidx == 0) {
 		fprintf(stderr, "[bam_index_build2] fail to create the index file.\n");
 		free(fnidx);
@@ -446,7 +491,7 @@ int bam_index_build(const char *fn)
 int bam_index(int argc, char *argv[])
 {
 	if (argc < 2) {
-		fprintf(stderr, "Usage: samtools index <in.bam> [<out.index>]\n");
+		fprintf(stderr, "Usage: samtools index <in.bam> [out.index]\n");
 		return 1;
 	}
 	if (argc >= 3) bam_index_build2(argv[1], argv[2]);
@@ -454,11 +499,43 @@ int bam_index(int argc, char *argv[])
 	return 0;
 }
 
-#define MAX_BIN 37450 // =(8^6-1)/7+1
+int bam_idxstats(int argc, char *argv[])
+{
+	bam_index_t *idx;
+	bam_header_t *header;
+	bamFile fp;
+	int i;
+	if (argc < 2) {
+		fprintf(stderr, "Usage: samtools idxstats <in.bam>\n");
+		return 1;
+	}
+	fp = bam_open(argv[1], "r");
+	if (fp == 0) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
+	header = bam_header_read(fp);
+	bam_close(fp);
+	idx = bam_index_load(argv[1]);
+	if (idx == 0) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; }
+	for (i = 0; i < idx->n; ++i) {
+		khint_t k;
+		khash_t(i) *h = idx->index[i];
+		printf("%s\t%d", header->target_name[i], header->target_len[i]);
+		k = kh_get(i, h, BAM_MAX_BIN);
+		if (k != kh_end(h))
+			printf("\t%llu\t%llu", (long long)kh_val(h, k).list[1].u, (long long)kh_val(h, k).list[1].v);
+		else printf("\t0\t0");
+		putchar('\n');
+	}
+	printf("*\t0\t0\t%llu\n", (long long)idx->n_no_coor);
+	bam_header_destroy(header);
+	bam_index_destroy(idx);
+	return 0;
+}
 
-static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[MAX_BIN])
+static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[BAM_MAX_BIN])
 {
 	int i = 0, k;
+	if (beg >= end) return 0;
+	if (end >= 1u<<29) end = 1u<<29;
 	--end;
 	list[i++] = 0;
 	for (k =    1 + (beg>>26); k <=    1 + (end>>26); ++k) list[i++] = k;
@@ -476,8 +553,15 @@ static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
 	return (rend > beg && rbeg < end);
 }
 
+struct __bam_iter_t {
+	int from_first; // read from the first record; no random access
+	int tid, beg, end, n_off, i, finished;
+	uint64_t curr_off;
+	pair64_t *off;
+};
+
 // bam_fetch helper function retrieves 
-pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int* cnt_off)
+bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end)
 {
 	uint16_t *bins;
 	int i, n_bins, n_off;
@@ -485,17 +569,34 @@ pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int e
 	khint_t k;
 	khash_t(i) *index;
 	uint64_t min_off;
-
-	bins = (uint16_t*)calloc(MAX_BIN, 2);
+	bam_iter_t iter = 0;
+
+	if (beg < 0) beg = 0;
+	if (end < beg) return 0;
+	// initialize iter
+	iter = calloc(1, sizeof(struct __bam_iter_t));
+	iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1;
+	//
+	bins = (uint16_t*)calloc(BAM_MAX_BIN, 2);
 	n_bins = reg2bins(beg, end, bins);
 	index = idx->index[tid];
-	min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? 0 : idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];
+	if (idx->index2[tid].n > 0) {
+		min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]
+			: idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];
+		if (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4
+			int n = beg>>BAM_LIDX_SHIFT;
+			if (n > idx->index2[tid].n) n = idx->index2[tid].n;
+			for (i = n - 1; i >= 0; --i)
+				if (idx->index2[tid].offset[i] != 0) break;
+			if (i >= 0) min_off = idx->index2[tid].offset[i];
+		}
+	} else min_off = 0; // tabix 0.1.2 may produce such index files
 	for (i = n_off = 0; i < n_bins; ++i) {
 		if ((k = kh_get(i, index, bins[i])) != kh_end(index))
 			n_off += kh_value(index, k).n;
 	}
 	if (n_off == 0) {
-		free(bins); return 0;
+		free(bins); return iter;
 	}
 	off = (pair64_t*)calloc(n_off, 16);
 	for (i = n_off = 0; i < n_bins; ++i) {
@@ -534,41 +635,62 @@ pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int e
 		}
 		bam_destroy1(b);
 	}
-	*cnt_off = n_off;
+	iter->n_off = n_off; iter->off = off;
+	return iter;
+}
+
+pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off)
+{ // for pysam compatibility
+	bam_iter_t iter;
+	pair64_t *off;
+	iter = bam_iter_query(idx, tid, beg, end);
+	off = iter->off; *cnt_off = iter->n_off;
+	free(iter);
 	return off;
 }
 
-int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
+void bam_iter_destroy(bam_iter_t iter)
 {
-	int n_off;
-	pair64_t *off = get_chunk_coordinates(idx, tid, beg, end, &n_off);
-	if (off == 0) return 0;
-	{
-		// retrive alignments
-		uint64_t curr_off;
-		int i, ret, n_seeks;
-		n_seeks = 0; i = -1; curr_off = 0;
-		bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
-		for (;;) {
-			if (curr_off == 0 || curr_off >= off[i].v) { // then jump to the next chunk
-				if (i == n_off - 1) break; // no more chunks
-				if (i >= 0) assert(curr_off == off[i].v); // otherwise bug
-				if (i < 0 || off[i].v != off[i+1].u) { // not adjacent chunks; then seek
-					bam_seek(fp, off[i+1].u, SEEK_SET);
-					curr_off = bam_tell(fp);
-					++n_seeks;
-				}
-				++i;
+	if (iter) { free(iter->off); free(iter); }
+}
+
+int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)
+{
+	if (iter->finished) return -1;
+	if (iter->from_first) {
+		int ret = bam_read1(fp, b);
+		if (ret < 0) iter->finished = 1;
+		return ret;
+	}
+	if (iter->off == 0) return -1;
+	for (;;) {
+		int ret;
+		if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk
+			if (iter->i == iter->n_off - 1) break; // no more chunks
+			if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug
+			if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek
+				bam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);
+				iter->curr_off = bam_tell(fp);
 			}
-			if ((ret = bam_read1(fp, b)) > 0) {
-				curr_off = bam_tell(fp);
-				if (b->core.tid != tid || b->core.pos >= end) break; // no need to proceed
-				else if (is_overlap(beg, end, b)) func(b, data);
-			} else break; // end of file
+			++iter->i;
 		}
-//		fprintf(stderr, "[bam_fetch] # seek calls: %d\n", n_seeks);
-		bam_destroy1(b);
+		if ((ret = bam_read1(fp, b)) > 0) {
+			iter->curr_off = bam_tell(fp);
+			if (b->core.tid != iter->tid || b->core.pos >= iter->end) break; // no need to proceed
+			else if (is_overlap(iter->beg, iter->end, b)) return ret;
+		} else break; // end of file
 	}
-	free(off);
+	iter->finished = 1;
+	return -1;
+}
+
+int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
+{
+	bam_iter_t iter;
+	bam1_t *b;
+	b = bam_init1();
+	iter = bam_iter_query(idx, tid, beg, end);
+	while (bam_iter_read(fp, iter, b) >= 0) func(b, data);
+	bam_destroy1(b);
 	return 0;
 }
diff --git a/samtools/bam_maqcns.c b/samtools/bam_maqcns.c
index 71c2185..cad63d7 100644
--- a/samtools/bam_maqcns.c
+++ b/samtools/bam_maqcns.c
@@ -310,6 +310,7 @@ bam_maqindel_opt_t *bam_maqindel_opt_init()
 	bam_maqindel_opt_t *mi = (bam_maqindel_opt_t*)calloc(1, sizeof(bam_maqindel_opt_t));
 	mi->q_indel = 40;
 	mi->r_indel = 0.00015;
+	mi->r_snp = 0.001;
 	//
 	mi->mm_penalty = 3;
 	mi->indel_err = 4;
@@ -406,7 +407,8 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
 	}
 	{ // the core part
 		char *ref2, *rs, *inscns = 0;
-		int k, l, *score, *pscore, max_ins = types[n_types-1];
+		int qr_snp, k, l, *score, *pscore, max_ins = types[n_types-1];
+		qr_snp = (int)(-4.343 * log(mi->r_snp) + .499);
 		if (max_ins > 0) { // get the consensus of inserted sequences
 			int *inscns_aux = (int*)calloc(4 * n_types * max_ins, sizeof(int));
 			// count occurrences
@@ -446,12 +448,18 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
 		for (i = 0; i < n_types; ++i) {
 			ka_param_t ap = ka_param_blast;
 			ap.band_width = 2 * types[n_types - 1] + 2;
+			ap.gap_end = 0;
 			// write ref2
 			for (k = 0, j = left; j <= pos; ++j)
 				ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]];
 			if (types[i] <= 0) j += -types[i];
 			else for (l = 0; l < types[i]; ++l)
 					 ref2[k++] = bam_nt16_nt4_table[(int)inscns[i*max_ins + l]];
+			if (types[0] < 0) { // mask deleted sequences
+				int jj, tmp = types[i] >= 0? -types[0] : -types[0] + types[i];
+				for (jj = 0; jj < tmp && j < right && ref[j]; ++jj, ++j)
+					ref2[k++] = 4;
+			}
 			for (; j < right && ref[j]; ++j)
 				ref2[k++] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[j]]];
 			if (j < right) right = j;
@@ -482,22 +490,27 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
 						if (op == BAM_CMATCH) {
 							int k;
 							for (k = 0; k < len; ++k)
-								if (ref2[x+k] != rs[y+k]) ps += bam1_qual(p->b)[y+k];
+								if (ref2[x+k] != rs[y+k] && ref2[x+k] < 4)
+									ps += bam1_qual(p->b)[y+k] < qr_snp? bam1_qual(p->b)[y+k] : qr_snp;
 							x += len; y += len;
 						} else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
-							if (op == BAM_CINS) ps += mi->q_indel * len;
+							if (op == BAM_CINS && l > 0 && l < n_acigar - 1) ps += mi->q_indel * len;
 							y += len;
 						} else if (op == BAM_CDEL) {
-							ps += mi->q_indel * len;
+							if (l > 0 && l < n_acigar - 1) ps += mi->q_indel * len;
 							x += len;
 						}
 					}
 					pscore[i*n+j] = ps;
-					/*if (pos == 2618517) { // for debugging only
-						fprintf(stderr, "pos=%d, type=%d, j=%d, score=%d, psore=%d, %d, %d, %d, %d, ", pos+1, types[i], j, score[i*n+j], pscore[i*n+j], tbeg, tend, qbeg, qend);
-						for (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]); fprintf(stderr, "\n");
-						for (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l]], stderr); fputc('\n', stderr);
-						for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr); fputc('\n', stderr);
+					/*if (1) { // for debugging only
+						fprintf(stderr, "id=%d, pos=%d, type=%d, j=%d, score=%d, psore=%d, %d, %d, %d, %d, %d, ",
+								j, pos+1, types[i], j, score[i*n+j], pscore[i*n+j], tbeg, tend, qbeg, qend, mi->q_indel);
+						for (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]);
+						fprintf(stderr, "\n");
+						for (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l+tbeg-left]], stderr);
+						fputc('\n', stderr);
+						for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr);
+						fputc('\n', stderr);
 						}*/
 					free(acigar);
 				}
@@ -560,7 +573,7 @@ bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, c
 				ret->gl[0] = ret->gl[1] = 0;
 				for (j = 0; j < n; ++j) {
 					int s1 = pscore[max1_i*n + j], s2 = pscore[max2_i*n + j];
-					//printf("%d, %d, %d, %d, %d\n", pl[j].b->core.pos+1, max1_i, max2_i, s1, s2);
+					//fprintf(stderr, "id=%d, %d, %d, %d, %d, %d\n", j, pl[j].b->core.pos+1, types[max1_i], types[max2_i], s1, s2);
 					if (s1 > s2) ret->gl[0] += s1 - s2 < seq_err? s1 - s2 : seq_err;
 					else ret->gl[1] += s2 - s1 < seq_err? s2 - s1 : seq_err;
 				}
diff --git a/samtools/bam_maqcns.h b/samtools/bam_maqcns.h
index fa5489d..6cc5355 100644
--- a/samtools/bam_maqcns.h
+++ b/samtools/bam_maqcns.h
@@ -16,8 +16,9 @@ typedef struct {
 } bam_maqcns_t;
 
 typedef struct {
-	int q_indel;
-	float r_indel;
+	int q_indel; // indel sequencing error, phred scaled
+	float r_indel; // indel prior
+	float r_snp; // snp prior
 	// hidden parameters, unchangeable from command line
 	int mm_penalty, indel_err, ambi_thres;
 } bam_maqindel_opt_t;
diff --git a/samtools/bam_md.c b/samtools/bam_md.c
index 3ca7309..17b0a4a 100644
--- a/samtools/bam_md.c
+++ b/samtools/bam_md.c
@@ -6,7 +6,7 @@
 #include "sam.h"
 #include "kstring.h"
 
-void bam_fillmd1(bam1_t *b, char *ref, int is_equal)
+void bam_fillmd1_core(bam1_t *b, char *ref, int is_equal, int max_nm)
 {
 	uint8_t *seq = bam1_seq(b);
 	uint32_t *cigar = bam1_cigar(b);
@@ -53,6 +53,26 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal)
 		}
 	}
 	ksprintf(str, "%d", u);
+	// apply max_nm
+	if (max_nm > 0 && nm >= max_nm) {
+		for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
+			int j, l = cigar[i]>>4, op = cigar[i]&0xf;
+			if (op == BAM_CMATCH) {
+				for (j = 0; j < l; ++j) {
+					int z = y + j;
+					int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]];
+					if (ref[x+j] == 0) break; // out of boundary
+					if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
+						seq[z/2] |= (z&1)? 0x0f : 0xf0;
+						bam1_qual(b)[z] = 0;
+					}
+				}
+				if (j < l) break;
+				x += l; y += l;
+			} else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
+			else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
+		}
+	}
 	// update NM
 	old_nm = bam_aux_get(b, "NM");
 	if (c->flag & BAM_FUNMAP) return;
@@ -83,9 +103,14 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal)
 	free(str->s); free(str);
 }
 
+void bam_fillmd1(bam1_t *b, char *ref, int is_equal)
+{
+	bam_fillmd1_core(b, ref, is_equal, 0);
+}
+
 int bam_fillmd(int argc, char *argv[])
 {
-	int c, is_equal = 0, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed;
+	int c, is_equal = 0, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed, max_nm = 0;
 	samfile_t *fp, *fpout = 0;
 	faidx_t *fai;
 	char *ref = 0, mode_w[8], mode_r[8];
@@ -94,12 +119,13 @@ int bam_fillmd(int argc, char *argv[])
 	is_bam_out = is_sam_in = is_uncompressed = 0;
 	mode_w[0] = mode_r[0] = 0;
 	strcpy(mode_r, "r"); strcpy(mode_w, "w");
-	while ((c = getopt(argc, argv, "eubS")) >= 0) {
+	while ((c = getopt(argc, argv, "eubSn:")) >= 0) {
 		switch (c) {
 		case 'e': is_equal = 1; break;
 		case 'b': is_bam_out = 1; break;
 		case 'u': is_uncompressed = is_bam_out = 1; break;
 		case 'S': is_sam_in = 1; break;
+		case 'n': max_nm = atoi(optarg); break;
 		default: fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n", c); return 1;
 		}
 	}
@@ -136,7 +162,7 @@ int bam_fillmd(int argc, char *argv[])
 					fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
 							fp->header->target_name[tid]);
 			}
-			if (ref) bam_fillmd1(b, ref, is_equal);
+			if (ref) bam_fillmd1_core(b, ref, is_equal, max_nm);
 		}
 		samwrite(fpout, b);
 	}
diff --git a/samtools/bam_pileup.c b/samtools/bam_pileup.c
index f68f400..3c41a16 100644
--- a/samtools/bam_pileup.c
+++ b/samtools/bam_pileup.c
@@ -73,18 +73,28 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
 				p->qpos = y + (pos - x);
 				if (x == pos && is_restart) p->is_head = 1;
 				if (x + l - 1 == pos) { // come to the end of a match
-					if (k < c->n_cigar - 1) { // there are additional operation(s)
+					int has_next_match = 0;
+					unsigned i;
+					for (i = k + 1; i < c->n_cigar; ++i) {
+						uint32_t cigar = bam1_cigar(b)[i];
+						int opi = cigar&BAM_CIGAR_MASK;
+						if (opi == BAM_CMATCH) {
+							has_next_match = 1;
+							break;
+						} else if (opi == BAM_CSOFT_CLIP || opi == BAM_CREF_SKIP || opi == BAM_CHARD_CLIP) break;
+					}
+					if (!has_next_match) p->is_tail = 1;
+					if (k < c->n_cigar - 1 && has_next_match) { // there are additional operation(s)
 						uint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR
 						int op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation
 						if (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del
 						else if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins
-						if (op_next == BAM_CDEL || op_next == BAM_CINS) {
-							if (k + 2 < c->n_cigar) op_next = bam1_cigar(b)[k+2]&BAM_CIGAR_MASK;
-							else p->is_tail = 1;
+						else if (op_next == BAM_CPAD && k + 2 < c->n_cigar) { // no working for adjacent padding
+							cigar = bam1_cigar(b)[k+2]; op_next = cigar&BAM_CIGAR_MASK;
+							if (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del
+							else if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins
 						}
-						if (op_next == BAM_CSOFT_CLIP || op_next == BAM_CREF_SKIP || op_next == BAM_CHARD_CLIP)
-							p->is_tail = 1; // tail
-					} else p->is_tail = 1; // this is the last operation; set tail
+					}
 				}
 			}
 			x += l; y += l;
@@ -96,7 +106,8 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
 			x += l;
 		} else if (op == BAM_CREF_SKIP) x += l;
 		else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
-		is_restart = (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);
+		if (is_restart) is_restart ^= (op == BAM_CMATCH);
+		else is_restart ^= (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);
 		if (x > pos) {
 			if (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all
 			break;
@@ -108,119 +119,167 @@ static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
 
 /* --- END: Auxiliary functions */
 
-struct __bam_plbuf_t {
+/*******************
+ * pileup iterator *
+ *******************/
+
+struct __bam_plp_t {
 	mempool_t *mp;
 	lbnode_t *head, *tail, *dummy;
-	bam_pileup_f func;
-	void *func_data;
 	int32_t tid, pos, max_tid, max_pos;
-	int max_pu, is_eof;
-	bam_pileup1_t *pu;
-	int flag_mask;
+	int is_eof, flag_mask, max_plp, error;
+	bam_pileup1_t *plp;
+	// for the "auto" interface only
+	bam1_t *b;
+	bam_plp_auto_f func;
+	void *data;
 };
 
-void bam_plbuf_reset(bam_plbuf_t *buf)
+bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data)
 {
-	lbnode_t *p, *q;
-	buf->max_tid = buf->max_pos = -1;
-	buf->tid = buf->pos = 0;
-	buf->is_eof = 0;
-	for (p = buf->head; p->next;) {
-		q = p->next;
-		mp_free(buf->mp, p);
-		p = q;
+	bam_plp_t iter;
+	iter = calloc(1, sizeof(struct __bam_plp_t));
+	iter->mp = mp_init();
+	iter->head = iter->tail = mp_alloc(iter->mp);
+	iter->dummy = mp_alloc(iter->mp);
+	iter->max_tid = iter->max_pos = -1;
+	iter->flag_mask = BAM_DEF_MASK;
+	if (func) {
+		iter->func = func;
+		iter->data = data;
+		iter->b = bam_init1();
 	}
-	buf->head = buf->tail;
+	return iter;
 }
 
-void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask)
-{
-	if (mask < 0) buf->flag_mask = BAM_DEF_MASK;
-	else buf->flag_mask = BAM_FUNMAP | mask;
-}
-
-bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data)
+void bam_plp_destroy(bam_plp_t iter)
 {
-	bam_plbuf_t *buf;
-	buf = (bam_plbuf_t*)calloc(1, sizeof(bam_plbuf_t));
-	buf->func = func; buf->func_data = data;
-	buf->mp = mp_init();
-	buf->head = buf->tail = mp_alloc(buf->mp);
-	buf->dummy = mp_alloc(buf->mp);
-	buf->max_tid = buf->max_pos = -1;
-	buf->flag_mask = BAM_DEF_MASK;
-	return buf;
+	mp_free(iter->mp, iter->dummy);
+	mp_free(iter->mp, iter->head);
+	if (iter->mp->cnt != 0)
+		fprintf(stderr, "[bam_plp_destroy] memory leak: %d. Continue anyway.\n", iter->mp->cnt);
+	mp_destroy(iter->mp);
+	if (iter->b) bam_destroy1(iter->b);
+	free(iter->plp);
+	free(iter);
 }
 
-void bam_plbuf_destroy(bam_plbuf_t *buf)
+const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
 {
-	mp_free(buf->mp, buf->dummy);
-	mp_free(buf->mp, buf->head);
-	if (buf->mp->cnt != 0)
-		fprintf(stderr, "[bam_plbuf_destroy] memory leak: %d. Continue anyway.\n", buf->mp->cnt);
-	mp_destroy(buf->mp);
-	free(buf->pu);
-	free(buf);
+	if (iter->error) { *_n_plp = -1; return 0; }
+	*_n_plp = 0;
+	if (iter->is_eof && iter->head->next == 0) return 0;
+	while (iter->is_eof || iter->max_tid > iter->tid || (iter->max_tid == iter->tid && iter->max_pos > iter->pos)) {
+		int n_plp = 0;
+		lbnode_t *p, *q;
+		// write iter->plp at iter->pos
+		iter->dummy->next = iter->head;
+		for (p = iter->head, q = iter->dummy; p->next; q = p, p = p->next) {
+			if (p->b.core.tid < iter->tid || (p->b.core.tid == iter->tid && p->end <= iter->pos)) { // then remove
+				q->next = p->next; mp_free(iter->mp, p); p = q;
+			} else if (p->b.core.tid == iter->tid && p->beg <= iter->pos) { // here: p->end > pos; then add to pileup
+				if (n_plp == iter->max_plp) { // then double the capacity
+					iter->max_plp = iter->max_plp? iter->max_plp<<1 : 256;
+					iter->plp = (bam_pileup1_t*)realloc(iter->plp, sizeof(bam_pileup1_t) * iter->max_plp);
+				}
+				iter->plp[n_plp].b = &p->b;
+				if (resolve_cigar(iter->plp + n_plp, iter->pos)) ++n_plp; // skip the read if we are looking at ref-skip
+			}
+		}
+		iter->head = iter->dummy->next; // dummy->next may be changed
+		*_n_plp = n_plp; *_tid = iter->tid; *_pos = iter->pos;
+		// update iter->tid and iter->pos
+		if (iter->head->next) {
+			if (iter->tid > iter->head->b.core.tid) {
+				fprintf(stderr, "[%s] unsorted input. Pileup aborts.\n", __func__);
+				iter->error = 1;
+				*_n_plp = -1;
+				return 0;
+			}
+		}
+		if (iter->tid < iter->head->b.core.tid) { // come to a new reference sequence
+			iter->tid = iter->head->b.core.tid; iter->pos = iter->head->beg; // jump to the next reference
+		} else if (iter->pos < iter->head->beg) { // here: tid == head->b.core.tid
+			iter->pos = iter->head->beg; // jump to the next position
+		} else ++iter->pos; // scan contiguously
+		// return
+		if (n_plp) return iter->plp;
+		if (iter->is_eof && iter->head->next == 0) break;
+	}
+	return 0;
 }
 
-int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf)
+int bam_plp_push(bam_plp_t iter, const bam1_t *b)
 {
-	if (b) { // fill buffer
+	if (iter->error) return -1;
+	if (b) {
 		if (b->core.tid < 0) return 0;
-		if (b->core.flag & buf->flag_mask) return 0;
-		bam_copy1(&buf->tail->b, b);
-		buf->tail->beg = b->core.pos; buf->tail->end = bam_calend(&b->core, bam1_cigar(b));
-		if (b->core.tid < buf->max_tid) {
+		if (b->core.flag & iter->flag_mask) return 0;
+		bam_copy1(&iter->tail->b, b);
+		iter->tail->beg = b->core.pos; iter->tail->end = bam_calend(&b->core, bam1_cigar(b));
+		if (b->core.tid < iter->max_tid) {
 			fprintf(stderr, "[bam_pileup_core] the input is not sorted (chromosomes out of order)\n");
+			iter->error = 1;
 			return -1;
 		}
-		if ((b->core.tid == buf->max_tid) && (buf->tail->beg < buf->max_pos)) {
+		if ((b->core.tid == iter->max_tid) && (iter->tail->beg < iter->max_pos)) {
 			fprintf(stderr, "[bam_pileup_core] the input is not sorted (reads out of order)\n");
+			iter->error = 1;
 			return -1;
 		}
-		buf->max_tid = b->core.tid; buf->max_pos = buf->tail->beg;
-		if (buf->tail->end > buf->pos || buf->tail->b.core.tid > buf->tid) {
-			buf->tail->next = mp_alloc(buf->mp);
-			buf->tail = buf->tail->next;
-		}
-	} else buf->is_eof = 1;
-	while (buf->is_eof || buf->max_tid > buf->tid || (buf->max_tid == buf->tid && buf->max_pos > buf->pos)) {
-		int n_pu = 0;
-		lbnode_t *p, *q;
-		buf->dummy->next = buf->head;
-		for (p = buf->head, q = buf->dummy; p->next; q = p, p = p->next) {
-			if (p->b.core.tid < buf->tid || (p->b.core.tid == buf->tid && p->end <= buf->pos)) { // then remove from the list
-				q->next = p->next; mp_free(buf->mp, p); p = q;
-			} else if (p->b.core.tid == buf->tid && p->beg <= buf->pos) { // here: p->end > pos; then add to pileup
-				if (n_pu == buf->max_pu) { // then double the capacity
-					buf->max_pu = buf->max_pu? buf->max_pu<<1 : 256;
-					buf->pu = (bam_pileup1_t*)realloc(buf->pu, sizeof(bam_pileup1_t) * buf->max_pu);
-				}
-				buf->pu[n_pu].b = &p->b;
-				if (resolve_cigar(buf->pu + n_pu, buf->pos)) ++n_pu; // skip the read if we are looking at BAM_CREF_SKIP
-			}
+		iter->max_tid = b->core.tid; iter->max_pos = iter->tail->beg;
+		if (iter->tail->end > iter->pos || iter->tail->b.core.tid > iter->tid) {
+			iter->tail->next = mp_alloc(iter->mp);
+			iter->tail = iter->tail->next;
 		}
-		buf->head = buf->dummy->next; // dummy->next may be changed
-		if (n_pu) { // then call user defined function
-			buf->func(buf->tid, buf->pos, n_pu, buf->pu, buf->func_data);
-		}
-		// update tid and pos
-		if (buf->head->next) {
-			if (buf->tid > buf->head->b.core.tid) {
-				fprintf(stderr, "[bam_plbuf_push] unsorted input. Pileup aborts.\n");
-				return 1;
+	} else iter->is_eof = 1;
+	return 0;
+}
+
+const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
+{
+	const bam_pileup1_t *plp;
+	if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }
+	if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
+	else {
+		*_n_plp = 0;
+		if (iter->is_eof) return 0;
+		while (iter->func(iter->data, iter->b) >= 0) {
+			if (bam_plp_push(iter, iter->b) < 0) {
+				*_n_plp = -1;
+				return 0;
 			}
+			if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
 		}
-		if (buf->tid < buf->head->b.core.tid) { // come to a new reference sequence
-			buf->tid = buf->head->b.core.tid; buf->pos = buf->head->beg; // jump to the next reference
-		} else if (buf->pos < buf->head->beg) { // here: tid == head->b.core.tid
-			buf->pos = buf->head->beg; // jump to the next position
-		} else ++buf->pos; // scan contiguously
-		if (buf->is_eof && buf->head->next == 0) break;
+		bam_plp_push(iter, 0);
+		if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
+		return 0;
 	}
-	return 0;
 }
 
+void bam_plp_reset(bam_plp_t iter)
+{
+	lbnode_t *p, *q;
+	iter->max_tid = iter->max_pos = -1;
+	iter->tid = iter->pos = 0;
+	iter->is_eof = 0;
+	for (p = iter->head; p->next;) {
+		q = p->next;
+		mp_free(iter->mp, p);
+		p = q;
+	}
+	iter->head = iter->tail;
+}
+
+void bam_plp_set_mask(bam_plp_t iter, int mask)
+{
+	iter->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask);
+}
+
+/*****************
+ * callback APIs *
+ *****************/
+
 int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data)
 {
 	bam_plbuf_t *buf;
@@ -236,3 +295,102 @@ int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data)
 	bam_destroy1(b);
 	return 0;
 }
+
+void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask)
+{
+	bam_plp_set_mask(buf->iter, mask);
+}
+
+void bam_plbuf_reset(bam_plbuf_t *buf)
+{
+	bam_plp_reset(buf->iter);
+}
+
+bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data)
+{
+	bam_plbuf_t *buf;
+	buf = calloc(1, sizeof(bam_plbuf_t));
+	buf->iter = bam_plp_init(0, 0);
+	buf->func = func;
+	buf->data = data;
+	return buf;
+}
+
+void bam_plbuf_destroy(bam_plbuf_t *buf)
+{
+	bam_plp_destroy(buf->iter);
+	free(buf);
+}
+
+int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf)
+{
+	int ret, n_plp, tid, pos;
+	const bam_pileup1_t *plp;
+	ret = bam_plp_push(buf->iter, b);
+	if (ret < 0) return ret;
+	while ((plp = bam_plp_next(buf->iter, &tid, &pos, &n_plp)) != 0)
+		buf->func(tid, pos, n_plp, plp, buf->data);
+	return 0;
+}
+
+/***********
+ * mpileup *
+ ***********/
+
+struct __bam_mplp_t {
+	int n;
+	uint64_t min, *pos;
+	bam_plp_t *iter;
+	int *n_plp;
+	const bam_pileup1_t **plp;
+};
+
+bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data)
+{
+	int i;
+	bam_mplp_t iter;
+	iter = calloc(1, sizeof(struct __bam_mplp_t));
+	iter->pos = calloc(n, 8);
+	iter->n_plp = calloc(n, sizeof(int));
+	iter->plp = calloc(n, sizeof(void*));
+	iter->iter = calloc(n, sizeof(void*));
+	iter->n = n;
+	iter->min = (uint64_t)-1;
+	for (i = 0; i < n; ++i) {
+		iter->iter[i] = bam_plp_init(func, data[i]);
+		iter->pos[i] = iter->min;
+	}
+	return iter;
+}
+
+void bam_mplp_destroy(bam_mplp_t iter)
+{
+	int i;
+	for (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]);
+	free(iter->iter); free(iter->pos); free(iter->n_plp); free(iter->plp);
+	free(iter);
+}
+
+int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp)
+{
+	int i, ret = 0;
+	uint64_t new_min = (uint64_t)-1;
+	for (i = 0; i < iter->n; ++i) {
+		if (iter->pos[i] == iter->min) {
+			int tid, pos;
+			iter->plp[i] = bam_plp_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]);
+			iter->pos[i] = (uint64_t)tid<<32 | pos;
+		}
+		if (iter->plp[i] && iter->pos[i] < new_min) new_min = iter->pos[i];
+	}
+	iter->min = new_min;
+	if (new_min == (uint64_t)-1) return 0;
+	*_tid = new_min>>32; *_pos = (uint32_t)new_min;
+	for (i = 0; i < iter->n; ++i) {
+		if (iter->pos[i] == iter->min) {
+			n_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i];
+			++ret;
+		} else n_plp[i] = 0, plp[i] = 0;
+	}
+	return ret;
+}
diff --git a/samtools/bam_plcmd.c b/samtools/bam_plcmd.c
index ba787a9..6804795 100644
--- a/samtools/bam_plcmd.c
+++ b/samtools/bam_plcmd.c
@@ -18,6 +18,10 @@ KHASH_MAP_INIT_INT64(64, indel_list_t)
 #define BAM_PLF_GLF        0x08
 #define BAM_PLF_VAR_ONLY   0x10
 #define BAM_PLF_2ND        0x20
+#define BAM_PLF_RANBASE    0x40
+#define BAM_PLF_1STBASE    0x80
+#define BAM_PLF_ALLBASE    0x100
+#define BAM_PLF_READPOS    0x200
 
 typedef struct {
 	bam_header_t *h;
@@ -28,6 +32,7 @@ typedef struct {
 	uint32_t format;
 	int tid, len, last_pos;
 	int mask;
+    int max_depth;  // for indel calling, ignore reads with the depth too high. 0 for unlimited
 	char *ref;
 	glfFile fp_glf; // for glf output only
 } pu_data_t;
@@ -121,10 +126,11 @@ static int glt3_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu,
 	g3->offset = pos - d->last_pos;
 	d->last_pos = pos;
 	glf3_write1(d->fp_glf, g3);
-	if (pos < d->len) {
+    if (pos < d->len) {
+        int m = (!d->max_depth || d->max_depth>n) ? n : d->max_depth;
 		if (proposed_indels)
-			r = bam_maqindel(n, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1);
-		else r = bam_maqindel(n, pos, d->ido, pu, d->ref, 0, 0);
+			r = bam_maqindel(m, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1);
+		else r = bam_maqindel(m, pos, d->ido, pu, d->ref, 0, 0);
 	}
 	if (r) { // then write indel line
 		int het = 3 * n, min;
@@ -152,11 +158,37 @@ static int glt3_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu,
 	return 0;
 }
 
+static void pileup_seq(const bam_pileup1_t *p, int pos, int ref_len, const char *ref)
+{
+	if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33);
+	if (!p->is_del) {
+		int j, rb, c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
+		rb = (ref && pos < ref_len)? ref[pos] : 'N';
+		if (c == '=' || toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
+		else c = bam1_strand(p->b)? tolower(c) : toupper(c);
+		putchar(c);
+		if (p->indel > 0) {
+			printf("+%d", p->indel);
+			for (j = 1; j <= p->indel; ++j) {
+				c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
+				putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
+			}
+		} else if (p->indel < 0) {
+			printf("%d", p->indel);
+			for (j = 1; j <= -p->indel; ++j) {
+				c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N';
+				putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
+			}
+		}
+	} else putchar('*');
+	if (p->is_tail) putchar('$');
+}
+
 static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data)
 {
 	pu_data_t *d = (pu_data_t*)data;
 	bam_maqindel_ret_t *r = 0;
-	int i, j, rb, rms_mapq = -1, *proposed_indels = 0;
+	int i, rb, rms_mapq = -1, *proposed_indels = 0;
 	uint64_t rms_aux;
 	uint32_t cns = 0;
 
@@ -171,7 +203,7 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p
 	// update d->ref if necessary
 	if (d->fai && (int)tid != d->tid) {
 		free(d->ref);
-		d->ref = fai_fetch(d->fai, d->h->target_name[tid], &d->len);
+		d->ref = faidx_fetch_seq(d->fai, d->h->target_name[tid], 0, 0x7fffffff, &d->len);
 		d->tid = tid;
 	}
 	rb = (d->ref && (int)pos < d->len)? d->ref[pos] : 'N';
@@ -182,12 +214,31 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p
 		if (i == n) return 0;
 	}
 	// call the consensus and indel
-	if (d->format & BAM_PLF_CNS) // call consensus
-		cns = bam_maqcns_call(n, pu, d->c);
-	if ((d->format & (BAM_PLF_CNS|BAM_PLF_INDEL_ONLY)) && d->ref && pos < d->len) { // call indels
-		if (proposed_indels) // the first element gives the size of the array
-			r = bam_maqindel(n, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1);
-		else r = bam_maqindel(n, pos, d->ido, pu, d->ref, 0, 0);
+	if (d->format & BAM_PLF_CNS) { // call consensus
+		if (d->format & (BAM_PLF_RANBASE|BAM_PLF_1STBASE)) { // use a random base or the 1st base as the consensus call
+			const bam_pileup1_t *p = (d->format & BAM_PLF_1STBASE)? pu : pu + (int)(drand48() * n);
+			int q = bam1_qual(p->b)[p->qpos];
+			int mapQ = p->b->core.qual < d->c->cap_mapQ? p->b->core.qual : d->c->cap_mapQ;
+			uint32_t b = bam1_seqi(bam1_seq(p->b), p->qpos);
+			cns = b<<28 | 0xf<<24 | mapQ<<16 | q<<8;
+		} else if (d->format & BAM_PLF_ALLBASE) { // collapse all bases
+			uint64_t rmsQ = 0;
+			uint32_t b = 0;
+			for (i = 0; i < n; ++i) {
+				const bam_pileup1_t *p = pu + i;
+				int q = p->b->core.qual < d->c->cap_mapQ? p->b->core.qual : d->c->cap_mapQ;
+				b |= bam1_seqi(bam1_seq(p->b), p->qpos);
+				rmsQ += q * q;
+			}
+			rmsQ = (uint64_t)(sqrt((double)rmsQ / n) + .499);
+			cns = b<<28 | 0xf<<24 | rmsQ<<16 | 60<<8;
+		} else cns = bam_maqcns_call(n, pu, d->c);
+	}
+    if ((d->format & (BAM_PLF_CNS|BAM_PLF_INDEL_ONLY)) && d->ref && pos < d->len) { // call indels
+        int m = (!d->max_depth || d->max_depth>n) ? n : d->max_depth;
+        if (proposed_indels) // the first element gives the size of the array
+            r = bam_maqindel(m, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1);
+        else r = bam_maqindel(m, pos, d->ido, pu, d->ref, 0, 0);
 	}
 	// when only variant sites are asked for, test if the site is a variant
 	if ((d->format & BAM_PLF_CNS) && (d->format & BAM_PLF_VAR_ONLY)) {
@@ -218,27 +269,7 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p
 		const bam_pileup1_t *p = pu + i;
 		int tmp = p->b->core.qual < d->c->cap_mapQ? p->b->core.qual : d->c->cap_mapQ;
 		rms_aux += tmp * tmp;
-		if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33);
-		if (!p->is_del) {
-			int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
-			if (c == '=' || toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
-			else c = bam1_strand(p->b)? tolower(c) : toupper(c);
-			putchar(c);
-			if (p->indel > 0) {
-				printf("+%d", p->indel);
-				for (j = 1; j <= p->indel; ++j) {
-					c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
-					putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
-				}
-			} else if (p->indel < 0) {
-				printf("%d", p->indel);
-				for (j = 1; j <= -p->indel; ++j) {
-					c = (d->ref && (int)pos+j < d->len)? d->ref[pos+j] : 'N';
-					putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
-				}
-			}
-		} else putchar('*');
-		if (p->is_tail) putchar('$');
+		pileup_seq(p, pos, d->len, d->ref);
 	}
 	// finalize rms_mapq
 	rms_aux = (uint64_t)(sqrt((double)rms_aux / n) + .499);
@@ -275,6 +306,15 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p
 			putchar(c);
 		}
 	}
+	// print read position
+	if (d->format & BAM_PLF_READPOS) {
+		putchar('\t');
+		for (i = 0; i < n; ++i) {
+			int x = pu[i].qpos;
+			int l = pu[i].b->core.l_qseq;
+			printf("%d,", x < l/2? x+1 : -((l-1)-x+1));
+		}
+	}
 	putchar('\n');
 	// print the indel line if r has been calculated. This only happens if:
 	// a) -c or -i are flagged, AND b) the reference sequence is available
@@ -298,29 +338,40 @@ int bam_pileup(int argc, char *argv[])
 	int c, is_SAM = 0;
 	char *fn_list = 0, *fn_fa = 0, *fn_pos = 0;
 	pu_data_t *d = (pu_data_t*)calloc(1, sizeof(pu_data_t));
+    d->max_depth = 0;
 	d->tid = -1; d->mask = BAM_DEF_MASK;
 	d->c = bam_maqcns_init();
+	d->c->is_soap = 1; // change the default model
 	d->ido = bam_maqindel_opt_init();
-	while ((c = getopt(argc, argv, "st:f:cT:N:r:l:im:gI:G:vM:S2a")) >= 0) {
+	while ((c = getopt(argc, argv, "st:f:cT:N:r:l:d:im:gI:G:vM:S2aR:PA")) >= 0) {
 		switch (c) {
 		case 'a': d->c->is_soap = 1; break;
+		case 'A': d->c->is_soap = 0; break;
 		case 's': d->format |= BAM_PLF_SIMPLE; break;
 		case 't': fn_list = strdup(optarg); break;
 		case 'l': fn_pos = strdup(optarg); break;
 		case 'f': fn_fa = strdup(optarg); break;
 		case 'T': d->c->theta = atof(optarg); break;
 		case 'N': d->c->n_hap = atoi(optarg); break;
-		case 'r': d->c->het_rate = atof(optarg); break;
+		case 'r': d->c->het_rate = atof(optarg); d->ido->r_snp = d->c->het_rate; break;
 		case 'M': d->c->cap_mapQ = atoi(optarg); break;
+		case 'd': d->max_depth = atoi(optarg); break;
 		case 'c': d->format |= BAM_PLF_CNS; break;
 		case 'i': d->format |= BAM_PLF_INDEL_ONLY; break;
 		case 'v': d->format |= BAM_PLF_VAR_ONLY; break;
 		case 'm': d->mask = strtol(optarg, 0, 0); break;
 		case 'g': d->format |= BAM_PLF_GLF; break;
 		case '2': d->format |= BAM_PLF_2ND; break;
+		case 'P': d->format |= BAM_PLF_READPOS; break;
 		case 'I': d->ido->q_indel = atoi(optarg); break;
 		case 'G': d->ido->r_indel = atof(optarg); break;
 		case 'S': is_SAM = 1; break;
+		case 'R':
+			if (strcmp(optarg, "random") == 0) d->format |= BAM_PLF_RANBASE;
+			else if (strcmp(optarg, "first") == 0) d->format |= BAM_PLF_1STBASE;
+			else if (strcmp(optarg, "all") == 0) d->format |= BAM_PLF_ALLBASE;
+			else fprintf(stderr, "[bam_pileup] unrecognized -R\n");
+			break;
 		default: fprintf(stderr, "Unrecognizd option '-%c'.\n", c); return 1;
 		}
 	}
@@ -330,15 +381,16 @@ int bam_pileup(int argc, char *argv[])
 		fprintf(stderr, "Usage:  samtools pileup [options] <in.bam>|<in.sam>\n\n");
 		fprintf(stderr, "Option: -s        simple (yet incomplete) pileup format\n");
 		fprintf(stderr, "        -S        the input is in SAM\n");
-		fprintf(stderr, "        -a        use the SOAPsnp model for SNP calling\n");
+		fprintf(stderr, "        -A        use the MAQ model for SNP calling\n");
 		fprintf(stderr, "        -2        output the 2nd best call and quality\n");
 		fprintf(stderr, "        -i        only show lines/consensus with indels\n");
 		fprintf(stderr, "        -m INT    filtering reads with bits in INT [%d]\n", d->mask);
 		fprintf(stderr, "        -M INT    cap mapping quality at INT [%d]\n", d->c->cap_mapQ);
+        fprintf(stderr, "        -d INT    limit maximum depth for indels [unlimited]\n");
 		fprintf(stderr, "        -t FILE   list of reference sequences (force -S)\n");
 		fprintf(stderr, "        -l FILE   list of sites at which pileup is output\n");
 		fprintf(stderr, "        -f FILE   reference sequence in the FASTA format\n\n");
-		fprintf(stderr, "        -c        output the maq consensus sequence\n");
+		fprintf(stderr, "        -c        output the SOAPsnp consensus sequence\n");
 		fprintf(stderr, "        -v        print variants only (for -c)\n");
 		fprintf(stderr, "        -g        output in the GLFv3 format (suppressing -c/-i/-s)\n");
 		fprintf(stderr, "        -T FLOAT  theta in maq consensus calling model (for -c/-g) [%f]\n", d->c->theta);
@@ -350,6 +402,7 @@ int bam_pileup(int argc, char *argv[])
 		free(fn_list); free(fn_fa); free(d);
 		return 1;
 	}
+	if (d->format & (BAM_PLF_RANBASE|BAM_PLF_1STBASE|BAM_PLF_ALLBASE)) d->format |= BAM_PLF_CNS;
 	if (fn_fa) d->fai = fai_load(fn_fa);
 	if (d->format & (BAM_PLF_CNS|BAM_PLF_GLF)) bam_maqcns_prepare(d->c); // consensus calling
 	if (d->format & BAM_PLF_GLF) { // for glf output
@@ -390,3 +443,128 @@ int bam_pileup(int argc, char *argv[])
 	free(d->ido); free(d->ref); free(d);
 	return 0;
 }
+
+/***********
+ * mpileup *
+ ***********/
+
+typedef struct {
+	char *reg;
+	faidx_t *fai;
+} mplp_conf_t;
+
+typedef struct {
+	bamFile fp;
+	bam_iter_t iter;
+} mplp_aux_t;
+
+static int mplp_func(void *data, bam1_t *b)
+{
+	mplp_aux_t *ma = (mplp_aux_t*)data;
+	if (ma->iter) return bam_iter_read(ma->fp, ma->iter, b);
+	return bam_read1(ma->fp, b);
+}
+
+static int mpileup(mplp_conf_t *conf, int n, char **fn)
+{
+	mplp_aux_t **data;
+	int i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid;
+	const bam_pileup1_t **plp;
+	bam_mplp_t iter;
+	bam_header_t *h = 0;
+	char *ref;
+	// allocate
+	data = calloc(n, sizeof(void*));
+	plp = calloc(n, sizeof(void*));
+	n_plp = calloc(n, sizeof(int*));
+	// read the header and initialize data
+	for (i = 0; i < n; ++i) {
+		bam_header_t *h_tmp;
+		data[i] = calloc(1, sizeof(mplp_aux_t));
+		data[i]->fp = bam_open(fn[i], "r");
+		h_tmp = bam_header_read(data[i]->fp);
+		if (conf->reg) {
+			int beg, end;
+			bam_index_t *idx;
+			idx = bam_index_load(fn[i]);
+			if (idx == 0) {
+				fprintf(stderr, "[%s] fail to load index for %d-th input.\n", __func__, i+1);
+				exit(1);
+			}
+			if (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) {
+				fprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\n", __func__, i+1);
+				exit(1);
+			}
+			if (i == 0) beg0 = beg, end0 = end;
+			data[i]->iter = bam_iter_query(idx, tid, beg, end);
+			bam_index_destroy(idx);
+		}
+		if (i == 0) h = h_tmp;
+		else {
+			// FIXME: to check consistency
+			bam_header_destroy(h_tmp);
+		}
+	}
+	// mpileup
+	ref_tid = -1; ref = 0;
+	iter = bam_mplp_init(n, mplp_func, (void**)data);
+	while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
+		if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
+		if (tid != ref_tid) {
+			free(ref);
+			if (conf->fai) ref = fai_fetch(conf->fai, h->target_name[tid], &ref_len);
+			ref_tid = tid;
+		}
+		printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
+		for (i = 0; i < n; ++i) {
+			int j;
+			printf("\t%d\t", n_plp[i]);
+			if (n_plp[i] == 0) printf("*\t*");
+			else {
+				for (j = 0; j < n_plp[i]; ++j)
+					pileup_seq(plp[i] + j, pos, ref_len, ref);
+				putchar('\t');
+				for (j = 0; j < n_plp[i]; ++j) {
+					const bam_pileup1_t *p = plp[i] + j;
+					int c = bam1_qual(p->b)[p->qpos] + 33;
+					if (c > 126) c = 126;
+					putchar(c);
+				}
+			}
+		}
+		putchar('\n');
+	}
+	bam_mplp_destroy(iter);
+	bam_header_destroy(h);
+	for (i = 0; i < n; ++i) {
+		bam_close(data[i]->fp);
+		if (data[i]->iter) bam_iter_destroy(data[i]->iter);
+		free(data[i]);
+	}
+	free(data); free(plp); free(ref); free(n_plp);
+	return 0;
+}
+
+int bam_mpileup(int argc, char *argv[])
+{
+	int c;
+	mplp_conf_t mplp;
+	memset(&mplp, 0, sizeof(mplp_conf_t));
+	while ((c = getopt(argc, argv, "f:r:")) >= 0) {
+		switch (c) {
+		case 'f':
+			mplp.fai = fai_load(optarg);
+			if (mplp.fai == 0) return 1;
+			break;
+		case 'r': mplp.reg = strdup(optarg);
+		}
+	}
+	if (argc == 1) {
+		fprintf(stderr, "Usage: samtools mpileup [-r reg] [-f in.fa] in1.bam [in2.bam [...]]\n");
+		return 1;
+	}
+	mpileup(&mplp, argc - optind, argv + optind);
+	free(mplp.reg);
+	if (mplp.fai) fai_destroy(mplp.fai);
+	return 0;
+}
diff --git a/samtools/bam_reheader.c b/samtools/bam_reheader.c
new file mode 100644
index 0000000..bae97c7
--- /dev/null
+++ b/samtools/bam_reheader.c
@@ -0,0 +1,60 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "bgzf.h"
+#include "bam.h"
+
+#define BUF_SIZE 0x10000
+
+int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
+{
+	BGZF *fp;
+	bam_header_t *old;
+	int len;
+	uint8_t *buf;
+	if (in->open_mode != 'r') return -1;
+	buf = malloc(BUF_SIZE);
+	old = bam_header_read(in);
+	fp = bgzf_fdopen(fd, "w");
+	bam_header_write(fp, h);
+	if (in->block_offset < in->block_length) {
+		bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
+		bgzf_flush(fp);
+	}
+#ifdef _USE_KNETFILE
+	while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0)
+#else
+	while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0)
+#endif
+		fwrite(buf, 1, len, fp->x.fpw);
+	free(buf);
+	fp->block_offset = in->block_offset = 0;
+	bgzf_close(fp);
+	return 0;
+}
+
+int main_reheader(int argc, char *argv[])
+{
+	bam_header_t *h;
+	BGZF *in;
+	if (argc != 3) {
+		fprintf(stderr, "Usage: samtools reheader <in.header.sam> <in.bam>\n");
+		return 1;
+	}
+	{ // read the header
+		tamFile fph = sam_open(argv[1]);
+		if (fph == 0) {
+			fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]);
+			return 1;
+		}
+		h = sam_header_read(fph);
+		sam_close(fph);
+	}
+	in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r");
+	if (in == 0) {
+		fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]);
+		return 1;
+	}
+	bam_reheader(in, h, fileno(stdout));
+	bgzf_close(in);
+	return 0;
+}
diff --git a/samtools/bam_sort.c b/samtools/bam_sort.c
index 9884f3d..12b1b54 100644
--- a/samtools/bam_sort.c
+++ b/samtools/bam_sort.c
@@ -294,7 +294,7 @@ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size
 		mem += ret;
 		++k;
 		if (mem >= max_mem) {
-			sort_blocks(n++, k, buf, prefix, header, is_stdout);
+			sort_blocks(n++, k, buf, prefix, header, 0);
 			mem = 0; k = 0;
 		}
 	}
@@ -304,7 +304,7 @@ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size
 	else { // then merge
 		char **fns, *fnout;
 		fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n+1);
-		sort_blocks(n++, k, buf, prefix, header, is_stdout);
+		sort_blocks(n++, k, buf, prefix, header, 0);
 		fnout = (char*)calloc(strlen(prefix) + 20, 1);
 		if (is_stdout) sprintf(fnout, "-");
 		else sprintf(fnout, "%s.bam", prefix);
diff --git a/samtools/bam_tview.c b/samtools/bam_tview.c
index 4c121e7..7b326fc 100644
--- a/samtools/bam_tview.c
+++ b/samtools/bam_tview.c
@@ -280,7 +280,7 @@ int tv_draw_aln(tview_t *tv, int tid, int pos)
 
 static void tv_win_goto(tview_t *tv, int *tid, int *pos)
 {
-	char str[256];
+	char str[256], *p;
 	int i, l = 0;
 	wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+');
 	mvwprintw(tv->wgoto, 1, 2, "Goto: ");
@@ -291,10 +291,18 @@ static void tv_win_goto(tview_t *tv, int *tid, int *pos)
 			--l;
 		} else if (c == KEY_ENTER || c == '\012' || c == '\015') {
 			int _tid = -1, _beg, _end;
-			bam_parse_region(tv->header, str, &_tid, &_beg, &_end);
-			if (_tid >= 0) {
-				*tid = _tid; *pos = _beg;
-				return;
+			if (str[0] == '=') {
+				_beg = strtol(str+1, &p, 10);
+				if (_beg > 0) {
+					*pos = _beg;
+					return;
+				}
+			} else {
+				bam_parse_region(tv->header, str, &_tid, &_beg, &_end);
+				if (_tid >= 0) {
+					*tid = _tid; *pos = _beg;
+					return;
+				}
 			}
 		} else if (isgraph(c)) {
 			if (l < TV_MAX_GOTO) str[l++] = c;
@@ -351,6 +359,7 @@ void tv_loop(tview_t *tv)
 			case '?': tv_win_help(tv); break;
 			case '\033':
 			case 'q': goto end_loop;
+			case '/': 
 			case 'g': tv_win_goto(tv, &tid, &pos); break;
 			case 'm': tv->color_for = TV_COLOR_MAPQ; break;
 			case 'b': tv->color_for = TV_COLOR_BASEQ; break;
diff --git a/samtools/bgzf.c b/samtools/bgzf.c
index 59f902f..a6923da 100644
--- a/samtools/bgzf.c
+++ b/samtools/bgzf.c
@@ -203,9 +203,7 @@ bgzf_open(const char* __restrict path, const char* __restrict mode)
 		if (fd == -1) return 0;
         fp = open_write(fd, strstr(mode, "u")? 1 : 0);
     }
-    if (fp != NULL) {
-        fp->owned_file = 1;
-    }
+    if (fp != NULL) fp->owned_file = 1;
     return fp;
 }
 
@@ -429,20 +427,19 @@ static void cache_block(BGZF *fp, int size)
 	memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
 }
 
-static
 int
-read_block(BGZF* fp)
+bgzf_read_block(BGZF* fp)
 {
     bgzf_byte_t header[BLOCK_HEADER_LENGTH];
-	int size = 0;
+	int count, size = 0;
 #ifdef _USE_KNETFILE
     int64_t block_address = knet_tell(fp->x.fpr);
 	if (load_block_from_cache(fp, block_address)) return 0;
-    int count = knet_read(fp->x.fpr, header, sizeof(header));
+    count = knet_read(fp->x.fpr, header, sizeof(header));
 #else
     int64_t block_address = ftello(fp->file);
 	if (load_block_from_cache(fp, block_address)) return 0;
-    int count = fread(header, 1, sizeof(header), fp->file);
+    count = fread(header, 1, sizeof(header), fp->file);
 #endif
     if (count == 0) {
         fp->block_length = 0;
@@ -472,9 +469,7 @@ read_block(BGZF* fp)
     }
 	size += count;
     count = inflate_block(fp, block_length);
-    if (count < 0) {
-        return -1;
-    }
+    if (count < 0) return -1;
     if (fp->block_length != 0) {
         // Do not reset offset if this read follows a seek.
         fp->block_offset = 0;
@@ -501,7 +496,7 @@ bgzf_read(BGZF* fp, void* data, int length)
     while (bytes_read < length) {
         int available = fp->block_length - fp->block_offset;
         if (available <= 0) {
-            if (read_block(fp) != 0) {
+            if (bgzf_read_block(fp) != 0) {
                 return -1;
             }
             available = fp->block_length - fp->block_offset;
@@ -528,19 +523,16 @@ bgzf_read(BGZF* fp, void* data, int length)
     return bytes_read;
 }
 
-static
-int
-flush_block(BGZF* fp)
+int bgzf_flush(BGZF* fp)
 {
     while (fp->block_offset > 0) {
-        int block_length = deflate_block(fp, fp->block_offset);
-        if (block_length < 0) {
-            return -1;
-        }
+        int count, block_length;
+		block_length = deflate_block(fp, fp->block_offset);
+        if (block_length < 0) return -1;
 #ifdef _USE_KNETFILE
-        int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
+        count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
 #else
-        int count = fwrite(fp->compressed_block, 1, block_length, fp->file);
+        count = fwrite(fp->compressed_block, 1, block_length, fp->file);
 #endif
         if (count != block_length) {
             report_error(fp, "write failed");
@@ -551,17 +543,22 @@ flush_block(BGZF* fp)
     return 0;
 }
 
-int
-bgzf_write(BGZF* fp, const void* data, int length)
+int bgzf_flush_try(BGZF *fp, int size)
+{
+	if (fp->block_offset + size > fp->uncompressed_block_size)
+		return bgzf_flush(fp);
+	return -1;
+}
+
+int bgzf_write(BGZF* fp, const void* data, int length)
 {
     if (fp->open_mode != 'w') {
         report_error(fp, "file not open for writing");
         return -1;
     }
 
-    if (fp->uncompressed_block == NULL) {
+    if (fp->uncompressed_block == NULL)
         fp->uncompressed_block = malloc(fp->uncompressed_block_size);
-    }
 
     const bgzf_byte_t* input = data;
     int block_length = fp->uncompressed_block_size;
@@ -574,7 +571,7 @@ bgzf_write(BGZF* fp, const void* data, int length)
         input += copy_length;
         bytes_written += copy_length;
         if (fp->block_offset == block_length) {
-            if (flush_block(fp) != 0) {
+            if (bgzf_flush(fp) != 0) {
                 break;
             }
         }
@@ -582,13 +579,10 @@ bgzf_write(BGZF* fp, const void* data, int length)
     return bytes_written;
 }
 
-int
-bgzf_close(BGZF* fp)
+int bgzf_close(BGZF* fp)
 {
     if (fp->open_mode == 'w') {
-        if (flush_block(fp) != 0) {
-            return -1;
-        }
+        if (bgzf_flush(fp) != 0) return -1;
 		{ // add an empty block
 			int count, block_length = deflate_block(fp, 0);
 #ifdef _USE_KNETFILE
@@ -613,9 +607,7 @@ bgzf_close(BGZF* fp)
 		else ret = knet_close(fp->x.fpr);
         if (ret != 0) return -1;
 #else
-        if (fclose(fp->file) != 0) {
-            return -1;
-        }
+        if (fclose(fp->file) != 0) return -1;
 #endif
     }
     free(fp->uncompressed_block);
@@ -625,12 +617,6 @@ bgzf_close(BGZF* fp)
     return 0;
 }
 
-int64_t
-bgzf_tell(BGZF* fp)
-{
-    return ((fp->block_address << 16) | (fp->block_offset & 0xFFFF));
-}
-
 void bgzf_set_cache_size(BGZF *fp, int cache_size)
 {
 	if (fp) fp->cache_size = cache_size;
@@ -655,9 +641,11 @@ int bgzf_check_EOF(BGZF *fp)
 	return (memcmp(magic, buf, 28) == 0)? 1 : 0;
 }
 
-int64_t
-bgzf_seek(BGZF* fp, int64_t pos, int where)
+int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
 {
+	int block_offset;
+	int64_t block_address;
+
     if (fp->open_mode != 'r') {
         report_error(fp, "file not open for read");
         return -1;
@@ -666,8 +654,8 @@ bgzf_seek(BGZF* fp, int64_t pos, int where)
         report_error(fp, "unimplemented seek option");
         return -1;
     }
-    int block_offset = pos & 0xFFFF;
-    int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
+    block_offset = pos & 0xFFFF;
+    block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
 #ifdef _USE_KNETFILE
     if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
 #else
diff --git a/samtools/bgzf.h b/samtools/bgzf.h
index 91b3317..099ae9a 100644
--- a/samtools/bgzf.h
+++ b/samtools/bgzf.h
@@ -106,7 +106,7 @@ int bgzf_write(BGZF* fp, const void* data, int length);
  * Return value is non-negative on success.
  * Returns -1 on error.
  */
-int64_t bgzf_tell(BGZF* fp);
+#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
 
 /*
  * Set the file to read from the location specified by pos, which must
@@ -126,9 +126,32 @@ int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
 void bgzf_set_cache_size(BGZF *fp, int cache_size);
 
 int bgzf_check_EOF(BGZF *fp);
+int bgzf_read_block(BGZF* fp);
+int bgzf_flush(BGZF* fp);
+int bgzf_flush_try(BGZF *fp, int size);
 
 #ifdef __cplusplus
 }
 #endif
 
+static inline int bgzf_getc(BGZF *fp)
+{
+	int c;
+	if (fp->block_offset >= fp->block_length) {
+		if (bgzf_read_block(fp) != 0) return -2; /* error */
+		if (fp->block_length == 0) return -1; /* end-of-file */
+	}
+	c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
+    if (fp->block_offset == fp->block_length) {
+#ifdef _USE_KNETFILE
+        fp->block_address = knet_tell(fp->x.fpr);
+#else
+        fp->block_address = ftello(fp->file);
+#endif
+        fp->block_offset = 0;
+        fp->block_length = 0;
+    }
+	return c;
+}
+
 #endif
diff --git a/samtools/faidx.c b/samtools/faidx.c
index 811bdf8..dbd8b3e 100644
--- a/samtools/faidx.c
+++ b/samtools/faidx.c
@@ -197,7 +197,7 @@ int fai_build(const char *fn)
 	sprintf(str, "%s.fai", fn);
 	rz = razf_open(fn, "r");
 	if (rz == 0) {
-		fprintf(stderr, "[fai_build] fail to open the FASTA file %s\n",str);
+		fprintf(stderr, "[fai_build] fail to open the FASTA file %s\n",fn);
 		free(str);
 		return -1;
 	}
diff --git a/samtools/knetfile.c b/samtools/knetfile.c
index 994babb..e1be4d6 100644
--- a/samtools/knetfile.c
+++ b/samtools/knetfile.c
@@ -38,9 +38,7 @@
 #include <unistd.h>
 #include <sys/types.h>
 
-#ifdef _WIN32
-#include <winsock.h>
-#else
+#ifndef _WIN32
 #include <netdb.h>
 #include <arpa/inet.h>
 #include <sys/socket.h>
@@ -566,7 +564,7 @@ off_t knet_seek(knetFile *fp, int64_t off, int whence)
         else if (whence==SEEK_SET)
             fp->offset = off;
 		fp->is_ready = 0;
-		return fp->offset;
+		return 0;
 	}
 	errno = EINVAL;
     fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
diff --git a/samtools/kstring.h b/samtools/kstring.h
index f4e5a99..925117a 100644
--- a/samtools/kstring.h
+++ b/samtools/kstring.h
@@ -58,6 +58,40 @@ static inline int kputc(int c, kstring_t *s)
 	return c;
 }
 
+static inline int kputw(int c, kstring_t *s)
+{
+	char buf[16];
+	int l, x;
+	if (c == 0) return kputc('0', s);
+	for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
+	if (c < 0) buf[l++] = '-';
+	if (s->l + l + 1 >= s->m) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
+	s->s[s->l] = 0;
+	return 0;
+}
+
+static inline int kputuw(unsigned c, kstring_t *s)
+{
+	char buf[16];
+	int l, i;
+	unsigned x;
+	if (c == 0) return kputc('0', s);
+	for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
+	if (s->l + l + 1 >= s->m) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
+	s->s[s->l] = 0;
+	return 0;
+}
+
 static inline int *ksplit(kstring_t *s, int delimiter, int *n)
 {
 	int max = 0, *offsets = 0;
diff --git a/samtools/sam.c b/samtools/sam.c
index ad4325b..ecdee02 100644
--- a/samtools/sam.c
+++ b/samtools/sam.c
@@ -55,6 +55,7 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
 				if (aux) { // check if aux is present
 					bam_header_t *textheader = fp->header;
 					fp->header = sam_header_read2((const char*)aux);
+					if (fp->header == 0) goto open_err_ret;
 					append_header_text(fp->header, textheader->text, textheader->l_text);
 					bam_header_destroy(textheader);
 				}
diff --git a/samtools/sam_header.c b/samtools/sam_header.c
index a119c02..05d75de 100644
--- a/samtools/sam_header.c
+++ b/samtools/sam_header.c
@@ -10,6 +10,7 @@ KHASH_MAP_INIT_STR(str, const char *)
 
 struct _HeaderList
 {
+    struct _HeaderList *last;   // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only.
     struct _HeaderList *next;
     void *data;
 };
@@ -58,6 +59,34 @@ static void debug(const char *format, ...)
     va_end(ap);
 }
 
+#if 0
+// Replaced by list_append_to_end
+static list_t *list_prepend(list_t *root, void *data)
+{
+    list_t *l = malloc(sizeof(list_t));
+    l->next = root;
+    l->data = data;
+    return l;
+}
+#endif
+
+// Relies on the root->last being correct. Do not use with the other list_*
+//  routines unless they are fixed to modify root->last as well.
+static list_t *list_append_to_end(list_t *root, void *data)
+{
+    list_t *l = malloc(sizeof(list_t));
+    l->last = l;
+    l->next = NULL;
+    l->data = data;
+
+    if ( !root )
+        return l;
+
+    root->last->next = l;
+    root->last = l;
+    return root;
+}
+
 static list_t *list_append(list_t *root, void *data)
 {
     list_t *l = root;
@@ -322,7 +351,7 @@ static HeaderLine *sam_header_line_parse(const char *headerLine)
 
     while (*to && *to!='\t') to++;
     if ( to-from != 2 ) {
-		debug("[sam_header_line_parse] expected '@XY', got [%s]\n", headerLine);
+		debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine);
 		return 0;
 	}
     
@@ -345,7 +374,11 @@ static HeaderLine *sam_header_line_parse(const char *headerLine)
         while (*to && *to!='\t') to++;
 
         if ( !required_tags[itype] && !optional_tags[itype] )
+        {
+            // CO is a special case, it can contain anything, including tabs
+            if ( *to ) { to++; continue; }
             tag = new_tag("  ",from,to-1);
+        }
         else
             tag = new_tag(from,from+3,to-1);
 
@@ -539,7 +572,8 @@ void *sam_header_parse2(const char *headerText)
     {
         hline = sam_header_line_parse(buf);
         if ( hline && sam_header_line_validate(hline) )
-            hlines = list_append(hlines, hline);
+            // With too many (~250,000) reference sequences the header parsing was too slow with list_append.
+            hlines = list_append_to_end(hlines, hline);
         else
         {
 			if (hline) sam_header_line_free(hline);
diff --git a/samtools/sam_view.c b/samtools/sam_view.c
index 06dd01a..3b10e2e 100644
--- a/samtools/sam_view.c
+++ b/samtools/sam_view.c
@@ -6,7 +6,12 @@
 #include "sam_header.h"
 #include "sam.h"
 #include "faidx.h"
+#include "khash.h"
+KHASH_SET_INIT_STR(rg)
 
+typedef khash_t(rg) *rghash_t;
+
+rghash_t g_rghash = 0;
 static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
 static char *g_library, *g_rg;
 static int g_sol2sanger_tbl[128];
@@ -32,9 +37,15 @@ static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b)
 {
 	if (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off))
 		return 1;
-	if (g_rg) {
+	if (g_rg || g_rghash) {
 		uint8_t *s = bam_aux_get(b, "RG");
-		if (s && strcmp(g_rg, (char*)(s + 1)) == 0) return 0;
+		if (s) {
+			if (g_rg) return (strcmp(g_rg, (char*)(s + 1)) == 0)? 0 : 1;
+			if (g_rghash) {
+				khint_t k = kh_get(rg, g_rghash, (char*)(s + 1));
+				return (k != kh_end(g_rghash))? 0 : 1;
+			}
+		}
 	}
 	if (g_library) {
 		const char *p = bam_get_library((bam_header_t*)h, b);
@@ -58,11 +69,11 @@ int main_samview(int argc, char *argv[])
 	int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, is_uncompressed = 0, is_bamout = 0, slx2sngr = 0;
 	int of_type = BAM_OFDEC, is_long_help = 0;
 	samfile_t *in = 0, *out = 0;
-	char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0;
+	char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0, *fn_rg = 0;
 
 	/* parse command-line options */
 	strcpy(in_mode, "r"); strcpy(out_mode, "w");
-	while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:xX?T:C")) >= 0) {
+	while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:xX?T:CR:")) >= 0) {
 		switch (c) {
 		case 'C': slx2sngr = 1; break;
 		case 'S': is_bamin = 0; break;
@@ -77,6 +88,7 @@ int main_samview(int argc, char *argv[])
 		case 'u': is_uncompressed = 1; break;
 		case 'l': g_library = strdup(optarg); break;
 		case 'r': g_rg = strdup(optarg); break;
+		case 'R': fn_rg = strdup(optarg); break;
 		case 'x': of_type = BAM_OFHEX; break;
 		case 'X': of_type = BAM_OFSTR; break;
 		case '?': is_long_help = 1; break;
@@ -94,7 +106,19 @@ int main_samview(int argc, char *argv[])
 	if (is_bamin) strcat(in_mode, "b");
 	if (is_header) strcat(out_mode, "h");
 	if (is_uncompressed) strcat(out_mode, "u");
-	if (argc == optind) return usage(is_long_help);
+	if (argc == optind) return usage(is_long_help); // potential memory leak...
+
+	// read the list of read groups
+	if (fn_rg) {
+		FILE *fp_rg;
+		char buf[1024];
+		int ret;
+		g_rghash = kh_init(rg);
+		fp_rg = fopen(fn_rg, "r");
+		while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but bear me...
+			kh_put(rg, g_rghash, strdup(buf), &ret); // we'd better check duplicates...
+		fclose(fp_rg);
+	}
 
 	// generate the fn_list if necessary
 	if (fn_list == 0 && fn_ref) fn_list = samfaipath(fn_ref);
@@ -147,7 +171,13 @@ int main_samview(int argc, char *argv[])
 
 view_end:
 	// close files, free and return
-	free(fn_list); free(fn_ref); free(fn_out); free(g_library); free(g_rg);
+	free(fn_list); free(fn_ref); free(fn_out); free(g_library); free(g_rg); free(fn_rg);
+	if (g_rghash) {
+		khint_t k;
+		for (k = 0; k < kh_end(g_rghash); ++k)
+			if (kh_exist(g_rghash, k)) free((char*)kh_key(g_rghash, k));
+		kh_destroy(rg, g_rghash);
+	}
 	samclose(in);
 	samclose(out);
 	return ret;
@@ -167,6 +197,7 @@ static int usage(int is_long_help)
 	fprintf(stderr, "         -t FILE  list of reference names and lengths (force -S) [null]\n");
 	fprintf(stderr, "         -T FILE  reference sequence file (force -S) [null]\n");
 	fprintf(stderr, "         -o FILE  output file name [stdout]\n");
+	fprintf(stderr, "         -R FILE  list of read groups to be outputted [null]\n");
 	fprintf(stderr, "         -f INT   required flag, 0 for unset [0]\n");
 	fprintf(stderr, "         -F INT   filtering flag, 0 for unset [0]\n");
 	fprintf(stderr, "         -q INT   minimum mapping quality [0]\n");
diff --git a/setup.py b/setup.py
index 098cb7f..925f016 100644
--- a/setup.py
+++ b/setup.py
@@ -6,34 +6,54 @@ pysam
 
 '''
 
-import os, sys, glob, shutil
+import os, sys, glob, shutil, hashlib
 
 name = "pysam"
-version = "0.2"
+
+# collect pysam version
+sys.path.insert( 0, "pysam")
+import version
+
+version = version.__version__
 
 samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c" )
 samtools_dest = os.path.abspath( "samtools" )
+tabix_exclude = ( "main.c", )
+tabix_dest = os.path.abspath( "tabix" )
 
 # copy samtools source
 if len(sys.argv) >= 2 and sys.argv[1] == "import":
    if len(sys.argv) < 3: raise ValueError("missing PATH to samtools source directory")
-   samtools_src = os.path.abspath( sys.argv[2] )
-   if not os.path.exists( samtools_src ): raise IOError( "samtools src dir `%s` does not exist." % samtools_src )
-
-   cfiles = glob.glob( os.path.join( samtools_src, "*.c" ) )
-   hfiles = glob.glob( os.path.join( samtools_src, "*.h" ) )
-   ncopied = 0
-   for p in cfiles + hfiles:
-      f = os.path.basename(p)
-      if f in samtools_exclude: continue
-      if os.path.exists( os.path.join( samtools_dest, f )): continue
-      shutil.copy( p, samtools_dest )
-      ncopied += 1
-   print "installed latest source code from %s: %i files copied" % (samtools_src, ncopied)
+   if len(sys.argv) < 4: raise ValueError("missing PATH to tabix source directory")
+
+   for destdir, srcdir, exclude in zip( 
+      (samtools_dest, tabix_dest), 
+      sys.argv[2:4],
+      (samtools_exclude, tabix_exclude)):
+
+      srcdir = os.path.abspath( srcdir )
+      if not os.path.exists( srcdir ): raise IOError( "samtools src dir `%s` does not exist." % srcdir )
+
+      cfiles = glob.glob( os.path.join( srcdir, "*.c" ) )
+      hfiles = glob.glob( os.path.join( srcdir, "*.h" ) )
+      ncopied = 0
+      for new_file in cfiles + hfiles:
+         f = os.path.basename(new_file)
+         if f in exclude: continue
+         old_file = os.path.join( destdir, f )
+         if os.path.exists( old_file ):
+            md5_old = hashlib.md5("".join(open(old_file,"r").readlines())).digest()
+            md5_new = hashlib.md5("".join(open(new_file,"r").readlines())).digest()
+            if md5_old == md5_new: continue
+            raise ValueError( "incompatible files for %s and %s" % (old_file, new_file ))
+
+         shutil.copy( new_file, destdir )
+         ncopied += 1
+      print "installed latest source code from %s: %i files copied" % (srcdir, ncopied)
    sys.exit(0)
 
 from distutils.core import setup, Extension
-from Pyrex.Distutils import build_ext
+from Cython.Distutils import build_ext
 
 classifiers = """
 Development Status :: 2 - Alpha
@@ -48,14 +68,27 @@ Topic :: Scientific/Engineering
 Topic :: Scientific/Engineering :: Bioinformatics
 """
 
-pysam = Extension(
-    "pysam/csamtools",                   # name of extension
+samtools = Extension(
+    "csamtools",                   # name of extension
     [ "pysam/csamtools.pyx" ]  +\
        [ "pysam/%s" % x for x in (
              "pysam_util.c", )] +\
        glob.glob( os.path.join( "samtools", "*.c" ) ),
     library_dirs=[],
-    include_dirs=[ "samtools", ],
+    include_dirs=[ "samtools", "pysam" ],
+    libraries=[ "z", ],
+    language="c",
+    define_macros = [('FILE_OFFSET_BITS','64'),
+                     ('_USE_KNETFILE','')], 
+    )
+
+tabix = Extension(
+    "ctabix",                   # name of extension
+    [ "pysam/ctabix.pyx" ]  +\
+       [ "pysam/%s" % x for x in ()] +\
+       glob.glob( os.path.join( "tabix", "*.c" ) ),
+    library_dirs=[],
+    include_dirs=[ "tabix", "pysam" ],
     libraries=[ "z", ],
     language="c",
     )
@@ -71,8 +104,11 @@ metadata = {
     'platforms': "ALL",
     'url': "http://code.google.com/p/pysam/",
     'py_modules': [
-      "pysam/__init__", "pysam/Pileup", "pysam/namedtuple" ],
-    'ext_modules': [pysam,],
+      "pysam/__init__", 
+      "pysam/Pileup", 
+      "pysam/namedtuple",
+      "pysam/version" ],
+    'ext_modules': [samtools, tabix],
     'cmdclass' : {'build_ext': build_ext} }
 
 if __name__=='__main__':
diff --git a/tabix/bam_endian.h b/tabix/bam_endian.h
new file mode 100644
index 0000000..0fc74a8
--- /dev/null
+++ b/tabix/bam_endian.h
@@ -0,0 +1,42 @@
+#ifndef BAM_ENDIAN_H
+#define BAM_ENDIAN_H
+
+#include <stdint.h>
+
+static inline int bam_is_big_endian()
+{
+	long one= 1;
+	return !(*((char *)(&one)));
+}
+static inline uint16_t bam_swap_endian_2(uint16_t v)
+{
+	return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
+}
+static inline void *bam_swap_endian_2p(void *x)
+{
+	*(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x);
+	return x;
+}
+static inline uint32_t bam_swap_endian_4(uint32_t v)
+{
+	v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
+	return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
+}
+static inline void *bam_swap_endian_4p(void *x)
+{
+	*(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x);
+	return x;
+}
+static inline uint64_t bam_swap_endian_8(uint64_t v)
+{
+	v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
+	v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
+	return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
+}
+static inline void *bam_swap_endian_8p(void *x)
+{
+	*(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x);
+	return x;
+}
+
+#endif
diff --git a/tabix/bgzf.c b/tabix/bgzf.c
new file mode 100644
index 0000000..7a936a8
--- /dev/null
+++ b/tabix/bgzf.c
@@ -0,0 +1,676 @@
+/* The MIT License
+
+   Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+/*
+  2009-06-29 by lh3: cache recent uncompressed blocks.
+  2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
+  2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "bgzf.h"
+
+#include "khash.h"
+typedef struct {
+	int size;
+	uint8_t *block;
+	int64_t end_offset;
+} cache_t;
+KHASH_MAP_INIT_INT64(cache, cache_t)
+
+#if defined(_WIN32) || defined(_MSC_VER)
+#define ftello(fp) ftell(fp)
+#define fseeko(fp, offset, whence) fseek(fp, offset, whence)
+#else
+extern off_t ftello(FILE *stream);
+extern int fseeko(FILE *stream, off_t offset, int whence);
+#endif
+
+typedef int8_t bgzf_byte_t;
+
+static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
+static const int MAX_BLOCK_SIZE = 64 * 1024;
+
+static const int BLOCK_HEADER_LENGTH = 18;
+static const int BLOCK_FOOTER_LENGTH = 8;
+
+static const int GZIP_ID1 = 31;
+static const int GZIP_ID2 = 139;
+static const int CM_DEFLATE = 8;
+static const int FLG_FEXTRA = 4;
+static const int OS_UNKNOWN = 255;
+static const int BGZF_ID1 = 66; // 'B'
+static const int BGZF_ID2 = 67; // 'C'
+static const int BGZF_LEN = 2;
+static const int BGZF_XLEN = 6; // BGZF_LEN+4
+
+static const int GZIP_WINDOW_BITS = -15; // no zlib header
+static const int Z_DEFAULT_MEM_LEVEL = 8;
+
+
+inline
+void
+packInt16(uint8_t* buffer, uint16_t value)
+{
+    buffer[0] = value;
+    buffer[1] = value >> 8;
+}
+
+inline
+int
+unpackInt16(const uint8_t* buffer)
+{
+    return (buffer[0] | (buffer[1] << 8));
+}
+
+inline
+void
+packInt32(uint8_t* buffer, uint32_t value)
+{
+    buffer[0] = value;
+    buffer[1] = value >> 8;
+    buffer[2] = value >> 16;
+    buffer[3] = value >> 24;
+}
+
+static inline
+int
+bgzf_min(int x, int y)
+{
+    return (x < y) ? x : y;
+}
+
+static
+void
+report_error(BGZF* fp, const char* message) {
+    fp->error = message;
+}
+
+static BGZF *bgzf_read_init()
+{
+	BGZF *fp;
+	fp = calloc(1, sizeof(BGZF));
+    fp->uncompressed_block_size = MAX_BLOCK_SIZE;
+    fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
+    fp->compressed_block_size = MAX_BLOCK_SIZE;
+    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
+	fp->cache_size = 0;
+	fp->cache = kh_init(cache);
+	return fp;
+}
+
+static
+BGZF*
+open_read(int fd)
+{
+#ifdef _USE_KNETFILE
+    knetFile *file = knet_dopen(fd, "r");
+#else
+    FILE* file = fdopen(fd, "r");
+#endif
+    BGZF* fp;
+	if (file == 0) return 0;
+	fp = bgzf_read_init();
+    fp->file_descriptor = fd;
+    fp->open_mode = 'r';
+#ifdef _USE_KNETFILE
+    fp->x.fpr = file;
+#else
+    fp->file = file;
+#endif
+    return fp;
+}
+
+static
+BGZF*
+open_write(int fd, bool is_uncompressed)
+{
+    FILE* file = fdopen(fd, "w");
+    BGZF* fp;
+	if (file == 0) return 0;
+	fp = malloc(sizeof(BGZF));
+    fp->file_descriptor = fd;
+    fp->open_mode = 'w';
+    fp->owned_file = 0; fp->is_uncompressed = is_uncompressed;
+#ifdef _USE_KNETFILE
+    fp->x.fpw = file;
+#else
+    fp->file = file;
+#endif
+    fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
+    fp->uncompressed_block = NULL;
+    fp->compressed_block_size = MAX_BLOCK_SIZE;
+    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
+    fp->block_address = 0;
+    fp->block_offset = 0;
+    fp->block_length = 0;
+    fp->error = NULL;
+    return fp;
+}
+
+BGZF*
+bgzf_open(const char* __restrict path, const char* __restrict mode)
+{
+    BGZF* fp = NULL;
+    if (mode[0] == 'r' || mode[0] == 'R') { /* The reading mode is preferred. */
+#ifdef _USE_KNETFILE
+		knetFile *file = knet_open(path, mode);
+		if (file == 0) return 0;
+		fp = bgzf_read_init();
+		fp->file_descriptor = -1;
+		fp->open_mode = 'r';
+		fp->x.fpr = file;
+#else
+		int fd, oflag = O_RDONLY;
+#ifdef _WIN32
+		oflag |= O_BINARY;
+#endif
+		fd = open(path, oflag);
+		if (fd == -1) return 0;
+        fp = open_read(fd);
+#endif
+    } else if (mode[0] == 'w' || mode[0] == 'W') {
+		int fd, oflag = O_WRONLY | O_CREAT | O_TRUNC;
+#ifdef _WIN32
+		oflag |= O_BINARY;
+#endif
+		fd = open(path, oflag, 0666);
+		if (fd == -1) return 0;
+        fp = open_write(fd, strstr(mode, "u")? 1 : 0);
+    }
+    if (fp != NULL) {
+        fp->owned_file = 1;
+    }
+    return fp;
+}
+
+BGZF*
+bgzf_fdopen(int fd, const char * __restrict mode)
+{
+	if (fd == -1) return 0;
+    if (mode[0] == 'r' || mode[0] == 'R') {
+        return open_read(fd);
+    } else if (mode[0] == 'w' || mode[0] == 'W') {
+        return open_write(fd, strstr(mode, "u")? 1 : 0);
+    } else {
+        return NULL;
+    }
+}
+
+static
+int
+deflate_block(BGZF* fp, int block_length)
+{
+    // Deflate the block in fp->uncompressed_block into fp->compressed_block.
+    // Also adds an extra field that stores the compressed block length.
+
+    bgzf_byte_t* buffer = fp->compressed_block;
+    int buffer_size = fp->compressed_block_size;
+
+    // Init gzip header
+    buffer[0] = GZIP_ID1;
+    buffer[1] = GZIP_ID2;
+    buffer[2] = CM_DEFLATE;
+    buffer[3] = FLG_FEXTRA;
+    buffer[4] = 0; // mtime
+    buffer[5] = 0;
+    buffer[6] = 0;
+    buffer[7] = 0;
+    buffer[8] = 0;
+    buffer[9] = OS_UNKNOWN;
+    buffer[10] = BGZF_XLEN;
+    buffer[11] = 0;
+    buffer[12] = BGZF_ID1;
+    buffer[13] = BGZF_ID2;
+    buffer[14] = BGZF_LEN;
+    buffer[15] = 0;
+    buffer[16] = 0; // placeholder for block length
+    buffer[17] = 0;
+
+    // loop to retry for blocks that do not compress enough
+    int input_length = block_length;
+    int compressed_length = 0;
+    while (1) {
+		int compress_level = fp->is_uncompressed? 0 : Z_DEFAULT_COMPRESSION;
+        z_stream zs;
+        zs.zalloc = NULL;
+        zs.zfree = NULL;
+        zs.next_in = fp->uncompressed_block;
+        zs.avail_in = input_length;
+        zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
+        zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
+
+        int status = deflateInit2(&zs, compress_level, Z_DEFLATED,
+                                  GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+        if (status != Z_OK) {
+            report_error(fp, "deflate init failed");
+            return -1;
+        }
+        status = deflate(&zs, Z_FINISH);
+        if (status != Z_STREAM_END) {
+            deflateEnd(&zs);
+            if (status == Z_OK) {
+                // Not enough space in buffer.
+                // Can happen in the rare case the input doesn't compress enough.
+                // Reduce the amount of input until it fits.
+                input_length -= 1024;
+                if (input_length <= 0) {
+                    // should never happen
+                    report_error(fp, "input reduction failed");
+                    return -1;
+                }
+                continue;
+            }
+            report_error(fp, "deflate failed");
+            return -1;
+        }
+        status = deflateEnd(&zs);
+        if (status != Z_OK) {
+            report_error(fp, "deflate end failed");
+            return -1;
+        }
+        compressed_length = zs.total_out;
+        compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
+        if (compressed_length > MAX_BLOCK_SIZE) {
+            // should never happen
+            report_error(fp, "deflate overflow");
+            return -1;
+        }
+        break;
+    }
+
+    packInt16((uint8_t*)&buffer[16], compressed_length-1);
+    uint32_t crc = crc32(0L, NULL, 0L);
+    crc = crc32(crc, fp->uncompressed_block, input_length);
+    packInt32((uint8_t*)&buffer[compressed_length-8], crc);
+    packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
+
+    int remaining = block_length - input_length;
+    if (remaining > 0) {
+        if (remaining > input_length) {
+            // should never happen (check so we can use memcpy)
+            report_error(fp, "remainder too large");
+            return -1;
+        }
+        memcpy(fp->uncompressed_block,
+               fp->uncompressed_block + input_length,
+               remaining);
+    }
+    fp->block_offset = remaining;
+    return compressed_length;
+}
+
+static
+int
+inflate_block(BGZF* fp, int block_length)
+{
+    // Inflate the block in fp->compressed_block into fp->uncompressed_block
+
+    z_stream zs;
+    zs.zalloc = NULL;
+    zs.zfree = NULL;
+    zs.next_in = fp->compressed_block + 18;
+    zs.avail_in = block_length - 16;
+    zs.next_out = fp->uncompressed_block;
+    zs.avail_out = fp->uncompressed_block_size;
+
+    int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
+    if (status != Z_OK) {
+        report_error(fp, "inflate init failed");
+        return -1;
+    }
+    status = inflate(&zs, Z_FINISH);
+    if (status != Z_STREAM_END) {
+        inflateEnd(&zs);
+        report_error(fp, "inflate failed");
+        return -1;
+    }
+    status = inflateEnd(&zs);
+    if (status != Z_OK) {
+        report_error(fp, "inflate failed");
+        return -1;
+    }
+    return zs.total_out;
+}
+
+static
+int
+check_header(const bgzf_byte_t* header)
+{
+    return (header[0] == GZIP_ID1 &&
+            header[1] == (bgzf_byte_t) GZIP_ID2 &&
+            header[2] == Z_DEFLATED &&
+            (header[3] & FLG_FEXTRA) != 0 &&
+            unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
+            header[12] == BGZF_ID1 &&
+            header[13] == BGZF_ID2 &&
+            unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
+}
+
+static void free_cache(BGZF *fp)
+{
+	khint_t k;
+	khash_t(cache) *h = (khash_t(cache)*)fp->cache;
+	if (fp->open_mode != 'r') return;
+	for (k = kh_begin(h); k < kh_end(h); ++k)
+		if (kh_exist(h, k)) free(kh_val(h, k).block);
+	kh_destroy(cache, h);
+}
+
+static int load_block_from_cache(BGZF *fp, int64_t block_address)
+{
+	khint_t k;
+	cache_t *p;
+	khash_t(cache) *h = (khash_t(cache)*)fp->cache;
+	k = kh_get(cache, h, block_address);
+	if (k == kh_end(h)) return 0;
+	p = &kh_val(h, k);
+	if (fp->block_length != 0) fp->block_offset = 0;
+	fp->block_address = block_address;
+	fp->block_length = p->size;
+	memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
+#ifdef _USE_KNETFILE
+	knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
+#else
+	fseeko(fp->file, p->end_offset, SEEK_SET);
+#endif
+	return p->size;
+}
+
+static void cache_block(BGZF *fp, int size)
+{
+	int ret;
+	khint_t k;
+	cache_t *p;
+	khash_t(cache) *h = (khash_t(cache)*)fp->cache;
+	if (MAX_BLOCK_SIZE >= fp->cache_size) return;
+	if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
+		/* A better way would be to remove the oldest block in the
+		 * cache, but here we remove a random one for simplicity. This
+		 * should not have a big impact on performance. */
+		for (k = kh_begin(h); k < kh_end(h); ++k)
+			if (kh_exist(h, k)) break;
+		if (k < kh_end(h)) {
+			free(kh_val(h, k).block);
+			kh_del(cache, h, k);
+		}
+	}
+	k = kh_put(cache, h, fp->block_address, &ret);
+	if (ret == 0) return; // if this happens, a bug!
+	p = &kh_val(h, k);
+	p->size = fp->block_length;
+	p->end_offset = fp->block_address + size;
+	p->block = malloc(MAX_BLOCK_SIZE);
+	memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
+}
+
+int
+bgzf_read_block(BGZF* fp)
+{
+    bgzf_byte_t header[BLOCK_HEADER_LENGTH];
+	int size = 0;
+#ifdef _USE_KNETFILE
+    int64_t block_address = knet_tell(fp->x.fpr);
+	if (load_block_from_cache(fp, block_address)) return 0;
+    int count = knet_read(fp->x.fpr, header, sizeof(header));
+#else
+    int64_t block_address = ftello(fp->file);
+	if (load_block_from_cache(fp, block_address)) return 0;
+    int count = fread(header, 1, sizeof(header), fp->file);
+#endif
+    if (count == 0) {
+        fp->block_length = 0;
+        return 0;
+    }
+	size = count;
+    if (count != sizeof(header)) {
+        report_error(fp, "read failed");
+        return -1;
+    }
+    if (!check_header(header)) {
+        report_error(fp, "invalid block header");
+        return -1;
+    }
+    int block_length = unpackInt16((uint8_t*)&header[16]) + 1;
+    bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
+    memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
+    int remaining = block_length - BLOCK_HEADER_LENGTH;
+#ifdef _USE_KNETFILE
+    count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
+#else
+    count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
+#endif
+    if (count != remaining) {
+        report_error(fp, "read failed");
+        return -1;
+    }
+	size += count;
+    count = inflate_block(fp, block_length);
+    if (count < 0) {
+        return -1;
+    }
+    if (fp->block_length != 0) {
+        // Do not reset offset if this read follows a seek.
+        fp->block_offset = 0;
+    }
+    fp->block_address = block_address;
+    fp->block_length = count;
+	cache_block(fp, size);
+    return 0;
+}
+
+int
+bgzf_read(BGZF* fp, void* data, int length)
+{
+    if (length <= 0) {
+        return 0;
+    }
+    if (fp->open_mode != 'r') {
+        report_error(fp, "file not open for reading");
+        return -1;
+    }
+
+    int bytes_read = 0;
+    bgzf_byte_t* output = data;
+    while (bytes_read < length) {
+        int available = fp->block_length - fp->block_offset;
+        if (available <= 0) {
+            if (bgzf_read_block(fp) != 0) {
+                return -1;
+            }
+            available = fp->block_length - fp->block_offset;
+            if (available <= 0) {
+                break;
+            }
+        }
+        int copy_length = bgzf_min(length-bytes_read, available);
+        bgzf_byte_t* buffer = fp->uncompressed_block;
+        memcpy(output, buffer + fp->block_offset, copy_length);
+        fp->block_offset += copy_length;
+        output += copy_length;
+        bytes_read += copy_length;
+    }
+    if (fp->block_offset == fp->block_length) {
+#ifdef _USE_KNETFILE
+        fp->block_address = knet_tell(fp->x.fpr);
+#else
+        fp->block_address = ftello(fp->file);
+#endif
+        fp->block_offset = 0;
+        fp->block_length = 0;
+    }
+    return bytes_read;
+}
+
+static
+int
+flush_block(BGZF* fp)
+{
+    while (fp->block_offset > 0) {
+        int block_length = deflate_block(fp, fp->block_offset);
+        if (block_length < 0) {
+            return -1;
+        }
+#ifdef _USE_KNETFILE
+        int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
+#else
+        int count = fwrite(fp->compressed_block, 1, block_length, fp->file);
+#endif
+        if (count != block_length) {
+            report_error(fp, "write failed");
+            return -1;
+        }
+        fp->block_address += block_length;
+    }
+    return 0;
+}
+
+int
+bgzf_write(BGZF* fp, const void* data, int length)
+{
+    if (fp->open_mode != 'w') {
+        report_error(fp, "file not open for writing");
+        return -1;
+    }
+
+    if (fp->uncompressed_block == NULL) {
+        fp->uncompressed_block = malloc(fp->uncompressed_block_size);
+    }
+
+    const bgzf_byte_t* input = data;
+    int block_length = fp->uncompressed_block_size;
+    int bytes_written = 0;
+    while (bytes_written < length) {
+        int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
+        bgzf_byte_t* buffer = fp->uncompressed_block;
+        memcpy(buffer + fp->block_offset, input, copy_length);
+        fp->block_offset += copy_length;
+        input += copy_length;
+        bytes_written += copy_length;
+        if (fp->block_offset == block_length) {
+            if (flush_block(fp) != 0) {
+                break;
+            }
+        }
+    }
+    return bytes_written;
+}
+
+int
+bgzf_close(BGZF* fp)
+{
+    if (fp->open_mode == 'w') {
+        if (flush_block(fp) != 0) {
+            return -1;
+        }
+		{ // add an empty block
+			int count, block_length = deflate_block(fp, 0);
+#ifdef _USE_KNETFILE
+			count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
+#else
+			count = fwrite(fp->compressed_block, 1, block_length, fp->file);
+#endif
+		}
+#ifdef _USE_KNETFILE
+        if (fflush(fp->x.fpw) != 0) {
+#else
+        if (fflush(fp->file) != 0) {
+#endif
+            report_error(fp, "flush failed");
+            return -1;
+        }
+    }
+    if (fp->owned_file) {
+#ifdef _USE_KNETFILE
+		int ret;
+		if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
+		else ret = knet_close(fp->x.fpr);
+        if (ret != 0) return -1;
+#else
+        if (fclose(fp->file) != 0) {
+            return -1;
+        }
+#endif
+    }
+    free(fp->uncompressed_block);
+    free(fp->compressed_block);
+	free_cache(fp);
+    free(fp);
+    return 0;
+}
+
+void bgzf_set_cache_size(BGZF *fp, int cache_size)
+{
+	if (fp) fp->cache_size = cache_size;
+}
+
+int bgzf_check_EOF(BGZF *fp)
+{
+	static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
+	uint8_t buf[28];
+	off_t offset;
+#ifdef _USE_KNETFILE
+	offset = knet_tell(fp->x.fpr);
+	if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
+	knet_read(fp->x.fpr, buf, 28);
+	knet_seek(fp->x.fpr, offset, SEEK_SET);
+#else
+	offset = ftello(fp->file);
+	if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
+	fread(buf, 1, 28, fp->file);
+	fseeko(fp->file, offset, SEEK_SET);
+#endif
+	return (memcmp(magic, buf, 28) == 0)? 1 : 0;
+}
+
+int64_t
+bgzf_seek(BGZF* fp, int64_t pos, int where)
+{
+    if (fp->open_mode != 'r') {
+        report_error(fp, "file not open for read");
+        return -1;
+    }
+    if (where != SEEK_SET) {
+        report_error(fp, "unimplemented seek option");
+        return -1;
+    }
+    int block_offset = pos & 0xFFFF;
+    int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
+#ifdef _USE_KNETFILE
+    if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
+#else
+    if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
+#endif
+        report_error(fp, "seek failed");
+        return -1;
+    }
+    fp->block_length = 0;  // indicates current block is not loaded
+    fp->block_address = block_address;
+    fp->block_offset = block_offset;
+    return 0;
+}
diff --git a/tabix/bgzf.h b/tabix/bgzf.h
new file mode 100644
index 0000000..f544a67
--- /dev/null
+++ b/tabix/bgzf.h
@@ -0,0 +1,156 @@
+/* The MIT License
+
+   Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+#ifndef __BGZF_H
+#define __BGZF_H
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <zlib.h>
+#ifdef _USE_KNETFILE
+#include "knetfile.h"
+#endif
+
+//typedef int8_t bool;
+
+typedef struct {
+    int file_descriptor;
+    char open_mode;  // 'r' or 'w'
+    bool owned_file, is_uncompressed;
+#ifdef _USE_KNETFILE
+	union {
+		knetFile *fpr;
+		FILE *fpw;
+	} x;
+#else
+    FILE* file;
+#endif
+    int uncompressed_block_size;
+    int compressed_block_size;
+    void* uncompressed_block;
+    void* compressed_block;
+    int64_t block_address;
+    int block_length;
+    int block_offset;
+	int cache_size;
+    const char* error;
+	void *cache; // a pointer to a hash table
+} BGZF;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Open an existing file descriptor for reading or writing.
+ * Mode must be either "r" or "w".
+ * A subsequent bgzf_close will not close the file descriptor.
+ * Returns null on error.
+ */
+BGZF* bgzf_fdopen(int fd, const char* __restrict mode);
+
+/*
+ * Open the specified file for reading or writing.
+ * Mode must be either "r" or "w".
+ * Returns null on error.
+ */
+BGZF* bgzf_open(const char* path, const char* __restrict mode);
+
+/*
+ * Close the BGZ file and free all associated resources.
+ * Does not close the underlying file descriptor if created with bgzf_fdopen.
+ * Returns zero on success, -1 on error.
+ */
+int bgzf_close(BGZF* fp);
+
+/*
+ * Read up to length bytes from the file storing into data.
+ * Returns the number of bytes actually read.
+ * Returns zero on end of file.
+ * Returns -1 on error.
+ */
+int bgzf_read(BGZF* fp, void* data, int length);
+
+/*
+ * Write length bytes from data to the file.
+ * Returns the number of bytes written.
+ * Returns -1 on error.
+ */
+int bgzf_write(BGZF* fp, const void* data, int length);
+
+/*
+ * Return a virtual file pointer to the current location in the file.
+ * No interpetation of the value should be made, other than a subsequent
+ * call to bgzf_seek can be used to position the file at the same point.
+ * Return value is non-negative on success.
+ * Returns -1 on error.
+ */
+#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
+
+/*
+ * Set the file to read from the location specified by pos, which must
+ * be a value previously returned by bgzf_tell for this file (but not
+ * necessarily one returned by this file handle).
+ * The where argument must be SEEK_SET.
+ * Seeking on a file opened for write is not supported.
+ * Returns zero on success, -1 on error.
+ */
+int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
+
+/*
+ * Set the cache size. Zero to disable. By default, caching is
+ * disabled. The recommended cache size for frequent random access is
+ * about 8M bytes.
+ */
+void bgzf_set_cache_size(BGZF *fp, int cache_size);
+
+int bgzf_check_EOF(BGZF *fp);
+
+int bgzf_read_block(BGZF* fp);
+
+#ifdef __cplusplus
+}
+#endif
+
+static inline int bgzf_getc(BGZF *fp)
+{
+	int c;
+	if (fp->block_offset >= fp->block_length) {
+		if (bgzf_read_block(fp) != 0) return -2; /* error */
+		if (fp->block_length == 0) return -1; /* end-of-file */
+	}
+	c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
+    if (fp->block_offset == fp->block_length) {
+#ifdef _USE_KNETFILE
+        fp->block_address = knet_tell(fp->x.fpr);
+#else
+        fp->block_address = ftello(fp->file);
+#endif
+        fp->block_offset = 0;
+        fp->block_length = 0;
+    }
+	return c;
+}
+
+#endif
diff --git a/tabix/bgzip.c b/tabix/bgzip.c
new file mode 100644
index 0000000..d144632
--- /dev/null
+++ b/tabix/bgzip.c
@@ -0,0 +1,201 @@
+/* The MIT License
+
+   Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to deal
+   in the Software without restriction, including without limitation the rights
+   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+   copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+   THE SOFTWARE.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/select.h>
+#include <sys/stat.h>
+#include "bgzf.h"
+
+static const int WINDOW_SIZE = 64 * 1024;
+
+static int bgzip_main_usage()
+{
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Usage:   bgzip [options] [file] ...\n\n");
+	fprintf(stderr, "Options: -c      write on standard output, keep original files unchanged\n");
+	fprintf(stderr, "         -d      decompress\n");
+	fprintf(stderr, "         -f      overwrite files without asking\n");
+	fprintf(stderr, "         -b INT  decompress at virtual file pointer INT\n");
+	fprintf(stderr, "         -s INT  decompress INT bytes in the uncompressed file\n");
+	fprintf(stderr, "         -h      give this help\n");
+	fprintf(stderr, "\n");
+	return 1;
+}
+
+static int write_open(const char *fn, int is_forced)
+{
+	int fd = -1;
+	char c;
+	if (!is_forced) {
+		if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
+			fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
+			scanf("%c", &c);
+			if (c != 'Y' && c != 'y') {
+				fprintf(stderr, "[bgzip] not overwritten\n");
+				exit(1);
+			}
+		}
+	}
+	if (fd < 0) {
+		if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
+			fprintf(stderr, "[bgzip] %s: Fail to write\n", fn);
+			exit(1);
+		}
+	}
+	return fd;
+}
+
+static void fail(BGZF* fp)
+{
+    fprintf(stderr, "Error: %s\n", fp->error);
+    exit(1);
+}
+
+int main(int argc, char **argv)
+{
+	int c, compress, pstdout, is_forced;
+	BGZF *fp;
+	void *buffer;
+	long start, end, size;
+
+	compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
+	while((c  = getopt(argc, argv, "cdhfb:s:")) >= 0){
+		switch(c){
+		case 'h': return bgzip_main_usage();
+		case 'd': compress = 0; break;
+		case 'c': pstdout = 1; break;
+		case 'b': start = atol(optarg); break;
+		case 's': size = atol(optarg); break;
+		case 'f': is_forced = 1; break;
+		}
+	}
+	if (size >= 0) end = start + size;
+	if (end >= 0 && end < start) {
+		fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
+		return 1;
+	}
+	if (compress == 1) {
+		struct stat sbuf;
+		int f_src = fileno(stdin);
+		int f_dst = fileno(stdout);
+
+		if ( argc>optind )
+		{
+			if ( stat(argv[optind],&sbuf)<0 ) 
+			{ 
+				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
+				return 1; 
+			}
+
+			if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
+				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
+				return 1;
+			}
+
+			if (pstdout)
+				f_dst = fileno(stdout);
+			else
+			{
+				char *name = malloc(strlen(argv[optind]) + 5);
+				strcpy(name, argv[optind]);
+				strcat(name, ".gz");
+				f_dst = write_open(name, is_forced);
+				if (f_dst < 0) return 1;
+				free(name);
+			}
+		}
+		else if (!pstdout && isatty(fileno((FILE *)stdout)) )
+			return bgzip_main_usage();
+
+		fp = bgzf_fdopen(f_dst, "w");
+		buffer = malloc(WINDOW_SIZE);
+		while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
+			if (bgzf_write(fp, buffer, c) < 0) fail(fp);
+		// f_dst will be closed here
+		if (bgzf_close(fp) < 0) fail(fp);
+		if (argc > optind) unlink(argv[optind]);
+		free(buffer);
+		close(f_src);
+		return 0;
+	} else {
+		struct stat sbuf;
+		int f_dst;
+
+		if ( argc>optind )
+		{
+			if ( stat(argv[optind],&sbuf)<0 )
+			{
+				fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
+				return 1;
+			}
+			char *name;
+			int len = strlen(argv[optind]);
+			if ( strcmp(argv[optind]+len-3,".gz") )
+			{
+				fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
+				return 1;
+			}
+			fp = bgzf_open(argv[optind], "r");
+			if (fp == NULL) {
+				fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
+				return 1;
+			}
+
+			name = strdup(argv[optind]);
+			name[strlen(name) - 3] = '\0';
+			f_dst = write_open(name, is_forced);
+			free(name);
+		}
+		else if (!pstdout && isatty(fileno((FILE *)stdin)) )
+			return bgzip_main_usage();
+		else
+		{
+			f_dst = fileno(stdout);
+			fp = bgzf_fdopen(fileno(stdin), "r");
+			if (fp == NULL) {
+				fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
+				return 1;
+			}
+		}
+		buffer = malloc(WINDOW_SIZE);
+		if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
+		while (1) {
+			if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
+			else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
+			if (c == 0) break;
+			if (c < 0) fail(fp);
+			start += c;
+			write(f_dst, buffer, c);
+			if (end >= 0 && start >= end) break;
+		}
+		free(buffer);
+		if (bgzf_close(fp) < 0) fail(fp);
+		if (!pstdout) unlink(argv[optind]);
+		return 0;
+	}
+}
diff --git a/tabix/index.c b/tabix/index.c
new file mode 100644
index 0000000..e5b227c
--- /dev/null
+++ b/tabix/index.c
@@ -0,0 +1,954 @@
+#include <ctype.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include "khash.h"
+#include "ksort.h"
+#include "kstring.h"
+#include "bam_endian.h"
+#ifdef _USE_KNETFILE
+#include "knetfile.h"
+#endif
+#include "tabix.h"
+
+#define TAD_MIN_CHUNK_GAP 32768
+// 1<<14 is the size of minimum bin.
+#define TAD_LIDX_SHIFT    14
+
+typedef struct {
+	uint64_t u, v;
+} pair64_t;
+
+#define pair64_lt(a,b) ((a).u < (b).u)
+KSORT_INIT(off, pair64_t, pair64_lt)
+
+typedef struct {
+	uint32_t m, n;
+	pair64_t *list;
+} ti_binlist_t;
+
+typedef struct {
+	int32_t n, m;
+	uint64_t *offset;
+} ti_lidx_t;
+
+KHASH_MAP_INIT_INT(i, ti_binlist_t)
+KHASH_MAP_INIT_STR(s, int)
+
+struct __ti_index_t {
+	ti_conf_t conf;
+	int32_t n, max;
+	khash_t(s) *tname;
+	khash_t(i) **index;
+	ti_lidx_t *index2;
+};
+
+struct __ti_iter_t {
+	int from_first; // read from the first record; no random access
+	int tid, beg, end, n_off, i, finished;
+	uint64_t curr_off;
+	kstring_t str;
+	const ti_index_t *idx;
+	pair64_t *off;
+};
+
+typedef struct {
+	int tid, beg, end, bin;
+} ti_intv_t;
+
+ti_conf_t ti_conf_gff = { 0, 1, 4, 5, '#', 0 };
+ti_conf_t ti_conf_bed = { TI_FLAG_UCSC, 1, 2, 3, '#', 0 };
+ti_conf_t ti_conf_psltbl = { TI_FLAG_UCSC, 15, 17, 18, '#', 0 };
+ti_conf_t ti_conf_sam = { TI_PRESET_SAM, 3, 4, 0, '@', 0 };
+ti_conf_t ti_conf_vcf = { TI_PRESET_VCF, 1, 2, 0, '#', 0 };
+
+/***************
+ * read a line *
+ ***************/
+
+/*
+int ti_readline(BGZF *fp, kstring_t *str)
+{
+	int c, l = 0;
+	str->l = 0;
+	while ((c = bgzf_getc(fp)) >= 0 && c != '\n') {
+		++l;
+		if (c != '\r') kputc(c, str);
+	}
+	if (c < 0 && l == 0) return -1; // end of file
+	return str->l;
+}
+*/
+
+/* Below is a faster implementation largely equivalent to the one
+ * commented out above. */
+int ti_readline(BGZF *fp, kstring_t *str)
+{
+	int l, state = 0;
+	unsigned char *buf = (unsigned char*)fp->uncompressed_block;
+	str->l = 0;
+	do {
+		if (fp->block_offset >= fp->block_length) {
+			if (bgzf_read_block(fp) != 0) { state = -2; break; }
+			if (fp->block_length == 0) { state = -1; break; }
+		}
+		for (l = fp->block_offset; l < fp->block_length && buf[l] != '\n'; ++l);
+		if (l < fp->block_length) state = 1;
+		l -= fp->block_offset;
+		if (str->l + l + 1 >= str->m) {
+			str->m = str->l + l + 2;
+			kroundup32(str->m);
+			str->s = (char*)realloc(str->s, str->m);
+		}
+		memcpy(str->s + str->l, buf + fp->block_offset, l);
+		str->l += l;
+		fp->block_offset += l + 1;
+		if (fp->block_offset >= fp->block_length) {
+#ifdef _USE_KNETFILE
+			fp->block_address = knet_tell(fp->x.fpr);
+#else
+			fp->block_address = ftello(fp->file);
+#endif
+			fp->block_offset = 0;
+			fp->block_length = 0;
+		} 
+	} while (state == 0);
+	if (str->l == 0 && state < 0) return state;
+	str->s[str->l] = 0;
+	return str->l;
+}
+
+/*************************************
+ * get the interval from a data line *
+ *************************************/
+
+static inline int ti_reg2bin(uint32_t beg, uint32_t end)
+{
+	--end;
+	if (beg>>14 == end>>14) return 4681 + (beg>>14);
+	if (beg>>17 == end>>17) return  585 + (beg>>17);
+	if (beg>>20 == end>>20) return   73 + (beg>>20);
+	if (beg>>23 == end>>23) return    9 + (beg>>23);
+	if (beg>>26 == end>>26) return    1 + (beg>>26);
+	return 0;
+}
+
+static int get_tid(ti_index_t *idx, const char *ss)
+{
+	khint_t k;
+	int tid;
+	k = kh_get(s, idx->tname, ss);
+	if (k == kh_end(idx->tname)) { // a new target sequence
+		int ret, size;
+		// update idx->n, ->max, ->index and ->index2
+		if (idx->n == idx->max) {
+			idx->max = idx->max? idx->max<<1 : 8;
+			idx->index = realloc(idx->index, idx->max * sizeof(void*));
+			idx->index2 = realloc(idx->index2, idx->max * sizeof(ti_lidx_t));
+		}
+		memset(&idx->index2[idx->n], 0, sizeof(ti_lidx_t));
+		idx->index[idx->n++] = kh_init(i);
+		// update ->tname
+		tid = size = kh_size(idx->tname);
+		k = kh_put(s, idx->tname, strdup(ss), &ret);
+		kh_value(idx->tname, k) = size;
+		assert(idx->n == kh_size(idx->tname));
+	} else tid = kh_value(idx->tname, k);
+	return tid;
+}
+
+static int get_intv(ti_index_t *idx, kstring_t *str, ti_intv_t *intv)
+{
+	int i, b = 0, id = 1;
+	char *s;
+	intv->tid = intv->beg = intv->end = intv->bin = -1;
+	for (i = 0; i <= str->l; ++i) {
+		if (str->s[i] == '\t' || str->s[i] == 0) {
+			if (id == idx->conf.sc) {
+				str->s[i] = 0;
+				intv->tid = get_tid(idx, str->s + b);
+				if (i != str->l) str->s[i] = '\t';
+			} else if (id == idx->conf.bc) {
+				// here ->beg is 0-based.
+				intv->beg = intv->end = strtol(str->s + b, &s, 0);
+				if (!(idx->conf.preset&TI_FLAG_UCSC)) --intv->beg;
+				else ++intv->end;
+				if (intv->beg < 0) intv->beg = 0;
+				if (intv->end < 1) intv->end = 1;
+			} else {
+				if ((idx->conf.preset&0xffff) == TI_PRESET_GENERIC) {
+					if (id == idx->conf.ec) intv->end = strtol(str->s + b, &s, 0);
+				} else if ((idx->conf.preset&0xffff) == TI_PRESET_SAM) {
+					if (id == 6) { // CIGAR
+						int l = 0, op;
+						char *t;
+						for (s = str->s + b; s < str->s + i;) {
+							long x = strtol(s, &t, 10);
+							op = toupper(*t);
+							if (op == 'M' || op == 'D' || op == 'N') l += x;
+							s = t + 1;
+						}
+						if (l == 0) l = 1;
+						intv->end = intv->beg + l;
+					}
+				} else if ((idx->conf.preset&0xffff) == TI_PRESET_VCF) {
+					// FIXME: the following is NOT tested and is likely to be buggy
+					if (id == 5) { // ALT
+						char *t;
+						int max = 1;
+						for (s = str->s + b; s < str->s + i;) {
+							if (s[i] == 'D') {
+								long x = strtol(s + 1, &t, 10);
+								if (x > max) max = x;
+								s = t + 1;
+							} else ++s;
+						}
+						intv->end = intv->beg + max;
+					}
+				}
+			}
+			b = i + 1;
+			++id;
+		}
+	}
+	if (intv->tid < 0 || intv->beg < 0 || intv->end < 0) return -1;
+	intv->bin = ti_reg2bin(intv->beg, intv->end);
+	return 0;
+}
+
+/************
+ * indexing *
+ ************/
+
+// requirement: len <= LEN_MASK
+static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)
+{
+	khint_t k;
+	ti_binlist_t *l;
+	int ret;
+	k = kh_put(i, h, bin, &ret);
+	l = &kh_value(h, k);
+	if (ret) { // not present
+		l->m = 1; l->n = 0;
+		l->list = (pair64_t*)calloc(l->m, 16);
+	}
+	if (l->n == l->m) {
+		l->m <<= 1;
+		l->list = (pair64_t*)realloc(l->list, l->m * 16);
+	}
+	l->list[l->n].u = beg; l->list[l->n++].v = end;
+}
+
+static inline void insert_offset2(ti_lidx_t *index2, int _beg, int _end, uint64_t offset)
+{
+	int i, beg, end;
+	beg = _beg >> TAD_LIDX_SHIFT;
+	end = (_end - 1) >> TAD_LIDX_SHIFT;
+	if (index2->m < end + 1) {
+		int old_m = index2->m;
+		index2->m = end + 1;
+		kroundup32(index2->m);
+		index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);
+		memset(index2->offset + old_m, 0, 8 * (index2->m - old_m));
+	}
+	if (beg == end) {
+		if (index2->offset[beg] == 0) index2->offset[beg] = offset;
+	} else {
+		for (i = beg; i <= end; ++i)
+			if (index2->offset[i] == 0) index2->offset[i] = offset;
+	}
+	if (index2->n < end + 1) index2->n = end + 1;
+}
+
+static void merge_chunks(ti_index_t *idx)
+{
+	khash_t(i) *index;
+	int i, l, m;
+	khint_t k;
+	for (i = 0; i < idx->n; ++i) {
+		index = idx->index[i];
+		for (k = kh_begin(index); k != kh_end(index); ++k) {
+			ti_binlist_t *p;
+			if (!kh_exist(index, k)) continue;
+			p = &kh_value(index, k);
+			m = 0;
+			for (l = 1; l < p->n; ++l) {
+				if (p->list[m].v>>16 == p->list[l].u>>16) p->list[m].v = p->list[l].v;
+				else p->list[++m] = p->list[l];
+			} // ~for(l)
+			p->n = m + 1;
+		} // ~for(k)
+	} // ~for(i)
+}
+
+static void fill_missing(ti_index_t *idx)
+{
+	int i, j;
+	for (i = 0; i < idx->n; ++i) {
+		ti_lidx_t *idx2 = &idx->index2[i];
+		for (j = 1; j < idx2->n; ++j)
+			if (idx2->offset[j] == 0)
+				idx2->offset[j] = idx2->offset[j-1];
+	}
+}
+
+ti_index_t *ti_index_core(BGZF *fp, const ti_conf_t *conf)
+{
+	int ret;
+	ti_index_t *idx;
+	uint32_t last_bin, save_bin;
+	int32_t last_coor, last_tid, save_tid;
+	uint64_t save_off, last_off, lineno = 0;
+	kstring_t *str;
+
+	str = calloc(1, sizeof(kstring_t));
+
+	idx = (ti_index_t*)calloc(1, sizeof(ti_index_t));
+	idx->conf = *conf;
+	idx->n = idx->max = 0;
+	idx->tname = kh_init(s);
+	idx->index = 0;
+	idx->index2 = 0;
+
+	save_bin = save_tid = last_tid = last_bin = 0xffffffffu;
+	save_off = last_off = bgzf_tell(fp); last_coor = 0xffffffffu;
+	while ((ret = ti_readline(fp, str)) >= 0) {
+		ti_intv_t intv;
+		++lineno;
+		if (lineno <= idx->conf.line_skip || str->s[0] == idx->conf.meta_char) {
+			last_off = bgzf_tell(fp);
+			continue;
+		}
+		get_intv(idx, str, &intv);
+		if (last_tid != intv.tid) { // change of chromosomes
+			last_tid = intv.tid;
+			last_bin = 0xffffffffu;
+		} else if (last_coor > intv.beg) {
+			fprintf(stderr, "[ti_index_core] the file out of order at line %llu\n", (unsigned long long)lineno);
+			exit(1);
+		}
+		insert_offset2(&idx->index2[intv.tid], intv.beg, intv.end, last_off);
+		if (intv.bin != last_bin) { // then possibly write the binning index
+			if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
+				insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
+			save_off = last_off;
+			save_bin = last_bin = intv.bin;
+			save_tid = intv.tid;
+			if (save_tid < 0) break;
+		}
+		if (bgzf_tell(fp) <= last_off) {
+			fprintf(stderr, "[ti_index_core] bug in BGZF: %llx < %llx\n",
+					(unsigned long long)bgzf_tell(fp), (unsigned long long)last_off);
+			exit(1);
+		}
+		last_off = bgzf_tell(fp);
+		last_coor = intv.beg;
+	}
+	if (save_tid >= 0) insert_offset(idx->index[save_tid], save_bin, save_off, bgzf_tell(fp));
+	merge_chunks(idx);
+	fill_missing(idx);
+
+	free(str->s); free(str);
+	return idx;
+}
+
+void ti_index_destroy(ti_index_t *idx)
+{
+	khint_t k;
+	int i;
+	if (idx == 0) return;
+	// destroy the name hash table
+	for (k = kh_begin(idx->tname); k != kh_end(idx->tname); ++k) {
+		if (kh_exist(idx->tname, k))
+			free((char*)kh_key(idx->tname, k));
+	}
+	kh_destroy(s, idx->tname);
+	// destroy the binning index
+	for (i = 0; i < idx->n; ++i) {
+		khash_t(i) *index = idx->index[i];
+		ti_lidx_t *index2 = idx->index2 + i;
+		for (k = kh_begin(index); k != kh_end(index); ++k) {
+			if (kh_exist(index, k))
+				free(kh_value(index, k).list);
+		}
+		kh_destroy(i, index);
+		free(index2->offset);
+	}
+	free(idx->index);
+	// destroy the linear index
+	free(idx->index2);
+	free(idx);
+}
+
+/******************
+ * index file I/O *
+ ******************/
+
+void ti_index_save(const ti_index_t *idx, BGZF *fp)
+{
+	int32_t i, size, ti_is_be;
+	khint_t k;
+	ti_is_be = bam_is_big_endian();
+	bgzf_write(fp, "TBI\1", 4);
+	if (ti_is_be) {
+		uint32_t x = idx->n;
+		bgzf_write(fp, bam_swap_endian_4p(&x), 4);
+	} else bgzf_write(fp, &idx->n, 4);
+	assert(sizeof(ti_conf_t) == 24);
+	if (ti_is_be) { // write ti_conf_t;
+		uint32_t x[6];
+		memcpy(x, &idx->conf, 24);
+		for (i = 0; i < 6; ++i) bgzf_write(fp, bam_swap_endian_4p(&x[i]), 4);
+	} else bgzf_write(fp, &idx->conf, sizeof(ti_conf_t));
+	{ // write target names
+		char **name;
+		int32_t l = 0;
+		name = calloc(kh_size(idx->tname), sizeof(void*));
+		for (k = kh_begin(idx->tname); k != kh_end(idx->tname); ++k)
+			if (kh_exist(idx->tname, k))
+				name[kh_value(idx->tname, k)] = (char*)kh_key(idx->tname, k);
+		for (i = 0; i < kh_size(idx->tname); ++i)
+			l += strlen(name[i]) + 1;
+		if (ti_is_be) bgzf_write(fp, bam_swap_endian_4p(&l), 4);
+		else bgzf_write(fp, &l, 4);
+		for (i = 0; i < kh_size(idx->tname); ++i)
+			bgzf_write(fp, name[i], strlen(name[i]) + 1);
+		free(name);
+	}
+	for (i = 0; i < idx->n; ++i) {
+		khash_t(i) *index = idx->index[i];
+		ti_lidx_t *index2 = idx->index2 + i;
+		// write binning index
+		size = kh_size(index);
+		if (ti_is_be) { // big endian
+			uint32_t x = size;
+			bgzf_write(fp, bam_swap_endian_4p(&x), 4);
+		} else bgzf_write(fp, &size, 4);
+		for (k = kh_begin(index); k != kh_end(index); ++k) {
+			if (kh_exist(index, k)) {
+				ti_binlist_t *p = &kh_value(index, k);
+				if (ti_is_be) { // big endian
+					uint32_t x;
+					x = kh_key(index, k); bgzf_write(fp, bam_swap_endian_4p(&x), 4);
+					x = p->n; bgzf_write(fp, bam_swap_endian_4p(&x), 4);
+					for (x = 0; (int)x < p->n; ++x) {
+						bam_swap_endian_8p(&p->list[x].u);
+						bam_swap_endian_8p(&p->list[x].v);
+					}
+					bgzf_write(fp, p->list, 16 * p->n);
+					for (x = 0; (int)x < p->n; ++x) {
+						bam_swap_endian_8p(&p->list[x].u);
+						bam_swap_endian_8p(&p->list[x].v);
+					}
+				} else {
+					bgzf_write(fp, &kh_key(index, k), 4);
+					bgzf_write(fp, &p->n, 4);
+					bgzf_write(fp, p->list, 16 * p->n);
+				}
+			}
+		}
+		// write linear index (index2)
+		if (ti_is_be) {
+			int x = index2->n;
+			bgzf_write(fp, bam_swap_endian_4p(&x), 4);
+		} else bgzf_write(fp, &index2->n, 4);
+		if (ti_is_be) { // big endian
+			int x;
+			for (x = 0; (int)x < index2->n; ++x)
+				bam_swap_endian_8p(&index2->offset[x]);
+			bgzf_write(fp, index2->offset, 8 * index2->n);
+			for (x = 0; (int)x < index2->n; ++x)
+				bam_swap_endian_8p(&index2->offset[x]);
+		} else bgzf_write(fp, index2->offset, 8 * index2->n);
+	}
+}
+
+static ti_index_t *ti_index_load_core(BGZF *fp)
+{
+	int i, ti_is_be;
+	char magic[4];
+	ti_index_t *idx;
+	ti_is_be = bam_is_big_endian();
+	if (fp == 0) {
+		fprintf(stderr, "[ti_index_load_core] fail to load index.\n");
+		return 0;
+	}
+	bgzf_read(fp, magic, 4);
+	if (strncmp(magic, "TBI\1", 4)) {
+		fprintf(stderr, "[ti_index_load] wrong magic number.\n");
+		return 0;
+	}
+	idx = (ti_index_t*)calloc(1, sizeof(ti_index_t));	
+	bgzf_read(fp, &idx->n, 4);
+	if (ti_is_be) bam_swap_endian_4p(&idx->n);
+	idx->tname = kh_init(s);
+	idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
+	idx->index2 = (ti_lidx_t*)calloc(idx->n, sizeof(ti_lidx_t));
+	// read idx->conf
+	bgzf_read(fp, &idx->conf, sizeof(ti_conf_t));
+	if (ti_is_be) {
+		bam_swap_endian_4p(&idx->conf.preset);
+		bam_swap_endian_4p(&idx->conf.sc);
+		bam_swap_endian_4p(&idx->conf.bc);
+		bam_swap_endian_4p(&idx->conf.ec);
+		bam_swap_endian_4p(&idx->conf.meta_char);
+		bam_swap_endian_4p(&idx->conf.line_skip);
+	}
+	{ // read target names
+		int j, ret;
+		kstring_t *str;
+		int32_t l;
+		uint8_t *buf;
+		bgzf_read(fp, &l, 4);
+		if (ti_is_be) bam_swap_endian_4p(&l);
+		buf = calloc(l, 1);
+		bgzf_read(fp, buf, l);
+		str = calloc(1, sizeof(kstring_t));
+		for (i = j = 0; i < l; ++i) {
+			if (buf[i] == 0) {
+				khint_t k = kh_put(s, idx->tname, strdup(str->s), &ret);
+				kh_value(idx->tname, k) = j++;
+				str->l = 0;
+			} else kputc(buf[i], str);
+		}
+		free(str->s); free(str); free(buf);
+	}
+	for (i = 0; i < idx->n; ++i) {
+		khash_t(i) *index;
+		ti_lidx_t *index2 = idx->index2 + i;
+		uint32_t key, size;
+		khint_t k;
+		int j, ret;
+		ti_binlist_t *p;
+		index = idx->index[i] = kh_init(i);
+		// load binning index
+		bgzf_read(fp, &size, 4);
+		if (ti_is_be) bam_swap_endian_4p(&size);
+		for (j = 0; j < (int)size; ++j) {
+			bgzf_read(fp, &key, 4);
+			if (ti_is_be) bam_swap_endian_4p(&key);
+			k = kh_put(i, index, key, &ret);
+			p = &kh_value(index, k);
+			bgzf_read(fp, &p->n, 4);
+			if (ti_is_be) bam_swap_endian_4p(&p->n);
+			p->m = p->n;
+			p->list = (pair64_t*)malloc(p->m * 16);
+			bgzf_read(fp, p->list, 16 * p->n);
+			if (ti_is_be) {
+				int x;
+				for (x = 0; x < p->n; ++x) {
+					bam_swap_endian_8p(&p->list[x].u);
+					bam_swap_endian_8p(&p->list[x].v);
+				}
+			}
+		}
+		// load linear index
+		bgzf_read(fp, &index2->n, 4);
+		if (ti_is_be) bam_swap_endian_4p(&index2->n);
+		index2->m = index2->n;
+		index2->offset = (uint64_t*)calloc(index2->m, 8);
+		bgzf_read(fp, index2->offset, index2->n * 8);
+		if (ti_is_be)
+			for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]);
+	}
+	return idx;
+}
+
+ti_index_t *ti_index_load_local(const char *fnidx)
+{
+	BGZF *fp;
+	fp = bgzf_open(fnidx, "r");
+	if (fp) {
+		ti_index_t *idx = ti_index_load_core(fp);
+		bgzf_close(fp);
+		return idx;
+	} else return 0;
+}
+
+#ifdef _USE_KNETFILE
+static void download_from_remote(const char *url)
+{
+	const int buf_size = 1 * 1024 * 1024;
+	char *fn;
+	FILE *fp;
+	uint8_t *buf;
+	knetFile *fp_remote;
+	int l;
+	if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return;
+	l = strlen(url);
+	for (fn = (char*)url + l - 1; fn >= url; --fn)
+		if (*fn == '/') break;
+	++fn; // fn now points to the file name
+	fp_remote = knet_open(url, "r");
+	if (fp_remote == 0) {
+		fprintf(stderr, "[download_from_remote] fail to open remote file.\n");
+		return;
+	}
+	if ((fp = fopen(fn, "w")) == 0) {
+		fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n");
+		knet_close(fp_remote);
+		return;
+	}
+	buf = (uint8_t*)calloc(buf_size, 1);
+	while ((l = knet_read(fp_remote, buf, buf_size)) != 0)
+		fwrite(buf, 1, l, fp);
+	free(buf);
+	fclose(fp);
+	knet_close(fp_remote);
+}
+#else
+static void download_from_remote(const char *url)
+{
+	return;
+}
+#endif
+
+static char *get_local_version(const char *fn)
+{
+    struct stat sbuf;
+	char *fnidx = (char*)calloc(strlen(fn) + 5, 1);
+	strcat(strcpy(fnidx, fn), ".tbi");
+	if ((strstr(fnidx, "ftp://") == fnidx || strstr(fnidx, "http://") == fnidx)) {
+		char *p, *url;
+		int l = strlen(fnidx);
+		for (p = fnidx + l - 1; p >= fnidx; --p)
+			if (*p == '/') break;
+		url = fnidx; fnidx = strdup(p + 1);
+		if (stat(fnidx, &sbuf) == 0) {
+			free(url);
+			return fnidx;
+		}
+		fprintf(stderr, "[%s] downloading the index file...\n", __func__);
+		download_from_remote(url);
+		free(url);
+	}
+    if (stat(fnidx, &sbuf) == 0) return fnidx;
+	free(fnidx); return 0;
+}
+
+const char **ti_seqname(const ti_index_t *idx, int *n)
+{
+	const char **names;
+	khint_t k;
+	*n = idx->n;
+	names = calloc(idx->n, sizeof(void*));
+	for (k = kh_begin(idx->tname); k < kh_end(idx->tname); ++k)
+		if (kh_exist(idx->tname, k))
+			names[kh_val(idx->tname, k)] = kh_key(idx->tname, k);
+	return names;
+}
+
+ti_index_t *ti_index_load(const char *fn)
+{
+	ti_index_t *idx;
+    char *fname = get_local_version(fn);
+	if (fname == 0) return 0;
+	idx = ti_index_load_local(fname);
+    free(fname);
+	if (idx == 0) fprintf(stderr, "[ti_index_load] fail to load BAM index.\n");
+	return idx;
+}
+
+int ti_index_build2(const char *fn, const ti_conf_t *conf, const char *_fnidx)
+{
+	char *fnidx;
+	BGZF *fp, *fpidx;
+	ti_index_t *idx;
+	if ((fp = bgzf_open(fn, "r")) == 0) {
+		fprintf(stderr, "[ti_index_build2] fail to open the BAM file.\n");
+		return -1;
+	}
+	idx = ti_index_core(fp, conf);
+	bgzf_close(fp);
+	if (_fnidx == 0) {
+		fnidx = (char*)calloc(strlen(fn) + 5, 1);
+		strcpy(fnidx, fn); strcat(fnidx, ".tbi");
+	} else fnidx = strdup(_fnidx);
+	fpidx = bgzf_open(fnidx, "w");
+	if (fpidx == 0) {
+		fprintf(stderr, "[ti_index_build2] fail to create the index file.\n");
+		free(fnidx);
+		return -1;
+	}
+	ti_index_save(idx, fpidx);
+	ti_index_destroy(idx);
+	bgzf_close(fpidx);
+	free(fnidx);
+	return 0;
+}
+
+int ti_index_build(const char *fn, const ti_conf_t *conf)
+{
+	return ti_index_build2(fn, conf, 0);
+}
+
+/********************************************
+ * parse a region in the format chr:beg-end *
+ ********************************************/
+
+int ti_get_tid(const ti_index_t *idx, const char *name)
+{
+	khiter_t iter;
+	const khash_t(s) *h = idx->tname;
+	iter = kh_get(s, h, name); /* get the tid */
+	if (iter == kh_end(h)) return -1;
+	return kh_value(h, iter);
+}
+
+int ti_parse_region(const ti_index_t *idx, const char *str, int *tid, int *begin, int *end)
+{
+	char *s, *p;
+	int i, l, k;
+	l = strlen(str);
+	p = s = (char*)malloc(l+1);
+	/* squeeze out "," */
+	for (i = k = 0; i != l; ++i)
+		if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i];
+	s[k] = 0;
+	for (i = 0; i != k; ++i) if (s[i] == ':') break;
+	s[i] = 0;
+	if ((*tid = ti_get_tid(idx, s)) < 0) {
+		free(s);
+		return -1;
+	}
+	if (i == k) { /* dump the whole sequence */
+		*begin = 0; *end = 1<<29; free(s);
+		return 0;
+	}
+	for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break;
+	*begin = atoi(p);
+	if (i < k) {
+		p = s + i + 1;
+		*end = atoi(p);
+	} else *end = 1<<29;
+	if (*begin > 0) --*begin;
+	free(s);
+	if (*begin > *end) return -1;
+	return 0;
+}
+
+/*******************************
+ * retrieve a specified region *
+ *******************************/
+
+#define MAX_BIN 37450 // =(8^6-1)/7+1
+
+static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[MAX_BIN])
+{
+	int i = 0, k;
+	if (beg >= end) return 0;
+	if (end >= 1u<<29) end = 1u<<29;
+	--end;
+	list[i++] = 0;
+	for (k =    1 + (beg>>26); k <=    1 + (end>>26); ++k) list[i++] = k;
+	for (k =    9 + (beg>>23); k <=    9 + (end>>23); ++k) list[i++] = k;
+	for (k =   73 + (beg>>20); k <=   73 + (end>>20); ++k) list[i++] = k;
+	for (k =  585 + (beg>>17); k <=  585 + (end>>17); ++k) list[i++] = k;
+	for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list[i++] = k;
+	return i;
+}
+
+ti_iter_t ti_iter_first()
+{
+	ti_iter_t iter;
+	iter = calloc(1, sizeof(struct __ti_iter_t));
+	iter->from_first = 1;
+	return iter;
+}
+
+ti_iter_t ti_iter_query(const ti_index_t *idx, int tid, int beg, int end)
+{
+	uint16_t *bins;
+	int i, n_bins, n_off;
+	pair64_t *off;
+	khint_t k;
+	khash_t(i) *index;
+	uint64_t min_off;
+	ti_iter_t iter = 0;
+
+	if (beg < 0) beg = 0;
+	if (end < beg) return 0;
+	// initialize the iterator
+	iter = calloc(1, sizeof(struct __ti_iter_t));
+	iter->idx = idx; iter->tid = tid; iter->beg = beg; iter->end = end; iter->i = -1;
+	// random access
+	bins = (uint16_t*)calloc(MAX_BIN, 2);
+	n_bins = reg2bins(beg, end, bins);
+	index = idx->index[tid];
+	if (idx->index2[tid].n > 0) {
+		min_off = (beg>>TAD_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]
+			: idx->index2[tid].offset[beg>>TAD_LIDX_SHIFT];
+		if (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4
+			int n = beg>>TAD_LIDX_SHIFT;
+			if (n > idx->index2[tid].n) n = idx->index2[tid].n;
+			for (i = n - 1; i >= 0; --i)
+				if (idx->index2[tid].offset[i] != 0) break;
+			if (i >= 0) min_off = idx->index2[tid].offset[i];
+		}
+	} else min_off = 0; // tabix 0.1.2 may produce such index files
+	for (i = n_off = 0; i < n_bins; ++i) {
+		if ((k = kh_get(i, index, bins[i])) != kh_end(index))
+			n_off += kh_value(index, k).n;
+	}
+	if (n_off == 0) {
+		free(bins); return iter;
+	}
+	off = (pair64_t*)calloc(n_off, 16);
+	for (i = n_off = 0; i < n_bins; ++i) {
+		if ((k = kh_get(i, index, bins[i])) != kh_end(index)) {
+			int j;
+			ti_binlist_t *p = &kh_value(index, k);
+			for (j = 0; j < p->n; ++j)
+				if (p->list[j].v > min_off) off[n_off++] = p->list[j];
+		}
+	}
+	free(bins);
+	{
+		int l;
+		ks_introsort(off, n_off, off);
+		// resolve completely contained adjacent blocks
+		for (i = 1, l = 0; i < n_off; ++i)
+			if (off[l].v < off[i].v)
+				off[++l] = off[i];
+		n_off = l + 1;
+		// resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
+		for (i = 1; i < n_off; ++i)
+			if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;
+		{ // merge adjacent blocks
+			for (i = 1, l = 0; i < n_off; ++i) {
+				if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;
+				else off[++l] = off[i];
+			}
+			n_off = l + 1;
+		}
+	}
+	iter->n_off = n_off; iter->off = off;
+	return iter;
+}
+
+const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len)
+{
+	if (iter->finished) return 0;
+	if (iter->from_first) {
+		int ret;
+		if ((ret = ti_readline(fp, &iter->str)) < 0) {
+			iter->finished = 1;
+			return 0;
+		} else {
+			if (len) *len = iter->str.l;
+			return iter->str.s;
+		}
+	}
+	if (iter->n_off == 0) return 0;
+	while (1) {
+		int ret;
+		if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk
+			if (iter->i == iter->n_off - 1) break; // no more chunks
+			if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug
+			if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek
+				bgzf_seek(fp, iter->off[iter->i+1].u, SEEK_SET);
+				iter->curr_off = bgzf_tell(fp);
+			}
+			++iter->i;
+		}
+		if ((ret = ti_readline(fp, &iter->str)) >= 0) {
+			ti_intv_t intv;
+			iter->curr_off = bgzf_tell(fp);
+			if (iter->str.s[0] == iter->idx->conf.meta_char) continue;
+			get_intv((ti_index_t*)iter->idx, &iter->str, &intv);
+			if (intv.tid != iter->tid || intv.beg >= iter->end) break; // no need to proceed
+			else if (intv.end > iter->beg && iter->end > intv.beg) {
+				if (len) *len = iter->str.l;
+				return iter->str.s;
+			}
+		} else break; // end of file
+	}
+	iter->finished = 1;
+	return 0;
+}
+
+void ti_iter_destroy(ti_iter_t iter)
+{
+	if (iter) {
+		free(iter->str.s); free(iter->off);
+		free(iter);
+	}
+}
+
+int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *data, ti_fetch_f func)
+{
+	ti_iter_t iter;
+	const char *s;
+	int len;
+	iter = ti_iter_query(idx, tid, beg, end);
+	while ((s = ti_iter_read(fp, iter, &len)) != 0)
+		func(len, s, data);
+	ti_iter_destroy(iter);
+	return 0;
+}
+
+/*******************
+ * High-level APIs *
+ *******************/
+
+tabix_t *ti_open(const char *fn, const char *fnidx)
+{
+	tabix_t *t;
+	BGZF *fp;
+	if ((fp = bgzf_open(fn, "r")) == 0) return 0;
+	t = calloc(1, sizeof(tabix_t));
+	t->fn = strdup(fn);
+	if (fnidx) t->fnidx = strdup(fnidx);
+	t->fp = fp;
+	return t;
+}
+
+void ti_close(tabix_t *t)
+{
+	if (t) {
+		bgzf_close(t->fp);
+		if (t->idx) ti_index_destroy(t->idx);
+		free(t->fn); free(t->fnidx);
+		free(t);
+	}
+}
+
+int ti_lazy_index_load(tabix_t *t)
+{
+	if (t->idx == 0) { // load index
+		if (t->fnidx) t->idx = ti_index_load_local(t->fnidx);
+		else t->idx = ti_index_load(t->fn);
+		if (t->idx == 0) return -1; // fail to load index
+	}
+	return 0;
+}
+
+ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end)
+{
+	if (tid < 0) return ti_iter_first();
+	if (ti_lazy_index_load(t) != 0) return 0;
+	return ti_iter_query(t->idx, tid, beg, end);	
+}
+
+ti_iter_t ti_querys(tabix_t *t, const char *reg)
+{
+	int tid, beg, end;
+	if (reg == 0) return ti_iter_first();
+	if (ti_lazy_index_load(t) != 0) return 0;
+	if (ti_parse_region(t->idx, reg, &tid, &beg, &end) < 0) return 0;
+	return ti_iter_query(t->idx, tid, beg, end);
+}
+
+ti_iter_t ti_query(tabix_t *t, const char *name, int beg, int end)
+{
+	int tid;
+	if (name == 0) return ti_iter_first();
+	// then need to load the index
+	if (ti_lazy_index_load(t) != 0) return 0;
+	if ((tid = ti_get_tid(t->idx, name)) < 0) return 0;
+	return ti_iter_query(t->idx, tid, beg, end);
+}
+
+const char *ti_read(tabix_t *t, ti_iter_t iter, int *len)
+{
+	return ti_iter_read(t->fp, iter, len);
+}
diff --git a/tabix/khash.h b/tabix/khash.h
new file mode 100644
index 0000000..1d583ef
--- /dev/null
+++ b/tabix/khash.h
@@ -0,0 +1,486 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+/*
+  An example:
+
+#include "khash.h"
+KHASH_MAP_INIT_INT(32, char)
+int main() {
+	int ret, is_missing;
+	khiter_t k;
+	khash_t(32) *h = kh_init(32);
+	k = kh_put(32, h, 5, &ret);
+	if (!ret) kh_del(32, h, k);
+	kh_value(h, k) = 10;
+	k = kh_get(32, h, 10);
+	is_missing = (k == kh_end(h));
+	k = kh_get(32, h, 5);
+	kh_del(32, h, k);
+	for (k = kh_begin(h); k != kh_end(h); ++k)
+		if (kh_exist(h, k)) kh_value(h, k) = 1;
+	kh_destroy(32, h);
+	return 0;
+}
+*/
+
+/*
+  2008-09-19 (0.2.3):
+
+	* Corrected the example
+	* Improved interfaces
+
+  2008-09-11 (0.2.2):
+
+	* Improved speed a little in kh_put()
+
+  2008-09-10 (0.2.1):
+
+	* Added kh_clear()
+	* Fixed a compiling error
+
+  2008-09-02 (0.2.0):
+
+	* Changed to token concatenation which increases flexibility.
+
+  2008-08-31 (0.1.2):
+
+	* Fixed a bug in kh_get(), which has not been tested previously.
+
+  2008-08-31 (0.1.1):
+
+	* Added destructor
+*/
+
+
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
+
+/*!
+  @header
+
+  Generic hash table library.
+
+  @copyright Heng Li
+ */
+
+#define AC_VERSION_KHASH_H "0.2.2"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef uint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define __ac_HASH_PRIME_SIZE 32
+static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
+{
+  0ul,          3ul,          11ul,         23ul,         53ul,
+  97ul,         193ul,        389ul,        769ul,        1543ul,
+  3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
+  98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
+  3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
+  100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
+  3221225473ul, 4294967291ul
+};
+
+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+
+static const double __ac_HASH_UPPER = 0.77;
+
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	typedef struct {													\
+		khint_t n_buckets, size, n_occupied, upper_bound;				\
+		uint32_t *flags;												\
+		khkey_t *keys;													\
+		khval_t *vals;													\
+	} kh_##name##_t;													\
+	static inline kh_##name##_t *kh_init_##name() {						\
+		return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));		\
+	}																	\
+	static inline void kh_destroy_##name(kh_##name##_t *h)				\
+	{																	\
+		if (h) {														\
+			free(h->keys); free(h->flags);								\
+			free(h->vals);												\
+			free(h);													\
+		}																\
+	}																	\
+	static inline void kh_clear_##name(kh_##name##_t *h)				\
+	{																	\
+		if (h && h->flags) {											\
+			memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t)); \
+			h->size = h->n_occupied = 0;								\
+		}																\
+	}																	\
+	static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
+	{																	\
+		if (h->n_buckets) {												\
+			khint_t inc, k, i, last;									\
+			k = __hash_func(key); i = k % h->n_buckets;					\
+			inc = 1 + k % (h->n_buckets - 1); last = i;					\
+			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+				if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
+				else i += inc;											\
+				if (i == last) return h->n_buckets;						\
+			}															\
+			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
+		} else return 0;												\
+	}																	\
+	static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+	{																	\
+		uint32_t *new_flags = 0;										\
+		khint_t j = 1;													\
+		{																\
+			khint_t t = __ac_HASH_PRIME_SIZE - 1;						\
+			while (__ac_prime_list[t] > new_n_buckets) --t;				\
+			new_n_buckets = __ac_prime_list[t+1];						\
+			if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	\
+			else {														\
+				new_flags = (uint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(uint32_t));	\
+				memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t)); \
+				if (h->n_buckets < new_n_buckets) {						\
+					h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
+					if (kh_is_map)										\
+						h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+				}														\
+			}															\
+		}																\
+		if (j) {														\
+			for (j = 0; j != h->n_buckets; ++j) {						\
+				if (__ac_iseither(h->flags, j) == 0) {					\
+					khkey_t key = h->keys[j];							\
+					khval_t val;										\
+					if (kh_is_map) val = h->vals[j];					\
+					__ac_set_isdel_true(h->flags, j);					\
+					while (1) {											\
+						khint_t inc, k, i;								\
+						k = __hash_func(key);							\
+						i = k % new_n_buckets;							\
+						inc = 1 + k % (new_n_buckets - 1);				\
+						while (!__ac_isempty(new_flags, i)) {			\
+							if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
+							else i += inc;								\
+						}												\
+						__ac_set_isempty_false(new_flags, i);			\
+						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
+							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+							if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
+							__ac_set_isdel_true(h->flags, i);			\
+						} else {										\
+							h->keys[i] = key;							\
+							if (kh_is_map) h->vals[i] = val;			\
+							break;										\
+						}												\
+					}													\
+				}														\
+			}															\
+			if (h->n_buckets > new_n_buckets) {							\
+				h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
+				if (kh_is_map)											\
+					h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+			}															\
+			free(h->flags);												\
+			h->flags = new_flags;										\
+			h->n_buckets = new_n_buckets;								\
+			h->n_occupied = h->size;									\
+			h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+		}																\
+	}																	\
+	static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
+	{																	\
+		khint_t x;														\
+		if (h->n_occupied >= h->upper_bound) {							\
+			if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
+			else kh_resize_##name(h, h->n_buckets + 1);					\
+		}																\
+		{																\
+			khint_t inc, k, i, site, last;								\
+			x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
+			if (__ac_isempty(h->flags, i)) x = i;						\
+			else {														\
+				inc = 1 + k % (h->n_buckets - 1); last = i;				\
+				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+					if (__ac_isdel(h->flags, i)) site = i;				\
+					if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
+					else i += inc;										\
+					if (i == last) { x = site; break; }					\
+				}														\
+				if (x == h->n_buckets) {								\
+					if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
+					else x = i;											\
+				}														\
+			}															\
+		}																\
+		if (__ac_isempty(h->flags, x)) {								\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size; ++h->n_occupied;									\
+			*ret = 1;													\
+		} else if (__ac_isdel(h->flags, x)) {							\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size;													\
+			*ret = 2;													\
+		} else *ret = 0;												\
+		return x;														\
+	}																	\
+	static inline void kh_del_##name(kh_##name##_t *h, khint_t x)		\
+	{																	\
+		if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {			\
+			__ac_set_isdel_true(h->flags, x);							\
+			--h->size;													\
+		}																\
+	}
+
+/* --- BEGIN OF HASH FUNCTIONS --- */
+
+/*! @function
+  @abstract     Integer hash function
+  @param  key   The integer [uint32_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int_hash_func(key) (uint32_t)(key)
+/*! @function
+  @abstract     Integer comparison function
+ */
+#define kh_int_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     64-bit integer hash function
+  @param  key   The integer [uint64_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int64_hash_func(key) (uint32_t)((key)>>33^(key)^(key)<<11)
+/*! @function
+  @abstract     64-bit integer comparison function
+ */
+#define kh_int64_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     const char* hash function
+  @param  s     Pointer to a null terminated string
+  @return       The hash value
+ */
+static inline khint_t __ac_X31_hash_string(const char *s)
+{
+	khint_t h = *s;
+	if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
+	return h;
+}
+/*! @function
+  @abstract     Another interface to const char* hash function
+  @param  key   Pointer to a null terminated string [const char*]
+  @return       The hash value [khint_t]
+ */
+#define kh_str_hash_func(key) __ac_X31_hash_string(key)
+/*! @function
+  @abstract     Const char* comparison function
+ */
+#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
+
+/* --- END OF HASH FUNCTIONS --- */
+
+/* Other necessary macros... */
+
+/*!
+  @abstract Type of the hash table.
+  @param  name  Name of the hash table [symbol]
+ */
+#define khash_t(name) kh_##name##_t
+
+/*! @function
+  @abstract     Initiate a hash table.
+  @param  name  Name of the hash table [symbol]
+  @return       Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_init(name) kh_init_##name()
+
+/*! @function
+  @abstract     Destroy a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_destroy(name, h) kh_destroy_##name(h)
+
+/*! @function
+  @abstract     Reset a hash table without deallocating memory.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_clear(name, h) kh_clear_##name(h)
+
+/*! @function
+  @abstract     Resize a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  s     New size [khint_t]
+ */
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+
+/*! @function
+  @abstract     Insert a key to the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @param  r     Extra return code: 0 if the key is present in the hash table;
+                1 if the bucket is empty (never used); 2 if the element in
+				the bucket has been deleted [int*]
+  @return       Iterator to the inserted element [khint_t]
+ */
+#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
+
+/*! @function
+  @abstract     Retrieve a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @return       Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
+ */
+#define kh_get(name, h, k) kh_get_##name(h, k)
+
+/*! @function
+  @abstract     Remove a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Iterator to the element to be deleted [khint_t]
+ */
+#define kh_del(name, h, k) kh_del_##name(h, k)
+
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Alias of kh_val()
+ */
+#define kh_value(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Get the start iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->size)
+
+/*! @function
+  @abstract     Get the number of buckets in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of buckets in the hash table [khint_t]
+ */
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+/* More conenient interfaces */
+
+/*! @function
+  @abstract     Instantiate a hash set containing integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT(name)										\
+	KHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT(name, khval_t)								\
+	KHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT64(name)										\
+	KHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT64(name, khval_t)								\
+	KHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+
+typedef const char *kh_cstr_t;
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_STR(name)										\
+	KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_STR(name, khval_t)								\
+	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
+#endif /* __AC_KHASH_H */
diff --git a/tabix/knetfile.c b/tabix/knetfile.c
new file mode 100644
index 0000000..7c96a3e
--- /dev/null
+++ b/tabix/knetfile.c
@@ -0,0 +1,632 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+/* Probably I will not do socket programming in the next few years and
+   therefore I decide to heavily annotate this file, for Linux and
+   Windows as well.  -lh3 */
+
+#include <time.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#ifdef _WIN32
+#include <winsock.h>
+#else
+#include <netdb.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#endif
+
+#include "knetfile.h"
+
+/* In winsock.h, the type of a socket is SOCKET, which is: "typedef
+ * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
+ * integer -1. In knetfile.c, I use "int" for socket type
+ * throughout. This should be improved to avoid confusion.
+ *
+ * In Linux/Mac, recv() and read() do almost the same thing. You can see
+ * in the header file that netread() is simply an alias of read(). In
+ * Windows, however, they are different and using recv() is mandatory.
+ */
+
+/* This function tests if the file handler is ready for reading (or
+ * writing if is_read==0). */
+static int socket_wait(int fd, int is_read)
+{
+	fd_set fds, *fdr = 0, *fdw = 0;
+	struct timeval tv;
+	int ret;
+	tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
+	FD_ZERO(&fds);
+	FD_SET(fd, &fds);
+	if (is_read) fdr = &fds;
+	else fdw = &fds;
+	ret = select(fd+1, fdr, fdw, 0, &tv);
+#ifndef _WIN32
+	if (ret == -1) perror("select");
+#else
+	if (ret == 0)
+		fprintf(stderr, "select time-out\n");
+	else if (ret == SOCKET_ERROR)
+		fprintf(stderr, "select: %d\n", WSAGetLastError());
+#endif
+	return ret;
+}
+
+#ifndef _WIN32
+/* This function does not work with Windows due to the lack of
+ * getaddrinfo() in winsock. It is addapted from an example in "Beej's
+ * Guide to Network Programming" (http://beej.us/guide/bgnet/). */
+static int socket_connect(const char *host, const char *port)
+{
+#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
+
+	int on = 1, fd;
+	struct linger lng = { 0, 0 };
+	struct addrinfo hints, *res;
+	memset(&hints, 0, sizeof(struct addrinfo));
+	hints.ai_family = AF_UNSPEC;
+	hints.ai_socktype = SOCK_STREAM;
+	/* In Unix/Mac, getaddrinfo() is the most convenient way to get
+	 * server information. */
+	if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
+	if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
+	/* The following two setsockopt() are used by ftplib
+	 * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
+	 * necessary. */
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
+	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
+	if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
+	freeaddrinfo(res);
+	return fd;
+}
+#else
+/* MinGW's printf has problem with "%lld" */
+char *int64tostr(char *buf, int64_t x)
+{
+	int cnt;
+	int i = 0;
+	do {
+		buf[i++] = '0' + x % 10;
+		x /= 10;
+	} while (x);
+	buf[i] = 0;
+	for (cnt = i, i = 0; i < cnt/2; ++i) {
+		int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
+	}
+	return buf;
+}
+
+int64_t strtoint64(const char *buf)
+{
+	int64_t x;
+	for (x = 0; *buf != '\0'; ++buf)
+		x = x * 10 + ((int64_t) *buf - 48);
+	return x;
+}
+/* In windows, the first thing is to establish the TCP connection. */
+int knet_win32_init()
+{
+	WSADATA wsaData;
+	return WSAStartup(MAKEWORD(2, 2), &wsaData);
+}
+void knet_win32_destroy()
+{
+	WSACleanup();
+}
+/* A slightly modfied version of the following function also works on
+ * Mac (and presummably Linux). However, this function is not stable on
+ * my Mac. It sometimes works fine but sometimes does not. Therefore for
+ * non-Windows OS, I do not use this one. */
+static SOCKET socket_connect(const char *host, const char *port)
+{
+#define __err_connect(func)										\
+	do {														\
+		fprintf(stderr, "%s: %d\n", func, WSAGetLastError());	\
+		return -1;												\
+	} while (0)
+
+	int on = 1;
+	SOCKET fd;
+	struct linger lng = { 0, 0 };
+	struct sockaddr_in server;
+	struct hostent *hp = 0;
+	// open socket
+	if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
+	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
+	// get host info
+	if (isalpha(host[0])) hp = gethostbyname(host);
+	else {
+		struct in_addr addr;
+		addr.s_addr = inet_addr(host);
+		hp = gethostbyaddr((char*)&addr, 4, AF_INET);
+	}
+	if (hp == 0) __err_connect("gethost");
+	// connect
+	server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
+	server.sin_family= AF_INET;
+	server.sin_port = htons(atoi(port));
+	if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
+	// freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
+	return fd;
+}
+#endif
+
+static off_t my_netread(int fd, void *buf, off_t len)
+{
+	off_t rest = len, curr, l = 0;
+	/* recv() and read() may not read the required length of data with
+	 * one call. They have to be called repeatedly. */
+	while (rest) {
+		if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
+		curr = netread(fd, buf + l, rest);
+		/* According to the glibc manual, section 13.2, a zero returned
+		 * value indicates end-of-file (EOF), which should mean that
+		 * read() will not return zero if EOF has not been met but data
+		 * are not immediately available. */
+		if (curr == 0) break;
+		l += curr; rest -= curr;
+	}
+	return l;
+}
+
+/*************************
+ * FTP specific routines *
+ *************************/
+
+static int kftp_get_response(knetFile *ftp)
+{
+#ifndef _WIN32
+	unsigned char c;
+#else
+	char c;
+#endif
+	int n = 0;
+	char *p;
+	if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
+	while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
+		//fputc(c, stderr);
+		if (n >= ftp->max_response) {
+			ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
+			ftp->response = realloc(ftp->response, ftp->max_response);
+		}
+		ftp->response[n++] = c;
+		if (c == '\n') {
+			if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
+				&& ftp->response[3] != '-') break;
+			n = 0;
+			continue;
+		}
+	}
+	if (n < 2) return -1;
+	ftp->response[n-2] = 0;
+	return strtol(ftp->response, &p, 0);
+}
+
+static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
+{
+	if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
+	netwrite(ftp->ctrl_fd, cmd, strlen(cmd));
+	return is_get? kftp_get_response(ftp) : 0;
+}
+
+static int kftp_pasv_prep(knetFile *ftp)
+{
+	char *p;
+	int v[6];
+	kftp_send_cmd(ftp, "PASV\r\n", 1);
+	for (p = ftp->response; *p && *p != '('; ++p);
+	if (*p != '(') return -1;
+	++p;
+	sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
+	memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
+	ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
+	return 0;
+}
+
+
+static int kftp_pasv_connect(knetFile *ftp)
+{
+	char host[80], port[10];
+	if (ftp->pasv_port == 0) {
+		fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
+		return -1;
+	}
+	sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
+	sprintf(port, "%d", ftp->pasv_port);
+	ftp->fd = socket_connect(host, port);
+	if (ftp->fd == -1) return -1;
+	return 0;
+}
+
+int kftp_connect(knetFile *ftp)
+{
+	ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
+	if (ftp->ctrl_fd == -1) return -1;
+	kftp_get_response(ftp);
+	kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
+	kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
+	kftp_send_cmd(ftp, "TYPE I\r\n", 1);
+	return 0;
+}
+
+int kftp_reconnect(knetFile *ftp)
+{
+	if (ftp->ctrl_fd != -1) {
+		netclose(ftp->ctrl_fd);
+		ftp->ctrl_fd = -1;
+	}
+	netclose(ftp->fd);
+	ftp->fd = -1;
+	return kftp_connect(ftp);
+}
+
+// initialize ->type, ->host, ->retr and ->size
+knetFile *kftp_parse_url(const char *fn, const char *mode)
+{
+	knetFile *fp;
+	char *p;
+	int l;
+	if (strstr(fn, "ftp://") != fn) return 0;
+	for (p = (char*)fn + 6; *p && *p != '/'; ++p);
+	if (*p != '/') return 0;
+	l = p - fn - 6;
+	fp = calloc(1, sizeof(knetFile));
+	fp->type = KNF_TYPE_FTP;
+	fp->fd = -1;
+	/* the Linux/Mac version of socket_connect() also recognizes a port
+	 * like "ftp", but the Windows version does not. */
+	fp->port = strdup("21");
+	fp->host = calloc(l + 1, 1);
+	if (strchr(mode, 'c')) fp->no_reconnect = 1;
+	strncpy(fp->host, fn + 6, l);
+	fp->retr = calloc(strlen(p) + 8, 1);
+	sprintf(fp->retr, "RETR %s\r\n", p);
+    fp->size_cmd = calloc(strlen(p) + 8, 1);
+    sprintf(fp->size_cmd, "SIZE %s\r\n", p);
+	fp->seek_offset = 0;
+	return fp;
+}
+// place ->fd at offset off
+int kftp_connect_file(knetFile *fp)
+{
+	int ret;
+	long long file_size;
+	if (fp->fd != -1) {
+		netclose(fp->fd);
+		if (fp->no_reconnect) kftp_get_response(fp);
+	}
+	kftp_pasv_prep(fp);
+    kftp_send_cmd(fp, fp->size_cmd, 1);
+#ifndef _WIN32
+    if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
+    {
+        fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
+        return -1;
+    }
+#else
+	const char *p = fp->response;
+	while (*p != ' ') ++p;
+	while (*p < '0' || *p > '9') ++p;
+	file_size = strtoint64(p);
+#endif
+	fp->file_size = file_size;
+	if (fp->offset>=0) {
+		char tmp[32];
+#ifndef _WIN32
+		sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
+#else
+		strcpy(tmp, "REST ");
+		int64tostr(tmp + 5, fp->offset);
+		strcat(tmp, "\r\n");
+#endif
+		kftp_send_cmd(fp, tmp, 1);
+	}
+	kftp_send_cmd(fp, fp->retr, 0);
+	kftp_pasv_connect(fp);
+	ret = kftp_get_response(fp);
+	if (ret != 150) {
+		fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
+		netclose(fp->fd);
+		fp->fd = -1;
+		return -1;
+	}
+	fp->is_ready = 1;
+	return 0;
+}
+
+
+/**************************
+ * HTTP specific routines *
+ **************************/
+
+knetFile *khttp_parse_url(const char *fn, const char *mode)
+{
+	knetFile *fp;
+	char *p, *proxy, *q;
+	int l;
+	if (strstr(fn, "http://") != fn) return 0;
+	// set ->http_host
+	for (p = (char*)fn + 7; *p && *p != '/'; ++p);
+	l = p - fn - 7;
+	fp = calloc(1, sizeof(knetFile));
+	fp->http_host = calloc(l + 1, 1);
+	strncpy(fp->http_host, fn + 7, l);
+	fp->http_host[l] = 0;
+	for (q = fp->http_host; *q && *q != ':'; ++q);
+	if (*q == ':') *q++ = 0;
+	// get http_proxy
+	proxy = getenv("http_proxy");
+	// set ->host, ->port and ->path
+	if (proxy == 0) {
+		fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
+		fp->port = strdup(*q? q : "80");
+		fp->path = strdup(*p? p : "/");
+	} else {
+		fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
+		for (q = fp->host; *q && *q != ':'; ++q);
+		if (*q == ':') *q++ = 0; 
+		fp->port = strdup(*q? q : "80");
+		fp->path = strdup(fn);
+	}
+	fp->type = KNF_TYPE_HTTP;
+	fp->ctrl_fd = fp->fd = -1;
+	fp->seek_offset = 0;
+	return fp;
+}
+
+int khttp_connect_file(knetFile *fp)
+{
+	int ret, l = 0;
+	char *buf, *p;
+	if (fp->fd != -1) netclose(fp->fd);
+	fp->fd = socket_connect(fp->host, fp->port);
+	buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
+	l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
+    l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
+	l += sprintf(buf + l, "\r\n");
+	netwrite(fp->fd, buf, l);
+	l = 0;
+	while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
+		if (buf[l] == '\n' && l >= 3)
+			if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
+		++l;
+	}
+	buf[l] = 0;
+	if (l < 14) { // prematured header
+		netclose(fp->fd);
+		fp->fd = -1;
+		return -1;
+	}
+	ret = strtol(buf + 8, &p, 0); // HTTP return code
+	if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
+		off_t rest = fp->offset;
+		while (rest) {
+			off_t l = rest < 0x10000? rest : 0x10000;
+			rest -= my_netread(fp->fd, buf, l);
+		}
+	} else if (ret != 206 && ret != 200) {
+		free(buf);
+		fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
+		netclose(fp->fd);
+		fp->fd = -1;
+		return -1;
+	}
+	free(buf);
+	fp->is_ready = 1;
+	return 0;
+}
+
+/********************
+ * Generic routines *
+ ********************/
+
+knetFile *knet_open(const char *fn, const char *mode)
+{
+	knetFile *fp = 0;
+	if (mode[0] != 'r') {
+		fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
+		return 0;
+	}
+	if (strstr(fn, "ftp://") == fn) {
+		fp = kftp_parse_url(fn, mode);
+		if (fp == 0) return 0;
+		if (kftp_connect(fp) == -1) {
+			knet_close(fp);
+			return 0;
+		}
+		kftp_connect_file(fp);
+	} else if (strstr(fn, "http://") == fn) {
+		fp = khttp_parse_url(fn, mode);
+		if (fp == 0) return 0;
+		khttp_connect_file(fp);
+	} else { // local file
+#ifdef _WIN32
+		/* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
+		 * be undefined on some systems, although it is defined on my
+		 * Mac and the Linux I have tested on. */
+		int fd = open(fn, O_RDONLY | O_BINARY);
+#else		
+		int fd = open(fn, O_RDONLY);
+#endif
+		if (fd == -1) {
+			perror("open");
+			return 0;
+		}
+		fp = (knetFile*)calloc(1, sizeof(knetFile));
+		fp->type = KNF_TYPE_LOCAL;
+		fp->fd = fd;
+		fp->ctrl_fd = -1;
+	}
+	if (fp && fp->fd == -1) {
+		knet_close(fp);
+		return 0;
+	}
+	return fp;
+}
+
+knetFile *knet_dopen(int fd, const char *mode)
+{
+	knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
+	fp->type = KNF_TYPE_LOCAL;
+	fp->fd = fd;
+	return fp;
+}
+
+off_t knet_read(knetFile *fp, void *buf, off_t len)
+{
+	off_t l = 0;
+	if (fp->fd == -1) return 0;
+	if (fp->type == KNF_TYPE_FTP) {
+		if (fp->is_ready == 0) {
+			if (!fp->no_reconnect) kftp_reconnect(fp);
+			kftp_connect_file(fp);
+		}
+	} else if (fp->type == KNF_TYPE_HTTP) {
+		if (fp->is_ready == 0)
+			khttp_connect_file(fp);
+	}
+	if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
+		off_t rest = len, curr;
+		while (rest) {
+			curr = read(fp->fd, buf + l, rest);
+			if (curr == 0) break;
+			l += curr; rest -= curr;
+		}
+	} else l = my_netread(fp->fd, buf, len);
+	fp->offset += l;
+	return l;
+}
+
+off_t knet_seek(knetFile *fp, int64_t off, int whence)
+{
+	if (whence == SEEK_SET && off == fp->offset) return 0;
+	if (fp->type == KNF_TYPE_LOCAL) {
+		/* Be aware that lseek() returns the offset after seeking,
+		 * while fseek() returns zero on success. */
+		off_t offset = lseek(fp->fd, off, whence);
+		if (offset == -1) {
+            // Be silent, it is OK for knet_seek to fail when the file is streamed
+            // fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
+			return -1;
+		}
+		fp->offset = offset;
+		return 0;
+	}
+    else if (fp->type == KNF_TYPE_FTP) 
+    {
+        if (whence==SEEK_CUR)
+            fp->offset += off;
+        else if (whence==SEEK_SET)
+            fp->offset = off;
+        else if ( whence==SEEK_END)
+            fp->offset = fp->file_size+off;
+		fp->is_ready = 0;
+		return 0;
+	} 
+    else if (fp->type == KNF_TYPE_HTTP) 
+    {
+		if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
+			fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
+			errno = ESPIPE;
+			return -1;
+		}
+        if (whence==SEEK_CUR)
+            fp->offset += off;
+        else if (whence==SEEK_SET)
+            fp->offset = off;
+		fp->is_ready = 0;
+		return fp->offset;
+	}
+	errno = EINVAL;
+    fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
+	return -1;
+}
+
+int knet_close(knetFile *fp)
+{
+	if (fp == 0) return 0;
+	if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
+	if (fp->fd != -1) {
+		/* On Linux/Mac, netclose() is an alias of close(), but on
+		 * Windows, it is an alias of closesocket(). */
+		if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
+		else netclose(fp->fd);
+	}
+	free(fp->host); free(fp->port);
+	free(fp->response); free(fp->retr); free(fp->size_cmd); // FTP specific
+	free(fp->path); free(fp->http_host); // HTTP specific
+	free(fp);
+	return 0;
+}
+
+#ifdef KNETFILE_MAIN
+int main(void)
+{
+	char *buf;
+	knetFile *fp;
+	int type = 4, l;
+#ifdef _WIN32
+	knet_win32_init();
+#endif
+	buf = calloc(0x100000, 1);
+	if (type == 0) {
+		fp = knet_open("knetfile.c", "r");
+		knet_seek(fp, 1000, SEEK_SET);
+	} else if (type == 1) { // NCBI FTP, large file
+		fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
+		knet_seek(fp, 2500000000ll, SEEK_SET);
+		l = knet_read(fp, buf, 255);
+	} else if (type == 2) {
+		fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
+		knet_seek(fp, 1000, SEEK_SET);
+	} else if (type == 3) {
+		fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
+		knet_seek(fp, 1000, SEEK_SET);
+	} else if (type == 4) {
+		fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
+		knet_read(fp, buf, 10000);
+		knet_seek(fp, 20000, SEEK_SET);
+		knet_seek(fp, 10000, SEEK_SET);
+		l = knet_read(fp, buf+10000, 10000000) + 10000;
+	}
+	if (type != 4 && type != 1) {
+		knet_read(fp, buf, 255);
+		buf[255] = 0;
+		printf("%s\n", buf);
+	} else write(fileno(stdout), buf, l);
+	knet_close(fp);
+	free(buf);
+	return 0;
+}
+#endif
diff --git a/tabix/knetfile.h b/tabix/knetfile.h
new file mode 100644
index 0000000..0a0e66f
--- /dev/null
+++ b/tabix/knetfile.h
@@ -0,0 +1,75 @@
+#ifndef KNETFILE_H
+#define KNETFILE_H
+
+#include <stdint.h>
+#include <fcntl.h>
+
+#ifndef _WIN32
+#define netread(fd, ptr, len) read(fd, ptr, len)
+#define netwrite(fd, ptr, len) write(fd, ptr, len)
+#define netclose(fd) close(fd)
+#else
+#include <winsock2.h>
+#define netread(fd, ptr, len) recv(fd, ptr, len, 0)
+#define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
+#define netclose(fd) closesocket(fd)
+#endif
+
+// FIXME: currently I/O is unbuffered
+
+#define KNF_TYPE_LOCAL 1
+#define KNF_TYPE_FTP   2
+#define KNF_TYPE_HTTP  3
+
+typedef struct knetFile_s {
+	int type, fd;
+	int64_t offset;
+	char *host, *port;
+
+	// the following are for FTP only
+	int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
+	char *response, *retr, *size_cmd;
+	int64_t seek_offset; // for lazy seek
+    int64_t file_size;
+
+	// the following are for HTTP only
+	char *path, *http_host;
+} knetFile;
+
+#define knet_tell(fp) ((fp)->offset)
+#define knet_fileno(fp) ((fp)->fd)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _WIN32
+	int knet_win32_init();
+	void knet_win32_destroy();
+#endif
+
+	knetFile *knet_open(const char *fn, const char *mode);
+
+	/* 
+	   This only works with local files.
+	 */
+	knetFile *knet_dopen(int fd, const char *mode);
+
+	/*
+	  If ->is_ready==0, this routine updates ->fd; otherwise, it simply
+	  reads from ->fd.
+	 */
+	off_t knet_read(knetFile *fp, void *buf, off_t len);
+
+	/*
+	  This routine only sets ->offset and ->is_ready=0. It does not
+	  communicate with the FTP server.
+	 */
+	off_t knet_seek(knetFile *fp, int64_t off, int whence);
+	int knet_close(knetFile *fp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tabix/ksort.h b/tabix/ksort.h
new file mode 100644
index 0000000..16a03fd
--- /dev/null
+++ b/tabix/ksort.h
@@ -0,0 +1,271 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+/*
+  2008-11-16 (0.1.4):
+
+    * Fixed a bug in introsort() that happens in rare cases.
+
+  2008-11-05 (0.1.3):
+
+    * Fixed a bug in introsort() for complex comparisons.
+
+	* Fixed a bug in mergesort(). The previous version is not stable.
+
+  2008-09-15 (0.1.2):
+
+	* Accelerated introsort. On my Mac (not on another Linux machine),
+	  my implementation is as fast as std::sort on random input.
+
+	* Added combsort and in introsort, switch to combsort if the
+	  recursion is too deep.
+
+  2008-09-13 (0.1.1):
+
+	* Added k-small algorithm
+
+  2008-09-05 (0.1.0):
+
+	* Initial version
+
+*/
+
+#ifndef AC_KSORT_H
+#define AC_KSORT_H
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+	void *left, *right;
+	int depth;
+} ks_isort_stack_t;
+
+#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
+
+#define KSORT_INIT(name, type_t, __sort_lt)								\
+	void ks_mergesort_##name(size_t n, type_t array[], type_t temp[])	\
+	{																	\
+		type_t *a2[2], *a, *b;											\
+		int curr, shift;												\
+																		\
+		a2[0] = array;													\
+		a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);		\
+		for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {			\
+			a = a2[curr]; b = a2[1-curr];								\
+			if (shift == 0) {											\
+				type_t *p = b, *i, *eb = a + n;							\
+				for (i = a; i < eb; i += 2) {							\
+					if (i == eb - 1) *p++ = *i;							\
+					else {												\
+						if (__sort_lt(*(i+1), *i)) {					\
+							*p++ = *(i+1); *p++ = *i;					\
+						} else {										\
+							*p++ = *i; *p++ = *(i+1);					\
+						}												\
+					}													\
+				}														\
+			} else {													\
+				size_t i, step = 1ul<<shift;							\
+				for (i = 0; i < n; i += step<<1) {						\
+					type_t *p, *j, *k, *ea, *eb;						\
+					if (n < i + step) {									\
+						ea = a + n; eb = a;								\
+					} else {											\
+						ea = a + i + step;								\
+						eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
+					}													\
+					j = a + i; k = a + i + step; p = b + i;				\
+					while (j < ea && k < eb) {							\
+						if (__sort_lt(*k, *j)) *p++ = *k++;				\
+						else *p++ = *j++;								\
+					}													\
+					while (j < ea) *p++ = *j++;							\
+					while (k < eb) *p++ = *k++;							\
+				}														\
+			}															\
+			curr = 1 - curr;											\
+		}																\
+		if (curr == 1) {												\
+			type_t *p = a2[0], *i = a2[1], *eb = array + n;				\
+			for (; p < eb; ++i) *p++ = *i;								\
+		}																\
+		if (temp == 0) free(a2[1]);										\
+	}																	\
+	void ks_heapadjust_##name(size_t i, size_t n, type_t l[])			\
+	{																	\
+		size_t k = i;													\
+		type_t tmp = l[i];												\
+		while ((k = (k << 1) + 1) < n) {								\
+			if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k;				\
+			if (__sort_lt(l[k], tmp)) break;							\
+			l[i] = l[k]; i = k;											\
+		}																\
+		l[i] = tmp;														\
+	}																	\
+	void ks_heapmake_##name(size_t lsize, type_t l[])					\
+	{																	\
+		size_t i;														\
+		for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i)				\
+			ks_heapadjust_##name(i, lsize, l);							\
+	}																	\
+	void ks_heapsort_##name(size_t lsize, type_t l[])					\
+	{																	\
+		size_t i;														\
+		for (i = lsize - 1; i > 0; --i) {								\
+			type_t tmp;													\
+			tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
+		}																\
+	}																	\
+	inline void __ks_insertsort_##name(type_t *s, type_t *t)			\
+	{																	\
+		type_t *i, *j, swap_tmp;										\
+		for (i = s + 1; i < t; ++i)										\
+			for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) {			\
+				swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp;			\
+			}															\
+	}																	\
+	void ks_combsort_##name(size_t n, type_t a[])						\
+	{																	\
+		const double shrink_factor = 1.2473309501039786540366528676643; \
+		int do_swap;													\
+		size_t gap = n;													\
+		type_t tmp, *i, *j;												\
+		do {															\
+			if (gap > 2) {												\
+				gap = (size_t)(gap / shrink_factor);					\
+				if (gap == 9 || gap == 10) gap = 11;					\
+			}															\
+			do_swap = 0;												\
+			for (i = a; i < a + n - gap; ++i) {							\
+				j = i + gap;											\
+				if (__sort_lt(*j, *i)) {								\
+					tmp = *i; *i = *j; *j = tmp;						\
+					do_swap = 1;										\
+				}														\
+			}															\
+		} while (do_swap || gap > 2);									\
+		if (gap != 1) __ks_insertsort_##name(a, a + n);					\
+	}																	\
+	void ks_introsort_##name(size_t n, type_t a[])						\
+	{																	\
+		int d;															\
+		ks_isort_stack_t *top, *stack;									\
+		type_t rp, swap_tmp;											\
+		type_t *s, *t, *i, *j, *k;										\
+																		\
+		if (n < 1) return;												\
+		else if (n == 2) {												\
+			if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
+			return;														\
+		}																\
+		for (d = 2; 1ul<<d < n; ++d);									\
+		stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
+		top = stack; s = a; t = a + (n-1); d <<= 1;						\
+		while (1) {														\
+			if (s < t) {												\
+				if (--d == 0) {											\
+					ks_combsort_##name(t - s + 1, s);					\
+					t = s;												\
+					continue;											\
+				}														\
+				i = s; j = t; k = i + ((j-i)>>1) + 1;					\
+				if (__sort_lt(*k, *i)) {								\
+					if (__sort_lt(*k, *j)) k = j;						\
+				} else k = __sort_lt(*j, *i)? i : j;					\
+				rp = *k;												\
+				if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }	\
+				for (;;) {												\
+					do ++i; while (__sort_lt(*i, rp));					\
+					do --j; while (i <= j && __sort_lt(rp, *j));		\
+					if (j <= i) break;									\
+					swap_tmp = *i; *i = *j; *j = swap_tmp;				\
+				}														\
+				swap_tmp = *i; *i = *t; *t = swap_tmp;					\
+				if (i-s > t-i) {										\
+					if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
+					s = t-i > 16? i+1 : t;								\
+				} else {												\
+					if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
+					t = i-s > 16? i-1 : s;								\
+				}														\
+			} else {													\
+				if (top == stack) {										\
+					free(stack);										\
+					__ks_insertsort_##name(a, a+n);						\
+					return;												\
+				} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
+			}															\
+		}																\
+	}																	\
+	/* This function is adapted from: http://ndevilla.free.fr/median/ */ \
+	/* 0 <= kk < n */													\
+	type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)			\
+	{																	\
+		type_t *low, *high, *k, *ll, *hh, *mid;							\
+		low = arr; high = arr + n - 1; k = arr + kk;					\
+		for (;;) {														\
+			if (high <= low) return *k;									\
+			if (high == low + 1) {										\
+				if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
+				return *k;												\
+			}															\
+			mid = low + (high - low) / 2;								\
+			if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
+			if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
+			if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);	\
+			KSORT_SWAP(type_t, *mid, *(low+1));							\
+			ll = low + 1; hh = high;									\
+			for (;;) {													\
+				do ++ll; while (__sort_lt(*ll, *low));					\
+				do --hh; while (__sort_lt(*low, *hh));					\
+				if (hh < ll) break;										\
+				KSORT_SWAP(type_t, *ll, *hh);							\
+			}															\
+			KSORT_SWAP(type_t, *low, *hh);								\
+			if (hh <= k) low = ll;										\
+			if (hh >= k) high = hh - 1;									\
+		}																\
+	}
+
+#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
+#define ks_introsort(name, n, a) ks_introsort_##name(n, a)
+#define ks_combsort(name, n, a) ks_combsort_##name(n, a)
+#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
+#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
+#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
+#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
+
+#define ks_lt_generic(a, b) ((a) < (b))
+#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
+
+typedef const char *ksstr_t;
+
+#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
+#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
+
+#endif
diff --git a/tabix/kstring.c b/tabix/kstring.c
new file mode 100644
index 0000000..e0203fa
--- /dev/null
+++ b/tabix/kstring.c
@@ -0,0 +1,165 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdint.h>
+#include "kstring.h"
+
+int ksprintf(kstring_t *s, const char *fmt, ...)
+{
+	va_list ap;
+	int l;
+	va_start(ap, fmt);
+	l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'.
+	va_end(ap);
+	if (l + 1 > s->m - s->l) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+		va_start(ap, fmt);
+		l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
+	}
+	va_end(ap);
+	s->l += l;
+	return l;
+}
+
+// s MUST BE a null terminated string; l = strlen(s)
+int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
+{
+	int i, n, max, last_char, last_start, *offsets, l;
+	n = 0; max = *_max; offsets = *_offsets;
+	l = strlen(s);
+	
+#define __ksplit_aux do {												\
+		if (_offsets) {													\
+			s[i] = 0;													\
+			if (n == max) {												\
+				max = max? max<<1 : 2;									\
+				offsets = (int*)realloc(offsets, sizeof(int) * max);	\
+			}															\
+			offsets[n++] = last_start;									\
+		} else ++n;														\
+	} while (0)
+
+	for (i = 0, last_char = last_start = 0; i <= l; ++i) {
+		if (delimiter == 0) {
+			if (isspace(s[i]) || s[i] == 0) {
+				if (isgraph(last_char)) __ksplit_aux; // the end of a field
+			} else {
+				if (isspace(last_char) || last_char == 0) last_start = i;
+			}
+		} else {
+			if (s[i] == delimiter || s[i] == 0) {
+				if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
+			} else {
+				if (last_char == delimiter || last_char == 0) last_start = i;
+			}
+		}
+		last_char = s[i];
+	}
+	*_max = max; *_offsets = offsets;
+	return n;
+}
+
+/**********************
+ * Boyer-Moore search *
+ **********************/
+
+// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
+int *ksBM_prep(const uint8_t *pat, int m)
+{
+	int i, *suff, *prep, *bmGs, *bmBc;
+	prep = calloc(m + 256, 1);
+	bmGs = prep; bmBc = prep + m;
+	{ // preBmBc()
+		for (i = 0; i < 256; ++i) bmBc[i] = m;
+		for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
+	}
+	suff = calloc(m, sizeof(int));
+	{ // suffixes()
+		int f = 0, g;
+		suff[m - 1] = m;
+		g = m - 1;
+		for (i = m - 2; i >= 0; --i) {
+			if (i > g && suff[i + m - 1 - f] < i - g)
+				suff[i] = suff[i + m - 1 - f];
+			else {
+				if (i < g) g = i;
+				f = i;
+				while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
+				suff[i] = f - g;
+			}
+		}
+	}
+	{ // preBmGs()
+		int j = 0;
+		for (i = 0; i < m; ++i) bmGs[i] = m;
+		for (i = m - 1; i >= 0; --i)
+			if (suff[i] == i + 1)
+				for (; j < m - 1 - i; ++j)
+					if (bmGs[j] == m)
+						bmGs[j] = m - 1 - i;
+		for (i = 0; i <= m - 2; ++i)
+			bmGs[m - 1 - suff[i]] = m - 1 - i;
+	}
+	free(suff);
+	return prep;
+}
+
+int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches)
+{
+	int i, j, *prep, *bmGs, *bmBc;
+	int *matches = 0, mm = 0, nm = 0;
+	prep = _prep? _prep : ksBM_prep(pat, m);
+	bmGs = prep; bmBc = prep + m;
+	j = 0;
+	while (j <= n - m) {
+		for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
+		if (i < 0) {
+			if (nm == mm) {
+				mm = mm? mm<<1 : 1;
+				matches = realloc(matches, mm * sizeof(int));
+			}
+			matches[nm++] = j;
+			j += bmGs[0];
+		} else {
+			int max = bmBc[str[i+j]] - m + 1 + i;
+			if (max < bmGs[i]) max = bmGs[i];
+			j += max;
+		}
+	}
+	*n_matches = nm;
+	if (_prep == 0) free(prep);
+	return matches;
+}
+
+#ifdef KSTRING_MAIN
+#include <stdio.h>
+int main()
+{
+	kstring_t *s;
+	int *fields, n, i;
+	s = (kstring_t*)calloc(1, sizeof(kstring_t));
+	// test ksprintf()
+	ksprintf(s, " abcdefg:    %d ", 100);
+	printf("'%s'\n", s->s);
+	// test ksplit()
+	fields = ksplit(s, 0, &n);
+	for (i = 0; i < n; ++i)
+		printf("field[%d] = '%s'\n", i, s->s + fields[i]);
+	free(s);
+
+	{
+		static char *str = "abcdefgcdg";
+		static char *pat = "cd";
+		int n, *matches;
+		matches = ksBM_search(str, strlen(str), pat, strlen(pat), 0, &n);
+		printf("%d: \n", n);
+		for (i = 0; i < n; ++i)
+			printf("- %d\n", matches[i]);
+		free(matches);
+	}
+	return 0;
+}
+#endif
diff --git a/tabix/kstring.h b/tabix/kstring.h
new file mode 100644
index 0000000..f4e5a99
--- /dev/null
+++ b/tabix/kstring.h
@@ -0,0 +1,68 @@
+#ifndef KSTRING_H
+#define KSTRING_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+int ksprintf(kstring_t *s, const char *fmt, ...);
+int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
+
+// calculate the auxiliary array, allocated by calloc()
+int *ksBM_prep(const uint8_t *pat, int m);
+
+/* Search pat in str and returned the list of matches. The size of the
+ * list is returned as n_matches. _prep is the array returned by
+ * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */
+int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches);
+
+static inline int kputsn(const char *p, int l, kstring_t *s)
+{
+	if (s->l + l + 1 >= s->m) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	strncpy(s->s + s->l, p, l);
+	s->l += l;
+	s->s[s->l] = 0;
+	return l;
+}
+
+static inline int kputs(const char *p, kstring_t *s)
+{
+	return kputsn(p, strlen(p), s);
+}
+
+static inline int kputc(int c, kstring_t *s)
+{
+	if (s->l + 1 >= s->m) {
+		s->m = s->l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	s->s[s->l++] = c;
+	s->s[s->l] = 0;
+	return c;
+}
+
+static inline int *ksplit(kstring_t *s, int delimiter, int *n)
+{
+	int max = 0, *offsets = 0;
+	*n = ksplit_core(s->s, delimiter, &max, &offsets);
+	return offsets;
+}
+
+#endif
diff --git a/tabix/tabix.h b/tabix/tabix.h
new file mode 100644
index 0000000..4390c09
--- /dev/null
+++ b/tabix/tabix.h
@@ -0,0 +1,137 @@
+/* The MIT License
+
+   Copyright (c) 2009 Genome Research Ltd (GRL), 2010 Broad Institute
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@live.co.uk> */
+
+#ifndef __TABIDX_H
+#define __TABIDX_H
+
+#include <stdint.h>
+#include "kstring.h"
+#include "bgzf.h"
+
+#define TI_PRESET_GENERIC 0
+#define TI_PRESET_SAM     1
+#define TI_PRESET_VCF     2
+
+#define TI_FLAG_UCSC      0x10000
+
+typedef int (*ti_fetch_f)(int l, const char *s, void *data);
+
+struct __ti_index_t;
+typedef struct __ti_index_t ti_index_t;
+
+struct __ti_iter_t;
+typedef struct __ti_iter_t *ti_iter_t;
+
+typedef struct {
+	BGZF *fp;
+	ti_index_t *idx;
+	char *fn, *fnidx;
+} tabix_t;
+
+typedef struct {
+	int32_t preset;
+	int32_t sc, bc, ec; // seq col., beg col. and end col.
+	int32_t meta_char, line_skip;
+} ti_conf_t;
+
+extern ti_conf_t ti_conf_gff, ti_conf_bed, ti_conf_psltbl, ti_conf_vcf, ti_conf_sam; // preset
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	/*******************
+	 * High-level APIs *
+	 *******************/
+
+	tabix_t *ti_open(const char *fn, const char *fnidx);
+	int ti_lazy_index_load(tabix_t *t);
+	void ti_close(tabix_t *t);
+	ti_iter_t ti_query(tabix_t *t, const char *name, int beg, int end);
+	ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end);
+	ti_iter_t ti_querys(tabix_t *t, const char *reg);
+	const char *ti_read(tabix_t *t, ti_iter_t iter, int *len);
+
+	/* Destroy the iterator */
+	void ti_iter_destroy(ti_iter_t iter);
+
+	/* Get the list of sequence names. Each "char*" pointer points to a
+	 * internal member of the index, so DO NOT modify the returned
+	 * pointer; otherwise the index will be corrupted. The returned
+	 * pointer should be freed by a single free() call by the routine
+	 * calling this function. The number of sequences is returned at *n. */
+	const char **ti_seqname(const ti_index_t *idx, int *n);
+
+	/******************
+	 * Low-level APIs *
+	 ******************/
+
+	/* Build the index for file <fn>. File <fn>.tbi will be generated
+	 * and overwrite the file of the same name. Return -1 on failure. */
+	int ti_index_build(const char *fn, const ti_conf_t *conf);
+
+	/* Load the index from file <fn>.tbi. If <fn> is a URL and the index
+	 * file is not in the working directory, <fn>.tbi will be
+	 * downloaded. Return NULL on failure. */
+	ti_index_t *ti_index_load(const char *fn);
+
+	ti_index_t *ti_index_load_local(const char *fnidx);
+
+	/* Destroy the index */
+	void ti_index_destroy(ti_index_t *idx);
+
+	/* Parse a region like: chr2, chr2:100, chr2:100-200. Return -1 on failure. */
+	int ti_parse_region(const ti_index_t *idx, const char *str, int *tid, int *begin, int *end);
+
+	int ti_get_tid(const ti_index_t *idx, const char *name);
+
+	/* Get the iterator pointing to the first record at the current file
+	 * position. If the file is just openned, the iterator points to the
+	 * first record in the file. */
+	ti_iter_t ti_iter_first(void);
+
+	/* Get the iterator pointing to the first record in region tid:beg-end */
+	ti_iter_t ti_iter_query(const ti_index_t *idx, int tid, int beg, int end);
+
+	/* Get the data line pointed by the iterator and iterate to the next record. */
+	const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len);
+
+	/*******************
+	 * Deprecated APIs *
+	 *******************/
+
+	/* The callback version for random access */
+	int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *data, ti_fetch_f func);
+
+	/* Read one line. */
+	int ti_readline(BGZF *fp, kstring_t *str);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tests/ex3.sam b/tests/ex3.sam
index bae2a22..495d4fe 100644
--- a/tests/ex3.sam
+++ b/tests/ex3.sam
@@ -10,4 +10,4 @@
 read_28833_29006_6945	99	chr1	33	20	10M1D25M	=	200	167	AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG	<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<	NM:i:1	RG:Z:L1	PG:Z:P1	XT:A:U
 read_28701_28881_323b	147	chr2	88	30	35M	=	500	412	ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA	<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<	MF:i:18	RG:Z:L2	PG:Z:P2	XT:A:R
 read_28701_28881_323c	147	chr2	88	30	35M	=	500	412	ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA	<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<
-
+test_clipped1	99	chr2	997	20	4S6M1D20M5S	=	200	167	AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG	<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<	NM:i:1	RG:Z:L1	PG:Z:P1	XT:A:U
diff --git a/tests/example.gtf.gz b/tests/example.gtf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..693db0c6372a12d4299eb20a49cfb54ebba94882
GIT binary patch
literal 3778
zcmV;z4n6T7iwFb&00000{{{d;LjnMM3+<g<Z`(K)hOfQ9BFOC`1rNVDbCsFtw7|AW
zkft-2L68`2fT>f&>0tNYFDc2fB#RU&N}fbM1uCNn!_<#oo)7Yny#4wJi??T=-hTY$
z!{YPR<pLB_3rY-GyjlF){{6CCEwAtIj*e}kcf|dnlo>^je?EG8yjiVpAMd|EZ5mt^
z4Y-huYTKar*{A<~UmhKQe|&ga-mk82AMWl~UpC)cKixe2SRWn#=j{B?v&|P*H{a|J
zTwVa2Fn$5-4K#n3r^8x{cDJ=}?RI`biTeeAE&pErb;N!?`g-&C&Fbdb{a22T&#$iD
zfBsm$Z~Mnc+w@<zb!N-|KCBR0BD8JaB|1)(Ky*QL?@#|YJv%M%<S}bCcooc^dC^o}
z<?z&sEx^#M;Yk<~6Z879vR&RT*X!lo_4oDi$K8Yb;lbQa2uUNiX|e5d)9$KjMm6WQ
z-NVX$=i=S@Pvuud3wnKcJM>I2hUoW#zR)`*oMyWtgq{eUE4iRTBJ_!&*Q3=6y5<-?
z#7C<LEj@&wn&|~KkQJul=uR`Kj985_YJ**bzBiLvK<9)4T8DU(T0+AtHpQee1`<<)
zJ&tbspL~3BTr7M$z^QwbF#tiq_KfmhfBRH1qud~~C}z%^a7yLEzH8@APCSELQ5Iyz
zv>-f_*`KGM-iwRTX_ehtPAr;}(0)$R&<swL37I>kH#MhtJ?(NTyas~WBdEELyxK%%
zfeQ>R!VwhKet6AQU-iYB3KSm(s@34df{Ko4cSW^+dicJHN~s|3K{3Kgh_DJ~n0h9p
zo}5ax;7sxi+od&dHQA6HCFy*&VLWMg#w47~1QO2OWxxXXU?Ygc`-pehQX$MX^Xj&x
zhw7gjS-lO}wsA5=PCicB4qk2uASW-U!XGy{VIJX6P!ZJrK+SdJHKIB#buK52F?wSR
z=M03j!G%y&0FA+gckDJn;n?+g&61M_g3cgDPzb8e8H~XRok5&4P&T=LMva+)f9y6u
zp)>HMnvyHBkdkT?RM2TQIiWA`4m<?4M^JMedG+XG3{DtbL`4@;fm%od8dJ@lvD*NJ
zW7q376@^?*hv5tmRGc#yffG7|sLctA7`afuam~O#cAe8KIawCyG{ZKo2@0KNA1H<+
zFYFA);DpW~rO-tIGh3f9%+}3SnGK%+oNkqgoP7C#;=tWQ=C}mH%}eJ*GdR_UUjx^`
zP-Et}O3zS$l0=}cyr8zdmbl!ZJva_~>^cXv)fEmvKPM@&IUPi(-vEU<Q9w{;%W2p$
z<=g|Rji5}fpympykvopS33JD&RVAgAUMNGe3K$Q;X^z3^5PU&{6Z!(bFKB>5Uoa`C
zxm&35K+V)dHKL0VIAL@V6<ugxu+VPfKYGoO*W7S27=?vxToV*JgK0s{bmY~Y!5Ey-
z8AR2rXjff;lI(W|O38(GdpOY<gl${_%CM|L&zXZ-*Uq}MnnGbkEPPu;26feMsDMiX
z_Nhotd_VU|jN&q>4Jm=M7<(Z#^Dv>j&1V!;b02;qqMG~o8xz$`2Vmnitx-r}@|bj9
zyNfnB#vD<eR1t;lA%>IzjU{$*-T_giiV6?X-V3R@MjYM44@I>fRC85TBaSu9;Gx75
z#<5M<u1Xz@A?4Zbov=CMT7+OL#>DP9<rSapPemX<{UE7g#JrInfp&G80)6`n8cwwr
zyom$PN#M@if;A;3bQdYI9?sfzx23EGDZ16*RZ&hN+;u%MFk+vA<>*rys?BR7*YNP8
z>9c*uLCqFySPxVwvi7oWL|xYGP-CA=N<U@_3?PJ=Xr^0H*SGu2v(pH!U)MJ$-|inz
z?59mnXcyZ}Pbn~WdfmiZO$3EK%tX*?@Y)bGhTI`)p?&|n$~N#AfOrFtin0yAG0fW+
zWptp^1S@VTjPfFu7K8*+ai%1&5M)q{=1yAte)<uZKJ~C2K5J3+n|Apxi9|{}g}Y(O
z0%sc`(^#fuj8ic+Jg~OpXWDA;f|(}q^yVIUv`~Rfvb8f;oTY@Cz3f~KMzXPFT~VKd
z$!q2k5NW;(5<s+lvMd82fy>+(_At}jkZDUcJ}QkS%K`?r;%iu7RT}xG@hy!3a8eU)
z=YDQ92C2rN!4#u`5qy@=&Swb|*g%O$6J=U5MVmUJO>L4z129fHu&oYtCP1Mf7z=GN
zB_$*wsMc(x^^H@U8ldi3tqXL^l#!xP#mkg};k$FGX-uCdIXjm}vaoKuZk2O&-R#-|
z(k-&X08&h_RY15Z3|TUeR)ZH0#0#UA<Ygk6l|~99!lhvak8*jnJ2yKF9;Fd^27&@9
zE?~DL6!K`F)7{2$h|afhK`hk(3&E5KEuRx6gJl|~jCC43>UFw(EY2_aJAmnGf2;lz
zf;=Eb!~}Va7LjdF@o(YPGf}r1On^EUZxUMHY%U6NAA&3{8`ti#F8dVBOB$52ZZ$xf
zQ3*PC8Q0SIU34lk#|jp`wh2God^tM))kaYEi?_FTYxic{KN%6l28Qq_z#~6_&Y7Qz
zw=oKk_q4oAdz;6*)!>Elj<+5>U~^HK`%-3ozS5aqUOc5Y7Deo%V0S5Vh1--zU?J{F
zRsNiCIo`%SE8Bw9C!<~jbro(j<@*WhlmVd^8f?Z9SRm1am{7VRC1+%ZcUEDv@t_~A
zFxn)PpIgc@l5!w5ELj5(Wde7DAnf6=i@3SMOc@Uy*6PRHYVfKsCn4^#2F2}m#z3fo
z-xwa;CSin7GM(Jn`5$jTc*yZQa+|LG<aok12}@5!3bakR5~!oB?Pm};qtj2h)!<c8
zP9p5Lqgnh=rF|+b&nG4z2vVM!GPtBpQwZ*v!jNEaNKjIy#IS72N-6%bDEDjGBpetM
zO<C&pQ9L#4T(Sl*L=%KP1?=)D9zrxo`vR0^aR$mq-Z{J%&TM?nfskWE+p0QPQH-<)
z)2>uag=FqU%Fu>abclHlaV*ZL=l$pzy<v|x1md<OWY4W3=U&c|;T-1l_8E#LZHzaj
zw$EG6n5|4|H?-S=Q*h4FvtWa!lm2ne3DZfjoZE*o*0Yt0_TVKDq}@lmRJA*{OF7O;
z2TqA&t{61L*?vm8z0cHJ?HQUHZ68M0#@ko#*^d_>g)bFxlcUnOE?YI9?Sijj?Ias{
ztHD&rM*y7!*=q_=qI;BRWfhpEjn;R1>nm&?;$B?IP-u6r4({%CtBJXrVJ<JM>KMNO
z=G_jHTXMhs6$w@Ml&h#S;1yvVRri6C+h_QyJPRJ-PYu6ve-(Dab@-4)5B64riLno_
zlWTPzkGtcB7OH^D@+GvC-6vHh%8L?S1+#=pNV>Sy3Sf8f`Oc+4e2pEwTX4b4coAIp
zj`?SNLrvWQ;w$bocHB%|cD_Rpky|sIRGd+aX!a1%H05dlOnsQ6dx>-}ZE$EG`@@>r
zU91iqpzzY|q>+NT7X_gu6Jv-BaWhPr-NW^=Dnomwmhz`JQf8K@Rac~~Y;&$sy^=E{
zH0Y1KoLdcE73U<z{yB<Ff~eF!rEV;O3pJ7R)SMYGH?hp9R)OOTIcOQ2%hz0sk#{x$
znH)4pBCVn>GO?sPw~#%PHbMn_oS$^7!3!eYU*}T%hIq<e>zm6E)!MJt$}Ui;Am1(+
zVusk?e_E{xWmyTN+q-od%C2}zX^{6syenl(>gKYTWGWC@AnEZ)qqUygT1!C$T90Wo
z-JBxhN^<316c1DgaH4cDF_F8d*Q?93)9Ya+FuIF!M~8#7tox5=&c@Pe@G7u)k<=YK
z>ZNBGiGXjQRMPM<`lKwPYGa9D@wxM!%*{`yyz$R6dT<PTv#ka%_04w6ewLqa!}|(i
zRqtLz;Mh>>eT4}Y=NZZM$Qvu5XQ?mXeg6+CNcltn0096WiwFb&00000{{{d;LjnLP
z1MS+;ZrU&u2H@-LDNt^wR`#)dY^zs@b*r?NN-OOyQ3O$o)CB}twY~l9kkAkaBu#>b
zWYjh)J$1s@(;wSP{Cm~&zYp(!4*jcMybPk%WI0>J<0$wQ_Wb@Wf@DNrQn&y@P04Hc
z7QXi2s`nDS1yL}54kJ@{KN{UU-hCtM2M7@Urs93jtZtOnNs8LP`ZRf2Qtzh!)F1Xe
zJDf)aG3PV~%9Qdhz0c;;{$M<t_WbMN!}>hHsnB}+s`nQpzZ?Hum<wGj=aX=?3a7ct
z$|_cI5a%_LCwCmSPk3<DTwQVOL(;XBxAfjeZEm>!BXNyfsq-^DO}B=B=8+|)Qab5V
zNOM4C4=A`%zSu9#G(&Yr8c_&YUwU}|bbVVQEt^4SLzhF#I?h~?O3sAI{rS6gMkYai
z#JNy#R?fAI%hR`^pzG9Qoth?sklbdkP((QAPMrlbch*!RoS}2&IayU_YMsi{Bh^;O
zh|}&uJMC(zky&Q~%B4q#>Cuj}>*r(rthi(@H%{QJsm*8e)y?iY%nbw%#2$p8&bdQp
zh7{durG00}oF7L&JWz^qk5Q!M;Y@9U(*wv1B;xL-W*p9<@nk-oMb8yBh`#pkC?$gu
z+wHOHYzw91UXX3lIC$>)gTa>v^W<bQUD+38zcp)Qp4$-GCAWKOK#!9%Fb`57Pg;9p
zexR|Pd28mmMNBG>OqDMfoqTKZ$;Mq~uH=Ujrcm%7n^y|#%yTyt7&U-RQ-P!T;GH@(
z-KKx<P*zK<QU^l$|9<KVT7dIy%Xibcw0Y-J0(d5})fRQNWQ&<l=v?~o1a-dqd{^;&
zVPAOCzerSW4ouvvpvzf-IYEM*j|_0+oi95WD5BjPV7qUCNiz%1Np4l)^5X=}TrhVl
zxlYZ^)h^@g?b4t#*Y2lncT_~WDPX%(!1X?}k^;3kXiaGOopK-v9<*gr>Xz=58xX0@
zXqTPcD7QI5<$Y%BEM+BUzT=#117*${Fu7Z!wsWE2tek5Zmn{Pu3%X7{)~V}6FhPLa
sIk)Wm50iKxG}SKv03VA81ONa4009360763o02=@U000000000001%faGynhq

literal 0
HcmV?d00001

diff --git a/tests/example.gtf.gz.tbi b/tests/example.gtf.gz.tbi
new file mode 100644
index 0000000000000000000000000000000000000000..eb3f24c6ba9cd8cf5c4551ced2edfd08518ce9e7
GIT binary patch
literal 260
zcmb2|=3rp}f&Xj_PR>jW&l$K2Kc%E3H88j-q%kIiDTK9k6*3kUxlL5;Fi3j9_Q__Z
zf|k9PW5^o|wuQE>3Xcpv?2>S*c08K+XGW#KEom*j$NOX07S=9!WU%j<8OK|0(e{tw
z@7gca1r$o`|02-xt-Z_Miszxc1@}T`pqhVIw+mPa05xs?*K)xTsK!2OOPjQqky-%%
z;X_P!^$oUv%XeS;|Bb^|=`=HqFNqIdt1xa<cVcvEcepFKmi3{rfzD&*i5ot&?mQhT
t>2&yz__6kvj}sjjX2{KmopQ;*n&H}%16RVB7#QTy{3*@A4E8pN004_)Vut_#

literal 0
HcmV?d00001

diff --git a/tests/pysam_test.py b/tests/pysam_test.py
index c2ae6fa..5be5ca4 100755
--- a/tests/pysam_test.py
+++ b/tests/pysam_test.py
@@ -7,7 +7,7 @@ and data files located there.
 
 import pysam
 import unittest
-import os
+import os, re
 import itertools
 import subprocess
 import shutil
@@ -47,7 +47,13 @@ def runSamtools( cmd ):
     except OSError, e:
         print >>sys.stderr, "Execution failed:", e
 
-        
+def getSamtoolsVersion():
+    '''return samtools version'''
+
+    pipe = subprocess.Popen("samtools", shell=True, stderr=subprocess.PIPE).stderr
+    lines = "".join(pipe.readlines())
+    return re.search( "Version:\s+(\S+)", lines).groups()[0]
+
 class BinaryTest(unittest.TestCase):
     '''test samtools command line commands and compare
     against pysam commands.
@@ -125,12 +131,19 @@ class BinaryTest(unittest.TestCase):
 
             BinaryTest.first_time = False
 
+        samtools_version = getSamtoolsVersion()
+        if samtools_version != pysam.__samtools_version__:
+            raise ValueError("versions of pysam/samtools and samtools differ: %s != %s" % \
+                                 (pysam.__samtools_version__,
+                                  samtools_version ))
+
     def checkCommand( self, command ):
+
         if command:
             samtools_target, pysam_target = self.mCommands[command][0][0], self.mCommands[command][1][0]
             self.assertTrue( checkBinaryEqual( samtools_target, pysam_target ), 
                              "%s failed: files %s and %s are not the same" % (command, samtools_target, pysam_target) )
-
+            
     def testImport( self ):
         self.checkCommand( "import" )
 
@@ -153,7 +166,7 @@ class BinaryTest(unittest.TestCase):
         self.assertRaises( pysam.SamtoolsError, pysam.index, "exdoesntexist.bam" )
 
     def __del__(self):
-
+        return
         for label, command in self.mCommands.iteritems():
             samtools_target, samtools_command = command[0]
             pysam_target, pysam_command = command[1]
@@ -254,6 +267,12 @@ class IOTest(unittest.TestCase):
         self.assertRaises( ValueError, samfile.fetch )
         self.assertEqual( len(list( samfile.fetch(until_eof = True) )), 3270 )
 
+    def testReadingFromFileWithWrongMode( self ):
+
+        assert not os.path.exists( "ex2.bam.bai" )
+        samfile = pysam.Samfile( "ex2.bam", "r" )
+        self.assertRaises( ValueError, samfile.fetch )
+
 class TestIteratorRow(unittest.TestCase):
 
     def setUp(self):
@@ -283,6 +302,7 @@ class TestIteratorRow(unittest.TestCase):
     def tearDown(self):
         self.samfile.close()
 
+
 class TestIteratorRowAll(unittest.TestCase):
 
     def setUp(self):
@@ -348,7 +368,8 @@ class TestIteratorColumn(unittest.TestCase):
                     self.assertEqual( len(columns), refcov, "wrong number of pileup columns returned for position %s:%i, %i should be %i" %(contig,pos,len(columns), refcov) )
                 elif refcov == 1:
                     # one read, all columns of the read are returned
-                    self.assertEqual( len(columns), refcolumns, "pileup incomplete - %i should be %i " % (len(columns), refcolumns))
+                    self.assertEqual( len(columns), refcolumns, "pileup incomplete at position %i: got %i, expected %i " %\
+                                          (pos, len(columns), refcolumns))
                     
     def tearDown(self):
         self.samfile.close()
@@ -398,10 +419,22 @@ class TestAlignedReadFromBam(unittest.TestCase):
     def testARseq(self):
         self.assertEqual( self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") )
         self.assertEqual( self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") )
+        self.assertEqual( self.reads[3].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 4: %s != %s" % (self.reads[3].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") )
 
     def testARqual(self):
         self.assertEqual( self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") )
         self.assertEqual( self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") )
+        self.assertEqual( self.reads[3].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 3: %s != %s" % (self.reads[3].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") )
+
+    def testARquery(self):
+        self.assertEqual( self.reads[0].query, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "query mismatch in read 1: %s != %s" % (self.reads[0].query, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") )
+        self.assertEqual( self.reads[1].query, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "query size mismatch in read 2: %s != %s" % (self.reads[1].query, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") )
+        self.assertEqual( self.reads[3].query, "TAGCTAGCTACCTATATCTTGGTCTT", "query mismatch in read 4: %s != %s" % (self.reads[3].query, "TAGCTAGCTACCTATATCTTGGTCTT") )
+
+    def testARqqual(self):
+        self.assertEqual( self.reads[0].qqual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "qquality string mismatch in read 1: %s != %s" % (self.reads[0].qqual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") )
+        self.assertEqual( self.reads[1].qqual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "qquality string mismatch in read 2: %s != %s" % (self.reads[1].qqual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") )
+        self.assertEqual( self.reads[3].qqual, "<<<<<<<<<<<<<<<<<:<9/,&,22", "qquality string mismatch in read 3: %s != %s" % (self.reads[3].qqual, "<<<<<<<<<<<<<<<<<:<9/,&,22") )
 
     def testPresentOptionalFields(self):
         self.assertEqual( self.reads[0].opt('NM'), 1, "optional field mismatch in read 1, NM: %s != %s" % (self.reads[0].opt('NM'), 1) )
@@ -518,7 +551,14 @@ class TestPileupObjects(unittest.TestCase):
 
     def tearDown(self):
         self.samfile.close()
-        
+
+class TestContextManager(unittest.TestCase):
+
+    def testManager( self ):
+        with pysam.Samfile('ex1.bam', 'rb') as samfile:
+            samfile.fetch()
+        self.assertEqual( samfile._isOpen(), False )
+
 class TestExceptions(unittest.TestCase):
 
     def setUp(self):
@@ -581,20 +621,26 @@ class TestFastaFile(unittest.TestCase):
             self.assertEqual( seq, self.file.fetch( id ) )
             for x in range( 0, len(seq), 10):
                 self.assertEqual( seq[x:x+10], self.file.fetch( id, x, x+10) )
+                # test x:end
+                self.assertEqual( seq[x:], self.file.fetch( id, x) )
+                # test 0:x
+                self.assertEqual( seq[:x], self.file.fetch( id, None, x) )
+
+        
+        # unknown sequence returns ""
+        self.assertEqual( "", self.file.fetch("chr12") )
 
     def testFetchErrors( self ):
         self.assertRaises( ValueError, self.file.fetch )
-        self.assertRaises( ValueError, self.file.fetch, "chr1", 0 )
         self.assertRaises( ValueError, self.file.fetch, "chr1", -1, 10 )
         self.assertRaises( ValueError, self.file.fetch, "chr1", 20, 10 )
-        # the following segfaults:
-        # self.assertRaises( IndexError, self.file.fetch, "chr12", )
-        pass
 
+    def testLength( self ):
+        self.assertEqual( len(self.file), 2 )
+        
     def tearDown(self):
         self.file.close()
 
-
 class TestAlignedRead(unittest.TestCase):
     '''tests to check if aligned read can be constructed
     and manipulated.
@@ -803,7 +849,7 @@ class TestDeNovoConstruction(unittest.TestCase):
         others = list(infile)
         for denovo, other in zip( others, self.reads):
             self.checkFieldEqual( other, denovo )
-            self.assertEqual( other, denovo)
+            self.assertEqual( other.compare( denovo ), 0 )
 
     def testSAMPerRead( self ):
         '''check if individual reads are binary equal.'''
@@ -812,7 +858,7 @@ class TestDeNovoConstruction(unittest.TestCase):
         others = list(infile)
         for denovo, other in zip( others, self.reads):
             self.checkFieldEqual( other, denovo )
-            self.assertEqual( other, denovo)
+            self.assertEqual( other.compare( denovo), 0 )
             
     def testBAMWholeFile( self ):
         
@@ -828,6 +874,82 @@ class TestDeNovoConstruction(unittest.TestCase):
         
         os.unlink( tmpfilename )
 
+class TestDoubleFetch(unittest.TestCase):
+    '''check if two iterators on the same bamfile are independent.'''
+    
+    def testDoubleFetch( self ):
+
+        samfile1 = pysam.Samfile('ex1.bam', 'rb')
+
+        for a,b in zip(samfile1.fetch(), samfile1.fetch()):
+            self.assertEqual( a.compare( b ), 0 )
+
+    def testDoubleFetchWithRegion( self ):
+
+        samfile1 = pysam.Samfile('ex1.bam', 'rb')
+        chr, start, stop = 'chr1', 200, 3000000
+        self.assertTrue(len(list(samfile1.fetch ( chr, start, stop))) > 0) #just making sure the test has something to catch
+
+        for a,b in zip(samfile1.fetch( chr, start, stop), samfile1.fetch( chr, start, stop)):
+            self.assertEqual( a.compare( b ), 0 ) 
+
+    def testDoubleFetchUntilEOF( self ):
+
+        samfile1 = pysam.Samfile('ex1.bam', 'rb')
+
+        for a,b in zip(samfile1.fetch( until_eof = True), 
+                       samfile1.fetch( until_eof = True )):
+            self.assertEqual( a.compare( b), 0 )
+
+class TestRemoteFileFTP(unittest.TestCase):
+    '''test remote access.
+
+    '''
+
+    # Need to find an ftp server without password on standard
+    # port.
+
+    url = "ftp://ftp.sanger.ac.uk/pub/rd/humanSequences/CV.bam"
+    region = "1:1-1000"
+
+    def testFTPView( self ):
+        result = pysam.view( self.url, self.region )
+        self.assertEqual( len(result), 36 )
+        
+    def testFTPFetch( self ):
+        samfile = pysam.Samfile(self.url, "rb")  
+        result = list(samfile.fetch( region = self.region ))
+        self.assertEqual( len(result), 36 )
+
+class TestRemoteFileHTTP( unittest.TestCase):
+
+    url = "http://genserv.anat.ox.ac.uk/downloads/pysam/test/ex1.bam"
+    region = "chr1:1-1000"
+    local = "ex1.bam"
+
+    def testView( self ):
+        self.assertRaises( pysam.SamtoolsError, pysam.view, self.url, self.region )
+        
+    def testFetch( self ):
+        samfile = pysam.Samfile(self.url, "rb")  
+        result = list(samfile.fetch( region = self.region ))
+        samfile_local = pysam.Samfile(self.local, "rb")  
+        ref = list(samfile_local.fetch( region = self.region ))
+
+        self.assertEqual( len(ref), len(result) )
+        for x, y in zip(result, ref):
+            self.assertEqual( x.compare( y ), 0 )
+
+    def testFetchAll( self ):
+        samfile = pysam.Samfile(self.url, "rb")  
+        result = list(samfile.fetch())
+        samfile_local = pysam.Samfile(self.local, "rb")  
+        ref = list(samfile_local.fetch() )
+
+        self.assertEqual( len(ref), len(result) )
+        for x, y in zip(result, ref):
+            self.assertEqual( x.compare( y ), 0 )
+
 
 # TODOS
 # 1. finish testing all properties within pileup objects
diff --git a/tests/tabix_test.py b/tests/tabix_test.py
new file mode 100644
index 0000000..8eb8a60
--- /dev/null
+++ b/tests/tabix_test.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python
+'''unit testing code for pysam.
+
+Execute in the :file:`tests` directory as it requires the Makefile
+and data files located there.
+'''
+
+import sys, os, shutil, gzip
+import pysam
+import unittest
+import itertools
+import subprocess
+
+def checkBinaryEqual( filename1, filename2 ):
+    '''return true if the two files are binary equal.'''
+    if os.path.getsize( filename1 ) !=  os.path.getsize( filename2 ):
+        return False
+
+    infile1 = open(filename1, "rb")
+    infile2 = open(filename2, "rb")
+
+    def chariter( infile ):
+        while 1:
+            c = infile.read(1)
+            if c == "": break
+            yield c
+
+    found = False
+    for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ):
+        if c1 != c2: break
+    else:
+        found = True
+
+    infile1.close()
+    infile2.close()
+    return found
+
+class TestIndexing(unittest.TestCase):
+    filename = "example.gtf.gz" 
+    filename_idx = "example.gtf.gz.tbi" 
+
+    def setUp( self ):
+        
+        self.tmpfilename = "tmp_%i.gtf.gz" % id(self)
+        shutil.copyfile( self.filename, self.tmpfilename )
+
+    def testIndexPreset( self ):
+        '''test indexing via preset.'''
+
+        pysam.tabix_index( self.tmpfilename, preset = "gff" )
+        checkBinaryEqual( self.tmpfilename + ".tbi", self.filename_idx )
+
+    def tearDown( self ):
+        os.unlink( self.tmpfilename )
+        os.unlink( self.tmpfilename + ".tbi" )
+
+class TestCompression(unittest.TestCase):
+    filename = "example.gtf.gz" 
+    filename_idx = "example.gtf.gz.tbi" 
+
+    def setUp( self ):
+        
+        self.tmpfilename = "tmp_%i.gtf" % id(self)
+        infile = gzip.open( self.filename, "r")
+        outfile = open( self.tmpfilename, "w" )
+        outfile.write( "".join(infile.readlines()) )
+        outfile.close()
+        infile.close()
+
+    def testIndexPreset( self ):
+        '''test indexing via preset.'''
+
+        pysam.tabix_index( self.tmpfilename, preset = "gff" )
+        checkBinaryEqual( self.tmpfilename + ".gz", self.filename )
+        checkBinaryEqual( self.tmpfilename + ".gz.tbi", self.filename_idx )
+
+    def tearDown( self ):
+        os.unlink( self.tmpfilename + ".gz" )
+        os.unlink( self.tmpfilename + ".gz.tbi" )
+
+class TestIteration( unittest.TestCase ):
+
+    filename = "example.gtf.gz" 
+
+    def setUp( self ):
+
+        self.tabix = pysam.Tabixfile( self.filename )
+        lines = gzip.open(self.filename).readlines()
+        # creates index of contig, start, end, adds content without newline.
+        self.compare = [ 
+            (x[0][0], int(x[0][3]), int(x[0][4]), x[1]) 
+            for x in [ (y.split("\t"), y[:-1]) for y in lines ] ]
+                         
+    def getSubset( self, contig = None, start = None, end = None):
+        
+        if contig == None:
+            # all lines
+            subset = [ x[3] for x in self.compare ]
+        else:
+            if start != None and end == None:
+                # until end of contig
+                subset = [ x[3] for x in self.compare if x[0] == contig and x[2] > start ]
+            elif start == None and end != None:
+                # from start of contig
+                subset = [ x[3] for x in self.compare if x[0] == contig and x[1] <= end ]
+            elif start == None and end == None:
+                subset = [ x[3] for x in self.compare if x[0] == contig ]
+            else:
+                # all within interval
+                subset = [ x[3] for x in self.compare if x[0] == contig and \
+                               min( x[2], end) - max(x[1], start) > 0 ]
+            
+        return subset
+
+    def checkPairwise( self, result, ref ):
+
+        result.sort()
+        ref.sort()
+
+        a = set(result)
+        b = set(ref)
+
+        self.assertEqual( len(result), len(ref),
+                          "unexpected number of results: %i, expected %i, differences are %s: %s" \
+                              % (len(result), len(ref),
+                                 a.difference(b), 
+                                 b.difference(a) ))
+
+        for x, d in enumerate( zip( result, ref )):
+            
+            self.assertEqual( d[0], d[1],
+                              "unexpected results in pair %i: '%s', expected '%s'" % \
+                                  (x, 
+                                   d[0], 
+                                   d[1]) )
+
+
+    def testAll( self ):
+        result = list(self.tabix.fetch())
+        ref = self.getSubset( )
+        self.checkPairwise( result, ref )
+
+    def testPerContig( self ):
+        for contig in ("chr1", "chr2", "chr1", "chr2" ):
+            result = list(self.tabix.fetch( contig ))
+            ref = self.getSubset( contig )
+            self.checkPairwise( result, ref )
+            
+    def testPerContigToEnd( self ):
+        
+        end = None
+        for contig in ("chr1", "chr2", "chr1", "chr2" ):
+            for start in range( 0, 200000, 1000):
+                result = list(self.tabix.fetch( contig, start, end ))
+                ref = self.getSubset( contig, start, end )
+                self.checkPairwise( result, ref )
+
+    def testPerContigFromStart( self ):
+        
+        start = None
+        for contig in ("chr1", "chr2", "chr1", "chr2" ):
+            for end in range( 0, 200000, 1000):
+                result = list(self.tabix.fetch( contig, start, end ))
+                ref = self.getSubset( contig, start, end )
+                self.checkPairwise( result, ref )
+
+    def testPerContig( self ):
+        
+        start, end  = None, None
+        for contig in ("chr1", "chr2", "chr1", "chr2" ):
+            result = list(self.tabix.fetch( contig, start, end ))
+            ref = self.getSubset( contig, start, end )
+            self.checkPairwise( result, ref )
+                
+    def testPerInterval( self ):
+        
+        start, end  = None, None
+        for contig in ("chr1", "chr2", "chr1", "chr2" ):
+            for start in range( 0, 200000, 2000):
+                for end in range( start, start + 2000, 500):
+                    result = list(self.tabix.fetch( contig, start, end ))
+                    ref = self.getSubset( contig, start, end )
+                    self.checkPairwise( result, ref )
+                
+
+    def testInvalidIntervals( self ):
+        
+        self.assertRaises( ValueError, self.tabix.fetch, "chr1", 0, -10)
+        self.assertRaises( ValueError, self.tabix.fetch, "chr1", -10, 200)
+        self.assertRaises( ValueError, self.tabix.fetch, "chr1", 200, 0)
+        self.assertRaises( ValueError, self.tabix.fetch, "chr1", -10, -20)
+        self.assertRaises( ValueError, self.tabix.fetch, "chrUn" )
+
+    def testGetContigs( self ):
+        self.assertEqual( sorted(self.tabix.contigs), ["chr1", "chr2"] )
+        # check that contigs is read-only
+        self.assertRaises( AttributeError, setattr, self.tabix, "contigs", ["chr1", "chr2"] )
+
+class TestParser( unittest.TestCase ):
+
+    filename = "example.gtf.gz" 
+
+    def setUp( self ):
+
+        self.tabix = pysam.Tabixfile( self.filename )
+        self.compare = [ x[:-1].split("\t") for x in gzip.open( self.filename, "r") ]
+
+    def testGTF( self ):
+
+        for x, r in enumerate(self.tabix.fetch( parser = pysam.asGTF() )):
+            self.assertEqual( "\t".join( self.compare[x]), str(r) )
+
+    def testTuple( self ):
+
+        for x, r in enumerate(self.tabix.fetch( parser = pysam.asTuple() )):
+            self.assertEqual( self.compare[x], list(r) )
+
+            self.assertEqual( len(self.compare[x]), len(r) )
+            for c in range(0,len(r)):
+                self.assertEqual( self.compare[x][c], r[c] )
+
+if __name__ == "__main__":
+    unittest.main()
+
+
-- 
2.30.2