+
+#########################################################
+#########################################################
+#########################################################
+## Iterators for parsing through unindexed files.
+#########################################################
+ctypedef class tabix_inplace_iterator:
+ '''iterate over ``infile``.
+
+ This iterator is not safe. If the :meth:`__next__()` method is called
+ after ``infile`` is closed, the result is undefined (see ``fclose()``).
+
+ The iterator might either raise a StopIteration or segfault.
+ '''
+
+
+ def __cinit__(self, infile, int buffer_size = 65536 ):
+
+ cdef int fd = PyObject_AsFileDescriptor( infile )
+ if fd == -1: raise ValueError( "I/O operation on closed file." )
+ self.infile = fdopen( fd, 'r')
+
+ if self.infile == NULL: raise ValueError( "I/O operation on closed file." )
+
+ self.buffer = <char*>malloc( buffer_size )
+ self.size = buffer_size
+
+ def __iter__(self):
+ return self
+
+ cdef __cnext__(self):
+
+ cdef char * b
+ cdef size_t nbytes
+ b = self.buffer
+ r = self.Parser()
+
+ while not feof( self.infile ):
+ nbytes = getline( &b, &self.size, self.infile)
+
+ # stop at first error or eof
+ if (nbytes == -1): break
+ # skip comments
+ if (b[0] == '#'): continue
+
+ # skip empty lines
+ if b[0] == '\0' or b[0] == '\n' or b[0] == '\r': continue
+
+ # make sure that entry is complete
+ if b[nbytes-1] != '\n' and b[nbytes-1] != '\r':
+ result = b
+ raise ValueError( "incomplete line at %s" % result )
+
+ # make sure that this goes fully through C
+ # otherwise buffer is copied to/from a
+ # Python object causing segfaults as
+ # the wrong memory is freed
+ r.present( b, nbytes )
+ return r
+
+ raise StopIteration
+
+ def __dealloc__(self):
+ free(self.buffer)
+
+ def __next__(self):
+ return self.__cnext__()
+
+ctypedef class tabix_copy_iterator:
+ '''iterate over ``infile``.
+
+ This iterator is not save. If the :meth:`__next__()` method is called
+ after ``infile`` is closed, the result is undefined (see ``fclose()``).
+
+ The iterator might either raise a StopIteration or segfault.
+ '''
+
+ def __cinit__(self, infile, Parser parser ):
+
+ cdef int fd = PyObject_AsFileDescriptor( infile )
+ if fd == -1: raise ValueError( "I/O operation on closed file." )
+ self.infile = fdopen( fd, 'r')
+ if self.infile == NULL: raise ValueError( "I/O operation on closed file." )
+ self.parser = parser
+
+ def __iter__(self):
+ return self
+
+ cdef __cnext__(self):
+
+ cdef char * b
+ cdef size_t nbytes
+ cdef int x
+
+ b = NULL
+
+ while not feof( self.infile ):
+
+ # getline allocates on demand
+ # return number of characters read excluding null byte
+ nbytes = getline( &b, &nbytes, self.infile)
+ # stop at first error
+ if (nbytes == -1): break
+ # skip comments
+ if (b[0] == '#'): continue
+
+ # skip empty lines
+ if b[0] == '\0' or b[0] == '\n' or b[0] == '\r': continue
+
+ # make sure that entry is complete
+ if b[nbytes-1] != '\n' and b[nbytes-1] != '\r':
+ result = b
+ free(b)
+ raise ValueError( "incomplete line at %s" % result )
+
+ # make sure that this goes fully through C
+ # otherwise buffer is copied to/from a
+ # Python object causing segfaults as
+ # the wrong memory is freed
+ # -1 to remove the new-line character
+ return self.parser(b, nbytes)
+
+ free(b)
+ raise StopIteration
+
+ def __next__(self):
+ return self.__cnext__()
+
+class tabix_generic_iterator:
+ '''iterate over ``infile``.
+
+ Permits the use of file-like objects for example from the gzip module.
+ '''
+ def __init__(self, infile, parser ):
+
+ self.infile = infile
+ if self.infile.closed: raise ValueError( "I/O operation on closed file." )
+ self.parser = parser
+
+ def __iter__(self):
+ return self
+
+ # cython version - required for python 3
+ def __next__(self):
+
+ cdef char * b, * cpy
+ cdef size_t nbytes
+ while 1:
+
+ line = self.infile.readline()
+ if not line: break
+
+ s = _force_bytes( line )
+ b = s
+ nbytes = len( line )
+ assert b[nbytes] == '\0'
+
+ # skip comments
+ if (b[0] == '#'): continue
+
+ # skip empty lines
+ if b[0] == '\0' or b[0] == '\n' or b[0] == '\r': continue
+
+ # make sure that entry is complete
+ if b[nbytes-1] != '\n' and b[nbytes-1] != '\r':
+ raise ValueError( "incomplete line at %s" % line )
+
+ # create a copy
+ cpy = <char*>malloc(nbytes+1)
+ if cpy == NULL: raise MemoryError()
+ memcpy( cpy, b, nbytes+1)
+
+ return self.parser(cpy, nbytes)
+
+ raise StopIteration
+
+ # python version - required for python 2.7
+ def next(self):
+ return self.__next__()
+
+def tabix_iterator( infile, parser ):
+ """return an iterator over all entries in a file."""
+ return tabix_generic_iterator( infile, parser )
+ # file objects can use C stdio
+ # used to be: isinstance( infile, file):
+ # if PYTHON3:
+ # if isinstance( infile, io.IOBase ):
+ # return tabix_copy_iterator( infile, parser )
+ # else:
+ # return tabix_generic_iterator( infile, parser )
+ # else:
+# if isinstance( infile, file ):
+# return tabix_copy_iterator( infile, parser )
+# else:
+# return tabix_generic_iterator( infile, parser )