X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=pysam.git;a=blobdiff_plain;f=pysam%2Fctabix.pyx;h=a31f3f244b37329286cc23760d5c2942b8628103;hp=25c1a1ee732df3e383a29e80c1ea2ae533b7925e;hb=ca46ef4ba4a883c57cea62d5bf1bc021f1185109;hpb=d02fe5283ed7a93a2f76a5d6dc6e37b40c11b9b1 diff --git a/pysam/ctabix.pyx b/pysam/ctabix.pyx index 25c1a1e..a31f3f2 100644 --- a/pysam/ctabix.pyx +++ b/pysam/ctabix.pyx @@ -33,7 +33,9 @@ cdef class Tabixfile: if self.tabixfile != NULL: self.close() self.tabixfile = NULL - self.filename = filename + if self._filename != NULL: free(self._filename ) + self._filename = strdup( filename ) + filename_index = filename + ".tbi" if mode[0] == 'w': @@ -42,14 +44,14 @@ cdef class Tabixfile: elif mode[0] == "r": # open file for reading - if not os.path.exists( self.filename ): - raise IOError( "file `%s` not found" % self.filename) + if not os.path.exists( self._filename ): + raise IOError( "file `%s` not found" % self._filename) if not os.path.exists( filename_index ): raise IOError( "index `%s` not found" % filename_index) # open file and load index - self.tabixfile = ti_open( self.filename, filename_index ) + self.tabixfile = ti_open( self._filename, filename_index ) if self.tabixfile == NULL: raise IOError("could not open file `%s`" % filename ) @@ -136,12 +138,26 @@ cdef class Tabixfile: else: return TabixIteratorParsed( self, -1, 0, 0, parser ) + ############################################################### + ############################################################### + ############################################################### + ## properties + ############################################################### + property filename: + '''filename associated with this object.''' + def __get__(self): + if not self._isOpen(): raise ValueError( "I/O operation on closed file" ) + return self._filename + property header: + '''the file header. + + .. note:: + The header is returned as an iterator over lines without the + newline character. + ''' + def __get__( self ): - '''return header lines as an iterator. - - Note that the header lines do not contain the newline '\n' character. - ''' return TabixHeaderIterator( self ) property contigs: @@ -158,6 +174,22 @@ cdef class Tabixfile: result.append( sequences[x] ) return result + def close( self ): + ''' + closes the :class:`pysam.Tabixfile`.''' + if self.tabixfile != NULL: + ti_close( self.tabixfile ) + self.tabixfile = NULL + + def __dealloc__( self ): + # remember: dealloc cannot call other python methods + # note: no doc string + # note: __del__ is not called. + if self.tabixfile != NULL: + ti_close( self.tabixfile ) + self.tabixfile = NULL + if self._filename != NULL: free( self._filename ) + cdef class TabixIterator: """iterates over rows in *tabixfile* in region given by *tid*, *start* and *end*. @@ -265,7 +297,10 @@ cdef class Parser: pass cdef class asTuple(Parser): - '''converts a :term:`tabix row` into a python tuple.''' + '''converts a :term:`tabix row` into a python tuple. + + Access is by numeric index. + ''' def __call__(self, char * buffer, int len): cdef TabProxies.TupleProxy r r = TabProxies.TupleProxy() @@ -275,7 +310,36 @@ cdef class asTuple(Parser): return r cdef class asGTF(Parser): - '''converts a :term:`tabix row` into a GTF record.''' + '''converts a :term:`tabix row` into a GTF record with the following + fields: + + contig + contig + feature + feature + source + source + start + genomic start coordinate (0-based) + end + genomic end coordinate plus one (0-based) + score + feature score + strand + strand + frame + frame + attributes + attribute string. + + GTF formatted entries also defined the attributes: + + gene_id + the gene identifier + transcript_ind + the transcript identifier + + ''' def __call__(self, char * buffer, int len): cdef TabProxies.GTFProxy r r = TabProxies.GTFProxy() @@ -283,7 +347,39 @@ cdef class asGTF(Parser): return r cdef class asBed( Parser ): - '''converts a :term:`tabix row` into a GTF record.''' + '''converts a :term:`tabix row` into a bed record + with the following fields: + + contig + contig + start + genomic start coordinate (zero-based) + end + genomic end coordinate plus one (zero-based) + name + name of feature. + score + score of feature + strand + strand of feature + thickStart + thickStart + thickEnd + thickEnd + itemRGB + itemRGB + blockCount + number of bocks + blockSizes + ',' separated string of block sizes + blockStarts + ',' separated string of block genomic start positions + + Only the first three fields are required. Additional + fields are optional, but if one is defined, all the preceeding + need to be defined as well. + + ''' def __call__(self, char * buffer, int len): cdef TabProxies.BedProxy r r = TabProxies.BedProxy() @@ -291,7 +387,35 @@ cdef class asBed( Parser ): return r cdef class asVCF( Parser ): - '''converts a :term:`tabix row` into a VCF record.''' + '''converts a :term:`tabix row` into a VCF record with + the following fields: + + contig + contig + pos + chromosomal position, zero-based + id + id + ref + reference + alt + alt + qual + qual + filter + filter + info + info + format + format specifier. + + Access to genotypes is via index:: + + contig = vcf.contig + first_sample_genotype = vcf[0] + second_sample_genotype = vcf[1] + + ''' def __call__(self, char * buffer, int len ): cdef TabProxies.VCFProxy r r = TabProxies.VCFProxy()