X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=blobdiff_plain;f=pysam%2Fcsamtools.pxd;h=4ec74e3693c86aea8cc7052db6ace9a770c6bb6d;hb=ca46ef4ba4a883c57cea62d5bf1bc021f1185109;hp=7dac38d2d887b5e24e8244df4287d2d1b518ed7c;hpb=aa8ecff068edbb09a03bd874fce716e93e22e53c;p=pysam.git diff --git a/pysam/csamtools.pxd b/pysam/csamtools.pxd index 7dac38d..4ec74e3 100644 --- a/pysam/csamtools.pxd +++ b/pysam/csamtools.pxd @@ -1,4 +1,3 @@ - cdef extern from "string.h": ctypedef int size_t void *memcpy(void *dst,void *src,size_t len) @@ -14,6 +13,9 @@ cdef extern from "stdlib.h": void qsort(void *base, size_t nmemb, size_t size, int (*compar)(void *,void *)) +cdef extern from "math.h": + double sqrt(double x) + cdef extern from "stdio.h": ctypedef struct FILE: pass @@ -27,7 +29,7 @@ cdef extern from "stdio.h": FILE * stdout int fclose(FILE *) int sscanf(char *str,char *fmt,...) - int printf(char *str,char *fmt,...) + int printf(char *fmt,...) int sprintf(char *str,char *fmt,...) int fprintf(FILE *ifile,char *fmt,...) char *fgets(char *str,int size,FILE *ifile) @@ -50,6 +52,14 @@ cdef extern from "string.h": size_t strlen(char *s) int memcmp( void * s1, void *s2, size_t len ) +cdef extern from "Python.h": + long _Py_HashPointer(void*) + FILE* PyFile_AsFile(object) + +cdef extern from "fileobject.h": + ctypedef class __builtin__.file [object PyFileObject]: + pass + cdef extern from "razf.h": pass @@ -60,9 +70,10 @@ cdef extern from "stdint.h": ctypedef int uint8_t ctypedef int uint64_t - cdef extern from "bam.h": + # constants + int BAM_DEF_MASK # IF _IOLIB=2, bamFile = BGZF, see bgzf.h # samtools uses KNETFILE, check how this works @@ -115,20 +126,51 @@ cdef extern from "bam.h": char *text ctypedef struct bam_index_t: - pass + int32_t n + uint64_t n_no_coor ctypedef struct bam_plbuf_t: pass + ctypedef struct pair64_t: + uint64_t u, v + + ctypedef struct bam_iter_t: + int from_first + int tid, beg, end, n_off, i, finished + uint64_t curr_off + pair64_t *off + + # ctypedef __bam_iter_t * bam_iter_t + + bam1_t * bam_init1() + void bam_destroy1(bam1_t *) + bamFile razf_dopen(int data_fd, char *mode) - # removed - macros not found + int64_t bam_seek( bamFile fp, uint64_t voffset, int where) + int64_t bam_tell( bamFile fp ) - # int64_t bam_seek( bamFile fp, uint64_t voffset, int where) - # int64_t bam_tell( bamFile fp ) - # void bam_destroy1( bam1_t * b) # void bam_init_header_hash(bam_header_t *header) + ############################################### + # stand-ins for samtools macros + uint32_t * bam1_cigar( bam1_t * b) + char * bam1_qname( bam1_t * b) + uint8_t * bam1_seq( bam1_t * b) + uint8_t * bam1_qual( bam1_t * b) + uint8_t * bam1_aux( bam1_t * b) + + ############################################### + # bam iterator interface + bam_iter_t bam_iter_query( bam_index_t *idx, int tid, int beg, int end) + + int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b) + + void bam_iter_destroy(bam_iter_t iter) + + ############################################### + bam1_t * bam_dup1( bam1_t *src ) bam1_t * bam_copy1(bam1_t *bdst, bam1_t *bsrc) @@ -138,6 +180,7 @@ cdef extern from "bam.h": int bam_parse_region(bam_header_t *header, char *str, int *ref_id, int *begin, int *end) + ############################################### bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data) int bam_fetch(bamFile fp, bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func) @@ -145,9 +188,28 @@ cdef extern from "bam.h": int bam_plbuf_push(bam1_t *b, bam_plbuf_t *buf) void bam_plbuf_destroy(bam_plbuf_t *buf) + ######################################## + # pileup iterator interface + ctypedef struct bam_plp_t: + pass + + ctypedef bam_pileup1_t * const_bam_pileup1_t_ptr "const bam_pileup1_t *" + + ctypedef int (*bam_plp_auto_f)(void *data, bam1_t *b) + + bam_plp_t bam_plp_init( bam_plp_auto_f func, void *data) + int bam_plp_push( bam_plp_t iter, bam1_t *b) + bam_pileup1_t * bam_plp_next( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) + bam_pileup1_t * bam_plp_auto( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) + void bam_plp_set_mask(bam_plp_t iter, int mask) + void bam_plp_reset(bam_plp_t iter) + void bam_plp_destroy(bam_plp_t iter) + void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt) - int bam_read1(bamFile fp, bam1_t *b) + ################################################## + int bam_read1( bamFile fp, bam1_t *b) + int bam_validate1( bam_header_t *header, bam1_t *b) int bam_write1( bamFile fp, bam1_t *b) bam_header_t *bam_header_init() @@ -164,7 +226,7 @@ cdef extern from "bam.h": uint8_t *bam_aux_get(bam1_t *b, char tag[2]) - int bam_aux2i(uint8_t *s) + int32_t bam_aux2i(uint8_t *s) float bam_aux2f(uint8_t *s) double bam_aux2d(uint8_t *s) char bam_aux2A( uint8_t *s) @@ -174,6 +236,9 @@ cdef extern from "bam.h": uint32_t bam_calend(bam1_core_t *c, uint32_t *cigar) +cdef extern from *: + ctypedef char* const_char_ptr "const char*" + cdef extern from "sam.h": ctypedef struct samfile_t_un: @@ -186,7 +251,7 @@ cdef extern from "sam.h": samfile_t_un x bam_header_t *header - samfile_t *samopen( char *fn, char * mode, void *aux) + samfile_t *samopen( const_char_ptr fn, char * mode, void *aux) int sampileup( samfile_t *fp, int mask, bam_pileup_f func, void *data) @@ -196,6 +261,73 @@ cdef extern from "sam.h": int samwrite(samfile_t *fp, bam1_t *b) + int bam_prob_realn(bam1_t *b, char *ref) + int bam_cap_mapQ(bam1_t *b, char *ref, int thres) + + +#cdef extern from "glf.h": +# ctypedef struct glf1_t: +# pass + +#cdef extern from "bam_maqcns.h": +# +# ctypedef struct bam_maqcns_t: +# float het_rate, theta +# int n_hap, cap_mapQ, errmod, min_baseQ +# float eta, q_r +# double *fk, *coef +# double *lhet +# void *aux + +# glf1_t *bam_maqcns_glfgen(int n, +# bam_pileup1_t *pl, +# uint8_t ref_base, +# bam_maqcns_t *bm) + +# ctypedef struct bam_maqindel_opt_t: +# int q_indel +# float r_indel +# float r_snp +# int mm_penalty, indel_err, ambi_thres + +# uint32_t bam_maqcns_call(int n, bam_pileup1_t *pl, bam_maqcns_t *bm) +# bam_maqcns_t * bam_maqcns_init() +# void bam_maqcns_destroy(bam_maqcns_t *bm) +# void bam_maqcns_prepare(bam_maqcns_t *bm) + +# uint32_t glf2cns(glf1_t *g, int q_r) + +# int BAM_ERRMOD_MAQ2 +# int BAM_ERRMOD_MAQ +# int BAM_ERRMOD_SOAP + +# ctypedef struct bam_maqindel_ret_t: +# int indel1 +# int indel2 +# int cnt1 +# int cnt2 +# int cnt_anti +# int cnt_ref +# int cnt_ambi +# char *s[2] +# int gt +# int gl[2] +# int q_cns +# int q_ref + +# void bam_maqindel_ret_destroy( bam_maqindel_ret_t * ) + +# bam_maqindel_opt_t *bam_maqindel_opt_init() + +# bam_maqindel_ret_t * bam_maqindel(int n, +# int pos, +# bam_maqindel_opt_t * mi, +# bam_pileup1_t * pl, +# char *ref, +# int _n_types, +# int * _types ) + + cdef extern from "faidx.h": ctypedef struct faidx_t: @@ -209,15 +341,21 @@ cdef extern from "faidx.h": char *fai_fetch(faidx_t *fai, char *reg, int *len) -cdef extern from "pysam_util.h": + int faidx_fetch_nseq(faidx_t *fai) + + char *faidx_fetch_seq(faidx_t *fai, char *c_name, + int p_beg_i, int p_end_i, int *len) - int pysam_bam_plbuf_push(bam1_t *b, bam_plbuf_t *buf, int cont) - int pysam_get_pos( bam_plbuf_t *buf) +cdef extern from "pysam_util.h": - int pysam_get_tid( bam_plbuf_t *buf) + int pysam_pileup_next(bam1_t *b, + bam_plbuf_t *buf, + bam_pileup1_t ** plp, + int * tid, + int * pos, + int * n_plp ) - bam_pileup1_t * pysam_get_pileup( bam_plbuf_t *buf) int pysam_dispatch(int argc, char *argv[] ) @@ -233,19 +371,66 @@ cdef extern from "pysam_util.h": # translate char to unsigned char unsigned char pysam_translate_sequence( char s ) - # stand-ins for samtools macros - uint32_t * pysam_bam1_cigar( bam1_t * b) - char * pysam_bam1_qname( bam1_t * b) - uint8_t * pysam_bam1_seq( bam1_t * b) - uint8_t * pysam_bam1_qual( bam1_t * b) - uint8_t * pysam_bam1_aux( bam1_t * b) + unsigned char * bam_nt16_table + + int pysam_reference2tid( bam_header_t *header, char * s ) + + void pysam_set_stderr( FILE * file ) + + # return mapped/unmapped reads on tid + uint32_t pysam_get_mapped( bam_index_t *idx, int tid ) + uint32_t pysam_get_unmapped( bam_index_t *idx, int tid ) + +# uint32_t pysam_glf_depth( glf1_t * g ) + +# void pysam_dump_glf( glf1_t * g, bam_maqcns_t * c ) + +# need to declare all C fields and methods here +cdef class AlignedRead: + + # object that this AlignedRead represents + cdef bam1_t * _delegate + +cdef class Samfile: + cdef char * _filename + # pointer to samfile + cdef samfile_t * samfile + # pointer to index + cdef bam_index_t *index + # true if file is a bam file + cdef int isbam + # true if not a file but a stream + cdef int isstream + # true if file is not on the local filesystem + cdef int isremote + # current read within iteration + cdef bam1_t * b + # file opening mode + cdef char * mode + + # beginning of read section + cdef int64_t start_offset + + cdef bam_header_t * _buildHeader( self, new_header ) + cdef bam1_t * getCurrent( self ) + cdef int cnext(self) + + # write an aligned read + cpdef int write( self, AlignedRead read ) + + cdef char * _getrname( self, int tid ) + +cdef class IteratorRow: + pass + +cdef class IteratorRowAll(IteratorRow): + cdef bam1_t * b + cdef samfile_t * fp + # true if samfile belongs to this object + cdef int owns_samfile + + cdef bam1_t * getCurrent( self ) + + cdef int cnext(self) + - # iterator implemenation - ctypedef struct bam_fetch_iterator_t: - pass - - bam_fetch_iterator_t* bam_init_fetch_iterator(bamFile fp, bam_index_t *idx, int tid, int beg, int end) - - bam1_t * bam_fetch_iterate(bam_fetch_iterator_t *iter) - - void bam_cleanup_fetch_iterator(bam_fetch_iterator_t *iter)