6 #include "bam_endian.h"
8 #include "pysam_util.h"
9 #include "errmod.h" // for pysam_dump
12 #define inline __inline
15 // Definition of pysamerr
17 FILE * pysamerr = NULL;
19 FILE * pysam_set_stderr( FILE * f )
25 // #######################################################
26 // utility routines to avoid using callbacks in bam_fetch
27 // taken from bam_index.c
28 // The order of the following declarations is important.
29 // #######################################################
36 #define pair64_lt(a,b) ((a).u < (b).u)
48 KSORT_INIT(my_off, pair64_t, pair64_lt);
49 KHASH_MAP_INIT_INT(my_i, bam_binlist_t);
50 KHASH_MAP_INIT_STR(s, int)
55 khash_t(my_i) **index;
59 typedef struct __linkbuf_t {
62 struct __linkbuf_t *next;
70 struct __bam_plbuf_t {
72 lbnode_t *head, *tail, *dummy;
75 int32_t tid, pos, max_tid, max_pos;
81 static mempool_t *mp_init()
84 mp = (mempool_t*)calloc(1, sizeof(mempool_t));
87 static void mp_destroy(mempool_t *mp)
90 for (k = 0; k < mp->n; ++k) {
91 free(mp->buf[k]->b.data);
97 static inline lbnode_t *mp_alloc(mempool_t *mp)
100 if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t));
101 else return mp->buf[--mp->n];
103 static inline void mp_free(mempool_t *mp, lbnode_t *p)
105 --mp->cnt; p->next = 0; // clear lbnode_t::next here
106 if (mp->n == mp->max) {
107 mp->max = mp->max? mp->max<<1 : 256;
108 mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max);
110 mp->buf[mp->n++] = p;
113 static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
117 bam1_core_t *c = &b->core;
118 uint32_t x = c->pos, y = 0;
119 int ret = 1, is_restart = 1;
121 if (c->flag&BAM_FUNMAP) return 0; // unmapped read
122 assert(x <= pos); // otherwise a bug
123 p->qpos = -1; p->indel = 0; p->is_del = p->is_head = p->is_tail = 0;
124 for (k = 0; k < c->n_cigar; ++k) {
125 int op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation
126 int l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length
127 if (op == BAM_CMATCH) { // NOTE: this assumes the first and the last operation MUST BE a match or a clip
128 if (x + l > pos) { // overlap with pos
129 p->indel = p->is_del = 0;
130 p->qpos = y + (pos - x);
131 if (x == pos && is_restart) p->is_head = 1;
132 if (x + l - 1 == pos) { // come to the end of a match
133 if (k < c->n_cigar - 1) { // there are additional operation(s)
134 uint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR
135 int op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation
136 if (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del
137 else if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins
138 if (op_next == BAM_CSOFT_CLIP || op_next == BAM_CREF_SKIP || op_next == BAM_CHARD_CLIP)
139 p->is_tail = 1; // tail
140 } else p->is_tail = 1; // this is the last operation; set tail
144 } else if (op == BAM_CDEL) { // then set ->is_del
146 p->indel = 0; p->is_del = 1;
147 p->qpos = y + (pos - x);
150 } else if (op == BAM_CREF_SKIP) x += l;
151 else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
152 is_restart = (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);
154 if (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all
158 assert(x > pos); // otherwise a bug
162 // the following code has been taken from bam_plbuf_push
163 // and modified such that instead of a function call
164 // the function returns and will continue (if cont is true).
165 // from where it left off.
168 // 1: if buf is full and can be emitted
169 // 0: if b has been added
170 // -1: if there was an error
171 int pysam_pileup_next(const bam1_t *b,
173 bam_pileup1_t ** plp,
178 *plp = bam_plp_next(buf->iter, tid, pos, n_plp);
179 if (plp == NULL) return 0;
183 typedef struct __bmc_aux_t {
190 uint32_t pysam_glf_depth( glf1_t * g )
196 void pysam_dump_glf( glf1_t * g, bam_maqcns_t * c )
200 "glf: ref_base=%i, max_mapQ=%i, min_lk=%i, depth=%i",
206 for (x = 0; x < 10; ++x)
207 fprintf(stderr, ", lk%x=%i, ", x, g->lk[x]);
210 "maqcns: het_rate=%f, theta=%f, n_hap=%i, cap_mapQ=%i, errmod=%i, min_baseQ=%i, eta=%f, q_r=%f, aux_max=%i",
221 for (x = 0; x < c->aux->max; ++x)
223 fprintf(stderr, ", info-%i=%i ", x, c->aux->info[x]);
224 if (c->aux->info[x] == 0) break;
227 for (x = 0; x < c->aux->max; ++x)
229 fprintf(stderr, ", info16-%i=%i ", x, c->aux->info16[x]);
230 if (c->aux->info16[x] == 0) break;
237 // pysam dispatch function to emulate the samtools
238 // command line within python.
239 // taken from the main function in bamtk.c
240 // added code to reset getopt
241 extern int main_samview(int argc, char *argv[]);
242 extern int main_import(int argc, char *argv[]);
243 extern int bam_pileup(int argc, char *argv[]);
244 extern int bam_merge(int argc, char *argv[]);
245 extern int bam_sort(int argc, char *argv[]);
246 extern int bam_index(int argc, char *argv[]);
247 extern int faidx_main(int argc, char *argv[]);
248 extern int bam_mating(int argc, char *argv[]);
249 extern int bam_rmdup(int argc, char *argv[]);
250 extern int glf3_view_main(int argc, char *argv[]);
251 extern int bam_flagstat(int argc, char *argv[]);
252 extern int bam_fillmd(int argc, char *argv[]);
254 int pysam_dispatch(int argc, char *argv[] )
258 setmode(fileno(stdout), O_BINARY);
259 setmode(fileno(stdin), O_BINARY);
268 if (argc < 2) return 1;
270 if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1);
271 else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1);
272 else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1);
273 else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1);
274 else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1);
275 else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1);
276 else if (strcmp(argv[1], "faidx") == 0) return faidx_main(argc-1, argv+1);
277 else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1);
278 else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);
279 else if (strcmp(argv[1], "glfview") == 0) return glf3_view_main(argc-1, argv+1);
280 else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);
281 else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);
282 else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);
285 else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1);
289 fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
295 // taken from samtools/bam_import.c
296 static inline uint8_t *alloc_data(bam1_t *b, size_t size)
298 if (b->m_data < size)
301 kroundup32(b->m_data);
302 b->data = (uint8_t*)realloc(b->data, b->m_data);
307 // update the variable length data within a bam1_t entry.
308 // Adds *nbytes_new* - *nbytes_old* into the variable length data of *src* at *pos*.
309 // Data within the bam1_t entry is moved so that it is
310 // consistent with the data field lengths.
311 bam1_t * pysam_bam_update( bam1_t * b,
312 const size_t nbytes_old,
313 const size_t nbytes_new,
316 int d = nbytes_new-nbytes_old;
321 if (d == 0) return b;
323 new_size = d + b->data_len;
324 offset = pos - b->data;
326 //printf("d=%i, old=%i, new=%i, old_size=%i, new_size=%i\n",
327 // d, nbytes_old, nbytes_new, b->data_len, new_size);
329 // increase memory if required
332 alloc_data( b, new_size );
333 pos = b->data + offset;
336 if (b->data_len != 0)
338 if (offset < 0 || offset > b->data_len)
339 fprintf(stderr, "[pysam_bam_insert] illegal offset: '%i'\n", (int)offset);
342 // printf("dest=%p, src=%p, n=%i\n", pos+nbytes_new, pos + nbytes_old, b->data_len - (offset+nbytes_old));
343 memmove( pos + nbytes_new,
345 b->data_len - (offset + nbytes_old));
347 b->data_len = new_size;
352 // translate a nucleotide character to binary code
353 unsigned char pysam_translate_sequence( const unsigned char s )
355 return bam_nt16_table[s];
359 void bam_init_header_hash(bam_header_t *header);
361 // translate a reference string *s* to a tid
362 // code taken from bam_parse_region
363 int pysam_reference2tid( bam_header_t *header, const char * s )
369 bam_init_header_hash(header);
370 h = (khash_t(s)*)header->hash;
372 iter = kh_get(s, h, s); /* get the ref_id */
373 if (iter == kh_end(h)) { // name not found
377 return kh_value(h, iter);