6 #include "bam_endian.h"
8 #include "pysam_util.h"
10 // #######################################################
11 // utility routines to avoid using callbacks in bam_fetch
12 // taken from bam_index.c
13 // The order of the following declarations is important.
14 // #######################################################
21 #define pair64_lt(a,b) ((a).u < (b).u)
33 KSORT_INIT(my_off, pair64_t, pair64_lt);
34 KHASH_MAP_INIT_INT(my_i, bam_binlist_t);
39 khash_t(my_i) **index;
43 typedef struct __linkbuf_t {
46 struct __linkbuf_t *next;
54 struct __bam_plbuf_t {
56 lbnode_t *head, *tail, *dummy;
59 int32_t tid, pos, max_tid, max_pos;
65 static mempool_t *mp_init()
68 mp = (mempool_t*)calloc(1, sizeof(mempool_t));
71 static void mp_destroy(mempool_t *mp)
74 for (k = 0; k < mp->n; ++k) {
75 free(mp->buf[k]->b.data);
81 static inline lbnode_t *mp_alloc(mempool_t *mp)
84 if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t));
85 else return mp->buf[--mp->n];
87 static inline void mp_free(mempool_t *mp, lbnode_t *p)
89 --mp->cnt; p->next = 0; // clear lbnode_t::next here
90 if (mp->n == mp->max) {
91 mp->max = mp->max? mp->max<<1 : 256;
92 mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max);
97 static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)
101 bam1_core_t *c = &b->core;
102 uint32_t x = c->pos, y = 0;
103 int ret = 1, is_restart = 1;
105 if (c->flag&BAM_FUNMAP) return 0; // unmapped read
106 assert(x <= pos); // otherwise a bug
107 p->qpos = -1; p->indel = 0; p->is_del = p->is_head = p->is_tail = 0;
108 for (k = 0; k < c->n_cigar; ++k) {
109 int op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation
110 int l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length
111 if (op == BAM_CMATCH) { // NOTE: this assumes the first and the last operation MUST BE a match or a clip
112 if (x + l > pos) { // overlap with pos
113 p->indel = p->is_del = 0;
114 p->qpos = y + (pos - x);
115 if (x == pos && is_restart) p->is_head = 1;
116 if (x + l - 1 == pos) { // come to the end of a match
117 if (k < c->n_cigar - 1) { // there are additional operation(s)
118 uint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR
119 int op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation
120 if (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del
121 else if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins
122 if (op_next == BAM_CSOFT_CLIP || op_next == BAM_CREF_SKIP || op_next == BAM_CHARD_CLIP)
123 p->is_tail = 1; // tail
124 } else p->is_tail = 1; // this is the last operation; set tail
128 } else if (op == BAM_CDEL) { // then set ->is_del
130 p->indel = 0; p->is_del = 1;
131 p->qpos = y + (pos - x);
134 } else if (op == BAM_CREF_SKIP) x += l;
135 else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
136 is_restart = (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);
138 if (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all
142 assert(x > pos); // otherwise a bug
146 // the following code has been taken from bam_plbuf_push
147 // and modified such that instead of a function call
148 // the function returns and will continue (if cont is true).
149 // from where it left off.
152 // 1: if buf is full and can be emitted
153 // 0: if b has been added
154 // -1: if there was an error
155 int pysam_pileup_next(const bam1_t *b,
157 bam_pileup1_t ** plp,
162 *plp = bam_plp_next(buf->iter, tid, pos, n_plp);
163 if (plp == NULL) return 0;
167 // pysam dispatch function to emulate the samtools
168 // command line within python.
169 // taken from the main function in bamtk.c
170 // added code to reset getopt
171 extern int main_samview(int argc, char *argv[]);
172 extern int main_import(int argc, char *argv[]);
173 extern int bam_pileup(int argc, char *argv[]);
174 extern int bam_merge(int argc, char *argv[]);
175 extern int bam_sort(int argc, char *argv[]);
176 extern int bam_index(int argc, char *argv[]);
177 extern int faidx_main(int argc, char *argv[]);
178 extern int bam_mating(int argc, char *argv[]);
179 extern int bam_rmdup(int argc, char *argv[]);
180 extern int glf3_view_main(int argc, char *argv[]);
181 extern int bam_flagstat(int argc, char *argv[]);
182 extern int bam_fillmd(int argc, char *argv[]);
184 int pysam_dispatch(int argc, char *argv[] )
188 setmode(fileno(stdout), O_BINARY);
189 setmode(fileno(stdin), O_BINARY);
200 if (argc < 2) return 1;
202 if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1);
203 else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1);
204 else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1);
205 else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1);
206 else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1);
207 else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1);
208 else if (strcmp(argv[1], "faidx") == 0) return faidx_main(argc-1, argv+1);
209 else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1);
210 else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);
211 else if (strcmp(argv[1], "glfview") == 0) return glf3_view_main(argc-1, argv+1);
212 else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);
213 else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);
214 else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);
217 else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1);
221 fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
227 // taken from samtools/bam_import.c
228 static inline uint8_t *alloc_data(bam1_t *b, size_t size)
230 if (b->m_data < size)
233 kroundup32(b->m_data);
234 b->data = (uint8_t*)realloc(b->data, b->m_data);
239 // update the variable length data within a bam1_t entry.
240 // Adds *nbytes_new* - *nbytes_old* into the variable length data of *src* at *pos*.
241 // Data within the bam1_t entry is moved so that it is
242 // consistent with the data field lengths.
243 bam1_t * pysam_bam_update( bam1_t * b,
244 const size_t nbytes_old,
245 const size_t nbytes_new,
248 int d = nbytes_new-nbytes_old;
251 if (d == 0) return b;
253 int new_size = d + b->data_len;
254 size_t offset = pos - b->data;
256 //printf("d=%i, old=%i, new=%i, old_size=%i, new_size=%i\n",
257 // d, nbytes_old, nbytes_new, b->data_len, new_size);
259 // increase memory if required
262 alloc_data( b, new_size );
263 pos = b->data + offset;
266 if (b->data_len != 0)
268 if (offset < 0 || offset > b->data_len)
269 fprintf(stderr, "[pysam_bam_insert] illegal offset: '%i'\n", (int)offset);
272 // printf("dest=%p, src=%p, n=%i\n", pos+nbytes_new, pos + nbytes_old, b->data_len - (offset+nbytes_old));
273 memmove( pos + nbytes_new,
275 b->data_len - (offset + nbytes_old));
277 b->data_len = new_size;
282 // translate a nucleotide character to binary code
283 unsigned char pysam_translate_sequence( const unsigned char s )
285 return bam_nt16_table[s];