X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bam.c;h=521c1dda7897aa2b4489b103e1ee1a84129d6c63;hp=ee7642b3434248c9b44ef918435f150f101cae00;hb=85bb95099e58e20cc03456b7528248f7baed4db4;hpb=317f5e8dd22cc9e1e5e05fbcaeb3b9aca7447351 diff --git a/bam.c b/bam.c index ee7642b..521c1dd 100644 --- a/bam.c +++ b/bam.c @@ -70,6 +70,7 @@ bam_header_t *bam_header_read(bamFile fp) { bam_header_t *header; char buf[4]; + int magic_len; int32_t i = 1, name_len; // check EOF i = bgzf_check_EOF(fp); @@ -80,9 +81,9 @@ bam_header_t *bam_header_read(bamFile fp) } else if (i == 0) fprintf(stderr, "[bam_header_read] EOF marker is absent.\n"); // read "BAM1" - if (bam_read(fp, buf, 4) != 4) return 0; - if (strncmp(buf, "BAM\001", 4)) { - fprintf(stderr, "[bam_header_read] wrong header\n"); + magic_len = bam_read(fp, buf, 4); + if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) { + fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n"); return 0; } header = bam_header_init(); @@ -140,6 +141,7 @@ int bam_header_write(bamFile fp, const bam_header_t *header) bam_write(fp, &x, 4); } else bam_write(fp, &header->target_len[i], 4); } + bgzf_flush(fp); return 0; } @@ -207,6 +209,7 @@ inline int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8 x[5] = c->mtid; x[6] = c->mpos; x[7] = c->isize; + bgzf_flush_try(fp, 4 + block_len); if (bam_is_be) { for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i); y = block_len; @@ -232,8 +235,8 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of) kstring_t str; str.l = str.m = 0; str.s = 0; - ksprintf(&str, "%s\t", bam1_qname(b)); - if (of == BAM_OFDEC) ksprintf(&str, "%d\t", c->flag); + kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str); + if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); } else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag); else { // BAM_OFSTR for (i = 0; i < 16; ++i) @@ -241,41 +244,51 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of) kputc(bam_flag2char_table[i], &str); kputc('\t', &str); } - if (c->tid < 0) kputs("*\t", &str); - else ksprintf(&str, "%s\t", header->target_name[c->tid]); - ksprintf(&str, "%d\t%d\t", c->pos + 1, c->qual); + if (c->tid < 0) kputsn("*\t", 2, &str); + else { + if (header) kputs(header->target_name[c->tid] , &str); + else kputw(c->tid, &str); + kputc('\t', &str); + } + kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str); if (c->n_cigar == 0) kputc('*', &str); else { - for (i = 0; i < c->n_cigar; ++i) - ksprintf(&str, "%d%c", bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, "MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK]); + for (i = 0; i < c->n_cigar; ++i) { + kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str); + kputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str); + } } kputc('\t', &str); - if (c->mtid < 0) kputs("*\t", &str); - else if (c->mtid == c->tid) kputs("=\t", &str); - else ksprintf(&str, "%s\t", header->target_name[c->mtid]); - ksprintf(&str, "%d\t%d\t", c->mpos + 1, c->isize); + if (c->mtid < 0) kputsn("*\t", 2, &str); + else if (c->mtid == c->tid) kputsn("=\t", 2, &str); + else { + if (header) kputs(header->target_name[c->mtid], &str); + else kputw(c->mtid, &str); + kputc('\t', &str); + } + kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str); if (c->l_qseq) { for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str); kputc('\t', &str); if (t[0] == 0xff) kputc('*', &str); else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str); - } else ksprintf(&str, "*\t*"); + } else kputsn("*\t*", 3, &str); s = bam1_aux(b); while (s < b->data + b->data_len) { uint8_t type, key[2]; key[0] = s[0]; key[1] = s[1]; s += 2; type = *s; ++s; - ksprintf(&str, "\t%c%c:", key[0], key[1]); - if (type == 'A') { ksprintf(&str, "A:%c", *s); ++s; } - else if (type == 'C') { ksprintf(&str, "i:%u", *s); ++s; } - else if (type == 'c') { ksprintf(&str, "i:%d", *s); ++s; } - else if (type == 'S') { ksprintf(&str, "i:%u", *(uint16_t*)s); s += 2; } - else if (type == 's') { ksprintf(&str, "i:%d", *(int16_t*)s); s += 2; } - else if (type == 'I') { ksprintf(&str, "i:%u", *(uint32_t*)s); s += 4; } - else if (type == 'i') { ksprintf(&str, "i:%d", *(int32_t*)s); s += 4; } + kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str); + if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; } + else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; } + else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; } + else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; } + else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; } + else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; } + else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; } else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; } else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; } - else if (type == 'Z' || type == 'H') { ksprintf(&str, "%c:", type); while (*s) kputc(*s++, &str); ++s; } + else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; } } return str.s; } @@ -288,10 +301,26 @@ char *bam_format1(const bam_header_t *header, const bam1_t *b) void bam_view1(const bam_header_t *header, const bam1_t *b) { char *s = bam_format1(header, b); - printf("%s\n", s); + puts(s); free(s); } +int bam_validate1(const bam_header_t *header, const bam1_t *b) +{ + char *s; + + if (b->core.tid < -1 || b->core.mtid < -1) return 0; + if (header && (b->core.tid >= header->n_targets || b->core.mtid >= header->n_targets)) return 0; + + if (b->data_len < b->core.l_qname) return 0; + s = memchr(bam1_qname(b), '\0', b->core.l_qname); + if (s != &bam1_qname(b)[b->core.l_qname-1]) return 0; + + // FIXME: Other fields could also be checked, especially the auxiliary data + + return 1; +} + // FIXME: we should also check the LB tag associated with each alignment const char *bam_get_library(bam_header_t *h, const bam1_t *b) {