From 105631601ef8d0527030ff3303dfec00e821529a Mon Sep 17 00:00:00 2001 From: Henry Amrhein Date: Thu, 20 Dec 2012 15:51:19 -0800 Subject: [PATCH] initial check-in of bam_calnh --- bam_calnh.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 bam_calnh.c diff --git a/bam_calnh.c b/bam_calnh.c new file mode 100644 index 0000000..13bb9f7 --- /dev/null +++ b/bam_calnh.c @@ -0,0 +1,122 @@ +#include +#include + +#include "sam.h" +#include "khash.h" + +KHASH_MAP_INIT_STR(QNH, int) + +int +bam_calnh_core(samfile_t *in, samfile_t *out) +{ + bam1_t *b; + khint_t k; + uint8_t *oldnh; + char *qn = NULL; + khash_t(QNH) *qnh; + int ret; + int64_t savepos; + uint64_t val; + + savepos = bam_tell(in->x.bam); + + if (savepos < 0) + { + fprintf(stderr, "[bam_calnh_core] bam_tell failure\n"); + return 1; + } + + qnh = kh_init(QNH); + b = bam_init1(); + + while (samread(in, b) >= 0) + { + qn = strdup(bam1_qname(b)); + k = kh_put(QNH, qnh, qn, &ret); + + if (ret == 0) + { + kh_val(qnh, k) += 1; + free(qn); + } + + else + kh_val(qnh, k) = 1; + } + + if ((bam_seek(in->x.bam, savepos, SEEK_SET)) < 0) + { + fprintf(stderr, "[bam_calnh_core] bam_seek failure\n"); + return 1; + } + + + while (samread(in, b) >= 0) + { + k = kh_get(QNH, qnh, bam1_qname(b)); + oldnh = bam_aux_get(b, "NH"); + + if (!(oldnh)) + { + val = kh_val(qnh, k); + bam_aux_append(b, "NH", 'i', 4, (uint8_t *)&val); + } + + samwrite(out, b); + } + + for (k = kh_begin(qnh); k != kh_end(qnh); ++k) + if (kh_exist(qnh, k)) + free((char *)kh_key(qnh, k)); + + kh_destroy(QNH, qnh); + bam_destroy1(b); + return 0; +} + +void +usage(char **argv) +{ +#ifdef _STANDALONE + fprintf(stderr, "\nUsage: %s \n", argv[0]); +#else + fprintf(stderr, "\nUsage: samtools calnh \n"); +#endif /* _STANDALONE */ + fprintf(stderr, " specify input.bam or output.bam as '-' to use pipes\n\n"); +} + +int +bam_calnh(int argc, char **argv) +{ + samfile_t *in; + samfile_t *out; + int ret; + + if (argc < 3) + { + usage(argv); + return 1; + } + + in = samopen(argv[1], "rb", 0); + out = samopen(argv[2], "wb", in->header); + + if ((in == 0) || (out == 0)) + { + fprintf(stderr, "[bam_calnh] fail to read/write input files\n"); + return 1; + } + + ret = bam_calnh_core(in, out); + samclose(in); + samclose(out); + return ret; +} + +#ifdef _STANDALONE +int +main(int argc, char **argv) +{ + return bam_calnh(argc, argv); +} +#endif /* _STANDALONE */ -- 2.30.2