initial check-in of bam_calnh
authorHenry Amrhein <hamrhein@caltech.edu>
Thu, 20 Dec 2012 23:51:19 +0000 (15:51 -0800)
committerHenry Amrhein <hamrhein@caltech.edu>
Fri, 21 Dec 2012 00:03:39 +0000 (16:03 -0800)
bam_calnh.c [new file with mode: 0644]

diff --git a/bam_calnh.c b/bam_calnh.c
new file mode 100644 (file)
index 0000000..13bb9f7
--- /dev/null
@@ -0,0 +1,122 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "sam.h"
+#include "khash.h"
+
+KHASH_MAP_INIT_STR(QNH, int)
+
+int
+bam_calnh_core(samfile_t *in, samfile_t *out)
+{
+  bam1_t *b;
+  khint_t k;
+  uint8_t *oldnh;
+  char *qn = NULL;
+  khash_t(QNH) *qnh;
+  int ret;
+  int64_t savepos;
+  uint64_t val;
+
+  savepos = bam_tell(in->x.bam);
+
+  if (savepos < 0)
+  {
+    fprintf(stderr, "[bam_calnh_core] bam_tell failure\n");
+    return 1;
+  }
+
+  qnh = kh_init(QNH);
+  b = bam_init1();
+
+  while (samread(in, b) >= 0)
+  {
+    qn = strdup(bam1_qname(b));
+    k = kh_put(QNH, qnh, qn, &ret);
+
+    if (ret == 0)
+    {
+      kh_val(qnh, k) += 1;
+      free(qn);
+    }
+
+    else
+      kh_val(qnh, k) = 1;
+  }
+
+  if ((bam_seek(in->x.bam, savepos, SEEK_SET)) < 0)
+  {
+    fprintf(stderr, "[bam_calnh_core] bam_seek failure\n");
+    return 1;
+  }
+  
+
+  while (samread(in, b) >= 0)
+  {
+    k = kh_get(QNH, qnh, bam1_qname(b));
+    oldnh = bam_aux_get(b, "NH");
+
+    if (!(oldnh))
+    {
+      val = kh_val(qnh, k);
+      bam_aux_append(b, "NH", 'i', 4, (uint8_t *)&val);
+    }
+
+    samwrite(out, b);
+  }
+
+  for (k = kh_begin(qnh); k != kh_end(qnh); ++k)
+    if (kh_exist(qnh, k))
+      free((char *)kh_key(qnh, k));
+
+  kh_destroy(QNH, qnh);
+  bam_destroy1(b);
+  return 0;
+}
+
+void
+usage(char **argv)
+{
+#ifdef _STANDALONE
+    fprintf(stderr, "\nUsage:  %s <input.bam> <output.bam>\n", argv[0]);
+#else
+    fprintf(stderr, "\nUsage:  samtools calnh <input.bam> <output.bam>\n");
+#endif /* _STANDALONE */
+    fprintf(stderr, "    specify input.bam or output.bam as '-' to use pipes\n\n");
+}
+
+int
+bam_calnh(int argc, char **argv)
+{
+  samfile_t *in;
+  samfile_t *out;
+  int ret;
+
+  if (argc < 3)
+  {
+    usage(argv);
+    return 1;
+  }
+
+  in = samopen(argv[1], "rb", 0);
+  out = samopen(argv[2], "wb", in->header);
+
+  if ((in == 0) || (out == 0))
+  {
+    fprintf(stderr, "[bam_calnh] fail to read/write input files\n");
+    return 1;
+  }
+
+  ret = bam_calnh_core(in, out);
+  samclose(in);
+  samclose(out);
+  return ret;
+}
+
+#ifdef _STANDALONE
+int
+main(int argc, char **argv)
+{
+  return bam_calnh(argc, argv);
+}
+#endif /* _STANDALONE */