Imported Upstream version 0.5
[pysam.git] / samtools / bam_rmdupse.c
diff --git a/samtools/bam_rmdupse.c b/samtools/bam_rmdupse.c
deleted file mode 100644 (file)
index e7dbdc7..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-#include <math.h>
-#include "sam.h"
-#include "khash.h"
-#include "klist.h"
-
-#define QUEUE_CLEAR_SIZE 0x100000
-#define MAX_POS 0x7fffffff
-
-typedef struct {
-       int endpos;
-       uint32_t score:31, discarded:1;
-       bam1_t *b;
-} elem_t, *elem_p;
-#define __free_elem(p) bam_destroy1((p)->data.b)
-KLIST_INIT(q, elem_t, __free_elem)
-typedef klist_t(q) queue_t;
-
-KHASH_MAP_INIT_INT(best, elem_p)
-typedef khash_t(best) besthash_t;
-
-typedef struct {
-       uint64_t n_checked, n_removed;
-       besthash_t *left, *rght;
-} lib_aux_t;
-KHASH_MAP_INIT_STR(lib, lib_aux_t)
-
-static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib)
-{
-       khint_t k = kh_get(lib, aux, lib);
-       if (k == kh_end(aux)) {
-               int ret;
-               char *p = strdup(lib);
-               lib_aux_t *q;
-               k = kh_put(lib, aux, p, &ret);
-               q = &kh_val(aux, k);
-               q->left = kh_init(best);
-               q->rght = kh_init(best);
-               q->n_checked = q->n_removed = 0;
-               return q;
-       } else return &kh_val(aux, k);
-}
-
-static inline int sum_qual(const bam1_t *b)
-{
-       int i, q;
-       uint8_t *qual = bam1_qual(b);
-       for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i];
-       return q;
-}
-
-static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, int score)
-{
-       elem_t *p = kl_pushp(q, queue);
-       p->discarded = 0;
-       p->endpos = endpos; p->score = score;
-       if (p->b == 0) p->b = bam_init1();
-       bam_copy1(p->b, b);
-       return p;
-}
-
-static void clear_besthash(besthash_t *h, int32_t pos)
-{
-       khint_t k;
-       for (k = kh_begin(h); k != kh_end(h); ++k)
-               if (kh_exist(h, k) && kh_val(h, k)->endpos <= pos)
-                       kh_del(best, h, k);
-}
-
-static void dump_alignment(samfile_t *out, queue_t *queue, int32_t pos, khash_t(lib) *h)
-{
-       if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) {
-               khint_t k;
-               while (1) {
-                       elem_t *q;
-                       if (queue->head == queue->tail) break;
-                       q = &kl_val(queue->head);
-                       if (q->discarded) {
-                               q->b->data_len = 0;
-                               kl_shift(q, queue, 0);
-                               continue;
-                       }
-                       if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break;
-                       samwrite(out, q->b);
-                       q->b->data_len = 0;
-                       kl_shift(q, queue, 0);
-               }
-               for (k = kh_begin(h); k != kh_end(h); ++k) {
-                       if (kh_exist(h, k)) {
-                               clear_besthash(kh_val(h, k).left, pos);
-                               clear_besthash(kh_val(h, k).rght, pos);
-                       }
-               }
-       }
-}
-
-void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se)
-{
-       bam1_t *b;
-       queue_t *queue;
-       khint_t k;
-       int last_tid = -2;
-       khash_t(lib) *aux;
-
-       aux = kh_init(lib);
-       b = bam_init1();
-       queue = kl_init(q);
-       while (samread(in, b) >= 0) {
-               bam1_core_t *c = &b->core;
-               int endpos = bam_calend(c, bam1_cigar(b));
-               int score = sum_qual(b);
-               
-               if (last_tid != c->tid) {
-                       if (last_tid >= 0) dump_alignment(out, queue, MAX_POS, aux);
-                       last_tid = c->tid;
-               } else dump_alignment(out, queue, c->pos, aux);
-               if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) {
-                       push_queue(queue, b, endpos, score);
-               } else {
-                       const char *lib;
-                       lib_aux_t *q;
-                       besthash_t *h;
-                       uint32_t key;
-                       int ret;
-                       lib = bam_get_library(in->header, b);
-                       q = lib? get_aux(aux, lib) : get_aux(aux, "\t");
-                       ++q->n_checked;
-                       h = (c->flag&BAM_FREVERSE)? q->rght : q->left;
-                       key = (c->flag&BAM_FREVERSE)? endpos : c->pos;
-                       k = kh_put(best, h, key, &ret);
-                       if (ret == 0) { // in the hash table
-                               elem_t *p = kh_val(h, k);
-                               ++q->n_removed;
-                               if (p->score < score) {
-                                       if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue
-                                               p->discarded = 1;
-                                               kh_val(h, k) = push_queue(queue, b, endpos, score);
-                                       } else { // replace
-                                               p->score = score; p->endpos = endpos;
-                                               bam_copy1(p->b, b);
-                                       }
-                               } // otherwise, discard the alignment
-                       } else kh_val(h, k) = push_queue(queue, b, endpos, score);
-               }
-       }
-       dump_alignment(out, queue, MAX_POS, aux);
-
-       for (k = kh_begin(aux); k != kh_end(aux); ++k) {
-               if (kh_exist(aux, k)) {
-                       lib_aux_t *q = &kh_val(aux, k);
-                       fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
-                                       (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
-                       kh_destroy(best, q->left); kh_destroy(best, q->rght);
-                       free((char*)kh_key(aux, k));
-               }
-       }
-       kh_destroy(lib, aux);
-       bam_destroy1(b);
-       kl_destroy(q, queue);
-}