Update debian changelog
[pysam.git] / samtools / bam_stat.c.pysam.c
1 #include "pysam.h"
2
3 #include <unistd.h>
4 #include <assert.h>
5 #include "bam.h"
6
7 typedef struct {
8         long long n_reads[2], n_mapped[2], n_pair_all[2], n_pair_map[2], n_pair_good[2];
9         long long n_sgltn[2], n_read1[2], n_read2[2];
10         long long n_dup[2];
11         long long n_diffchr[2], n_diffhigh[2];
12 } bam_flagstat_t;
13
14 #define flagstat_loop(s, c) do {                                                                                \
15                 int w = ((c)->flag & BAM_FQCFAIL)? 1 : 0;                                               \
16                 ++(s)->n_reads[w];                                                                                              \
17                 if ((c)->flag & BAM_FPAIRED) {                                                                  \
18                         ++(s)->n_pair_all[w];                                                                           \
19                         if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good[w];        \
20                         if ((c)->flag & BAM_FREAD1) ++(s)->n_read1[w];                          \
21                         if ((c)->flag & BAM_FREAD2) ++(s)->n_read2[w];                          \
22                         if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn[w];  \
23                         if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
24                                 ++(s)->n_pair_map[w];                                                                   \
25                                 if ((c)->mtid != (c)->tid) {                                                    \
26                                         ++(s)->n_diffchr[w];                                                            \
27                                         if ((c)->qual >= 5) ++(s)->n_diffhigh[w];                       \
28                                 }                                                                                                               \
29                         }                                                                                                                       \
30                 }                                                                                                                               \
31                 if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped[w];                              \
32                 if ((c)->flag & BAM_FDUP) ++(s)->n_dup[w];                                              \
33         } while (0)
34
35 bam_flagstat_t *bam_flagstat_core(bamFile fp)
36 {
37         bam_flagstat_t *s;
38         bam1_t *b;
39         bam1_core_t *c;
40         int ret;
41         s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t));
42         b = bam_init1();
43         c = &b->core;
44         while ((ret = bam_read1(fp, b)) >= 0)
45                 flagstat_loop(s, c);
46         bam_destroy1(b);
47         if (ret != -1)
48                 fprintf(pysamerr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
49         return s;
50 }
51 int bam_flagstat(int argc, char *argv[])
52 {
53         bamFile fp;
54         bam_header_t *header;
55         bam_flagstat_t *s;
56         if (argc == optind) {
57                 fprintf(pysamerr, "Usage: samtools flagstat <in.bam>\n");
58                 return 1;
59         }
60         fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
61         assert(fp);
62         header = bam_header_read(fp);
63         s = bam_flagstat_core(fp);
64         printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
65         printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
66         printf("%lld + %lld mapped (%.2f%%:%.2f%%)\n", s->n_mapped[0], s->n_mapped[1], (float)s->n_mapped[0] / s->n_reads[0] * 100.0, (float)s->n_mapped[1] / s->n_reads[1] * 100.0);
67         printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
68         printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
69         printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
70         printf("%lld + %lld properly paired (%.2f%%:%.2f%%)\n", s->n_pair_good[0], s->n_pair_good[1], (float)s->n_pair_good[0] / s->n_pair_all[0] * 100.0, (float)s->n_pair_good[1] / s->n_pair_all[1] * 100.0);
71         printf("%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
72         printf("%lld + %lld singletons (%.2f%%:%.2f%%)\n", s->n_sgltn[0], s->n_sgltn[1], (float)s->n_sgltn[0] / s->n_pair_all[0] * 100.0, (float)s->n_sgltn[1] / s->n_pair_all[1] * 100.0);
73         printf("%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
74         printf("%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
75         free(s);
76         bam_header_destroy(header);
77         bam_close(fp);
78         return 0;
79 }