Update debian changelog
[pysam.git] / samtools / sam.c.pysam.c
1 #include "pysam.h"
2
3 #include <string.h>
4 #include <unistd.h>
5 #include "faidx.h"
6 #include "sam.h"
7
8 #define TYPE_BAM  1
9 #define TYPE_READ 2
10
11 bam_header_t *bam_header_dup(const bam_header_t *h0)
12 {
13         bam_header_t *h;
14         int i;
15         h = bam_header_init();
16         *h = *h0;
17         h->hash = h->dict = h->rg2lib = 0;
18         h->text = (char*)calloc(h->l_text + 1, 1);
19         memcpy(h->text, h0->text, h->l_text);
20         h->target_len = (uint32_t*)calloc(h->n_targets, 4);
21         h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
22         for (i = 0; i < h->n_targets; ++i) {
23                 h->target_len[i] = h0->target_len[i];
24                 h->target_name[i] = strdup(h0->target_name[i]);
25         }
26         return h;
27 }
28 static void append_header_text(bam_header_t *header, char* text, int len)
29 {
30         int x = header->l_text + 1;
31         int y = header->l_text + len + 1; // 1 byte null
32         if (text == 0) return;
33         kroundup32(x); 
34         kroundup32(y);
35         if (x < y) header->text = (char*)realloc(header->text, y);
36         strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here.
37         header->l_text += len;
38         header->text[header->l_text] = 0;
39 }
40
41 samfile_t *samopen(const char *fn, const char *mode, const void *aux)
42 {
43         samfile_t *fp;
44         fp = (samfile_t*)calloc(1, sizeof(samfile_t));
45         if (strchr(mode, 'r')) { // read
46                 fp->type |= TYPE_READ;
47                 if (strchr(mode, 'b')) { // binary
48                         fp->type |= TYPE_BAM;
49                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
50                         if (fp->x.bam == 0) goto open_err_ret;
51                         fp->header = bam_header_read(fp->x.bam);
52                 } else { // text
53                         fp->x.tamr = sam_open(fn);
54                         if (fp->x.tamr == 0) goto open_err_ret;
55                         fp->header = sam_header_read(fp->x.tamr);
56                         if (fp->header->n_targets == 0) { // no @SQ fields
57                                 if (aux) { // check if aux is present
58                                         bam_header_t *textheader = fp->header;
59                                         fp->header = sam_header_read2((const char*)aux);
60                                         if (fp->header == 0) goto open_err_ret;
61                                         append_header_text(fp->header, textheader->text, textheader->l_text);
62                                         bam_header_destroy(textheader);
63                                 }
64                                 if (fp->header->n_targets == 0 && bam_verbose >= 1)
65                                         fprintf(pysamerr, "[samopen] no @SQ lines in the header.\n");
66                         } else if (bam_verbose >= 2) fprintf(pysamerr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
67                 }
68         } else if (strchr(mode, 'w')) { // write
69                 fp->header = bam_header_dup((const bam_header_t*)aux);
70                 if (strchr(mode, 'b')) { // binary
71                         char bmode[3];
72                         int i, compress_level = -1;
73                         for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break;
74                         if (mode[i]) compress_level = mode[i] - '0';
75                         if (strchr(mode, 'u')) compress_level = 0;
76                         bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0;
77                         fp->type |= TYPE_BAM;
78                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
79                         if (fp->x.bam == 0) goto open_err_ret;
80                         bam_header_write(fp->x.bam, fp->header);
81                 } else { // text
82                         // open file
83                         fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
84                         if (fp->x.tamr == 0) goto open_err_ret;
85                         if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2;
86                         else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2;
87                         else fp->type |= BAM_OFDEC<<2;
88                         // write header
89                         if (strchr(mode, 'h')) {
90                                 int i;
91                                 bam_header_t *alt;
92                                 // parse the header text 
93                                 alt = bam_header_init();
94                                 alt->l_text = fp->header->l_text; alt->text = fp->header->text;
95                                 sam_header_parse(alt);
96                                 alt->l_text = 0; alt->text = 0;
97                                 // check if there are @SQ lines in the header
98                                 fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); // FIXME: better to skip the trailing NULL
99                                 if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
100                                         if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1)
101                                                 fprintf(pysamerr, "[samopen] inconsistent number of target sequences. Output the text header.\n");
102                                 } else { // then dump ->target_{name,len}
103                                         for (i = 0; i < fp->header->n_targets; ++i)
104                                                 fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
105                                 }
106                                 bam_header_destroy(alt);
107                         }
108                 }
109         }
110         return fp;
111
112 open_err_ret:
113         free(fp);
114         return 0;
115 }
116
117 void samclose(samfile_t *fp)
118 {
119         if (fp == 0) return;
120         if (fp->header) bam_header_destroy(fp->header);
121         if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
122         else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
123         else fclose(fp->x.tamw);
124         free(fp);
125 }
126
127 int samread(samfile_t *fp, bam1_t *b)
128 {
129         if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
130         if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
131         else return sam_read1(fp->x.tamr, fp->header, b);
132 }
133
134 int samwrite(samfile_t *fp, const bam1_t *b)
135 {
136         if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
137         if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
138         else {
139                 char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
140                 int l = strlen(s);
141                 fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
142                 free(s);
143                 return l + 1;
144         }
145 }
146
147 int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
148 {
149         bam_plbuf_t *buf;
150         int ret;
151         bam1_t *b;
152         b = bam_init1();
153         buf = bam_plbuf_init(func, func_data);
154         bam_plbuf_set_mask(buf, mask);
155         while ((ret = samread(fp, b)) >= 0)
156                 bam_plbuf_push(b, buf);
157         bam_plbuf_push(0, buf);
158         bam_plbuf_destroy(buf);
159         bam_destroy1(b);
160         return 0;
161 }
162
163 char *samfaipath(const char *fn_ref)
164 {
165         char *fn_list = 0;
166         if (fn_ref == 0) return 0;
167         fn_list = calloc(strlen(fn_ref) + 5, 1);
168         strcat(strcpy(fn_list, fn_ref), ".fai");
169         if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
170                 if (access(fn_ref, R_OK) == -1) {
171                         fprintf(pysamerr, "[samfaipath] fail to read file %s.\n", fn_ref);
172                 } else {
173                         if (bam_verbose >= 3) fprintf(pysamerr, "[samfaipath] build FASTA index...\n");
174                         if (fai_build(fn_ref) == -1) {
175                                 fprintf(pysamerr, "[samfaipath] fail to build FASTA index.\n");
176                                 free(fn_list); fn_list = 0;
177                         }
178                 }
179         }
180         return fn_list;
181 }