# Copyright review of the diff between 0.1.12 and 0.1.13.
[samtools.git] / sam.c
1 #include <string.h>
2 #include <unistd.h>
3 #include "faidx.h"
4 #include "sam.h"
5
6 #define TYPE_BAM  1
7 #define TYPE_READ 2
8
9 bam_header_t *bam_header_dup(const bam_header_t *h0)
10 {
11         bam_header_t *h;
12         int i;
13         h = bam_header_init();
14         *h = *h0;
15         h->hash = h->dict = h->rg2lib = 0;
16         h->text = (char*)calloc(h->l_text + 1, 1);
17         memcpy(h->text, h0->text, h->l_text);
18         h->target_len = (uint32_t*)calloc(h->n_targets, 4);
19         h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
20         for (i = 0; i < h->n_targets; ++i) {
21                 h->target_len[i] = h0->target_len[i];
22                 h->target_name[i] = strdup(h0->target_name[i]);
23         }
24         return h;
25 }
26 static void append_header_text(bam_header_t *header, char* text, int len)
27 {
28         int x = header->l_text + 1;
29         int y = header->l_text + len + 1; // 1 byte null
30         if (text == 0) return;
31         kroundup32(x); 
32         kroundup32(y);
33         if (x < y) header->text = (char*)realloc(header->text, y);
34         strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here.
35         header->l_text += len;
36         header->text[header->l_text] = 0;
37 }
38
39 samfile_t *samopen(const char *fn, const char *mode, const void *aux)
40 {
41         samfile_t *fp;
42         fp = (samfile_t*)calloc(1, sizeof(samfile_t));
43         if (mode[0] == 'r') { // read
44                 fp->type |= TYPE_READ;
45                 if (mode[1] == 'b') { // binary
46                         fp->type |= TYPE_BAM;
47                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
48                         if (fp->x.bam == 0) goto open_err_ret;
49                         fp->header = bam_header_read(fp->x.bam);
50                 } else { // text
51                         fp->x.tamr = sam_open(fn);
52                         if (fp->x.tamr == 0) goto open_err_ret;
53                         fp->header = sam_header_read(fp->x.tamr);
54                         if (fp->header->n_targets == 0) { // no @SQ fields
55                                 if (aux) { // check if aux is present
56                                         bam_header_t *textheader = fp->header;
57                                         fp->header = sam_header_read2((const char*)aux);
58                                         if (fp->header == 0) goto open_err_ret;
59                                         append_header_text(fp->header, textheader->text, textheader->l_text);
60                                         bam_header_destroy(textheader);
61                                 }
62                                 if (fp->header->n_targets == 0)
63                                         fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
64                         } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
65                 }
66         } else if (mode[0] == 'w') { // write
67                 fp->header = bam_header_dup((const bam_header_t*)aux);
68                 if (mode[1] == 'b') { // binary
69                         char bmode[3];
70                         bmode[0] = 'w'; bmode[1] = strstr(mode, "u")? 'u' : 0; bmode[2] = 0;
71                         fp->type |= TYPE_BAM;
72                         fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
73                         if (fp->x.bam == 0) goto open_err_ret;
74                         bam_header_write(fp->x.bam, fp->header);
75                 } else { // text
76                         // open file
77                         fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
78                         if (fp->x.tamr == 0) goto open_err_ret;
79                         if (strstr(mode, "X")) fp->type |= BAM_OFSTR<<2;
80                         else if (strstr(mode, "x")) fp->type |= BAM_OFHEX<<2;
81                         else fp->type |= BAM_OFDEC<<2;
82                         // write header
83                         if (strstr(mode, "h")) {
84                                 int i;
85                                 bam_header_t *alt;
86                                 // parse the header text 
87                                 alt = bam_header_init();
88                                 alt->l_text = fp->header->l_text; alt->text = fp->header->text;
89                                 sam_header_parse(alt);
90                                 alt->l_text = 0; alt->text = 0;
91                                 // check if there are @SQ lines in the header
92                                 fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
93                                 if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
94                                         if (alt->n_targets != fp->header->n_targets)
95                                                 fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
96                                 } else { // then dump ->target_{name,len}
97                                         for (i = 0; i < fp->header->n_targets; ++i)
98                                                 fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
99                                 }
100                                 bam_header_destroy(alt);
101                         }
102                 }
103         }
104         return fp;
105
106 open_err_ret:
107         free(fp);
108         return 0;
109 }
110
111 void samclose(samfile_t *fp)
112 {
113         if (fp == 0) return;
114         if (fp->header) bam_header_destroy(fp->header);
115         if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
116         else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
117         else fclose(fp->x.tamw);
118         free(fp);
119 }
120
121 int samread(samfile_t *fp, bam1_t *b)
122 {
123         if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
124         if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
125         else return sam_read1(fp->x.tamr, fp->header, b);
126 }
127
128 int samwrite(samfile_t *fp, const bam1_t *b)
129 {
130         if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
131         if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
132         else {
133                 char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
134                 int l = strlen(s);
135                 fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
136                 free(s);
137                 return l + 1;
138         }
139 }
140
141 int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
142 {
143         bam_plbuf_t *buf;
144         int ret;
145         bam1_t *b;
146         b = bam_init1();
147         buf = bam_plbuf_init(func, func_data);
148         bam_plbuf_set_mask(buf, mask);
149         while ((ret = samread(fp, b)) >= 0)
150                 bam_plbuf_push(b, buf);
151         bam_plbuf_push(0, buf);
152         bam_plbuf_destroy(buf);
153         bam_destroy1(b);
154         return 0;
155 }
156
157 char *samfaipath(const char *fn_ref)
158 {
159         char *fn_list = 0;
160         if (fn_ref == 0) return 0;
161         fn_list = calloc(strlen(fn_ref) + 5, 1);
162         strcat(strcpy(fn_list, fn_ref), ".fai");
163         if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
164                 if (access(fn_ref, R_OK) == -1) {
165                         fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
166                 } else {
167                         fprintf(stderr, "[samfaipath] build FASTA index...\n");
168                         if (fai_build(fn_ref) == -1) {
169                                 fprintf(stderr, "[samfaipath] fail to build FASTA index.\n");
170                                 free(fn_list); fn_list = 0;
171                         }
172                 }
173         }
174         return fn_list;
175 }