10 KSTREAM_INIT(gzFile, gzread, 4096)
21 bcf_hdr_t *vcf_hdr_read(bcf_t *bp)
27 if (!bp->is_vcf) return bcf_hdr_read(bp);
28 h = calloc(1, sizeof(bcf_hdr_t));
31 memset(&meta, 0, sizeof(kstring_t));
32 memset(&smpl, 0, sizeof(kstring_t));
33 while (ks_getuntil(v->ks, '\n', &v->line, &dret) >= 0) {
34 if (v->line.l < 2) continue;
35 if (v->line.s[0] != '#') return 0; // no sample line
36 if (v->line.s[0] == '#' && v->line.s[1] == '#') {
37 kputsn(v->line.s, v->line.l, &meta); kputc('\n', &meta);
38 } else if (v->line.s[0] == '#') {
42 for (p = kstrtok(v->line.s, "\t\n", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) {
44 kputsn(p, aux.p - p, &smpl);
53 h->sname = smpl.s; h->l_smpl = smpl.l;
54 h->txt = meta.s; h->l_txt = meta.l;
59 bcf_t *vcf_open(const char *fn, const char *mode)
63 if (strchr(mode, 'b')) return bcf_open(fn, mode);
64 bp = calloc(1, sizeof(bcf_t));
65 v = calloc(1, sizeof(vcf_t));
68 v->refhash = bcf_str2id_init();
69 if (strchr(mode, 'r')) {
70 v->fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
71 v->ks = ks_init(v->fp);
72 } else if (strchr(mode, 'w'))
73 v->fpout = strcmp(fn, "-")? fopen(fn, "w") : stdout;
77 int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn)
84 if (bp == 0) return -1;
85 if (!bp->is_vcf) return 0;
86 s.l = s.m = 0; s.s = 0;
87 rn.m = rn.l = h->l_nm; rn.s = h->name;
91 while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
92 bcf_str2id_add(v->refhash, strdup(s.s));
93 kputs(s.s, &rn); kputc('\0', &rn);
94 if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
98 h->l_nm = rn.l; h->name = rn.s;
104 int vcf_close(bcf_t *bp)
107 if (bp == 0) return -1;
108 if (!bp->is_vcf) return bcf_close(bp);
114 if (v->fpout) fclose(v->fpout);
116 bcf_str2id_thorough_destroy(v->refhash);
122 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h)
124 vcf_t *v = (vcf_t*)bp->v;
126 if (!bp->is_vcf) return bcf_hdr_write(bp, h);
128 if (strstr(h->txt, "##fileformat=")) has_ver = 1;
129 if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n");
130 fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
132 if (h->l_txt == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n");
133 fprintf(v->fpout, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
134 for (i = 0; i < h->n_smpl; ++i)
135 fprintf(v->fpout, "\t%s", h->sns[i]);
136 fputc('\n', v->fpout);
140 int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
142 vcf_t *v = (vcf_t*)bp->v;
143 extern void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s);
144 if (!bp->is_vcf) return bcf_write(bp, h, b);
145 bcf_fmt_core(h, b, &v->line);
146 fwrite(v->line.s, 1, v->line.l, v->fpout);
147 fputc('\n', v->fpout);
148 return v->line.l + 1;
151 int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
153 int dret, k, i, sync = 0;
154 vcf_t *v = (vcf_t*)bp->v;
158 if (!bp->is_vcf) return bcf_read(bp, h, b);
160 str.l = 0; str.m = b->m_str; str.s = b->str;
161 rn.l = rn.m = h->l_nm; rn.s = h->name;
162 if (ks_getuntil(v->ks, '\n', &v->line, &dret) < 0) return -1;
163 b->n_smpl = h->n_smpl;
164 for (p = kstrtok(v->line.s, "\t", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) {
167 int tid = bcf_str2id(v->refhash, p);
169 tid = bcf_str2id_add(v->refhash, strdup(p));
170 kputs(p, &rn); kputc('\0', &rn);
174 } else if (k == 1) { // pos
175 b->pos = atoi(p) - 1;
176 } else if (k == 5) { // qual
177 b->qual = (p[0] >= '0' && p[0] <= '9')? atof(p) : 0;
178 } else if (k <= 8) { // variable length strings
179 kputs(p, &str); kputc('\0', &str);
180 b->l_str = str.l; b->m_str = str.m; b->str = str.s;
181 if (k == 8) bcf_sync(b);
183 if (strncmp(p, "./.", 3) == 0) {
184 for (i = 0; i < b->n_gi; ++i) {
185 if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
186 ((uint8_t*)b->gi[i].data)[k-9] = 1<<7;
187 } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
188 ((uint8_t*)b->gi[i].data)[k-9] = 0;
189 } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
190 ((int32_t*)b->gi[i].data)[k-9] = 0;
191 } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
192 ((uint16_t*)b->gi[i].data)[k-9] = 0;
193 } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
194 int y = b->n_alleles * (b->n_alleles + 1) / 2;
195 memset((uint8_t*)b->gi[i].data + (k - 9) * y, 0, y);
196 } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
197 int y = b->n_alleles * (b->n_alleles + 1) / 2;
198 memset((float*)b->gi[i].data + (k - 9) * y, 0, y * 4);
203 for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) {
204 if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
205 ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6;
206 } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
207 double _x = strtod(q, &q);
208 int x = (int)(_x + .499);
209 if (x > 255) x = 255;
210 ((uint8_t*)b->gi[i].data)[k-9] = x;
211 } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
212 int x = strtol(q, &q, 10);
213 if (x > 0xffff) x = 0xffff;
214 ((uint32_t*)b->gi[i].data)[k-9] = x;
215 } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
216 int x = strtol(q, &q, 10);
217 if (x > 0xffff) x = 0xffff;
218 ((uint16_t*)b->gi[i].data)[k-9] = x;
219 } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
221 uint8_t *data = (uint8_t*)b->gi[i].data;
222 y = b->n_alleles * (b->n_alleles + 1) / 2;
223 for (j = 0; j < y; ++j) {
224 x = strtol(q, &q, 10);
225 if (x > 255) x = 255;
226 data[(k-9) * y + j] = x;
229 } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
231 float x, *data = (float*)b->gi[i].data;
232 y = b->n_alleles * (b->n_alleles + 1) / 2;
233 for (j = 0; j < y; ++j) {
235 data[(k-9) * y + j] = x > 0? -x/10. : x;
243 h->l_nm = rn.l; h->name = rn.s;
244 if (sync) bcf_hdr_sync(h);
245 return v->line.l + 1;