Imported Upstream version 0.1.13
[samtools.git] / bcftools / vcf.c
index 9b661ff5b4fbc29dde6a53b5e71c027f131a81f4..9daa845cba6c13b7d2190fbf77f4877b7979178c 100644 (file)
@@ -72,6 +72,33 @@ bcf_t *vcf_open(const char *fn, const char *mode)
        return bp;
 }
 
        return bp;
 }
 
+int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn)
+{
+       vcf_t *v;
+       gzFile fp;
+       kstream_t *ks;
+       kstring_t s, rn;
+       int dret;
+       if (bp == 0) return -1;
+       if (!bp->is_vcf) return 0;
+       s.l = s.m = 0; s.s = 0;
+       rn.m = rn.l = h->l_nm; rn.s = h->name;
+       v = (vcf_t*)bp->v;
+       fp = gzopen(fn, "r");
+       ks = ks_init(fp);
+       while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
+               bcf_str2id_add(v->refhash, strdup(s.s));
+               kputs(s.s, &rn); kputc('\0', &rn);
+               if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
+       }
+       ks_destroy(ks);
+       gzclose(fp);
+       h->l_nm = rn.l; h->name = rn.s;
+       bcf_hdr_sync(h);
+       free(s.s);
+       return 0;
+}
+
 int vcf_close(bcf_t *bp)
 {
        vcf_t *v;
 int vcf_close(bcf_t *bp)
 {
        vcf_t *v;
@@ -84,7 +111,7 @@ int vcf_close(bcf_t *bp)
        }
        if (v->fpout) fclose(v->fpout);
        free(v->line.s);
        }
        if (v->fpout) fclose(v->fpout);
        free(v->line.s);
-       bcf_str2id_destroy(v->refhash);
+       bcf_str2id_thorough_destroy(v->refhash);
        free(v);
        free(bp);
        return 0;
        free(v);
        free(bp);
        return 0;
@@ -93,15 +120,14 @@ int vcf_close(bcf_t *bp)
 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h)
 {
        vcf_t *v = (vcf_t*)bp->v;
 int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h)
 {
        vcf_t *v = (vcf_t*)bp->v;
-       int i, has_ref = 0, has_ver = 0;
+       int i, has_ver = 0;
        if (!bp->is_vcf) return bcf_hdr_write(bp, h);
        if (h->l_txt > 0) {
                if (strstr(h->txt, "##fileformat=")) has_ver = 1;
        if (!bp->is_vcf) return bcf_hdr_write(bp, h);
        if (h->l_txt > 0) {
                if (strstr(h->txt, "##fileformat=")) has_ver = 1;
-               if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.0\n");
+               if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n");
                fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
                fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
-               if (strstr(h->txt, "##SQ=")) has_ref = 1;
        }
        }
-       if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.0\n");
+       if (h->l_txt == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n");
        fprintf(v->fpout, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
        for (i = 0; i < h->n_smpl; ++i)
                fprintf(v->fpout, "\t%s", h->sns[i]);
        fprintf(v->fpout, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
        for (i = 0; i < h->n_smpl; ++i)
                fprintf(v->fpout, "\t%s", h->sns[i]);
@@ -138,7 +164,7 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
                if (k == 0) { // ref
                        int tid = bcf_str2id(v->refhash, p);
                        if (tid < 0) {
                if (k == 0) { // ref
                        int tid = bcf_str2id(v->refhash, p);
                        if (tid < 0) {
-                               tid = bcf_str2id_add(v->refhash, p);
+                               tid = bcf_str2id_add(v->refhash, strdup(p));
                                kputs(p, &rn); kputc('\0', &rn);
                                sync = 1;
                        }
                                kputs(p, &rn); kputc('\0', &rn);
                                sync = 1;
                        }
@@ -156,8 +182,10 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
                                for (i = 0; i < b->n_gi; ++i) {
                                        if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
                                                ((uint8_t*)b->gi[i].data)[k-9] = 1<<7;
                                for (i = 0; i < b->n_gi; ++i) {
                                        if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
                                                ((uint8_t*)b->gi[i].data)[k-9] = 1<<7;
-                                       } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) {
+                                       } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
                                                ((uint8_t*)b->gi[i].data)[k-9] = 0;
                                                ((uint8_t*)b->gi[i].data)[k-9] = 0;
+                                       } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
+                                               ((int32_t*)b->gi[i].data)[k-9] = 0;
                                        } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
                                                ((uint16_t*)b->gi[i].data)[k-9] = 0;
                                        } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
                                        } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
                                                ((uint16_t*)b->gi[i].data)[k-9] = 0;
                                        } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
@@ -173,11 +201,15 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
                        for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) {
                                if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
                                        ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6;
                        for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) {
                                if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
                                        ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6;
-                               } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("SP", 2)) {
+                               } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
                                        double _x = strtod(q, &q);
                                        int x = (int)(_x + .499);
                                        if (x > 255) x = 255;
                                        ((uint8_t*)b->gi[i].data)[k-9] = x;
                                        double _x = strtod(q, &q);
                                        int x = (int)(_x + .499);
                                        if (x > 255) x = 255;
                                        ((uint8_t*)b->gi[i].data)[k-9] = x;
+                               } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
+                                       int x = strtol(q, &q, 10);
+                                       if (x > 0xffff) x = 0xffff;
+                                       ((uint32_t*)b->gi[i].data)[k-9] = x;
                                } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
                                        int x = strtol(q, &q, 10);
                                        if (x > 0xffff) x = 0xffff;
                                } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
                                        int x = strtol(q, &q, 10);
                                        if (x > 0xffff) x = 0xffff;
@@ -198,7 +230,7 @@ int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
                                        y = b->n_alleles * (b->n_alleles + 1) / 2;
                                        for (j = 0; j < y; ++j) {
                                                x = strtod(q, &q);
                                        y = b->n_alleles * (b->n_alleles + 1) / 2;
                                        for (j = 0; j < y; ++j) {
                                                x = strtod(q, &q);
-                                               data[(k-9) * y + j] = x;
+                                               data[(k-9) * y + j] = x > 0? -x/10. : x;
                                                ++q;
                                        }
                                }
                                                ++q;
                                        }
                                }