X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bcftools%2Fbcf-fix.pl;fp=bcftools%2Fbcf-fix.pl;h=61c6136f0c751516cf5959fbcbd98229002107ea;hp=0000000000000000000000000000000000000000;hb=8d2494d1fb7cd0fa7c63be5ffba8dd1a11457522;hpb=cb12a866906ec4ac644de0e658679261c82ab098 diff --git a/bcftools/bcf-fix.pl b/bcftools/bcf-fix.pl new file mode 100755 index 0000000..61c6136 --- /dev/null +++ b/bcftools/bcf-fix.pl @@ -0,0 +1,101 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; +use Carp; + +my $opts = parse_params(); +bcf_fix(); + +exit; + +#-------------------------------- + +sub error +{ + my (@msg) = @_; + if ( scalar @msg ) { confess @msg; } + die + "Usage: bcftools view test.bcf | bcf-fix.pl > test.vcf\n", + "Options:\n", + " -h, -?, --help This help message.\n", + "\n"; +} + + +sub parse_params +{ + my $opts = {}; + while (my $arg=shift(@ARGV)) + { + if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); } + error("Unknown parameter \"$arg\". Run -h for help.\n"); + } + return $opts; +} + +sub bcf_fix +{ + while (my $line=) + { + if ( $line=~/^#CHROM/ ) + { + print +qq[##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +]; + print $line; + } + elsif ( $line=~/^#/ ) + { + print $line; + } + else + { + my @items = split(/\t/,$line); + my @tags = split(/:/,$items[8]); # FORMAT tags + + my $nidx=2; + my @idxs; # Mapping which defines new ordering: $idxs[$inew]=$iold; GT comes first, PL second + for (my $i=0; $i<@tags; $i++) + { + if ( $tags[$i] eq 'GT' ) { $idxs[0]=$i; } + elsif ( $tags[$i] eq 'PL' ) { $idxs[1]=$i; } + else { $idxs[$nidx++]=$i; } + } + if ( !exists($tags[0]) or !exists($tags[1]) ) { error("FIXME: expected GT and PL in the format field.\n"); } + + # First fix the FORMAT column + $items[8] = 'GT:GL'; + for (my $i=2; $i<@tags; $i++) + { + $items[8] .= ':'.$tags[$idxs[$i]]; + } + + # Now all the genotype columns + for (my $iitem=9; $iitem<@items; $iitem++) + { + @tags = split(/:/,$items[$iitem]); + $items[$iitem] = $tags[$idxs[0]] .':'; + + # GL=-PL/10 + my ($a,$b,$c) = split(/,/,$tags[$idxs[1]]); + $items[$iitem] .= sprintf "%.2f,%.2f,%.2f",-$a/10.,-$b/10.,-$c/10.; + + for (my $itag=2; $itag<@tags; $itag++) + { + $items[$iitem] .= ':'.$tags[$idxs[$itag]]; + } + } + print join("\t",@items); + } + } +} +