patched to handle new pf file format with generality to old format.
[htsworkflow.git] / htswanalysis / scripts / count_bases.pm
1 #!/usr/bin/perl -w
2
3 use strict;
4 use warnings;
5
6 my %calls;
7 my @a = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
8 my @c = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
9 my @g = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
10 my @t = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
11 my @n = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
12
13 $calls{'A'} = \@a;
14 $calls{'C'} = \@c;
15 $calls{'G'} = \@g;
16 $calls{'T'} = \@t;
17 $calls{'N'} = \@t;
18
19 my $count = 0;
20 my $length;
21
22 while( my $read = <>) {
23   chomp $read;
24   my @a = split(/\s+/,$read);
25   $read = $a[0];
26   $length = length($read);
27   $count++;
28   for(0..$length-1) {
29     my $base = uc(substr($read,$_,1));
30     if($base eq 'A' || $base eq 'C' || $base eq 'G' || $base eq 'T' || $base eq 'N') {
31       $calls{$base}[$_] += 1;
32     }
33   }
34 }
35
36 for(0..$length-1) {
37   print "$_\t",$calls{A}[$_]/$count,"\t",$calls{C}[$_]/$count,"\t",$calls{G}[$_]/$count,"\t",$calls{T}[$_]/$count,"\n";
38 }