patched to handle new pf file format with generality to old format.
authorTim Reddy Tim <treddy@hudsonalpha.org>
Thu, 23 Apr 2009 22:33:28 +0000 (22:33 +0000)
committerTim Reddy Tim <treddy@hudsonalpha.org>
Thu, 23 Apr 2009 22:33:28 +0000 (22:33 +0000)
htswanalysis/scripts/count_bases.pm

index a4f357da9f1835bcd927fb81cc021b635a16073a..9049b9ca3f4fc629c308e14f1fb9cf1eb62eee2d 100755 (executable)
@@ -8,22 +8,28 @@ my @a = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 my @c = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
 my @g = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
 my @t = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
+my @n = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
 
 $calls{'A'} = \@a;
 $calls{'C'} = \@c;
 $calls{'G'} = \@g;
 $calls{'T'} = \@t;
+$calls{'N'} = \@t;
 
 my $count = 0;
 my $length;
 
 while( my $read = <>) {
   chomp $read;
+  my @a = split(/\s+/,$read);
+  $read = $a[0];
   $length = length($read);
   $count++;
   for(0..$length-1) {
     my $base = uc(substr($read,$_,1));
-    $calls{$base}[$_] += 1;
+    if($base eq 'A' || $base eq 'C' || $base eq 'G' || $base eq 'T' || $base eq 'N') {
+      $calls{$base}[$_] += 1;
+    }
   }
 }