Updated read counts to give information about aligment percent, and adapter percent.
authorTim Reddy Tim <treddy@hudsonalpha.org>
Tue, 16 Dec 2008 17:49:12 +0000 (17:49 +0000)
committerTim Reddy Tim <treddy@hudsonalpha.org>
Tue, 16 Dec 2008 17:49:12 +0000 (17:49 +0000)
htswanalysis/scripts/CollectLibraries.pm
htswanalysis/scripts/Flowcell_QC_Makefile
htswanalysis/scripts/SummarizeLibrary.pm
htswanalysis/scripts/WriteQCSummary.pm

index fcaa7928d348f8f15f19a934574fb7d869dc715d..5d1c214fba5625887f67ca771101d4f31af773a0 100755 (executable)
@@ -14,8 +14,9 @@ for my $filename (@ARGV) {
   my($date,$flowcell,$lanes,$lib) = ($1,$2,$3,$4);
 
   open(COUNT,$filename.".count");
-  my $count = <COUNT>; chomp $count; $count =~ s/\s//g;
-  if(!defined($count)) { print STDERR $filename,"\n"; }
+  <COUNT>;
+  my $count_line = <COUNT>; chomp $count_line; my($all,$pf,$adapt,$align) = split(/\t/,$count_line);
+  if(!defined($align)) { print STDERR $filename," is badly formatted.\n"; }
   close(COUNT);
 
   if(!defined($lib)) {
@@ -24,7 +25,7 @@ for my $filename (@ARGV) {
   }
 
   if(!exists($libraries{$lib})) { my @a; $libraries{$lib} = \@a; }
-  push @{$libraries{$lib}}, "$flowcell\t$lanes\t$filename\t$count\t$date";
+  push @{$libraries{$lib}}, "$flowcell\t$lanes\t$filename\t$all\t$pf\t$adapt\t$align\t$date";
 }
 
   print "<?xml version=\"1.0\" ?>\n";
@@ -33,8 +34,8 @@ for my $filename (@ARGV) {
   for my $lib (sort {$a =~ /[sS][lL](\d+)/; my $a1 = $1; $b =~ /[sS][lL](\d+)/; my $b1 = $1; $a1 <=> $b1} keys %libraries) {
     print "<Library Name=\"$lib\">\n";
     for my $data (@{$libraries{$lib}}) {
-      my($f,$l,$fn,$N,$D) = split(/\t/,$data);
-      print "<Track Flowcell=\"$f\" Lane=\"$l\" Filename=\"$fn\" Count=\"$N\" Date=\"$D\" />\n";
+      my($f,$l,$fn,$all,$pf,$adapt,$align,$D) = split(/\t/,$data);
+      print "<Track Flowcell=\"$f\" Lane=\"$l\" Filename=\"$fn\" All=\"$all\" Pf=\"$pf\" Adapter=\"$adapt\" Align=\"$align\" Date=\"$D\" />\n";
     }
     print "</Library>\n";
   }
index f57061ddeb0d322989e34007365d8c7f4636b38f..4216db16b5583fbd74b99305c8eaf1de2af51d5c 100644 (file)
@@ -16,10 +16,9 @@ all: $(QPCR_FILES) $(PROFILE_FILES) $(CMPLX_FILES) $(PROFILE_IMAGES) $(PERCENT_B
        $(EXPTRACK_DIR)/bin/complexity_count `basename $<` $< > $@
 
 %.txt.count: %.txt
-       grep -v contam $< | awk '{if(NF > 3) {print $$1} }' | wc -l > $@;
+       $(EXPTRACK_DIR)/scripts/count_reads.pm $< $(shell echo $< | awk -F\. '{ print $$1".all.txt.gz"; }') > $@
 
 %.txt.qPCR: %.txt
-       echo $(EXPTRACK_DIR)/bin/qPCR $< $(EXPTRACK_DIR)/reference_data/GenericBackground $(EXPTRACK_DIR)/reference_data/qPCR_Tests/ | sort -k 2 -g -r | awk -F\/ '{print $$NF}'
        $(EXPTRACK_DIR)/bin/qPCR $< $(EXPTRACK_DIR)/reference_data/GenericBackground $(EXPTRACK_DIR)/reference_data/qPCR_Tests/ | sort -k 2 -g -r | awk -F\/ '{print $$NF}' > $@
 
 %.txt.profile: %.txt
index 4a62e3df45e78063b5dba737f51749a8bd829505..889fc9c5d0ef750cd0f2e7c04a4ec2c6a43a4dfb 100755 (executable)
@@ -56,7 +56,7 @@ sub SummarizeLibrary {
       $filename =~ /^(\d+)_(.+?)_s(\d+)_(.+?)_$lib.align/;
       ($date,$fc,$lane,$desc) = ($1,$2,$3,$4);
       $num_lanes += length($lane);
-      $num_reads += $xml->{Library}->[$i]->{Track}->[$t]->{Count};
+      $num_reads += $xml->{Library}->[$i]->{Track}->[$t]->{Align};
       if(!defined($start_date)) {
         $start_date = $date;
         $end_date = $date;
index 9593c8940ba4e984fe08c7ca3fc33225fc543a03..163e861840a2e1234ac8377c121d1dc1fdb96b02 100755 (executable)
@@ -55,15 +55,21 @@ for my $i (0..scalar(@{$xml->{Library}})-1) {
     my $filename = $xml->{Library}->[$i]->{Track}->[$t]->{Filename};
     $filename =~ /^(\d+)_(.+?)_s(\d+)_(.+?)_$lib.align/;
     ($date,$fc,$lane,$desc) = ($1,$2,$3,$4);
-    my $num_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Count};
+    my $all_reads   = $xml->{Library}->[$i]->{Track}->[$t]->{All};
+    my $pf_reads    = $xml->{Library}->[$i]->{Track}->[$t]->{Pf};
+    my $adapt_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Adapter};
+    my $align_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Align};
 
     my $bgcolor;
-    if($num_reads < 3000000) { $bgcolor = "FF3300"; }
-    elsif($num_reads < 5000000) { $bgcolor = "FFCC33"; }
-    elsif($num_reads < 10000000) { $bgcolor = "00CCFF"; }
+    if($align_reads < 3000000) { $bgcolor = "FF3300"; }
+    elsif($align_reads < 5000000) { $bgcolor = "FFCC33"; }
+    elsif($align_reads < 10000000) { $bgcolor = "00CCFF"; }
     else { $bgcolor = "66FF66"; }
 
-    $num_align{$lane}{'num'} = $num_reads;
+    $num_align{$lane}{'all'} = $all_reads;
+    $num_align{$lane}{'pf'} = $pf_reads;
+    $num_align{$lane}{'adapter'} = $adapt_reads;
+    $num_align{$lane}{'align'} = $align_reads;
     $num_align{$lane}{'bgcolor'} = $bgcolor;
   }
 }
@@ -80,7 +86,15 @@ for my $file (@files) {
   print "<TD>$lanes</TD>";
   print "<TD>$lib</TD>\n";
   print "<TD>$libname</TD>\n";
-  printf "<TD BGCOLOR=#%s>%0.2fM</TD>\n",$num_align{$lanes}{'bgcolor'},$num_align{$lanes}{'num'}/1000000.0;
+  printf "<TD BGCOLOR=#%s>Total Reads: %0.2fM<BR><BR>Pass Filter Reads: %0.2fM (%0.2f%%)<BR><BR>Adapters: %0.2fM (%0.2f%%)<BR><BR><B>Aligned Reads: %0.2fM (%0.2f%%)</B></TD>\n",
+    $num_align{$lanes}{'bgcolor'},
+    $num_align{$lanes}{'all'}/1000000.0,
+    $num_align{$lanes}{'pf'}/1000000.0,
+    100*$num_align{$lanes}{'pf'}/$num_align{$lanes}{'all'},
+    $num_align{$lanes}{'adapter'}/1000000.0,
+    100*$num_align{$lanes}{'adapter'}/$num_align{$lanes}{'pf'},
+    $num_align{$lanes}{'align'}/1000000.0,
+    100*$num_align{$lanes}{'align'}/($num_align{$lanes}{'pf'}-$num_align{$lanes}{'adapter'});
   printf "<TD BGCOLOR=#%s>%s</TD><TD BGCOLOR=#%s>%0.2f<BR>%0.2f</TD>\n",$qpcr_sum{$lanes}{'bgcolor'},$qpcr_sum{$lanes}{'best'}."<BR>".$qpcr_sum{$lanes}{'best2'},$qpcr_sum{$lanes}{'bgcolor'},$qpcr_sum{$lanes}{'enrich'},$qpcr_sum{$lanes}{'enrich2'};
   print "<TD><OBJECT DATA=\"",`basename $file`,"\" WIDTH=\"300\" HEIGHT=\"300\"></OBJECT></TD>";
   print "<TD><IMG SRC=\"",$date,"_",$fc,"_s",$lanes,"_",$libname,"_",$lib,".percent_base.png\" WIDTH=\"300\" HEIGHT=\"300\"></TD>";