From 86c9237e10c6a8db2779baaa7e8d8d8908fc86a2 Mon Sep 17 00:00:00 2001 From: Tim Reddy Tim Date: Tue, 16 Dec 2008 17:49:12 +0000 Subject: [PATCH] Updated read counts to give information about aligment percent, and adapter percent. --- htswanalysis/scripts/CollectLibraries.pm | 11 +++++----- htswanalysis/scripts/Flowcell_QC_Makefile | 3 +-- htswanalysis/scripts/SummarizeLibrary.pm | 2 +- htswanalysis/scripts/WriteQCSummary.pm | 26 +++++++++++++++++------ 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/htswanalysis/scripts/CollectLibraries.pm b/htswanalysis/scripts/CollectLibraries.pm index fcaa792..5d1c214 100755 --- a/htswanalysis/scripts/CollectLibraries.pm +++ b/htswanalysis/scripts/CollectLibraries.pm @@ -14,8 +14,9 @@ for my $filename (@ARGV) { my($date,$flowcell,$lanes,$lib) = ($1,$2,$3,$4); open(COUNT,$filename.".count"); - my $count = ; chomp $count; $count =~ s/\s//g; - if(!defined($count)) { print STDERR $filename,"\n"; } + ; + my $count_line = ; chomp $count_line; my($all,$pf,$adapt,$align) = split(/\t/,$count_line); + if(!defined($align)) { print STDERR $filename," is badly formatted.\n"; } close(COUNT); if(!defined($lib)) { @@ -24,7 +25,7 @@ for my $filename (@ARGV) { } if(!exists($libraries{$lib})) { my @a; $libraries{$lib} = \@a; } - push @{$libraries{$lib}}, "$flowcell\t$lanes\t$filename\t$count\t$date"; + push @{$libraries{$lib}}, "$flowcell\t$lanes\t$filename\t$all\t$pf\t$adapt\t$align\t$date"; } print "\n"; @@ -33,8 +34,8 @@ for my $filename (@ARGV) { for my $lib (sort {$a =~ /[sS][lL](\d+)/; my $a1 = $1; $b =~ /[sS][lL](\d+)/; my $b1 = $1; $a1 <=> $b1} keys %libraries) { print "\n"; for my $data (@{$libraries{$lib}}) { - my($f,$l,$fn,$N,$D) = split(/\t/,$data); - print "\n"; + my($f,$l,$fn,$all,$pf,$adapt,$align,$D) = split(/\t/,$data); + print "\n"; } print "\n"; } diff --git a/htswanalysis/scripts/Flowcell_QC_Makefile b/htswanalysis/scripts/Flowcell_QC_Makefile index f57061d..4216db1 100644 --- a/htswanalysis/scripts/Flowcell_QC_Makefile +++ b/htswanalysis/scripts/Flowcell_QC_Makefile @@ -16,10 +16,9 @@ all: $(QPCR_FILES) $(PROFILE_FILES) $(CMPLX_FILES) $(PROFILE_IMAGES) $(PERCENT_B $(EXPTRACK_DIR)/bin/complexity_count `basename $<` $< > $@ %.txt.count: %.txt - grep -v contam $< | awk '{if(NF > 3) {print $$1} }' | wc -l > $@; + $(EXPTRACK_DIR)/scripts/count_reads.pm $< $(shell echo $< | awk -F\. '{ print $$1".all.txt.gz"; }') > $@ %.txt.qPCR: %.txt - echo $(EXPTRACK_DIR)/bin/qPCR $< $(EXPTRACK_DIR)/reference_data/GenericBackground $(EXPTRACK_DIR)/reference_data/qPCR_Tests/ | sort -k 2 -g -r | awk -F\/ '{print $$NF}' $(EXPTRACK_DIR)/bin/qPCR $< $(EXPTRACK_DIR)/reference_data/GenericBackground $(EXPTRACK_DIR)/reference_data/qPCR_Tests/ | sort -k 2 -g -r | awk -F\/ '{print $$NF}' > $@ %.txt.profile: %.txt diff --git a/htswanalysis/scripts/SummarizeLibrary.pm b/htswanalysis/scripts/SummarizeLibrary.pm index 4a62e3d..889fc9c 100755 --- a/htswanalysis/scripts/SummarizeLibrary.pm +++ b/htswanalysis/scripts/SummarizeLibrary.pm @@ -56,7 +56,7 @@ sub SummarizeLibrary { $filename =~ /^(\d+)_(.+?)_s(\d+)_(.+?)_$lib.align/; ($date,$fc,$lane,$desc) = ($1,$2,$3,$4); $num_lanes += length($lane); - $num_reads += $xml->{Library}->[$i]->{Track}->[$t]->{Count}; + $num_reads += $xml->{Library}->[$i]->{Track}->[$t]->{Align}; if(!defined($start_date)) { $start_date = $date; $end_date = $date; diff --git a/htswanalysis/scripts/WriteQCSummary.pm b/htswanalysis/scripts/WriteQCSummary.pm index 9593c89..163e861 100755 --- a/htswanalysis/scripts/WriteQCSummary.pm +++ b/htswanalysis/scripts/WriteQCSummary.pm @@ -55,15 +55,21 @@ for my $i (0..scalar(@{$xml->{Library}})-1) { my $filename = $xml->{Library}->[$i]->{Track}->[$t]->{Filename}; $filename =~ /^(\d+)_(.+?)_s(\d+)_(.+?)_$lib.align/; ($date,$fc,$lane,$desc) = ($1,$2,$3,$4); - my $num_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Count}; + my $all_reads = $xml->{Library}->[$i]->{Track}->[$t]->{All}; + my $pf_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Pf}; + my $adapt_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Adapter}; + my $align_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Align}; my $bgcolor; - if($num_reads < 3000000) { $bgcolor = "FF3300"; } - elsif($num_reads < 5000000) { $bgcolor = "FFCC33"; } - elsif($num_reads < 10000000) { $bgcolor = "00CCFF"; } + if($align_reads < 3000000) { $bgcolor = "FF3300"; } + elsif($align_reads < 5000000) { $bgcolor = "FFCC33"; } + elsif($align_reads < 10000000) { $bgcolor = "00CCFF"; } else { $bgcolor = "66FF66"; } - $num_align{$lane}{'num'} = $num_reads; + $num_align{$lane}{'all'} = $all_reads; + $num_align{$lane}{'pf'} = $pf_reads; + $num_align{$lane}{'adapter'} = $adapt_reads; + $num_align{$lane}{'align'} = $align_reads; $num_align{$lane}{'bgcolor'} = $bgcolor; } } @@ -80,7 +86,15 @@ for my $file (@files) { print "$lanes"; print "$lib\n"; print "$libname\n"; - printf "%0.2fM\n",$num_align{$lanes}{'bgcolor'},$num_align{$lanes}{'num'}/1000000.0; + printf "Total Reads: %0.2fM

Pass Filter Reads: %0.2fM (%0.2f%%)

Adapters: %0.2fM (%0.2f%%)

Aligned Reads: %0.2fM (%0.2f%%)\n", + $num_align{$lanes}{'bgcolor'}, + $num_align{$lanes}{'all'}/1000000.0, + $num_align{$lanes}{'pf'}/1000000.0, + 100*$num_align{$lanes}{'pf'}/$num_align{$lanes}{'all'}, + $num_align{$lanes}{'adapter'}/1000000.0, + 100*$num_align{$lanes}{'adapter'}/$num_align{$lanes}{'pf'}, + $num_align{$lanes}{'align'}/1000000.0, + 100*$num_align{$lanes}{'align'}/($num_align{$lanes}{'pf'}-$num_align{$lanes}{'adapter'}); printf "%s%0.2f
%0.2f\n",$qpcr_sum{$lanes}{'bgcolor'},$qpcr_sum{$lanes}{'best'}."
".$qpcr_sum{$lanes}{'best2'},$qpcr_sum{$lanes}{'bgcolor'},$qpcr_sum{$lanes}{'enrich'},$qpcr_sum{$lanes}{'enrich2'}; print ""; print ""; -- 2.30.2