From 04f7f4fc7b41d78fca0e38c0e2a1450d32a79174 Mon Sep 17 00:00:00 2001 From: Tim Reddy Tim Date: Mon, 24 Nov 2008 20:06:07 +0000 Subject: [PATCH] Removed from hard-coded directories. Starting to bring the QC into the sequence repository. --- htswanalysis/scripts/ConfigureTasks.pm | 53 ++++++++------- htswanalysis/scripts/WriteQCSummary.pm | 91 ++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 25 deletions(-) create mode 100755 htswanalysis/scripts/WriteQCSummary.pm diff --git a/htswanalysis/scripts/ConfigureTasks.pm b/htswanalysis/scripts/ConfigureTasks.pm index ab5f785..3aab29c 100755 --- a/htswanalysis/scripts/ConfigureTasks.pm +++ b/htswanalysis/scripts/ConfigureTasks.pm @@ -177,8 +177,8 @@ sub WriteQPCRTasks { my $testdir = $QPCRTESTDIR; my $outfile = "$name.qPCR"; - my $seqcheck = "if [ ! -e ~Data/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; - my $cmd = "$outfile: ~Data/Libraries/$lib.txt\n\t$seqcheck\n\t$QPCRDIR/qPCR \$< $background $testdir > \$@\n"; + my $seqcheck = "if [ ! -e $data_dir/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; + my $cmd = "$outfile: $data_dir/Libraries/$lib.txt\n\t$seqcheck\n\t$QPCRDIR/qPCR \$< $background $testdir > \$@\n"; writeTask($qpcr, "qPCR", $outfile, $cmd); $tasks .= $task." "; @@ -201,12 +201,12 @@ sub WriteProfileTasks { my $outfile = "$lib.wig.gz $lib.profile.gif"; - my $seqcheck = "if [ ! -e ~Data/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; - my $cmds .= "$lib.wig.gz: ~Data/Libraries/$lib.txt\n"; + my $seqcheck = "if [ ! -e $data_dir/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; + my $cmds .= "$lib.wig.gz: $data_dir/Libraries/$lib.txt\n"; $cmds .= "\t$seqcheck\n"; - $cmds .= "\t".$PROFILEDIR.'/profile_reads_wig ~Data/Libraries/'.$lib.'.txt "'.$name.'" "'.$name.'" | gzip > '.$lib.'.wig.gz'; + $cmds .= "\t".$PROFILEDIR.'/profile_reads_wig '.$data_dir.'/Libraries/'.$lib.'.txt "'.$name.'" "'.$name.'" | gzip > '.$lib.'.wig.gz'; $cmds .= "\n\n"; - $cmds .= $lib.'.profile.gif: ~Data/Libraries/'.$lib.'.txt '.$root_dir.'/reference_data/'.$genome.'_tx_start_sites'."\n"; + $cmds .= $lib.'.profile.gif: '.$data_dir.'/Libraries/'.$lib.'.txt '.$root_dir.'/reference_data/'.$genome.'_tx_start_sites'."\n"; $cmds .= "\t".$PROFILEDIR.'/profile_reads_against_features $^ | '.$root_dir.'/scripts/profile_to_svg.pm | /opt/local/bin/convert - $@'."\n"; $cmds .= "\n"; @@ -238,10 +238,10 @@ sub WriteCompareLibTasks { my $outfile = $name1."_".$name2.".compare "; - my $seqcheck = "if [ ! -e ~Data/Libraries/$name1.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name1 sequencing.\"; fi;"; - $seqcheck .= "\n\tif [ ! -e ~Data/Libraries/$name2.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name2 sequencing.\"; fi;"; + my $seqcheck = "if [ ! -e $data_dir/Libraries/$name1.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name1 sequencing.\"; fi;"; + $seqcheck .= "\n\tif [ ! -e $data_dir/Libraries/$name2.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name2 sequencing.\"; fi;"; - my $cmd = "$outfile: ~Data/Libraries/$name1.txt ~Data/Libraries/$name2.txt\n\t$seqcheck\n\t$root_dir/bin/count_reads_in_peaks $tf $name1 $features ~Data/Libraries/$name1.txt $name2 $features ~Data/Libraries/$name2.txt > \$@\n"; + my $cmd = "$outfile: $data_dir/Libraries/$name1.txt $data_dir/Libraries/$name2.txt\n\t$seqcheck\n\t$root_dir/bin/count_reads_in_peaks $tf $name1 $features $data_dir/Libraries/$name1.txt $name2 $features $data_dir/Libraries/$name2.txt > \$@\n"; writeTask($cmp, "CompareLibraries", $outfile, $cmd); $tasks .= $task." "; @@ -304,8 +304,8 @@ sub WritePeakCallingTasks { my $bg = $peakcall->{Background}->{Library}; my $genome = $peakcall->{Genome}; - my $seqcheck = "if [ ! -e ~Data/Libraries/$signal.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $signal sequencing.\"; fi;"; - $seqcheck .= "\n\tif [ ! -e ~Data/Libraries/$bg.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $bg sequencing.\"; fi;"; + my $seqcheck = "if [ ! -e $data_dir/Libraries/$signal.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $signal sequencing.\"; fi;"; + $seqcheck .= "\n\tif [ ! -e $data_dir/Libraries/$bg.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $bg sequencing.\"; fi;"; $tasks .= $task." "; @@ -316,11 +316,11 @@ sub WritePeakCallingTasks { if($caller eq "QuEST") { $outfile .= "peak_caller.ChIP.out peak_caller.ChIP.out.bedgraph peak_caller.ChIP.out.fasta"; - $cmd .= "peak_caller.ChIP.out: ~Data/Libraries/$signal.txt ~Data/Libraries/$bg.txt\n"; + $cmd .= "peak_caller.ChIP.out: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n"; $cmd .= "\t$seqcheck\n"; $cmd .= "\trm -f background_RX_noIP.align.txt\n"; $cmd .= "\trm -f pseudo_ChIP_RX_noIP.align.txt\n"; - $cmd .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP ~Data/Libraries/'.$signal.'.txt -solexa_align_RX_noIP ~Data/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n"; + $cmd .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP $data_dir/Libraries/'.$signal.'.txt -solexa_align_RX_noIP $data_dir/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n"; $cmd .= "\t".$QUESTDIR.'/run_QuEST_with_param_file.pl -p QuEST.batch.pars >> '.$name.'.QuEST.log;'."\n"; $cmd .= "\t".'rm -rf scores;'."\n\n"; @@ -336,8 +336,8 @@ sub WritePeakCallingTasks { } elsif($caller eq "WingPeaks") { $outfile .= "$name.peaks $name.peaks.fasta "; - $cmd .= "$name.peaks: ~Data/Libraries/$signal.txt ~Data/Libraries/$bg.txt\n"; - $cmd .= "\t".$WINGPEAKSDIR.'/ChIPSeq_PeakCaller_ENCODE -gn '.$WINGPEAKSGENOMEDIR.'/'.$genome.'_chrlist.cod -it '.$name.' -in 1 -if ~Data/Libraries/'.$signal.'.txt -ct Background -cn 1 -cf ~Data/Libraries/'.$bg.'.txt -ot '.$name.' > '.$name.'.log;'."\n"; + $cmd .= "$name.peaks: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n"; + $cmd .= "\t".$WINGPEAKSDIR.'/ChIPSeq_PeakCaller_ENCODE -gn '.$WINGPEAKSGENOMEDIR.'/'.$genome.'_chrlist.cod -it '.$name.' -in 1 -if '.$data_dir/Libraries/'.$signal.'.txt -ct Background -cn 1 -cf '.$data_dir.'/Libraries/'.$bg.'.txt -ot '.$name.' > '.$name.'.log;'."\n"; $cmd .= "%.peaks.tab: %.peaks\n"; $cmd .= "\t".'cat $< | awk \'{ print $$1"\t"$$2"\t"$$3"\t"$$4"\t"$$7}\' > $@'."\n\n"; @@ -347,20 +347,23 @@ sub WritePeakCallingTasks { } elsif($caller eq "MACS") { $outfile .= $name."_peaks.bed ".$name."_peaks.fasta "; - $cmd .= $name."_peaks.bed: ~Data/Libraries/$signal.txt ~Data/Libraries/$bg.txt\n"; + $cmd .= "\n.PRECIOUS: $name_peaks.xls $name_peaks.bed $name_peaks.fasta\n\n"; + + $cmd .= "\n".$name."_peaks.xls: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n"; $cmd .= "\t$seqcheck\n"; - $cmd .= "\tcat ~Data/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm > $signal.bed\n"; - $cmd .= "\tcat ~Data/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm > $bg.bed\n"; + $cmd .= "\tcat $data_dir/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm > $signal.bed\n"; + $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm > $bg.bed\n"; $cmd .= "\t$MACSDIR/macs -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 > $name.log 2> $name.err\n"; - $cmd .= "\t".'echo "track name="'.$name.'" description="'.$name.'"GR_EtOH_Rep2_Peak_Calls" > header'; - $cmd .= "\t".'cat header '.$name.'_peaks.bed > t; mv t '.$name.'_peaks.bed'."\n"; - $cmd .= "\trm -f $signal.bed $bg.bed header\n"; + $cmd .= "\trm -f $signal.bed $bg.bed\n"; $cmd .= "\t".'exit `grep -c "^CRITICAL" '.$name.'.err`'."\n\n"; - $cmd .= "%_peaks.tab: %_peaks.bed\n"; + $cmd .= "\n".$name."_peaks.bed: ".$name."_peaks.xls\n"; + $cmd .= "\t$root_dir/scripts/MACS_2_BED.sh $< $name > $@\n\n" + + $cmd .= "\n".$name."_peaks.tab: ".$name."_peaks.bed\n"; $cmd .= "\t".'cat $< | awk \'{print NR"\t"$$1"\t"$$2"\t"$$3"\t1"}\' > $@'."\n\n"; - $cmd .= "%_peaks.fasta: %_peaks.tab\n"; + $cmd .= "\n".$name."_peaks.fasta: ".$name."_peaks.tab\n"; $cmd .= "\t".'cat $< | '.$root_dir.'/scripts/extract_peaks.pm '.$root_dir.'/reference_data/hg18_chrom_list.txt > $@'."\n\n"; } @@ -411,8 +414,8 @@ sub WriteComparePeakCallingTasks { #my $set2_feat = $caller2."_".$set2."/".$peak_file_2; # #my $cmd = "$outfile: $set1_feat $set2_feat\n"; - #$cmd .= "\t~/EXPTRACK/QC/count_reads_in_peaks NA $set1-$caller1 $set1_feat ~Data/Libraries/".$xml->{PeakCalling}->[$index1]->{Signal}->{Library}.".txt "; - #$cmd .= "$set2-$caller2 $set2_feat ~Data/Libraries/".$xml->{PeakCalling}->[$index2]->{Signal}->{Library}.".txt > \$@\n"; + #$cmd .= "\t~/EXPTRACK/QC/count_reads_in_peaks NA $set1-$caller1 $set1_feat $data_dir/Libraries/".$xml->{PeakCalling}->[$index1]->{Signal}->{Library}.".txt "; + #$cmd .= "$set2-$caller2 $set2_feat $data_dir/Libraries/".$xml->{PeakCalling}->[$index2]->{Signal}->{Library}.".txt > \$@\n"; # #$file_list .= "$outfile "; #$cmds .= $cmd."\n"; diff --git a/htswanalysis/scripts/WriteQCSummary.pm b/htswanalysis/scripts/WriteQCSummary.pm new file mode 100755 index 0000000..aef28d9 --- /dev/null +++ b/htswanalysis/scripts/WriteQCSummary.pm @@ -0,0 +1,91 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; + +use XML::Simple; + +my $library_info = shift; +my $xml = XMLin($library_info, ForceArray => 1); + +my %num_align; + +my $qpcr_filename = shift; +open(QPCR,$qpcr_filename); + +my %qpcr_sum; + +while() { + chomp; + my($lane,$factor,$enrich) = split(/ /,$_); + my $line2 = ; chomp $line2; + my($lane2,$factor2,$enrich2) = split(/ /,$line2); + + next if(!defined($enrich)); + + my @a = split(/\//,$factor); + $factor = $a[scalar(@a)-1]; + @a = split(/\//,$factor2); + $factor2 = $a[scalar(@a)-1]; + + my $bgcolor; + if($enrich < 2) { $bgcolor = "FF6600"; } + elsif($enrich< 5) { $bgcolor = "FFCC33"; } + elsif($enrich< 10) { $bgcolor = "00CCFF"; } + elsif($enrich eq "nan") { $bgcolor = "FFFFFF"; } + else { $bgcolor = "66FF66"; } + + $lane =~ /^(.+?)\/(\d\d\d\d\d\d)_(.+?)_s(\d+)_(.+?)\.align_\d+\.(.+?)\.txt\.qPCR$/; + my($fc,$date,$fc2,$lanes,$name,$genome) = ($1,$2,$3,$4,$5,$6); + + $qpcr_sum{$lanes}{'best'} = $factor; + $qpcr_sum{$lanes}{'enrich'} = $enrich; + $qpcr_sum{$lanes}{'bgcolor'} = $bgcolor; + + $qpcr_sum{$lanes}{'best2'} = $factor2; + $qpcr_sum{$lanes}{'enrich2'} = $enrich2; + $qpcr_sum{$lanes}{'bgcolor2'} = $bgcolor; +} + +for my $i (0..scalar(@{$xml->{Library}})-1) { + my $lib = $xml->{Library}->[$i]->{Name}; + for my $t (0..scalar(@{$xml->{Library}->[$i]->{Track}})-1) { + my $N = scalar(@{$xml->{Library}->[$i]->{Track}}); + my($date,$fc,$lane,$desc); + my $filename = $xml->{Library}->[$i]->{Track}->[$t]->{Filename}; + $filename =~ /^(\d+)_(.+?)_s(\d+)_(.+?)_$lib.align/; + ($date,$fc,$lane,$desc) = ($1,$2,$3,$4); + my $num_reads = $xml->{Library}->[$i]->{Track}->[$t]->{Count}; + + my $bgcolor; + if($num_reads < 3000000) { $bgcolor = "FF3300"; } + elsif($num_reads < 5000000) { $bgcolor = "FFCC33"; } + elsif($num_reads < 10000000) { $bgcolor = "00CCFF"; } + else { $bgcolor = "66FF66"; } + + $num_align{$lane}{'num'} = $num_reads; + $num_align{$lane}{'bgcolor'} = $bgcolor; + } +} + +print ""; +print "\n"; + +my @files = `ls -1 QC/*.align_??.*.txt.profile.gif`; +for my $file (@files) { + $file =~ /(\d\d\d\d\d\d)_(.+?)_s(\d+)_(.+?)_([Ss][Ll]\d+)/; + my($date,$fc,$lanes,$libname,$lib) = ($1,$2,$3,$4,$5); + my $lane = substr($lanes,0,1); + print "\n"; + print ""; + print "\n"; + print "\n"; + printf "\n",$num_align{$lanes}{'bgcolor'},$num_align{$lanes}{'num'}/1000000.0; + printf "\n",$qpcr_sum{$lanes}{'bgcolor'},$qpcr_sum{$lanes}{'best'}."
".$qpcr_sum{$lanes}{'best2'},$qpcr_sum{$lanes}{'bgcolor'},$qpcr_sum{$lanes}{'enrich'},$qpcr_sum{$lanes}{'enrich2'}; + print ""; + print ""; + print "\n"; +} +print "
Lane\(s\)LibLibrary NameAligned ReadsqPCR FactorFold enr.Profile at TSSIVC Calls
$lanes$lib$libname%0.2fM%s%0.2f
%0.2f
\n"; + +print "\n"; -- 2.30.2