From fb97741089000bd0865ecec9634544469be4b0f8 Mon Sep 17 00:00:00 2001 From: Tim Reddy Tim Date: Wed, 3 Dec 2008 00:55:36 +0000 Subject: [PATCH] Bug-fixes in sequence repository code and projects code --- htswanalysis/conf/TSSProfileFormat.gnuplot | 3 ++- htswanalysis/conf/config_tasks.plist | 9 ++++++++- htswanalysis/conf/projectsummary.plist | 2 +- htswanalysis/scripts/CollectLibraries.pm | 3 ++- htswanalysis/scripts/ConfigureTasks.pm | 9 +++++---- htswanalysis/scripts/LibrariesMakefile | 6 ++++-- htswanalysis/scripts/SummarizeProject2.pm | 2 ++ htswanalysis/scripts/count_bases.pm | 3 +-- 8 files changed, 25 insertions(+), 12 deletions(-) diff --git a/htswanalysis/conf/TSSProfileFormat.gnuplot b/htswanalysis/conf/TSSProfileFormat.gnuplot index 79ad9f1..a5f05eb 100644 --- a/htswanalysis/conf/TSSProfileFormat.gnuplot +++ b/htswanalysis/conf/TSSProfileFormat.gnuplot @@ -1,4 +1,5 @@ set terminal png +set autoscale set linestyle lw 3 set linestyle ps 3 set xtics 250 @@ -7,4 +8,4 @@ set size 0.6,0.6 set xlabel "cycle" set ylabel "percent" set arrow 1 from first "0",graph 0.0 to first "0",graph 1.0 nohead front lw 3 linecolor rgb "#222222" -plot "FILENAME" using 1:2 title '' smooth cspline with filledcurve y1=0; +plot "FILENAME" using 1:2 title '' smooth bezier with filledcurve y1=0; diff --git a/htswanalysis/conf/config_tasks.plist b/htswanalysis/conf/config_tasks.plist index c764a93..4df6906 100644 --- a/htswanalysis/conf/config_tasks.plist +++ b/htswanalysis/conf/config_tasks.plist @@ -12,10 +12,17 @@ /Users/Data/Projects/config_tasks.log StandardErrorPath /Users/Data/Projects/config_tasks.err + EnvironmentVariables + + MACS + /usr/local/bin/macs + ProgramArguments /usr/bin/perl - ConfigureTasks.pm + /Users/ENCODE/htsworkflow/htswanalysis/scripts/ConfigureTasks.pm + /Users/ENCODE/htsworkflow/htswanalysis + /Users/Data all KeepAlive diff --git a/htswanalysis/conf/projectsummary.plist b/htswanalysis/conf/projectsummary.plist index 897d7af..b3b1c27 100644 --- a/htswanalysis/conf/projectsummary.plist +++ b/htswanalysis/conf/projectsummary.plist @@ -17,7 +17,7 @@ -j 8 -f - ProjectMakefile + /Users/Data/Projects/ProjectMakefile StartInterval 600 diff --git a/htswanalysis/scripts/CollectLibraries.pm b/htswanalysis/scripts/CollectLibraries.pm index f3d6aed..fcaa792 100755 --- a/htswanalysis/scripts/CollectLibraries.pm +++ b/htswanalysis/scripts/CollectLibraries.pm @@ -14,12 +14,13 @@ for my $filename (@ARGV) { my($date,$flowcell,$lanes,$lib) = ($1,$2,$3,$4); open(COUNT,$filename.".count"); - my $count = ; chomp $count; + my $count = ; chomp $count; $count =~ s/\s//g; if(!defined($count)) { print STDERR $filename,"\n"; } close(COUNT); if(!defined($lib)) { print STDERR "MISSING LIB: ", $filename,"\n"; + next; } if(!exists($libraries{$lib})) { my @a; $libraries{$lib} = \@a; } diff --git a/htswanalysis/scripts/ConfigureTasks.pm b/htswanalysis/scripts/ConfigureTasks.pm index ba7a472..b250f39 100755 --- a/htswanalysis/scripts/ConfigureTasks.pm +++ b/htswanalysis/scripts/ConfigureTasks.pm @@ -30,7 +30,7 @@ my %libs; my $BIOP = "$root_dir/bin/BioProspector.mac"; my $QUESTDIR = "$root_dir/bin/QuEST"; -my $MACS = `which macs | perl -e '\$l = <>; chomp \$l; print \$l;'` || die "MACS not found in PATH. Install and put in path"; +my $MACS = $ENV{'MACS'} || `which macs | perl -e '\$l = <>; chomp \$l; print \$l;'` || die "MACS not found in PATH. Install and put in path"; my $WINGPEAKSDIR = "$root_dir/bin"; my $WINGPEAKSGENOMEDIR = "$root_dir/reference_data"; my $PROFILEDIR = "$root_dir/bin"; @@ -317,11 +317,11 @@ sub WritePeakCallingTasks { $outfile .= "peak_caller.ChIP.out peak_caller.ChIP.out.bedgraph peak_caller.ChIP.out.fasta"; $cmd .= "peak_caller.ChIP.out: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n"; - $cmd .= "\trm -f background_RX_noIP.align.txt\n"; - $cmd .= "\trm -f pseudo_ChIP_RX_noIP.align.txt\n"; + $cmd .= "\trm -f *.align.txt\n"; $cmd .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP '.$data_dir.'/Libraries/'.$signal.'.txt -solexa_align_RX_noIP '.$data_dir.'/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n"; $cmd .= "\t".$QUESTDIR.'/run_QuEST_with_param_file.pl -p QuEST.batch.pars >> '.$name.'.QuEST.log;'."\n"; - $cmd .= "\t".'rm -rf scores;'."\n\n"; + $cmd .= "\t".'rm -rf scores;'."\n"; + $cmd .= "\trm -f *.align.txt\n\n"; $cmd .= "peak_caller.ChIP.out.tab: peak_caller.ChIP.out\n"; $cmd .= "\t$root_dir/scripts/tabify_quest.sh \$< > \$@\n\n"; @@ -353,6 +353,7 @@ sub WritePeakCallingTasks { $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'contam' | grep -v 'humRibosomal' > $bg.bed\n"; $cmd .= "\t$MACS -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 --mfold=20 > $name.log 2> $name.err\n"; $cmd .= "\trm -f $signal.bed $bg.bed\n"; + $cmd .= "\trm -f Background.ELAND.pos $name.ELAND.pos $name.R0.*.bar Background.ELAND.sorted $name.ELAND.sorted\n"; $cmd .= "\t".'exit `grep -c "^CRITICAL" '.$name.'.err`'."\n\n"; $cmd .= "\n".$name."_peaks.bed: ".$name."_peaks.xls\n"; diff --git a/htswanalysis/scripts/LibrariesMakefile b/htswanalysis/scripts/LibrariesMakefile index 7305f6a..3a30810 100644 --- a/htswanalysis/scripts/LibrariesMakefile +++ b/htswanalysis/scripts/LibrariesMakefile @@ -18,20 +18,22 @@ HTML_DIR=/Library/WebServer/Documents/SequencingSummaries # Error messages are collected so as not to bug the user. (if there are no matching files, ls errors.) FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err) -QC_FILES=$(shell ls -1d ~Data/Flowcells/**/ | awk -F/ '{print $$0"/"$$(NF-1)"_QC_Summary.html"}' ) +QC_FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/ | awk -F/ '{print $$0"/"$$(NF-1)"_QC_Summary.html"}' ) LIBFILES=$(shell ls -1d $(DATA_DIR)/Libraries/.*.config 2>> LibrariesMakefile.err | sed -e s/config/txt/ -e "s/\/\./\//") all: $(QC_FILES) $(FILES) $(DATA_DIR)/LibraryInfo.xml $(LIBFILES) $(DATA_DIR)/SequencingSummary.html Distribute +# TODO: Add error handling if flowcell_qc fails for some reason. Think about how we'd like this reported. Email? $(QC_FILES): cd $(DATA_DIR)/Flowcells/`basename $@ | awk -F_ '{print $$1}'` && $(MAKE) -f $(ROOT_DIR)/scripts/Flowcell_QC_Makefile -$(DATA_DIR)/Libraries/%.txt: $(DATA_DIR)/Libraries/.%.config | LibraryInfo.xml +$(DATA_DIR)/Libraries/%.txt: $(DATA_DIR)/Libraries/.%.config | $(DATA_DIR)/LibraryInfo.xml cat `cat $<` > $@; $(DATA_DIR)/LibraryInfo.xml: $(QC_FILES) $(ROOT_DIR)/scripts/CollectLibraries.pm `ls $(DATA_DIR)/Flowcells/**/*.align*.txt` > $@; $(ROOT_DIR)/scripts/RecompileLibraries.pm $@ $(DATA_DIR) + $(ROOT_DIR)/scripts/analys_track_main.py updLibInfo $(DATA_DIR)/SequencingSummary.html: $(DATA_DIR)/LibraryInfo.xml $(ROOT_DIR)/scripts/SummarizeLibrary.pm $< > $@; diff --git a/htswanalysis/scripts/SummarizeProject2.pm b/htswanalysis/scripts/SummarizeProject2.pm index ead12c8..b509ac3 100755 --- a/htswanalysis/scripts/SummarizeProject2.pm +++ b/htswanalysis/scripts/SummarizeProject2.pm @@ -41,6 +41,7 @@ for my $i (0..scalar(@{$xml->{PeakCalling}})-1) { $desc{Summary} = `$root_dir/scripts/SummarizeMACS.pm $peakfile $negpeakfile`; $desc{outfile} = "$caller_dir/".$name."_peaks.bed"; $desc{fasta} = "$caller_dir/".$name."_peaks.fasta"; + $desc{primer_design} = "$caller_dir/ValidationPrimers.html"; } if($genome eq "scer") { $genome = "sacCer1"; } @@ -196,6 +197,7 @@ for(@peak_calling) { print "$hash{Name}"; print "$hash{Caller}$hash{Summary}\n"; print "BED file
View in Genome BrowserFASTA\n"; + if(exists($hash{primer_design})) { print "Validation Primers\n"; } print "\n"; } } diff --git a/htswanalysis/scripts/count_bases.pm b/htswanalysis/scripts/count_bases.pm index 672718b..a4f357d 100755 --- a/htswanalysis/scripts/count_bases.pm +++ b/htswanalysis/scripts/count_bases.pm @@ -19,8 +19,7 @@ my $length; while( my $read = <>) { chomp $read; - if(!defined($length)) { $length = length($read); } - if($read =~ /\./) { next; } + $length = length($read); $count++; for(0..$length-1) { my $base = uc(substr($read,$_,1)); -- 2.30.2