Bug-fixes in sequence repository code and projects code
authorTim Reddy Tim <treddy@hudsonalpha.org>
Wed, 3 Dec 2008 00:55:36 +0000 (00:55 +0000)
committerTim Reddy Tim <treddy@hudsonalpha.org>
Wed, 3 Dec 2008 00:55:36 +0000 (00:55 +0000)
htswanalysis/conf/TSSProfileFormat.gnuplot
htswanalysis/conf/config_tasks.plist
htswanalysis/conf/projectsummary.plist
htswanalysis/scripts/CollectLibraries.pm
htswanalysis/scripts/ConfigureTasks.pm
htswanalysis/scripts/LibrariesMakefile
htswanalysis/scripts/SummarizeProject2.pm
htswanalysis/scripts/count_bases.pm

index 79ad9f1b68feddb64aedfb005eb0481d453bf8ed..a5f05eb3608cacc33221b8ee16656765afbb3572 100644 (file)
@@ -1,4 +1,5 @@
 set terminal png
+set autoscale
 set linestyle lw 3
 set linestyle ps 3
 set xtics 250
@@ -7,4 +8,4 @@ set size 0.6,0.6
 set xlabel "cycle"
 set ylabel "percent"
 set arrow 1 from first "0",graph 0.0 to first "0",graph 1.0 nohead front lw 3 linecolor rgb "#222222"
-plot "FILENAME" using 1:2 title '' smooth cspline with filledcurve y1=0;
+plot "FILENAME" using 1:2 title '' smooth bezier with filledcurve y1=0;
index c764a934717ff7630410221e4e52125c3f554e7c..4df6906bd00688a0355bdce3dea91f951804f2cb 100644 (file)
         <string>/Users/Data/Projects/config_tasks.log</string>
         <key>StandardErrorPath</key>
         <string>/Users/Data/Projects/config_tasks.err</string>
+       <key>EnvironmentVariables</key>
+       <dict>
+               <key>MACS</key>
+               <string>/usr/local/bin/macs</string>
+       </dict>
         <key>ProgramArguments</key>
         <array>
                 <string>/usr/bin/perl</string>
-                <string>ConfigureTasks.pm</string>
+                <string>/Users/ENCODE/htsworkflow/htswanalysis/scripts/ConfigureTasks.pm</string>
+                <string>/Users/ENCODE/htsworkflow/htswanalysis</string>
+                <string>/Users/Data</string>
                 <string>all</string>
         </array>
         <key>KeepAlive</key>
index 897d7aff383fffa95378219f67ba69f73b4f0ccf..b3b1c27026f285841a85da6c2f78dca19ee43563 100644 (file)
@@ -17,7 +17,7 @@
                <string>-j</string>
                <string>8</string>
                 <string>-f</string>
-               <string>ProjectMakefile</string>
+               <string>/Users/Data/Projects/ProjectMakefile</string>
         </array>
         <key>StartInterval</key>
         <integer>600</integer>
index f3d6aed41f5b5db1382369ceaa7a4a595a00a886..fcaa7928d348f8f15f19a934574fb7d869dc715d 100755 (executable)
@@ -14,12 +14,13 @@ for my $filename (@ARGV) {
   my($date,$flowcell,$lanes,$lib) = ($1,$2,$3,$4);
 
   open(COUNT,$filename.".count");
-  my $count = <COUNT>; chomp $count;
+  my $count = <COUNT>; chomp $count; $count =~ s/\s//g;
   if(!defined($count)) { print STDERR $filename,"\n"; }
   close(COUNT);
 
   if(!defined($lib)) {
     print STDERR "MISSING LIB: ", $filename,"\n"; 
+    next;
   }
 
   if(!exists($libraries{$lib})) { my @a; $libraries{$lib} = \@a; }
index ba7a4723e2b92888581aaaa4eb3e657482c82665..b250f39a5601eb0b76aea04acfd58d4ce9c94ca0 100755 (executable)
@@ -30,7 +30,7 @@ my %libs;
 
 my $BIOP = "$root_dir/bin/BioProspector.mac";
 my $QUESTDIR = "$root_dir/bin/QuEST";
-my $MACS  = `which macs | perl -e '\$l = <>; chomp \$l; print \$l;'` || die "MACS not found in PATH. Install and put in path";
+my $MACS  = $ENV{'MACS'} || `which macs | perl -e '\$l = <>; chomp \$l; print \$l;'` || die "MACS not found in PATH. Install and put in path";
 my $WINGPEAKSDIR = "$root_dir/bin";
 my $WINGPEAKSGENOMEDIR = "$root_dir/reference_data";
 my $PROFILEDIR = "$root_dir/bin";
@@ -317,11 +317,11 @@ sub WritePeakCallingTasks {
         $outfile .= "peak_caller.ChIP.out peak_caller.ChIP.out.bedgraph peak_caller.ChIP.out.fasta";
 
         $cmd   .= "peak_caller.ChIP.out: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
-       $cmd   .= "\trm -f background_RX_noIP.align.txt\n";
-        $cmd   .= "\trm -f pseudo_ChIP_RX_noIP.align.txt\n";
+       $cmd   .= "\trm -f *.align.txt\n";
         $cmd   .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP '.$data_dir.'/Libraries/'.$signal.'.txt -solexa_align_RX_noIP '.$data_dir.'/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n";
         $cmd   .= "\t".$QUESTDIR.'/run_QuEST_with_param_file.pl -p QuEST.batch.pars >> '.$name.'.QuEST.log;'."\n";
-        $cmd   .= "\t".'rm -rf scores;'."\n\n"; 
+        $cmd   .= "\t".'rm -rf scores;'."\n"; 
+       $cmd   .= "\trm -f *.align.txt\n\n";
 
         $cmd  .= "peak_caller.ChIP.out.tab: peak_caller.ChIP.out\n";
         $cmd .= "\t$root_dir/scripts/tabify_quest.sh \$< > \$@\n\n";
@@ -353,6 +353,7 @@ sub WritePeakCallingTasks {
         $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm  | grep -v 'contam' | grep -v 'humRibosomal' > $bg.bed\n";
         $cmd .= "\t$MACS -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 --mfold=20 > $name.log 2> $name.err\n";
        $cmd .= "\trm -f $signal.bed $bg.bed\n";
+       $cmd .= "\trm -f Background.ELAND.pos $name.ELAND.pos $name.R0.*.bar Background.ELAND.sorted $name.ELAND.sorted\n";
        $cmd .= "\t".'exit `grep -c "^CRITICAL" '.$name.'.err`'."\n\n";
 
         $cmd .= "\n".$name."_peaks.bed: ".$name."_peaks.xls\n";
index 7305f6af2240cd55329c8f3fc2f4108aab1cda0f..3a30810f2f3ee8d96dcf96bab517cf7496d863de 100644 (file)
@@ -18,20 +18,22 @@ HTML_DIR=/Library/WebServer/Documents/SequencingSummaries
 
 # Error messages are collected so as not to bug the user. (if there are no matching files, ls errors.)
 FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err)
-QC_FILES=$(shell ls -1d ~Data/Flowcells/**/ | awk -F/ '{print $$0"/"$$(NF-1)"_QC_Summary.html"}' )
+QC_FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/ | awk -F/ '{print $$0"/"$$(NF-1)"_QC_Summary.html"}' )
 LIBFILES=$(shell ls -1d $(DATA_DIR)/Libraries/.*.config 2>> LibrariesMakefile.err | sed -e s/config/txt/ -e "s/\/\./\//")
 
 all: $(QC_FILES) $(FILES) $(DATA_DIR)/LibraryInfo.xml $(LIBFILES) $(DATA_DIR)/SequencingSummary.html Distribute
 
+# TODO: Add error handling if flowcell_qc fails for some reason. Think about how we'd like this reported. Email?
 $(QC_FILES):
        cd $(DATA_DIR)/Flowcells/`basename $@ | awk -F_ '{print $$1}'` && $(MAKE) -f $(ROOT_DIR)/scripts/Flowcell_QC_Makefile
 
-$(DATA_DIR)/Libraries/%.txt: $(DATA_DIR)/Libraries/.%.config | LibraryInfo.xml
+$(DATA_DIR)/Libraries/%.txt: $(DATA_DIR)/Libraries/.%.config | $(DATA_DIR)/LibraryInfo.xml
        cat `cat $<` > $@;
 
 $(DATA_DIR)/LibraryInfo.xml: $(QC_FILES)
        $(ROOT_DIR)/scripts/CollectLibraries.pm `ls $(DATA_DIR)/Flowcells/**/*.align*.txt` > $@;
        $(ROOT_DIR)/scripts/RecompileLibraries.pm $@ $(DATA_DIR)
+       $(ROOT_DIR)/scripts/analys_track_main.py updLibInfo
 
 $(DATA_DIR)/SequencingSummary.html: $(DATA_DIR)/LibraryInfo.xml
        $(ROOT_DIR)/scripts/SummarizeLibrary.pm $< > $@;
index ead12c8c184b90705e883d531a0a458cfd6df323..b509ac32480c326114883337b0d2ad5b501907d5 100755 (executable)
@@ -41,6 +41,7 @@ for my $i (0..scalar(@{$xml->{PeakCalling}})-1) {
     $desc{Summary} = `$root_dir/scripts/SummarizeMACS.pm $peakfile $negpeakfile`;
     $desc{outfile} = "$caller_dir/".$name."_peaks.bed";
     $desc{fasta} = "$caller_dir/".$name."_peaks.fasta";
+    $desc{primer_design} = "$caller_dir/ValidationPrimers.html";
   }
 
   if($genome eq "scer") { $genome = "sacCer1"; }
@@ -196,6 +197,7 @@ for(@peak_calling) {
   print "<TR BGCOLOR=$color><TD><B>$hash{Name}</B></TD>";
   print "<TD>$hash{Caller}</TD><TD>$hash{Summary}</TD>\n";
   print "<TD><A HREF=$hash{outfile}>BED file</A><BR><A HREF=http://genome.ucsc.edu/cgi-bin/hgTracks?db=$hash{Genome}&hgt.customText=http://171.65.76.194/Tasks/$hash{Task}/$hash{outfile}>View in Genome Browser</A></TD><TD><A HREF=$hash{fasta}>FASTA</A></TD>\n";
+  if(exists($hash{primer_design})) { print "<TD><A HREF=$hash{primer_design}>Validation Primers</A></TD>\n"; }
   print "</TR>\n";
 }
 }
index 672718bf48573579d681281105b5baf67eea1116..a4f357da9f1835bcd927fb81cc021b635a16073a 100755 (executable)
@@ -19,8 +19,7 @@ my $length;
 
 while( my $read = <>) {
   chomp $read;
-  if(!defined($length)) { $length = length($read); }
-  if($read =~ /\./) { next; }
+  $length = length($read);
   $count++;
   for(0..$length-1) {
     my $base = uc(substr($read,$_,1));