Major analysis server update. Had to switch to a new server, and used the opportunity...
[htsworkflow.git] / htswanalysis / scripts / ConfigureTasks.pm
index 9cf527ae6f678884cab828939a6793d7c5ce46ff..ba7a4723e2b92888581aaaa4eb3e657482c82665 100755 (executable)
@@ -30,7 +30,7 @@ my %libs;
 
 my $BIOP = "$root_dir/bin/BioProspector.mac";
 my $QUESTDIR = "$root_dir/bin/QuEST";
-my $MACSDIR  = "$root_dir/bin";
+my $MACS  = `which macs | perl -e '\$l = <>; chomp \$l; print \$l;'` || die "MACS not found in PATH. Install and put in path";
 my $WINGPEAKSDIR = "$root_dir/bin";
 my $WINGPEAKSGENOMEDIR = "$root_dir/reference_data";
 my $PROFILEDIR = "$root_dir/bin";
@@ -143,6 +143,7 @@ sub writeTask {
   my $root = shift;
   my $outfile = shift;
   my $cmd = shift;
+  my $seqcheck = shift;
 
   my $taskid = $task->{TaskId};
   my $taskdir = "$data_dir/Tasks/".$taskid;
@@ -155,7 +156,7 @@ sub writeTask {
 
   open(MAKEFILE, ">$taskdir/Makefile");
   print MAKEFILE "all: .notify $outfile | .start\n\n.PHONY: .notify .start\n\n";
-  print MAKEFILE ".start:\n\t$root_dir/scripts/analys_track_main.py updsts $taskid \"Processing\"\n\ttouch .start\n\n";
+  print MAKEFILE ".start:\n\t$root_dir/scripts/analys_track_main.py updsts $taskid \"Processing\"\n\t$seqcheck\n\ttouch .start\n\n";
   
   print MAKEFILE ".notify: | .start $outfile .start\n\techo \"Complete\"\n\t$root_dir/scripts/analys_track_main.py updsts $taskid \"Complete\"\n\ttouch .notify\n\n";
   print MAKEFILE "$cmd";
@@ -178,8 +179,8 @@ sub WriteQPCRTasks {
       my $outfile = "$name.qPCR";
   
       my $seqcheck = "if [ ! -e $data_dir/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; 
-      my $cmd = "$outfile: $data_dir/Libraries/$lib.txt\n\t$seqcheck\n\t$QPCRDIR/qPCR \$< $background $testdir > \$@\n";
-      writeTask($qpcr, "qPCR", $outfile, $cmd);
+      my $cmd = "$outfile: $data_dir/Libraries/$lib.txt\n\t$QPCRDIR/qPCR \$< $background $testdir > \$@\n";
+      writeTask($qpcr, "qPCR", $outfile, $cmd,$seqcheck);
 
       $tasks .= $task." ";
     }
@@ -203,14 +204,13 @@ sub WriteProfileTasks {
 
       my $seqcheck = "if [ ! -e $data_dir/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; 
       my $cmds .= "$lib.wig.gz: $data_dir/Libraries/$lib.txt\n";
-      $cmds .= "\t$seqcheck\n";
       $cmds .= "\t".$PROFILEDIR.'/profile_reads_wig '.$data_dir.'/Libraries/'.$lib.'.txt "'.$name.'" "'.$name.'" | gzip > '.$lib.'.wig.gz';
       $cmds .= "\n\n";
       $cmds .= $lib.'.profile.gif: '.$data_dir.'/Libraries/'.$lib.'.txt '.$root_dir.'/reference_data/'.$genome.'_tx_start_sites'."\n";
       $cmds .= "\t".$PROFILEDIR.'/profile_reads_against_features $^ | '.$root_dir.'/scripts/profile_to_svg.pm | /opt/local/bin/convert - $@'."\n";
       $cmds .= "\n";
 
-      writeTask($profile, "ProfileReads",  $outfile, $cmds);
+      writeTask($profile, "ProfileReads",  $outfile, $cmds,$seqcheck);
       $tasks .= $task." ";
     }
   }
@@ -241,9 +241,9 @@ sub WriteCompareLibTasks {
       my $seqcheck = "if [ ! -e $data_dir/Libraries/$name1.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name1 sequencing.\"; fi;"; 
       $seqcheck .= "\n\tif [ ! -e $data_dir/Libraries/$name2.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name2 sequencing.\"; fi;"; 
 
-      my $cmd = "$outfile: $data_dir/Libraries/$name1.txt $data_dir/Libraries/$name2.txt\n\t$seqcheck\n\t$root_dir/bin/count_reads_in_peaks $tf $name1 $features $data_dir/Libraries/$name1.txt $name2 $features $data_dir/Libraries/$name2.txt > \$@\n";
+      my $cmd = "$outfile: $data_dir/Libraries/$name1.txt $data_dir/Libraries/$name2.txt\n\t$root_dir/bin/count_reads_in_peaks $tf $name1 $features $data_dir/Libraries/$name1.txt $name2 $features $data_dir/Libraries/$name2.txt > \$@\n";
   
-      writeTask($cmp, "CompareLibraries", $outfile, $cmd);
+      writeTask($cmp, "CompareLibraries", $outfile, $cmd,$seqcheck);
       $tasks .= $task." ";
     }
   }
@@ -317,10 +317,9 @@ sub WritePeakCallingTasks {
         $outfile .= "peak_caller.ChIP.out peak_caller.ChIP.out.bedgraph peak_caller.ChIP.out.fasta";
 
         $cmd   .= "peak_caller.ChIP.out: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
-        $cmd   .= "\t$seqcheck\n";
        $cmd   .= "\trm -f background_RX_noIP.align.txt\n";
         $cmd   .= "\trm -f pseudo_ChIP_RX_noIP.align.txt\n";
-        $cmd   .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP $data_dir/Libraries/'.$signal.'.txt -solexa_align_RX_noIP $data_dir/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n";
+        $cmd   .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP '.$data_dir.'/Libraries/'.$signal.'.txt -solexa_align_RX_noIP '.$data_dir.'/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n";
         $cmd   .= "\t".$QUESTDIR.'/run_QuEST_with_param_file.pl -p QuEST.batch.pars >> '.$name.'.QuEST.log;'."\n";
         $cmd   .= "\t".'rm -rf scores;'."\n\n"; 
 
@@ -337,7 +336,7 @@ sub WritePeakCallingTasks {
         $outfile .= "$name.peaks $name.peaks.fasta ";
 
         $cmd .= "$name.peaks: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
-        $cmd .= "\t".$WINGPEAKSDIR.'/ChIPSeq_PeakCaller_ENCODE -gn '.$WINGPEAKSGENOMEDIR.'/'.$genome.'_chrlist.cod -it '.$name.' -in 1 -if '.$data_dir/Libraries/'.$signal.'.txt -ct Background -cn 1 -cf '.$data_dir.'/Libraries/'.$bg.'.txt -ot '.$name.' > '.$name.'.log;'."\n";
+        $cmd .= "\t".$WINGPEAKSDIR.'/ChIPSeq_PeakCaller_ENCODE -gn '.$WINGPEAKSGENOMEDIR.'/'.$genome.'_chrlist.cod -it '.$name.' -in 1 -if '.$data_dir.'/Libraries/'.$signal.'.txt -ct Background -cn 1 -cf '.$data_dir.'/Libraries/'.$bg.'.txt -ot '.$name.' > '.$name.'.log;'."\n";
 
         $cmd .= "%.peaks.tab: %.peaks\n";
         $cmd .= "\t".'cat $< | awk \'{ print $$1"\t"$$2"\t"$$3"\t"$$4"\t"$$7}\' > $@'."\n\n";
@@ -345,21 +344,22 @@ sub WritePeakCallingTasks {
         $cmd .= "%.peaks.fasta: %.peaks.tab\n";
         $cmd .= "\t".'cat $< | '.$root_dir.'/scripts/extract_peaks.pm '.$root_dir.'/reference_data/hg18_chrom_list.txt > $@'."\n\n";
       } elsif($caller eq "MACS") {
-        $outfile .= $name."_peaks.bed ".$name."_peaks.fasta ";
+        $outfile .= $name."_peaks.bed ".$name."_peaks.fasta  ValidationPrimers.html ";
 
-       $cmd .= "\n.PRECIOUS: $name_peaks.xls $name_peaks.bed $name_peaks.fasta\n\n";
+       $cmd .= "\n.PRECIOUS: ".$name.'_peaks.xls '.$name.'_peaks.bed '.$name."_peaks.fasta\n\n";
 
         $cmd .= "\n".$name."_peaks.xls: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
-        $cmd .= "\t$seqcheck\n";
-        $cmd .= "\tcat $data_dir/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm > $signal.bed\n";
-        $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm > $bg.bed\n";
-        $cmd .= "\t$MACSDIR/macs -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 > $name.log 2> $name.err\n";
+        $cmd .= "\tcat $data_dir/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'contam' | grep -v 'humRibosomal' > $signal.bed\n";
+        $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm  | grep -v 'contam' | grep -v 'humRibosomal' > $bg.bed\n";
+        $cmd .= "\t$MACS -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 --mfold=20 > $name.log 2> $name.err\n";
        $cmd .= "\trm -f $signal.bed $bg.bed\n";
        $cmd .= "\t".'exit `grep -c "^CRITICAL" '.$name.'.err`'."\n\n";
 
         $cmd .= "\n".$name."_peaks.bed: ".$name."_peaks.xls\n";
-        $cmd .= "\t$root_dir/scripts/MACS_2_BED.sh $< $name > $@\n\n"
-        $cmd .= "\t/Library/WebServer/CGI-Executables/ValidationDesign/primer_design_cmd.pm http://m304-apple-server.stanford.edu/Tasks/$task/".$name."_peaks.bed $signal > ValidationPrimers.html\n\n";
+        $cmd .= "\t$root_dir".'/scripts/MACS_2_BED.sh $< '.$name.' > $@'."\n";
+
+        $cmd .= "\nValidationPrimers.html: ".$name."_peaks.bed\n";
+        $cmd .= "\t$root_dir".'/scripts/primer_design.pm $< '."$signal $root_dir $data_dir ".'> $@'."\n\n";
 
         $cmd .= "\n".$name."_peaks.tab: ".$name."_peaks.bed\n";
         $cmd .= "\t".'cat $< | awk \'{print NR"\t"$$1"\t"$$2"\t"$$3"\t1"}\' > $@'."\n\n";
@@ -368,7 +368,7 @@ sub WritePeakCallingTasks {
         $cmd .= "\t".'cat $< | '.$root_dir.'/scripts/extract_peaks.pm '.$root_dir.'/reference_data/hg18_chrom_list.txt > $@'."\n\n";
       }
 
-      writeTask($peakcall, "PeakCalling", $outfile, $cmd);
+      writeTask($peakcall, "PeakCalling", $outfile, $cmd,$seqcheck);
     }
   }
   return $tasks;