Removed from hard-coded directories. Starting to bring the QC into the sequence repos...
[htsworkflow.git] / htswanalysis / scripts / ConfigureTasks.pm
index ab5f785376d0d75645d6846c35e8497de727fd6f..3aab29c0380bba6415dea5fa3dea1df258edd873 100755 (executable)
@@ -177,8 +177,8 @@ sub WriteQPCRTasks {
       my $testdir =  $QPCRTESTDIR;
       my $outfile = "$name.qPCR";
   
-      my $seqcheck = "if [ ! -e ~Data/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; 
-      my $cmd = "$outfile: ~Data/Libraries/$lib.txt\n\t$seqcheck\n\t$QPCRDIR/qPCR \$< $background $testdir > \$@\n";
+      my $seqcheck = "if [ ! -e $data_dir/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; 
+      my $cmd = "$outfile: $data_dir/Libraries/$lib.txt\n\t$seqcheck\n\t$QPCRDIR/qPCR \$< $background $testdir > \$@\n";
       writeTask($qpcr, "qPCR", $outfile, $cmd);
 
       $tasks .= $task." ";
@@ -201,12 +201,12 @@ sub WriteProfileTasks {
 
       my $outfile = "$lib.wig.gz $lib.profile.gif";  
 
-      my $seqcheck = "if [ ! -e ~Data/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; 
-      my $cmds .= "$lib.wig.gz: ~Data/Libraries/$lib.txt\n";
+      my $seqcheck = "if [ ! -e $data_dir/Libraries/$lib.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for sequencing.\"; fi;"; 
+      my $cmds .= "$lib.wig.gz: $data_dir/Libraries/$lib.txt\n";
       $cmds .= "\t$seqcheck\n";
-      $cmds .= "\t".$PROFILEDIR.'/profile_reads_wig ~Data/Libraries/'.$lib.'.txt "'.$name.'" "'.$name.'" | gzip > '.$lib.'.wig.gz';
+      $cmds .= "\t".$PROFILEDIR.'/profile_reads_wig '.$data_dir.'/Libraries/'.$lib.'.txt "'.$name.'" "'.$name.'" | gzip > '.$lib.'.wig.gz';
       $cmds .= "\n\n";
-      $cmds .= $lib.'.profile.gif: ~Data/Libraries/'.$lib.'.txt '.$root_dir.'/reference_data/'.$genome.'_tx_start_sites'."\n";
+      $cmds .= $lib.'.profile.gif: '.$data_dir.'/Libraries/'.$lib.'.txt '.$root_dir.'/reference_data/'.$genome.'_tx_start_sites'."\n";
       $cmds .= "\t".$PROFILEDIR.'/profile_reads_against_features $^ | '.$root_dir.'/scripts/profile_to_svg.pm | /opt/local/bin/convert - $@'."\n";
       $cmds .= "\n";
 
@@ -238,10 +238,10 @@ sub WriteCompareLibTasks {
     
       my $outfile = $name1."_".$name2.".compare "; 
 
-      my $seqcheck = "if [ ! -e ~Data/Libraries/$name1.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name1 sequencing.\"; fi;"; 
-      $seqcheck .= "\n\tif [ ! -e ~Data/Libraries/$name2.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name2 sequencing.\"; fi;"; 
+      my $seqcheck = "if [ ! -e $data_dir/Libraries/$name1.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name1 sequencing.\"; fi;"; 
+      $seqcheck .= "\n\tif [ ! -e $data_dir/Libraries/$name2.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $name2 sequencing.\"; fi;"; 
 
-      my $cmd = "$outfile: ~Data/Libraries/$name1.txt ~Data/Libraries/$name2.txt\n\t$seqcheck\n\t$root_dir/bin/count_reads_in_peaks $tf $name1 $features ~Data/Libraries/$name1.txt $name2 $features ~Data/Libraries/$name2.txt > \$@\n";
+      my $cmd = "$outfile: $data_dir/Libraries/$name1.txt $data_dir/Libraries/$name2.txt\n\t$seqcheck\n\t$root_dir/bin/count_reads_in_peaks $tf $name1 $features $data_dir/Libraries/$name1.txt $name2 $features $data_dir/Libraries/$name2.txt > \$@\n";
   
       writeTask($cmp, "CompareLibraries", $outfile, $cmd);
       $tasks .= $task." ";
@@ -304,8 +304,8 @@ sub WritePeakCallingTasks {
       my $bg =     $peakcall->{Background}->{Library};
       my $genome = $peakcall->{Genome};
 
-      my $seqcheck = "if [ ! -e ~Data/Libraries/$signal.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $signal sequencing.\"; fi;"; 
-      $seqcheck .= "\n\tif [ ! -e ~Data/Libraries/$bg.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $bg sequencing.\"; fi;"; 
+      my $seqcheck = "if [ ! -e $data_dir/Libraries/$signal.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $signal sequencing.\"; fi;"; 
+      $seqcheck .= "\n\tif [ ! -e $data_dir/Libraries/$bg.txt ]; then $root_dir/scripts/analys_track_main.py updsts $task \"Waiting for $bg sequencing.\"; fi;"; 
 
       $tasks .= $task." ";
      
@@ -316,11 +316,11 @@ sub WritePeakCallingTasks {
       if($caller eq "QuEST") {
         $outfile .= "peak_caller.ChIP.out peak_caller.ChIP.out.bedgraph peak_caller.ChIP.out.fasta";
 
-        $cmd   .= "peak_caller.ChIP.out: ~Data/Libraries/$signal.txt ~Data/Libraries/$bg.txt\n";
+        $cmd   .= "peak_caller.ChIP.out: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
         $cmd   .= "\t$seqcheck\n";
        $cmd   .= "\trm -f background_RX_noIP.align.txt\n";
         $cmd   .= "\trm -f pseudo_ChIP_RX_noIP.align.txt\n";
-        $cmd   .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP ~Data/Libraries/'.$signal.'.txt -solexa_align_RX_noIP ~Data/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n";
+        $cmd   .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP $data_dir/Libraries/'.$signal.'.txt -solexa_align_RX_noIP $data_dir/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n";
         $cmd   .= "\t".$QUESTDIR.'/run_QuEST_with_param_file.pl -p QuEST.batch.pars >> '.$name.'.QuEST.log;'."\n";
         $cmd   .= "\t".'rm -rf scores;'."\n\n"; 
 
@@ -336,8 +336,8 @@ sub WritePeakCallingTasks {
       } elsif($caller eq "WingPeaks") {  
         $outfile .= "$name.peaks $name.peaks.fasta ";
 
-        $cmd .= "$name.peaks: ~Data/Libraries/$signal.txt ~Data/Libraries/$bg.txt\n";
-        $cmd .= "\t".$WINGPEAKSDIR.'/ChIPSeq_PeakCaller_ENCODE -gn '.$WINGPEAKSGENOMEDIR.'/'.$genome.'_chrlist.cod -it '.$name.' -in 1 -if ~Data/Libraries/'.$signal.'.txt -ct Background -cn 1 -cf ~Data/Libraries/'.$bg.'.txt -ot '.$name.' > '.$name.'.log;'."\n";
+        $cmd .= "$name.peaks: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
+        $cmd .= "\t".$WINGPEAKSDIR.'/ChIPSeq_PeakCaller_ENCODE -gn '.$WINGPEAKSGENOMEDIR.'/'.$genome.'_chrlist.cod -it '.$name.' -in 1 -if '.$data_dir/Libraries/'.$signal.'.txt -ct Background -cn 1 -cf '.$data_dir.'/Libraries/'.$bg.'.txt -ot '.$name.' > '.$name.'.log;'."\n";
 
         $cmd .= "%.peaks.tab: %.peaks\n";
         $cmd .= "\t".'cat $< | awk \'{ print $$1"\t"$$2"\t"$$3"\t"$$4"\t"$$7}\' > $@'."\n\n";
@@ -347,20 +347,23 @@ sub WritePeakCallingTasks {
       } elsif($caller eq "MACS") {
         $outfile .= $name."_peaks.bed ".$name."_peaks.fasta ";
 
-        $cmd .= $name."_peaks.bed: ~Data/Libraries/$signal.txt ~Data/Libraries/$bg.txt\n";
+       $cmd .= "\n.PRECIOUS: $name_peaks.xls $name_peaks.bed $name_peaks.fasta\n\n";
+
+        $cmd .= "\n".$name."_peaks.xls: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
         $cmd .= "\t$seqcheck\n";
-        $cmd .= "\tcat ~Data/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm > $signal.bed\n";
-        $cmd .= "\tcat ~Data/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm > $bg.bed\n";
+        $cmd .= "\tcat $data_dir/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm > $signal.bed\n";
+        $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm > $bg.bed\n";
         $cmd .= "\t$MACSDIR/macs -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 > $name.log 2> $name.err\n";
-        $cmd .= "\t".'echo "track name="'.$name.'" description="'.$name.'"GR_EtOH_Rep2_Peak_Calls" > header';
-        $cmd .= "\t".'cat header '.$name.'_peaks.bed > t; mv t '.$name.'_peaks.bed'."\n"; 
-        $cmd .= "\trm -f $signal.bed $bg.bed header\n";
+       $cmd .= "\trm -f $signal.bed $bg.bed\n";
        $cmd .= "\t".'exit `grep -c "^CRITICAL" '.$name.'.err`'."\n\n";
 
-        $cmd .= "%_peaks.tab: %_peaks.bed\n";
+        $cmd .= "\n".$name."_peaks.bed: ".$name."_peaks.xls\n";
+        $cmd .= "\t$root_dir/scripts/MACS_2_BED.sh $< $name > $@\n\n"
+
+        $cmd .= "\n".$name."_peaks.tab: ".$name."_peaks.bed\n";
         $cmd .= "\t".'cat $< | awk \'{print NR"\t"$$1"\t"$$2"\t"$$3"\t1"}\' > $@'."\n\n";
 
-        $cmd .= "%_peaks.fasta: %_peaks.tab\n";
+        $cmd .= "\n".$name."_peaks.fasta: ".$name."_peaks.tab\n";
         $cmd .= "\t".'cat $< | '.$root_dir.'/scripts/extract_peaks.pm '.$root_dir.'/reference_data/hg18_chrom_list.txt > $@'."\n\n";
       }
 
@@ -411,8 +414,8 @@ sub WriteComparePeakCallingTasks {
         #my $set2_feat = $caller2."_".$set2."/".$peak_file_2;
   #
         #my $cmd = "$outfile: $set1_feat $set2_feat\n";
-        #$cmd .= "\t~/EXPTRACK/QC/count_reads_in_peaks NA $set1-$caller1 $set1_feat ~Data/Libraries/".$xml->{PeakCalling}->[$index1]->{Signal}->{Library}.".txt ";
-        #$cmd .= "$set2-$caller2 $set2_feat ~Data/Libraries/".$xml->{PeakCalling}->[$index2]->{Signal}->{Library}.".txt > \$@\n";
+        #$cmd .= "\t~/EXPTRACK/QC/count_reads_in_peaks NA $set1-$caller1 $set1_feat $data_dir/Libraries/".$xml->{PeakCalling}->[$index1]->{Signal}->{Library}.".txt ";
+        #$cmd .= "$set2-$caller2 $set2_feat $data_dir/Libraries/".$xml->{PeakCalling}->[$index2]->{Signal}->{Library}.".txt > \$@\n";
   #
         #$file_list .= "$outfile ";
         #$cmds .= $cmd."\n";