The Main Controller to thhe Data production pipeline - Stanford version
authorUnknown Author <unknown>
Tue, 5 Aug 2008 19:14:33 +0000 (19:14 +0000)
committerUnknown Author <unknown>
Tue, 5 Aug 2008 19:14:33 +0000 (19:14 +0000)
htswdataprod/scripts/MainPipelineScript [new file with mode: 0755]

diff --git a/htswdataprod/scripts/MainPipelineScript b/htswdataprod/scripts/MainPipelineScript
new file mode 100755 (executable)
index 0000000..2ea0410
--- /dev/null
@@ -0,0 +1,187 @@
+#!/bin/sh
+#Last updated: Jul/30/2008 By Rami
+#---------------------------------
+# Main Desc: Top level wrapper script for the whole Data pipeline after sequencing is done until data analysis. 
+# Start: Read current folder as input Run Folder (ex: 080220_HWI-FC2057U).
+# This scripts assumes that:
+# 1 - Sequence reading has finished and all data are in Run Folder.
+# 2 - DataRun record saved with apropriate cnfig params in the FcTracker online DB (http://myerslab.stanford.edu/admin)
+# 3 - Curren directory, from were this script is ran, is the Run Folder ( = DataRun.RunFolder field in the Fctracker DB).
+# ----
+# START: (checks in ExpTrackLog dir if OKfiles exist before executing scripts)
+# Get Input: <FlowCellId> ["init" (to start from scatch, otherwise starts after the last successful step)]
+# Preliminary Checks:
+# Connect to the ExperimentInfo DB (FcTracker)
+# Get Config file
+# Get LanesNames.xml file
+# Aks the user to check if the retrieved parameters are OK. If Yes, enter "GO", otherwise <ENTER> and fix ..  
+# ---- <GO>:
+# Runs the PipelineListener & for the rest of the process flow 
+# Goes down 2 dirs to C1-X_Firecrest_...etc .
+# Runs the make -j ect ... 
+# FINISH
+################################################################################
+                                                                         
+
+fullpath=`pwd`
+runfolder=`pwd | sed -e 's/^.*\///'`
+
+if [ $1 ]
+then echo ""
+else
+  echo "Missing argument: FlowcellId. Try again. Example: ./MainPipelineScript FC99999 <enter>"
+  exit
+fi
+
+fcid=$1 
+
+if [ $2 ] && [ $2 == 'init' ]
+ then 
+   rm -r ExpTrackLog
+   echo "==== Starting from scratch ====\n"
+fi
+
+if [ -d ExpTrackLog ]
+ then echo ""
+else mkdir ExpTrackLog
+fi
+
+exptrackmainsc="$HOME/EXPTRACK/exp_track_main.py"
+# -- Check Image folders and find out cycle number 
+tocycle=`$HOME/EXPTRACK/CheckImageFolders`
+
+## Uncomment this if you want to force a cycle number:
+##tocycle=36
+
+if [ $tocycle -gt 0 ] && [ $tocycle -lt 37 ]
+then echo "" 
+else
+  echo "PROBLEM: Check Image folders failed to a cycle between 0 and 36. Found: $tocycle"
+  echo "Note: to input cycle number edit the MainP..Script un-comment ##tocycle=36"
+  echo "  --- tocycle  SET to 0 --- "
+  echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
+  echo 
+  echo  
+  ##tocycle=0
+  exit
+fi
+
+## The Command that runs the making of the "make" files
+makefilescmd="/Applications/GAPipeline-0.3.0/Goat/goat_pipeline.py --cycles=1-$tocycle --GERALD=config.txt --matrix=auto5 --phasing=auto5 $fullpath/Images/ --make"
+                                                                               
+## PRINT PRE CHECK REPORT
+echo "=================================== PRELIMINARY CHECKS ================================"
+echo "Full current path: $fullpath"
+echo "Run Folder: $runfolder" 
+echo "FlowcellId: $fcid"
+echo "------------------------------------------------------"
+echo "Config File generation"
+rm config.txt
+python $exptrackmainsc getconfile $fcid $runfolder
+if [ -f config.txt ]
+then 
+ echo "OK" > ExpTrackLog/OK_get_check_config_file
+else
+ echo "ERROR: config.txt not found."
+ exit
+fi
+cat config.txt
+echo ""
+echo "------------------------------------------------------"
+echo "LaneNames.xml file generation: "
+echo ""
+rm LaneNames.xml
+python $exptrackmainsc getLanesNames $fcid
+if [ -f LaneNames.xml ]
+then
+ echo "LaneNames.xml found."
+else
+ echo "ERROR: LaneNames.xml not found."
+ exit
+fi
+cat LaneNames.xml
+echo ""
+echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+echo "Will run Cycles: 1 to $tocycle"
+echo "Command for Make files generation step:"
+echo "--------------------------------------"
+echo $makefilescmd
+echo ""
+echo "NOTE!!: To change any of the above parameters just quit and modify this script according to your current needs."
+echo ""
+echo ""
+echo "If these are the correct parameters for your Data Pipeline Run type GO to proceed\n Otherwise type anything else to quit."
+echo "====================================================================================="
+confirm="GO";
+if [ $2 ] && [ $2 == 'GO' ]
+then
+echo "Autostarting";
+else
+read confirm
+fi
+                                                                                                                             
+
+#######################
+
+if [ $confirm ] && [ $confirm == "GO" ]
+then echo "Proceeding .."
+else echo "Response: "$confirm; exit
+fi
+###### START PROCESS
+if [ -f ExpTrackLog/OK_get_check_config_file ]
+ then
+  echo "Ready for Make file generation step"
+  #run Make files generation step
+  python $exptrackmainsc updsts $fcid $runfolder 0 Generating_Make_files
+  $makefilescmd
+ else
+  echo "FAILED: get_check_config_file."
+  python $exptrackmainsc updsts $fcid $runfolder 0 Check_ConfigFile_Failed
+  exit
+fi 
+
+## Make sure we find the Firecrest folder, then check "Makefile" in the 3 directories
+# firecrestdir=`ls Data | grep C1.*_Firecrest.*_encode`
+firecrestdir=`ls Data | grep "^C1-$tocycle.*_Firecrest.*"`
+countdirs=`ls Data | grep -c "^C1-$tocycle.*_Firecrest.*"`
+if [ $countdirs == 1 ]
+then
+  echo "ok. firecrest folder '$firecrestdir' found"
+else
+  if [ $countdirs -gt 1 ]
+  then
+    echo "ERROR: Found too many ($countdirs) firecrest directories. You need to be more specific. Found:"
+    echo ""
+    echo $firecrestdir
+    python $exptrackmainsc updsts $fcid $runfolder 0 ERROR:_Need_Specific_Firecrest_Dir
+    exit
+  else
+    echo "ERROR: Firecrest directory NOT FOUND."
+    python $exptrackmainsc updsts $fcid $runfolder 0 ERROR:_Firecrest_Dir_Not_Found
+    exit
+  fi
+fi
+
+firemakefile=Data/$firecrestdir/Makefile
+bustmakefile=Data/$firecrestdir/Bustard*_*/Makefile
+germakefile=Data/$firecrestdir/Bustard*_*/GERALD_*/Makefile
+if [ -f $firemakefile ] && [ -f $bustmakefile ] && [ -f $germakefile ]
+then 
+ #Launch MPListener in the background 
+ $HOME/EXPTRACK/MPListener $fcid $runfolder $tocycle &
+ #Change dir down 2 folders to the Firecrest folder
+ echo "Changing dir to Data/$firecrestdir"
+ cd Data/$firecrestdir
+ echo "Starting Solexa data process at `date`. This would be a good time for a short ski escapade at Lake Tahoe ... ;) ..."
+ python $exptrackmainsc updsts $fcid $runfolder 1
+ python $exptrackmainsc updsts $fcid $runfolder 1 Firecrest_Dir:_$firecrestdir
+ make -j 8 recursive
+else 
+ echo "FAILED: can't find 3 Makefile. Looked for: $firemakefile, $bustmakefile, $germakefile"
+ python $exptrackmainsc updsts $fcid $runfolder 0 ERROR:_Make_files_not_found
+ exit
+fi
+
+## MPListner takes care of the rest
+exit