From b714fb623de83fee87b2640a1564fa993b5e8391 Mon Sep 17 00:00:00 2001 From: Rami Rauch Date: Thu, 14 May 2009 22:05:24 +0000 Subject: [PATCH] upgraded to work both in standard and IPAR modes, and to run goat 1.3 --- htswdataprod/scripts/MainPipelineScript | 247 ++++++++++-------------- 1 file changed, 102 insertions(+), 145 deletions(-) diff --git a/htswdataprod/scripts/MainPipelineScript b/htswdataprod/scripts/MainPipelineScript index 6dd3eb1..0d3f853 100755 --- a/htswdataprod/scripts/MainPipelineScript +++ b/htswdataprod/scripts/MainPipelineScript @@ -1,12 +1,12 @@ #!/bin/sh -# Last updated: Aug/11/2008 By Rami -# --------------------------------- +#Last updated: May/14/2009 By Rami +#----------Standard / IPAR----------------------- # Main Desc: Top level wrapper script for the whole Data pipeline after sequencing is done until data analysis. # Start: Read current folder as input Run Folder (ex: 080220_HWI-FC2057U). # This scripts assumes that: # 1 - Sequence reading has finished and all data are in Run Folder. -# 2 - DataRun record saved with apropriate cnfig params in the FcTracker online DB (http://myerslab.stanford.edu/admin) -# 3 - Curren directory, from were this script is ran, is the Run Folder ( = DataRun.RunFolder field in the Fctracker DB). +# 2 - DataRun record saved with apropriate cnfig params in the FcTracker online DB (http://htsw.stanford.edu/admin) +# 3 - Curren directory, from were this script is ran, is the Run Folder # ---- # START: (checks in ExpTrackLog dir if OKfiles exist before executing scripts) # Get Input: ["init" (to start from scatch, otherwise starts after the last successful step)] @@ -17,56 +17,50 @@ # Aks the user to check if the retrieved parameters are OK. If Yes, enter "GO", otherwise and fix .. # ---- : # Runs the PipelineListener & for the rest of the process flow -# Goes down 2 dirs to C1-X_Firecrest_...etc . +# Goes down 2 dirs to Firecrest...etc . # Runs the make -j ect ... # FINISH ################################################################################ +fullpath=`pwd` +runfolder=`pwd | sed -e 's/^.*\///'` + if [ $1 ] then echo "" else echo "Missing argument: FlowcellId. Try again. Example: ./MainPipelineScript FC99999 " exit fi -fcid=$1 -echo "fcid is $fcid" - -#fullpath=`pwd` -#runfolder=`pwd | sed -e 's/^.*\///'` -#changed for the HTSW merged -fullpath=$2 -echo "fullpath is $fullpath" -runfolder=`echo $fullpath | sed -e 's/^.*\///'` -echo "runfolder is $runfolder" +fcid=$1 -if [ $3 ] && [ $3 == 'init' ] - then +if [ $2 ] && [ $2 == 'init' ] + then rm -r ExpTrackLog - echo "==== Starting from scratch ====\n" + echo "==== Starting from scratch at `date` ====" + else + echo "=== Starting at `date` ==="; fi +echo "=== Retrieving Information - Please wait ... ==="; if [ -d ExpTrackLog ] then echo "" else mkdir ExpTrackLog fi -#exptrackmainsc="$HOME/EXPTRACK/exp_track_main.py" -exptrackmainsc="exp_track_main.py" +exptrackmainsc="$EXPTRACKDIR/exp_track_main.py" # -- Check Image folders and find out cycle number -#tocycle=`$HOME/EXPTRACK/CheckImageFolders` -echo "calling CheckImageFolders" -tocycle=`./CheckImageFolders $fullpath` +tocycle=`$EXPTRACKDIR/CheckImageFolders` ## Uncomment this if you want to force a cycle number: -#tocycle=36 +##tocycle=36 if [ $tocycle -gt 0 ] && [ $tocycle -lt 37 ] -then echo "" +then echo "Cheked Imag Folders -> cycle set to: $tocycle" else - echo "PROBLEM: CheckImagefolders failed to return a cycle number between 0 and 36. Returned: '$tocycle'" + echo "PROBLEM: Check Image folders failed to a cycle between 0 and 36. Found: $tocycle" echo "Note: to input cycle number edit the MainP..Script un-comment ##tocycle=36" echo " --- tocycle SET to 0 --- " echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" @@ -76,15 +70,29 @@ else exit fi +analysis_mode='standard' +pipeline_version='1.3' ## The Command that runs the making of the "make" files -makefilescmd="/Applications/GAPipeline-0.3.0/Goat/goat_pipeline.py --cycles=1-$tocycle --GERALD=config.txt --matrix=auto5 --phasing=auto5 $fullpath/Images/ --make" +if [ -d Data/IPAR_$pipeline_version ] +then + datadir=Data/IPAR_$pipeline_version + analysis_mode='IPAR' +else + datadir=Data +fi + +# control lane by default 4 +makefilescmd="/Applications/SolexaPipeline/Goat/goat_pipeline.py --cycles=1-$tocycle --matrix=auto --phasing=auto --control-lane=4 --GERALD=config.txt $fullpath/$datadir --make" +if [ $analysis_mode == 'IPAR' ] +then + # For IPAR we can't use the goat. we use the bustard.py (see user man for Pipeline Version 1.3 page 22 at the bottom) + makefilescmd="/usr/local/src/GAPipeline-$pipeline_version.2/bin/bustard.py $fullpath/$datadir --make --cycles=1-$tocycle --matrix=auto --phasing=auto --control-lane=4 --GERALD=config.txt" +fi + ## PRINT PRE CHECK REPORT echo "=================================== PRELIMINARY CHECKS ================================" -echo "Full path: $fullpath" -echo "Run Folder: $runfolder" -echo "FlowcellId: $fcid" -echo "------------------------------------------------------" + echo "Config File generation" rm config.txt python $exptrackmainsc getconfile $fcid $runfolder @@ -96,7 +104,6 @@ else exit fi cat config.txt - echo "" echo "------------------------------------------------------" echo "LaneNames.xml file generation: " @@ -112,167 +119,117 @@ else fi cat LaneNames.xml echo "" -echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" -echo "Will run Cycles: 1 to $tocycle" -echo "Command for Make files generation step:" -echo "--------------------------------------" -echo $makefilescmd + +echo "----------------Analysis Params:----------------------" +echo "Analysis Mode: $analysis_mode" +echo "Pipeline Software Version: $pipeline_version" +echo "Cycles: 1 to $tocycle" +echo "Will run this to generate Make files: $makefilescmd" +echo "Full current path: $fullpath" +echo "Run Folder: $runfolder" +echo "FlowcellId: $fcid" +echo "------------------------------------------------------" echo "" echo "NOTE!!: To change any of the above parameters just quit and modify this script according to your current needs." echo "" echo "" -echo "If these are the correct parameters for your Data Pipeline Run type GO to proceed\n Otherwise type anything else to quit." +echo "If these are the correct parameters for your Data Pipeline Run type GO to proceed" +echo "" +echo "Otherwise type anything else to quit" echo "=====================================================================================" - confirm="GO"; -if [ $3 ] && [ $3 == 'GO' ] +if [ $2 ] && [ $2 == 'GO' ] then echo "Autostarting"; else read confirm fi - - ####################### if [ $confirm ] && [ $confirm == "GO" ] -then echo "" +then echo "Proceeding .." else echo "Response: "$confirm; exit fi - ###### START PROCESS - -## First check if this is a run from scratch or from Bustard or Gerald ~~~~~ -firecrestdir=`ls $fullpath/Data | grep "^C1-$tocycle.*_Firecrest.*"` -#echo "tocycle is $tocycle. fullpath is $fullpath" -#echo "firecrestdir is $firecrestdir" -firecrestfinished=$fullpath/Data/$firecrestdir/finished.txt -bustfinished=$fullpath/Data/$firecrestdir/Bustard*_*/finished.txt -gerfinished=$fullpath/Data/$firecrestdir/Bustard*_*/GERALD_*/finished.txt - -phaseMsg='' -nextDo='all' - -if [ -f $firecrestfinished ] -then - phaseMsg='Firecrest Finished (found finished.txt). Start from BUSTARD' - #? [Yes/[Any Key]] (Enter [Any Key] to Start from scratch)' - nextDo='Bustard' - echo "Firecrest finished.txt found." - if [ -f $bustfinished ] - then - phaseMsg='Bustard Finished (found finished.txt). Start from GERALD' - # ? [Yes/[Any Key]] (Any Key = Start from scratch)' - nextDo='Gerald' - echo " and Bustard finished.txt found." - if [ -f $gerfinished ] - then - phaseMsg='Looks like Solexa pipeline has completed (found all 3 finished.txt files).' - #? [Yes/[Any Key]] (Any Key = Start from scratch)' - nextDo='CollectReads' - echo " and Gerald finished.txt found." - fi - fi -else - echo "Run from start" -fi - -echo $phaseMsg - - - -if [ $nextDo == "all" ] -then - echo "Run from start" -else - confirm='' - # Can't read from prompt yet using the Jabber api - # read confirm - confirm="Yes" - if [ $confirm == "Yes" ] - then - #Launch MPListener in the background - echo " ... starting MPListener in the background ..." - #./MPListener $fcid $runfolder $tocycle & - ./MPListener $fcid $fullpath $tocycle & - if [ $nextDo == "Bustard" ] - then - #Change dir down to Bustard run folder - #echo "Starting Solexa data process (`date`) FROM BUSTARD ..." - python $exptrackmainsc updsts $fcid $runfolder 1 Start_from_BUSTARD - echo "Changing dir to $fullpath/Data/$firecrestdir/Bustard*_*/" - cd $fullpath/Data/$firecrestdir/Bustard*_* - echo " ..... supposed to run make -j 8 recursive here ...." - ##NOT READY YET make -j 8 recursive - else - if [ $nextDo == "Gerald" ] - then - #Change dir down to Bustard run folder - python $exptrackmainsc updsts $fcid $runfolder 1 Start_from_GERALD - #echo "Starting Solexa data process (`date`) FROM GERALD ..." - echo "Changing dir to $fullpath/Data/$firecrestdir/Bustard*_*/Gerald..." - cd $fullpath/Data/$firecrestdir/Bustard*_*/GERALD_* - echo " ..... supposed to run make -j 8 here ...." - ##NOT READY YET make -j 8 - fi - fi - ## MPListner takes care of the rest - exit - fi -fi -### ~~~~~~~~ - - if [ -f ExpTrackLog/OK_get_check_config_file ] then echo "Ready for Make file generation step" #run Make files generation step python $exptrackmainsc updsts $fcid $runfolder 0 Generating_Make_files - ##NOT READY YET $makefilescmd + $makefilescmd else echo "FAILED: get_check_config_file." python $exptrackmainsc updsts $fcid $runfolder 0 Check_ConfigFile_Failed exit fi +###################### +## Make sure we find the ONE specific Start folder (Firecrest or Standard / Bustard for IPAR), then check existance of "Makefile" these directories +if [ $analysis_mode == 'IPAR' ] +then + #firecrestdir=`ls $datadir | grep "Firecrest"` + #countdirs=`ls $datadir | grep -c Firecrest` + #Rami + startdir=`ls $datadir | grep Bustard$pipeline_version` + countdirs=`ls $datadir | grep -c Bustard$pipeline_version` +else + startdir=`ls $datadir | grep C1-[0-9][0-9]_Firecrest` + countdirs=`ls $datadir | grep -c C1-[0-9][0-9]_Firecrest` +fi -## CHECKING OF THE MAKE FILE. Checks presence of the Firecrest folder, then check "Makefile" in the 3 directories -firecrestdir=`ls $fullpath/Data | grep "^C1-$tocycle.*_Firecrest.*"` -countdirs=`ls $fullpath/Data | grep -c "^C1-$tocycle.*_Firecrest.*"` if [ $countdirs == 1 ] then - echo "ok. firecrest folder '$firecrestdir' found" + echo "ok. start folder '$startdir' found" else if [ $countdirs -gt 1 ] then - echo "ERROR: Found too many ($countdirs) firecrest directories. You need to be more specific. Found:" + echo "ERROR: Found too many ($countdirs) start directories (firecrest/bustard(for IPAR)). You need to specify ONE folder or REMOVE duplicates. Found:" echo "" - echo $firecrestdir - python $exptrackmainsc updsts $fcid $runfolder 0 ERROR:_Need_Specific_Firecrest_Dir + echo $startdir + python $exptrackmainsc updsts $fcid $runfolder 0 ERROR:_Need_Specific_Start_Dir exit else - echo "ERROR: Firecrest directory NOT FOUND. Looked for /^C1-$tocycle.*_Firecrest.*/" - python $exptrackmainsc updsts $fcid $runfolder 0 ERROR:_Firecrest_Dir_Not_Found + echo "ERROR: Start (firecrest/bustard(for IPAR)) directory NOT FOUND." + python $exptrackmainsc updsts $fcid $runfolder 0 ERROR:_Start_Dir_Not_Found exit fi fi -firemakefile=$fullpath/Data/$firecrestdir/Makefile -bustmakefile=$fullpath/Data/$firecrestdir/Bustard*_*/Makefile -germakefile=$fullpath/Data/$firecrestdir/Bustard*_*/GERALD_*/Makefile + +if [ $analysis_mode == 'IPAR' ] +then + firemakefile=$datadir/$startdir/Makefile # dummy check, since there's no need to check this one in IPAR + bustmakefile=$datadir/$startdir/Makefile + germakefile=$datadir/$startdir/GERALD_*/Makefile +else + firemakefile=$datadir/$startdir/Makefile + bustmakefile=$datadir/$startdir/Bustard*_*/Makefile + germakefile=$datadir/$startdir/Bustard*_*/GERALD_*/Makefile +fi + +##################### + + if [ -f $firemakefile ] && [ -f $bustmakefile ] && [ -f $germakefile ] then #Launch MPListener in the background - #$HOME/EXPTRACK/MPListener $fcid $runfolder $tocycle & - MPListener $fcid $runfolder $tocycle & + $EXPTRACKDIR/MPListener_IPAR $fcid $runfolder $tocycle & #Change dir down 2 folders to the Firecrest folder + echo "Changing dir to $datadir" + + cd $datadir/$startdir + + echo "Starting Solexa data process at `date`. This would be a good time for a climbing escapade at Lake Tahoe ... ;) ..." python $exptrackmainsc updsts $fcid $runfolder 1 - python $exptrackmainsc updsts $fcid $runfolder 1 Firecrest_Dir:_$firecrestdir - echo "Changing dir to $fullpath/Data/$firecrestdir" - cd $fullpath/Data/$firecrestdir - echo "Starting Solexa data process at `date`. This would be a good time for a short ski escapade at Lake Tahoe ... ;) ..." + msgtext="Mode:$analysis_mode$pipeline_version" + python $exptrackmainsc updsts $fcid $runfolder 1 $msgtext make -j 8 recursive + +#BNP Changes + cp Bustard*/GERALD*/Summary.htm /Volumes/EncodeDisk4/$fcid_Summary.htm +#end BNP Changes + else echo "FAILED: can't find 3 Makefile. Looked for: $firemakefile, $bustmakefile, $germakefile" -- 2.30.2