bin/config_pipeline.py

   1 #!/usr/bin/python
   2 import subprocess
   3 import logging
   4 import time
   5 import re
   6 import os
   7
   8
   9 logging.basicConfig(level=logging.DEBUG,
  10                     format='%(asctime)s %(levelname)-8s %(message)s',
  11                     datefmt='%a, %d %b %Y %H:%M:%S',
  12                     filename='pipeline_main.log',
  13                     filemode='w')
  14
  15 class ConfigInfo:
  16
  17   def __init__(self):
  18     self.run_path = None
  19     self.bustard_path = None
  20     self.config_filepath = None
  21
  22 #FLAGS
  23 RUN_ABORT = 'abort'
  24 RUN_FAILED = 'failed'
  25
  26
  27 #####################################
  28 # Configure Step (goat_pipeline.py)
  29 #Info
  30 s_start = re.compile('Starting Genome Analyzer Pipeline')
  31 s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
  32 s_generating = re.compile('Generating journals, Makefiles and parameter files')
  33 s_seq_folder = re.compile('^Sequence folder: ')
  34 s_seq_folder_sub = re.compile('want to make ')
  35 s_stderr_taskcomplete = re.compile('^Task complete, exiting')
  36
  37 #Errors
  38 s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
  39 s_species_dir_err = re.compile('Error: Lane [1-8]:')
  40 s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
  41
  42
  43 ##Ignore - Example of out above each ignore regex.
  44 #NOTE: Commenting out an ignore will cause it to be
  45 # logged as DEBUG with the logging module.
  46 #CF_STDERR_IGNORE_LIST = []
  47 s_skip = re.compile('s_[0-8]_[0-9]+')
  48
  49
  50 ##########################################
  51 # Pipeline Run Step (make -j8 recursive)
  52
  53 ##Info
  54
  55
  56 ##Errors
  57 s_make_error = re.compile('^make[\S\s]+Error')
  58 s_no_gnuplot = re.compile('gnuplot: command not found')
  59 s_no_convert = re.compile('^Can\'t exec "convert"')
  60 s_no_ghostscript = re.compile('gs: command not found')
  61
  62 ##Ignore - Example of out above each ignore regex.
  63 #NOTE: Commenting out an ignore will cause it to be
  64 # logged as DEBUG with the logging module.
  65 #
  66 PL_STDERR_IGNORE_LIST = []
  67 # Info: PF 11802
  68 PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') )
  69 # About to analyse intensity file s_4_0101_sig2.txt
  70 PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') )
  71 # Will send output to standard output
  72 PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') )
  73 # Found 31877 clusters
  74 PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') )
  75 # Will use quality criterion ((CHASTITY>=0.6)
  76 PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') )
  77 # Quality criterion translated to (($F[5]>=0.6))
  78 PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') )
  79 # opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt
  80 #  AND
  81 # opened s_4_0103_qhg.txt
  82 PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') )
  83 # 81129 sequences out of 157651 passed filter criteria
  84 PL_STDERR_IGNORE_LIST.append( re.compile('^[0-9]+ sequences out of [0-9]+ passed filter criteria') )
  85
  86
  87 def pl_stderr_ignore(line):
  88   """
  89   Searches lines for lines to ignore (i.e. not to log)
  90
  91   returns True if line should be ignored
  92   returns False if line should NOT be ignored
  93   """
  94   for s in PL_STDERR_IGNORE_LIST:
  95     if s.search(line):
  96       return True
  97   return False
  98
  99
 100 def config_stdout_handler(line, conf_info):
 101   """
 102   Processes each line of output from GOAT
 103   and stores useful information using the logging module
 104
 105   Loads useful information into conf_info as well, for future
 106   use outside the function.
 107
 108   returns True if found condition that signifies success.
 109   """
 110
 111   # Skip irrelevant line (without logging)
 112   if s_skip.search(line):
 113     pass
 114
 115   # Detect invalid command-line arguments
 116   elif s_invalid_cmdline.search(line):
 117     logging.error("Invalid commandline options!")
 118
 119   # Detect starting of configuration
 120   elif s_start.search(line):
 121     logging.info('START: Configuring pipeline')
 122
 123   # Detect it made it past invalid arguments
 124   elif s_gerald.search(line):
 125     logging.info('Running make now')
 126
 127   # Detect that make files have been generated (based on output)
 128   elif s_generating.search(line):
 129     logging.info('Make files generted')
 130     return True
 131
 132   # Capture run directory
 133   elif s_seq_folder.search(line):
 134     mo = s_seq_folder_sub.search(line)
 135     #Output changed when using --tiles=<tiles>
 136     # at least in pipeline v0.3.0b2
 137     if mo:
 138       firecrest_bustard_gerald_makefile = line[mo.end():]
 139       firecrest_bustard_gerald, junk = \
 140                                 os.path.split(firecrest_bustard_gerald_makefile)
 141       firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald)
 142       firecrest, junk = os.path.split(firecrest_bustard)
 143
 144       conf_info.bustard_path = firecrest_bustard
 145       conf_info.run_path = firecrest
 146
 147     #Standard output handling
 148     else:
 149       print 'Sequence line:', line
 150       mo = s_seq_folder.search(line)
 151       conf_info.bustard_path = line[mo.end():]
 152       conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
 153
 154   # Log all other output for debugging purposes
 155   else:
 156     logging.warning('CONF:?: %s' % (line))
 157
 158   return False
 159
 160
 161
 162 def config_stderr_handler(line, conf_info):
 163   """
 164   Processes each line of output from GOAT
 165   and stores useful information using the logging module
 166
 167   Loads useful information into conf_info as well, for future
 168   use outside the function.
 169
 170   returns RUN_ABORT upon detecting failure;
 171           True on success message;
 172           False if neutral message
 173             (i.e. doesn't signify failure or success)
 174   """
 175
 176   # Detect invalid species directory error
 177   if s_species_dir_err.search(line):
 178     logging.error(line)
 179     return RUN_ABORT
 180   # Detect goat_pipeline.py traceback
 181   elif s_goat_traceb.search(line):
 182     logging.error("Goat config script died, traceback in debug output")
 183     return RUN_ABORT
 184   # Detect indication of successful configuration (from stderr; odd, but ok)
 185   elif s_stderr_taskcomplete.search(line):
 186     logging.info('Configure step successful (from: stderr)')
 187     return True
 188   # Log all other output as debug output
 189   else:
 190     logging.debug('CONF:STDERR:?: %s' % (line))
 191
 192   # Neutral (not failure; nor success)
 193   return False
 194
 195
 196 #FIXME: Temperary hack
 197 f = open('pipeline_run.log', 'w')
 198 #ferr = open('pipeline_err.log', 'w')
 199
 200
 201
 202 def pipeline_stdout_handler(line, conf_info):
 203   """
 204   Processes each line of output from running the pipeline
 205   and stores useful information using the logging module
 206
 207   Loads useful information into conf_info as well, for future
 208   use outside the function.
 209
 210   returns True if found condition that signifies success.
 211   """
 212
 213   f.write(line + '\n')
 214
 215   return True
 216
 217
 218
 219 def pipeline_stderr_handler(line, conf_info):
 220   """
 221   """
 222
 223   if pl_stderr_ignore(line):
 224     pass
 225   elif s_make_error.search(line):
 226     logging.error("make error detected; run failed")
 227     return RUN_FAILED
 228   elif s_no_gnuplot.search(line):
 229     logging.error("gnuplot not found")
 230     return RUN_FAILED
 231   elif s_no_convert.search(line):
 232     logging.error("imagemagick's convert command not found")
 233     return RUN_FAILED
 234   elif s_no_ghostscript.search(line):
 235     logging.error("ghostscript not found")
 236     return RUN_FAILED
 237   else:
 238     logging.debug('PIPE:STDERR:?: %s' % (line))
 239
 240   return False
 241
 242
 243 def configure(conf_info):
 244   """
 245   Attempts to configure the GA pipeline using goat.
 246
 247   Uses logging module to store information about status.
 248
 249   returns True if configuration successful, otherwise False.
 250   """
 251   #ERROR Test:
 252   #pipe = subprocess.Popen(['goat_pipeline.py',
 253   #                         '--GERALD=config32bk.txt',
 254   #                         '--make .',],
 255   #                         #'.'],
 256   #                        stdout=subprocess.PIPE,
 257   #                        stderr=subprocess.PIPE)
 258
 259   #ERROR Test (2), causes goat_pipeline.py traceback
 260   #pipe = subprocess.Popen(['goat_pipeline.py',
 261   #                  '--GERALD=%s' % (conf_info.config_filepath),
 262   #                         '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
 263   #                         '--make',
 264   #                         '.'],
 265   #                        stdout=subprocess.PIPE,
 266   #                        stderr=subprocess.PIPE)
 267
 268   ##########################
 269   # Run configuration step
 270   #   Not a test; actual configure attempt.
 271   #pipe = subprocess.Popen(['goat_pipeline.py',
 272   #                  '--GERALD=%s' % (conf_info.config_filepath),
 273   #                         '--make',
 274   #                         '.'],
 275   #                        stdout=subprocess.PIPE,
 276   #                        stderr=subprocess.PIPE)
 277
 278   # CONTINUE HERE
 279   #FIXME: this only does a run on 5 tiles on lane 4
 280   pipe = subprocess.Popen(['goat_pipeline.py',
 281                     '--GERALD=%s' % (conf_info.config_filepath),
 282                            '--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_0103,s_4_0104',
 283                            '--make',
 284                            '.'],
 285                           stdout=subprocess.PIPE,
 286                           stderr=subprocess.PIPE)
 287   ##################
 288   # Process stdout
 289   stdout_line = pipe.stdout.readline()
 290
 291   complete = False
 292   while stdout_line != '':
 293     # Handle stdout
 294     if config_stdout_handler(stdout_line, conf_info):
 295       complete = True
 296     stdout_line = pipe.stdout.readline()
 297
 298
 299   error_code = pipe.wait()
 300   if error_code:
 301     logging.error('Recieved error_code: %s' % (error_code))
 302   else:
 303     logging.info('We are go for launch!')
 304
 305   #Process stderr
 306   stderr_line = pipe.stderr.readline()
 307
 308   abort = 'NO!'
 309   stderr_success = False
 310   while stderr_line != '':
 311     stderr_status = config_stderr_handler(stderr_line, conf_info)
 312     if stderr_status == RUN_ABORT:
 313       abort = RUN_ABORT
 314     elif stderr_status is True:
 315       stderr_success = True
 316     stderr_line = pipe.stderr.readline()
 317
 318
 319   #Success requirements:
 320   # 1) The stdout completed without error
 321   # 2) The program exited with status 0
 322   # 3) No errors found in stdout
 323   print '#Expect: True, False, True, True'
 324   print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
 325   status = complete is True and \
 326            bool(error_code) is False and \
 327            abort != RUN_ABORT and \
 328            stderr_success is True
 329
 330   # If everything was successful, but for some reason
 331   #  we didn't retrieve the path info, log it.
 332   if status is True:
 333     if conf_info.bustard_path is None or conf_info.run_path is None:
 334       logging.error("Failed to retrieve run_path")
 335       return False
 336
 337   return status
 338
 339
 340 def run_pipeline(conf_info):
 341   """
 342   Run the pipeline and monitor status.
 343   """
 344   # Fail if the run_path doesn't actually exist
 345   if not os.path.exists(conf_info.run_path):
 346     logging.error('Run path does not exist: %s' \
 347               % (conf_info.run_path))
 348     return False
 349
 350   # Change cwd to run_path
 351   os.chdir(conf_info.run_path)
 352
 353   # Log pipeline starting
 354   logging.info('STARTING PIPELINE @ %s' % (time.ctime()))
 355
 356   # Start the pipeline (and hide!)
 357   pipe = subprocess.Popen(['make',
 358                            '-j8',
 359                            'recursive'],
 360                           stdout=subprocess.PIPE,
 361                           stderr=subprocess.PIPE)
 362
 363   line = pipe.stdout.readline()
 364
 365   complete = False
 366   while line != '':
 367     if pipeline_stdout_handler(line, conf_info):
 368       complete = True
 369     line = pipe.stdout.readline()
 370
 371   error_code = pipe.wait()
 372
 373   #ferr.write(pipe.stderr.read())
 374   #ferr.close()
 375
 376   stderr_line = pipe.stderr.readline()
 377
 378   run_succeded = False
 379   run_failed = False
 380   while stderr_line != '':
 381     stderr_status = pipeline_stderr_handler(stderr_line, conf_info)
 382     if stderr_status is True:
 383       run_succeded = True
 384     if stderr_status == RUN_FAILED:
 385       run_failed = True
 386     stderr_line = pipe.stderr.readline()
 387
 388   ###DEBUG###
 389   print 'RUN STATUS: expect: True, True, True, True'
 390   print '            Status:',
 391   print complete is True, error_code == 0,
 392   print run_succeded is True, run_failed is False
 393   ###END_DEBUG###
 394
 395   status = complete is True and \
 396            error_code == 0 and \
 397            run_succeded is True and \
 398            run_failed is False
 399
 400   return status
 401
 402
 403
 404 if __name__ == '__main__':
 405   ci = ConfigInfo()
 406   ci.config_filepath = 'config32bk.txt'
 407
 408   status = configure(ci)
 409   if status:
 410     print "Configure success"
 411   else:
 412     print "Configure failed"
 413
 414   print 'Run Dir:', ci.run_path
 415   print 'Bustard Dir:', ci.bustard_path
 416
 417   if status:
 418     print 'Running pipeline now!'
 419     run_status = run_pipeline(ci)
 420     if run_status is True:
 421       print 'Pipeline ran successfully.'
 422     else:
 423       print 'Pipeline run failed.'
 424
 425   #FIXME: Temperary hack
 426   f.close()