bin/config_pipeline.py

   1 #!/usr/bin/python
   2 import subprocess
   3 import logging
   4 import time
   5 import re
   6 import os
   7
   8
   9 logging.basicConfig(level=logging.DEBUG,
  10                     format='%(asctime)s %(levelname)-8s %(message)s',
  11                     datefmt='%a, %d %b %Y %H:%M:%S',
  12                     filename='config_pipeline.log',
  13                     filemode='w')
  14
  15 class ConfigInfo:
  16
  17   def __init__(self):
  18     self.run_path = None
  19     self.bustard_path = None
  20     self.config_filepath = None
  21
  22 #FLAGS
  23 RUN_ABORT = 'abort'
  24 RUN_FAILED = 'failed'
  25
  26
  27 #####################################
  28 # Configure Step (goat_pipeline.py)
  29 #Info
  30 s_start = re.compile('Starting Genome Analyzer Pipeline')
  31 s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
  32 s_generating = re.compile('Generating journals, Makefiles and parameter files')
  33 s_seq_folder = re.compile('^Sequence folder: ')
  34 s_seq_folder_sub = re.compile('want to make ')
  35 s_stderr_taskcomplete = re.compile('^Task complete, exiting')
  36
  37 #Errors
  38 s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
  39 s_species_dir_err = re.compile('Error: Lane [1-8]:')
  40 s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
  41
  42
  43 #Ignore
  44 s_skip = re.compile('s_[0-8]_[0-9]+')
  45
  46 ##########################################
  47 # Pipeline Run Step (make -j8 recursive)
  48
  49 ##Info
  50
  51
  52 ##Errors
  53 s_make_error = re.compile('^make[\S\s]+Error')
  54 s_no_gnuplot = re.compile('gnuplot: command not found')
  55 s_no_convert = re.compile('^Can\'t exec "convert"')
  56
  57 ##Ignore
  58 PL_STDERR_IGNORE_LIST = []
  59 # Info: PF 11802
  60 PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') )
  61 # About to analyse intensity file s_4_0101_sig2.txt
  62 PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') )
  63 # Will send output to standard output
  64 PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') )
  65 # Found 31877 clusters
  66 PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') )
  67 # Will use quality criterion ((CHASTITY>=0.6)
  68 PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') )
  69 # Quality criterion translated to (($F[5]>=0.6))
  70 PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') )
  71 # opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt
  72 #  AND
  73 # opened s_4_0103_qhg.txt
  74 PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') )
  75
  76
  77 def pl_stderr_ignore(line):
  78   """
  79   Searches lines for lines to ignore (i.e. not to log)
  80
  81   returns True if line should be ignored
  82   returns False if line should NOT be ignored
  83   """
  84   for s in PL_STDERR_IGNORE_LIST:
  85     if s.search(line):
  86       return True
  87   return False
  88
  89
  90 def config_stdout_handler(line, conf_info):
  91   """
  92   Processes each line of output from GOAT
  93   and stores useful information using the logging module
  94
  95   Loads useful information into conf_info as well, for future
  96   use outside the function.
  97
  98   returns True if found condition that signifies success.
  99   """
 100
 101   # Skip irrelevant line (without logging)
 102   if s_skip.search(line):
 103     pass
 104
 105   # Detect invalid command-line arguments
 106   elif s_invalid_cmdline.search(line):
 107     logging.error("Invalid commandline options!")
 108
 109   # Detect starting of configuration
 110   elif s_start.search(line):
 111     logging.info('START: Configuring pipeline')
 112
 113   # Detect it made it past invalid arguments
 114   elif s_gerald.search(line):
 115     logging.info('Running make now')
 116
 117   # Detect that make files have been generated (based on output)
 118   elif s_generating.search(line):
 119     logging.info('Make files generted')
 120     return True
 121
 122   # Capture run directory
 123   elif s_seq_folder.search(line):
 124     mo = s_seq_folder_sub.search(line)
 125     #Output changed when using --tiles=<tiles>
 126     # at least in pipeline v0.3.0b2
 127     if mo:
 128       firecrest_bustard_gerald_makefile = line[mo.end():]
 129       firecrest_bustard_gerald, junk = \
 130                                 os.path.split(firecrest_bustard_gerald_makefile)
 131       firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald)
 132       firecrest, junk = os.path.split(firecrest_bustard)
 133
 134       conf_info.bustard_path = firecrest_bustard
 135       conf_info.run_path = firecrest
 136
 137     #Standard output handling
 138     else:
 139       print 'Sequence line:', line
 140       mo = s_seq_folder.search(line)
 141       conf_info.bustard_path = line[mo.end():]
 142       conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
 143
 144   # Log all other output for debugging purposes
 145   else:
 146     logging.warning('CONF:?: %s' % (line))
 147
 148   return False
 149
 150
 151
 152 def config_stderr_handler(line, conf_info):
 153   """
 154   Processes each line of output from GOAT
 155   and stores useful information using the logging module
 156
 157   Loads useful information into conf_info as well, for future
 158   use outside the function.
 159
 160   returns RUN_ABORT upon detecting failure;
 161           True on success message;
 162           False if neutral message
 163             (i.e. doesn't signify failure or success)
 164   """
 165
 166   # Detect invalid species directory error
 167   if s_species_dir_err.search(line):
 168     logging.error(line)
 169     return RUN_ABORT
 170   # Detect goat_pipeline.py traceback
 171   elif s_goat_traceb.search(line):
 172     logging.error("Goat config script died, traceback in debug output")
 173     return RUN_ABORT
 174   # Detect indication of successful configuration (from stderr; odd, but ok)
 175   elif s_stderr_taskcomplete.search(line):
 176     logging.info('Configure step successful (from: stderr)')
 177     return True
 178   # Log all other output as debug output
 179   else:
 180     logging.debug('CONF:STDERR:?: %s' % (line))
 181
 182   # Neutral (not failure; nor success)
 183   return False
 184
 185
 186 #FIXME: Temperary hack
 187 f = open('pipeline_run.log.1', 'w')
 188 #ferr = open('pipeline_err.log.1', 'w')
 189
 190
 191
 192 def pipeline_stdout_handler(line, conf_info):
 193   """
 194   Processes each line of output from running the pipeline
 195   and stores useful information using the logging module
 196
 197   Loads useful information into conf_info as well, for future
 198   use outside the function.
 199
 200   returns True if found condition that signifies success.
 201   """
 202
 203   f.write(line + '\n')
 204
 205   return True
 206
 207
 208
 209 def pipeline_stderr_handler(line, conf_info):
 210   """
 211   """
 212
 213   if pl_stderr_ignore(line):
 214     pass
 215   elif s_make_error.search(line):
 216     logging.error("make error detected; run failed")
 217     return RUN_FAILED
 218   elif s_no_gnuplot.search(line):
 219     logging.error("gnuplot not found")
 220     return RUN_FAILED
 221   elif s_no_convert.search(line):
 222     logging.error("imagemagick's convert command not found")
 223     return RUN_FAILED
 224   else:
 225     logging.debug('PIPE:STDERR:?: %s' % (line))
 226
 227   return False
 228
 229
 230 def configure(conf_info):
 231   """
 232   Attempts to configure the GA pipeline using goat.
 233
 234   Uses logging module to store information about status.
 235
 236   returns True if configuration successful, otherwise False.
 237   """
 238   #ERROR Test:
 239   #pipe = subprocess.Popen(['goat_pipeline.py',
 240   #                         '--GERALD=config32bk.txt',
 241   #                         '--make .',],
 242   #                         #'.'],
 243   #                        stdout=subprocess.PIPE,
 244   #                        stderr=subprocess.PIPE)
 245
 246   #ERROR Test (2), causes goat_pipeline.py traceback
 247   #pipe = subprocess.Popen(['goat_pipeline.py',
 248   #                  '--GERALD=%s' % (conf_info.config_filepath),
 249   #                         '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
 250   #                         '--make',
 251   #                         '.'],
 252   #                        stdout=subprocess.PIPE,
 253   #                        stderr=subprocess.PIPE)
 254
 255   ##########################
 256   # Run configuration step
 257   #   Not a test; actual configure attempt.
 258   #pipe = subprocess.Popen(['goat_pipeline.py',
 259   #                  '--GERALD=%s' % (conf_info.config_filepath),
 260   #                         '--make',
 261   #                         '.'],
 262   #                        stdout=subprocess.PIPE,
 263   #                        stderr=subprocess.PIPE)
 264
 265   # CONTINUE HERE
 266   #FIXME: this only does a run on 5 tiles on lane 4
 267   pipe = subprocess.Popen(['goat_pipeline.py',
 268                     '--GERALD=%s' % (conf_info.config_filepath),
 269                            '--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_0103,s_4_0104',
 270                            '--make',
 271                            '.'],
 272                           stdout=subprocess.PIPE,
 273                           stderr=subprocess.PIPE)
 274   ##################
 275   # Process stdout
 276   stdout_line = pipe.stdout.readline()
 277
 278   complete = False
 279   while stdout_line != '':
 280     # Handle stdout
 281     if config_stdout_handler(stdout_line, conf_info):
 282       complete = True
 283     stdout_line = pipe.stdout.readline()
 284
 285
 286   error_code = pipe.wait()
 287   if error_code:
 288     logging.error('Recieved error_code: %s' % (error_code))
 289   else:
 290     logging.info('We are go for launch!')
 291
 292   #Process stderr
 293   stderr_line = pipe.stderr.readline()
 294
 295   abort = 'NO!'
 296   stderr_success = False
 297   while stderr_line != '':
 298     stderr_status = config_stderr_handler(stderr_line, conf_info)
 299     if stderr_status == RUN_ABORT:
 300       abort = RUN_ABORT
 301     elif stderr_status is True:
 302       stderr_success = True
 303     stderr_line = pipe.stderr.readline()
 304
 305
 306   #Success requirements:
 307   # 1) The stdout completed without error
 308   # 2) The program exited with status 0
 309   # 3) No errors found in stdout
 310   print '#Expect: True, False, True, True'
 311   print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
 312   status = complete is True and \
 313            bool(error_code) is False and \
 314            abort != RUN_ABORT and \
 315            stderr_success is True
 316
 317   # If everything was successful, but for some reason
 318   #  we didn't retrieve the path info, log it.
 319   if status is True:
 320     if conf_info.bustard_path is None or conf_info.run_path is None:
 321       logging.error("Failed to retrieve run_path")
 322       return False
 323
 324   return status
 325
 326
 327 def run_pipeline(conf_info):
 328   """
 329   Run the pipeline and monitor status.
 330   """
 331   # Fail if the run_path doesn't actually exist
 332   if not os.path.exists(conf_info.run_path):
 333     logging.error('Run path does not exist: %s' \
 334               % (conf_info.run_path))
 335     return False
 336
 337   # Change cwd to run_path
 338   os.chdir(conf_info.run_path)
 339
 340   # Log pipeline starting
 341   logging.info('STARTING PIPELINE @ %s' % (time.ctime()))
 342
 343   # Start the pipeline (and hide!)
 344   pipe = subprocess.Popen(['make',
 345                            '-j8',
 346                            'recursive'],
 347                           stdout=subprocess.PIPE,
 348                           stderr=subprocess.PIPE)
 349
 350   line = pipe.stdout.readline()
 351
 352   complete = False
 353   while line != '':
 354     if pipeline_stdout_handler(line, conf_info):
 355       complete = True
 356     line = pipe.stdout.readline()
 357
 358   error_code = pipe.wait()
 359
 360   #ferr.write(pipe.stderr.read())
 361   #ferr.close()
 362
 363   stderr_line = pipe.stderr.readline()
 364
 365   run_succeded = False
 366   run_failed = False
 367   while stderr_line != '':
 368     stderr_status = pipeline_stderr_handler(stderr_line, conf_info)
 369     if stderr_status is True:
 370       run_succeded = True
 371     if stderr_status == RUN_FAILED:
 372       run_failed = True
 373     stderr_line = pipe.stderr.readline()
 374
 375   ###DEBUG###
 376   print 'RUN STATUS: expect: True, True, True, True'
 377   print '            Status:',
 378   print complete is True, error_code == 0,
 379   print run_succeded is True, run_failed is False
 380   ###END_DEBUG###
 381
 382   status = complete is True and \
 383            error_code == 0 and \
 384            run_succeded is True and \
 385            run_failed is False
 386
 387   return status
 388
 389
 390
 391 if __name__ == '__main__':
 392   ci = ConfigInfo()
 393   ci.config_filepath = 'config32bk.txt'
 394
 395   status = configure(ci)
 396   if status:
 397     print "Configure success"
 398   else:
 399     print "Configure failed"
 400
 401   print 'Run Dir:', ci.run_path
 402   print 'Bustard Dir:', ci.bustard_path
 403
 404   if status:
 405     print 'Running pipeline now!'
 406     run_pipeline(ci)
 407
 408   #FIXME: Temperary hack
 409   f.close()