bin/config_pipeline.py

   1 #!/usr/bin/python
   2 import subprocess
   3 import re
   4 import os
   5 import logging
   6
   7 logging.basicConfig(level=logging.DEBUG,
   8                     format='%(asctime)s %(levelname)-8s %(message)s',
   9                     datefmt='%a, %d %b %Y %H:%M:%S',
  10                     filename='config_pipeline.log',
  11                     filemode='w')
  12
  13 class ConfigInfo:
  14
  15   def __init__(self):
  16     self.run_path = None
  17     self.bustard_path = None
  18     self.config_filepath = None
  19
  20 #FLAGS
  21 RUN_ABORT = 'abort'
  22
  23 #Info
  24 s_start = re.compile('Starting Genome Analyzer Pipeline')
  25 s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
  26 s_generating = re.compile('Generating journals, Makefiles and parameter files')
  27 s_seq_folder = re.compile('^Sequence folder: ')
  28 s_stderr_taskcomplete = re.compile('^Task complete, exiting')
  29
  30 #Errors
  31 s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
  32 s_species_dir_err = re.compile('Error: Lane [1-8]:')
  33 s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
  34
  35
  36 #Ignore
  37 s_skip = re.compile('s_[0-8]_[0-9]+')
  38
  39 def config_stdout_handler(line, conf_info):
  40   """
  41   Processes each line of output from GOAT
  42   and stores useful information using the logging module
  43
  44   Loads useful information into conf_info as well, for future
  45   use outside the function.
  46
  47   returns True if found condition that signifies success.
  48   """
  49
  50   # Irrelevant line
  51   if s_skip.search(line):
  52     pass
  53   elif s_invalid_cmdline.search(line):
  54     logging.error("Invalid commandline options!")
  55   elif s_start.search(line):
  56     logging.info('START: Configuring pipeline')
  57   elif s_gerald.search(line):
  58     logging.info('Running make now')
  59   elif s_generating.search(line):
  60     logging.info('Make files generted')
  61     return True
  62   elif s_seq_folder.search(line):
  63     mo = s_seq_folder.search(line)
  64     conf_info.bustard_path = line[mo.end():]
  65     conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
  66   else:
  67     logging.warning('How to handle: %s' % (line))
  68
  69   return False
  70
  71
  72
  73 def config_stderr_handler(line, conf_info):
  74   """
  75   Processes each line of output from GOAT
  76   and stores useful information using the logging module
  77
  78   Loads useful information into conf_info as well, for future
  79   use outside the function.
  80
  81   returns RUN_ABORT upon detecting failure; True on success message
  82   """
  83
  84   if s_species_dir_err.search(line):
  85     logging.error(line)
  86     return RUN_ABORT
  87   elif s_goat_traceb.search(line):
  88     logging.error("Goat config script died, traceback in debug output")
  89     return RUN_ABORT
  90   elif s_stderr_taskcomplete.search(line):
  91     logging.info('Configure step successful (from: stderr)')
  92     return True
  93   else:
  94     logging.debug('STDERR: How to handle: %s' % (line))
  95
  96   return False
  97
  98 #FIXME: Temperary hack
  99 f = open('pipeline_run.log', 'w')
 100 ferr = open('pipeline_err.log', 'w')
 101
 102 def pipeline_handler(line, conf_info):
 103   """
 104   Processes each line of output from running the pipeline
 105   and stores useful information using the logging module
 106
 107   Loads useful information into conf_info as well, for future
 108   use outside the function.
 109
 110   returns True if found condition that signifies success.
 111   """
 112
 113   f.write(line + '\n')
 114
 115   return True
 116
 117
 118 def configure(conf_info):
 119   """
 120   Attempts to configure the GA pipeline using goat.
 121
 122   Uses logging module to store information about status.
 123
 124   returns True if configuration successful, otherwise False.
 125   """
 126   #ERROR Test:
 127   #pipe = subprocess.Popen(['goat_pipeline.py',
 128   #                         '--GERALD=config32bk.txt',
 129   #                         '--make .',],
 130   #                         #'.'],
 131   #                        stdout=subprocess.PIPE,
 132   #                        stderr=subprocess.PIPE)
 133
 134   #ERROR Test (2), causes goat_pipeline.py traceback
 135   #pipe = subprocess.Popen(['goat_pipeline.py',
 136   #                  '--GERALD=%s' % (conf_info.config_filepath),
 137   #                         '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
 138   #                         '--make',
 139   #                         '.'],
 140   #                        stdout=subprocess.PIPE,
 141   #                        stderr=subprocess.PIPE)
 142
 143   #Not a test; actual run attempt.
 144   pipe = subprocess.Popen(['goat_pipeline.py',
 145                     '--GERALD=%s' % (conf_info.config_filepath),
 146                            '--make',
 147                            '.'],
 148                           stdout=subprocess.PIPE,
 149                           stderr=subprocess.PIPE)
 150
 151   # CONTINUE HERE
 152   #FIXME: this only does a run on 5 tiles on lane 4
 153   #pipe = subprocess.Popen(['goat_pipeline.py',
 154   #                  '--GERALD=%s' % (conf_info.config_filepath),
 155   #                         '--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_103,s_4_104',
 156   #                         '--make',
 157   #                         '.'],
 158   #                        stdout=subprocess.PIPE,
 159   #                        stderr=subprocess.PIPE)
 160
 161   #Process stdout
 162   stdout_line = pipe.stdout.readline()
 163
 164   complete = False
 165   while stdout_line != '':
 166     # Handle stdout
 167     if config_stdout_handler(stdout_line, conf_info):
 168       complete = True
 169     stdout_line = pipe.stdout.readline()
 170
 171
 172   error_code = pipe.wait()
 173   if error_code:
 174     logging.error('Recieved error_code: %s' % (error_code))
 175   else:
 176     logging.info ('We are go for launch!')
 177
 178   #Process stderr
 179   stderr_line = pipe.stderr.readline()
 180
 181   abort = 'NO!'
 182   stderr_success = False
 183   while stderr_line != '':
 184     stderr_status = config_stderr_handler(stderr_line, conf_info)
 185     if stderr_status == RUN_ABORT:
 186       abort = RUN_ABORT
 187     elif stderr_status is True:
 188       stderr_success = True
 189     stderr_line = pipe.stderr.readline()
 190
 191
 192   #Success requirements:
 193   # 1) The stdout completed without error
 194   # 2) The program exited with status 0
 195   # 3) No errors found in stdout
 196   print '#Expect: True, False, True, True'
 197   print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
 198   status = complete is True and \
 199            bool(error_code) is False and \
 200            abort != RUN_ABORT and \
 201            stderr_success is True
 202
 203   # If everything was successful, but for some reason
 204   #  we didn't retrieve the path info, log it.
 205   if status is True:
 206     if conf_info.bustard_path is None or conf_info.run_path is None:
 207       logging.error("Failed to retrieve run_path")
 208       return False
 209
 210   return status
 211
 212
 213 def run_pipeline(conf_info):
 214
 215   # Fail if the run_path doesn't actually exist
 216   if not os.path.exists(conf_info.run_path):
 217     logging.error('Run path does not exist: %s' \
 218               % (conf_info.run_path))
 219     return False
 220
 221   # Change cwd to run_path
 222   os.chdir(conf_info.run_path)
 223
 224   # Start the pipeline (and hide!)
 225   pipe = subprocess.Popen(['make',
 226                            '-j8',
 227                            'recursive'],
 228                           stdout=subprocess.PIPE,
 229                           stderr=subprocess.PIPE)
 230
 231   line = pipe.stdout.readline()
 232
 233   complete = False
 234   while line != '':
 235     if pipeline_handler(line, conf_info):
 236       complete = True
 237     line = pipe.stdout.readline()
 238
 239   error_code = pipe.wait()
 240
 241   ferr.write(pipe.stderr.read())
 242   ferr.close()
 243
 244
 245 if __name__ == '__main__':
 246   ci = ConfigInfo()
 247   ci.config_filepath = 'config32bk.txt'
 248
 249   status = configure(ci)
 250   if status:
 251     print "Configure success"
 252   else:
 253     print "Configure failed"
 254
 255   print 'Run Dir:', ci.run_path
 256   print 'Bustard Dir:', ci.bustard_path
 257
 258   if status:
 259     print 'Running pipeline now!'
 260     run_pipeline(ci)
 261
 262   #FIXME: Temperary hack
 263   f.close()