Migration of runner bot to new package directories.

author Brandon King <kingb@caltech.edu>

Tue, 5 Aug 2008 20:03:40 +0000 (20:03 +0000)

committer Brandon King <kingb@caltech.edu>

Tue, 5 Aug 2008 20:03:40 +0000 (20:03 +0000)
author Brandon King <kingb@caltech.edu>
Tue, 5 Aug 2008 20:03:40 +0000 (20:03 +0000)
committer Brandon King <kingb@caltech.edu>
Tue, 5 Aug 2008 20:03:40 +0000 (20:03 +0000)
diff --git a/htswcommon/htswcommon/util/alphanum.py b/htswcommon/htswcommon/util/alphanum.py

new file mode 100644 (file)

index 0000000..8893bdb
--- /dev/null
+++ b/htswcommon/htswcommon/util/alphanum.py
@@ -0,0 +1,61 @@
+#\r
+# The Alphanum Algorithm is an improved sorting algorithm for strings\r
+# containing numbers.  Instead of sorting numbers in ASCII order like\r
+# a standard sort, this algorithm sorts numbers in numeric order.\r
+#\r
+# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com\r
+#\r
+#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com)\r
+#* Distributed under same license as original\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA\r
+#\r
+\r
+import re\r
+\r
+#\r
+# TODO: Make decimal points be considered in the same class as digits\r
+#\r
+\r
+def chunkify(str):\r
+       """return a list of numbers and non-numeric substrings of +str+\r
+\r
+       the numeric substrings are converted to integer, non-numeric are left as is\r
+       """\r
+       chunks = re.findall("(\d+|\D+)",str)\r
+       chunks = [re.match('\d',x) and int(x) or x for x in chunks] #convert numeric strings to numbers\r
+       return chunks\r
+\r
+def alphanum(a,b):\r
+       """breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces\r
+\r
+       +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters\r
+       Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives\r
+       Numeric sorting, comparison of non-numeric gives Lexicographic order\r
+       """\r
+       # split strings into chunks\r
+       aChunks = chunkify(a)\r
+       bChunks = chunkify(b)\r
+\r
+       return cmp(aChunks,bChunks) #built in comparison works once data is prepared\r
+\r
+\r
+\r
+if __name__ == "__main__":\r
+       unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"]\r
+       sorted = unsorted[:]\r
+       sorted.sort(alphanum)\r
+       print '+++++Sorted...++++'\r
+       print '\n'.join(sorted)\r
diff --git a/htswdataprod/htswdataprod/automation/runner.py b/htswdataprod/htswdataprod/automation/runner.py

index 84e9af2d310d36c37cbdf632b4f0846adabf7e68..391e7dd43789e95c0e672459a4685e37b8b7d5b2 100644 (file)
--- a/htswdataprod/htswdataprod/automation/runner.py
+++ b/htswdataprod/htswdataprod/automation/runner.py
@@ -9,8 +9,8 @@ import threading
  
  from benderjab import rpc
  
-from gaworkflow.pipeline.configure_run import *
-from gaworkflow.pipeline.monitors import _percentCompleted
+from htswdataprod.configure_run import *
+from htswdataprod.monitors import _percentCompleted
  
  #s_fc = re.compile('FC[0-9]+')
  s_fc = re.compile('_[0-9a-zA-Z]*$')
diff --git a/htswdataprod/htswdataprod/configure_run.py b/htswdataprod/htswdataprod/configure_run.py

new file mode 100644 (file)

index 0000000..0ad9b01
--- /dev/null
+++ b/htswdataprod/htswdataprod/configure_run.py
@@ -0,0 +1,606 @@
+#!/usr/bin/python
+import subprocess
+import logging
+import time
+import re
+import os
+
+from htswdataprod.retrieve_config import getCombinedOptions, saveConfigFile
+from htswdataprod.retrieve_config import FlowCellNotFound, WebError404
+from htswdataprod.genome_mapper import DuplicateGenome, getAvailableGenomes, constructMapperDict
+from htswdataprod.run_status import GARunStatus
+
+from pyinotify import WatchManager, ThreadedNotifier
+from pyinotify import EventsCodes, ProcessEvent
+
+class ConfigInfo:
+  
+  def __init__(self):
+    #run_path = firecrest analysis directory to run analysis from
+    self.run_path = None
+    self.bustard_path = None
+    self.config_filepath = None
+    self.status = None
+
+    #top level directory where all analyses are placed
+    self.base_analysis_dir = None
+    #analysis_dir, top level analysis dir...
+    # base_analysis_dir + '/070924_USI-EAS44_0022_FC12150'
+    self.analysis_dir = None
+
+
+  def createStatusObject(self):
+    """
+    Creates a status object which can be queried for
+    status of running the pipeline
+
+    returns True if object created
+    returns False if object cannot be created
+    """
+    if self.config_filepath is None:
+      return False
+
+    self.status = GARunStatus(self.config_filepath)
+    return True
+
+
+
+####################################
+# inotify event processor
+
+s_firecrest_finished = re.compile('Firecrest[0-9\._\-A-Za-z]+/finished.txt')
+s_bustard_finished = re.compile('Bustard[0-9\._\-A-Za-z]+/finished.txt')
+s_gerald_finished = re.compile('GERALD[0-9\._\-A-Za-z]+/finished.txt')
+
+s_gerald_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/GERALD[0-9\._\-A-Za-z]+/')
+s_bustard_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/')
+s_firecrest_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/')
+
+class RunEvent(ProcessEvent):
+
+  def __init__(self, conf_info):
+
+    self.run_status_dict = {'firecrest': False,
+                            'bustard': False,
+                            'gerald': False}
+
+    self._ci = conf_info
+
+    ProcessEvent.__init__(self)
+    
+
+  def process_IN_CREATE(self, event):
+    fullpath = os.path.join(event.path, event.name)
+    if s_finished.search(fullpath):
+      logging.info("File Found: %s" % (fullpath))
+
+      if s_firecrest_finished.search(fullpath):
+        self.run_status_dict['firecrest'] = True
+        self._ci.status.updateFirecrest(event.name)
+      elif s_bustard_finished.search(fullpath):
+        self.run_status_dict['bustard'] = True
+        self._ci.status.updateBustard(event.name)
+      elif s_gerald_finished.search(fullpath):
+        self.run_status_dict['gerald'] = True
+        self._ci.status.updateGerald(event.name)
+
+    #WARNING: The following order is important!!
+    # Firecrest regex will catch all gerald, bustard, and firecrest
+    # Bustard regex will catch all gerald and bustard
+    # Gerald regex will catch all gerald
+    # So, order needs to be Gerald, Bustard, Firecrest, or this
+    #  won't work properly.
+    elif s_gerald_all.search(fullpath):
+      self._ci.status.updateGerald(event.name)
+    elif s_bustard_all.search(fullpath):
+      self._ci.status.updateBustard(event.name)
+    elif s_firecrest_all.search(fullpath):
+      self._ci.status.updateFirecrest(event.name)
+      
+    #print "Create: %s" % (os.path.join(event.path, event.name))
+
+  def process_IN_DELETE(self, event):
+    #print "Remove %s" % (os.path.join(event.path, event.name))
+    pass
+
+
+
+
+#FLAGS
+# Config Step Error
+RUN_ABORT = 'abort'
+# Run Step Error
+RUN_FAILED = 'failed'
+
+
+#####################################
+# Configure Step (goat_pipeline.py)
+#Info
+s_start = re.compile('Starting Genome Analyzer Pipeline')
+s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+")
+s_generating = re.compile('^Generating journals, Makefiles')
+s_seq_folder = re.compile('^Sequence folder: ')
+s_seq_folder_sub = re.compile('want to make ')
+s_stderr_taskcomplete = re.compile('^Task complete, exiting')
+
+#Errors
+s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py')
+s_species_dir_err = re.compile('Error: Lane [1-8]:')
+s_goat_traceb = re.compile("^Traceback \(most recent call last\):")
+s_missing_cycles = re.compile('^Error: Tile s_[1-8]_[0-9]+: Different number of cycles: [0-9]+ instead of [0-9]+')
+
+SUPPRESS_MISSING_CYCLES = False
+
+
+##Ignore - Example of out above each ignore regex.
+#NOTE: Commenting out an ignore will cause it to be
+# logged as DEBUG with the logging module.
+#CF_STDERR_IGNORE_LIST = []
+s_skip = re.compile('s_[0-8]_[0-9]+')
+
+
+##########################################
+# Pipeline Run Step (make -j8 recursive)
+
+##Info
+s_finished = re.compile('finished')
+
+##Errors
+s_make_error = re.compile('^make[\S\s]+Error')
+s_no_gnuplot = re.compile('gnuplot: command not found')
+s_no_convert = re.compile('^Can\'t exec "convert"')
+s_no_ghostscript = re.compile('gs: command not found')
+
+##Ignore - Example of out above each ignore regex.
+#NOTE: Commenting out an ignore will cause it to be
+# logged as DEBUG with the logging module.
+#
+PL_STDERR_IGNORE_LIST = []
+# Info: PF 11802
+PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') )
+# About to analyse intensity file s_4_0101_sig2.txt
+PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') )
+# Will send output to standard output
+PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') )
+# Found 31877 clusters
+PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') )
+# Will use quality criterion ((CHASTITY>=0.6)
+PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') )
+# Quality criterion translated to (($F[5]>=0.6))
+PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') )
+# opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt
+#  AND
+# opened s_4_0103_qhg.txt
+PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') )
+# 81129 sequences out of 157651 passed filter criteria
+PL_STDERR_IGNORE_LIST.append( re.compile('^[0-9]+ sequences out of [0-9]+ passed filter criteria') )
+
+
+def pl_stderr_ignore(line):
+  """
+  Searches lines for lines to ignore (i.e. not to log)
+
+  returns True if line should be ignored
+  returns False if line should NOT be ignored
+  """
+  for s in PL_STDERR_IGNORE_LIST:
+    if s.search(line):
+      return True
+  return False
+
+
+def config_stdout_handler(line, conf_info):
+  """
+  Processes each line of output from GOAT
+  and stores useful information using the logging module
+
+  Loads useful information into conf_info as well, for future
+  use outside the function.
+
+  returns True if found condition that signifies success.
+  """
+
+  # Skip irrelevant line (without logging)
+  if s_skip.search(line):
+    pass
+
+  # Detect invalid command-line arguments
+  elif s_invalid_cmdline.search(line):
+    logging.error("Invalid commandline options!")
+
+  # Detect starting of configuration
+  elif s_start.search(line):
+    logging.info('START: Configuring pipeline')
+
+  # Detect it made it past invalid arguments
+  elif s_gerald.search(line):
+    logging.info('Running make now')
+
+  # Detect that make files have been generated (based on output)
+  elif s_generating.search(line):
+    logging.info('Make files generted')
+    return True
+
+  # Capture run directory
+  elif s_seq_folder.search(line):
+    mo = s_seq_folder_sub.search(line)
+    #Output changed when using --tiles=<tiles>
+    # at least in pipeline v0.3.0b2
+    if mo:
+      firecrest_bustard_gerald_makefile = line[mo.end():]
+      firecrest_bustard_gerald, junk = \
+                                os.path.split(firecrest_bustard_gerald_makefile)
+      firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald)
+      firecrest, junk = os.path.split(firecrest_bustard)
+
+      conf_info.bustard_path = firecrest_bustard
+      conf_info.run_path = firecrest
+    
+    #Standard output handling
+    else:
+      print 'Sequence line:', line
+      mo = s_seq_folder.search(line)
+      conf_info.bustard_path = line[mo.end():]
+      conf_info.run_path, temp = os.path.split(conf_info.bustard_path)
+
+  # Log all other output for debugging purposes
+  else:
+    logging.warning('CONF:?: %s' % (line))
+
+  return False
+
+
+
+def config_stderr_handler(line, conf_info):
+  """
+  Processes each line of output from GOAT
+  and stores useful information using the logging module
+
+  Loads useful information into conf_info as well, for future
+  use outside the function.
+
+  returns RUN_ABORT upon detecting failure;
+          True on success message;
+          False if neutral message
+            (i.e. doesn't signify failure or success)
+  """
+  global SUPPRESS_MISSING_CYCLES
+
+  # Detect invalid species directory error
+  if s_species_dir_err.search(line):
+    logging.error(line)
+    return RUN_ABORT
+  # Detect goat_pipeline.py traceback
+  elif s_goat_traceb.search(line):
+    logging.error("Goat config script died, traceback in debug output")
+    return RUN_ABORT
+  # Detect indication of successful configuration (from stderr; odd, but ok)
+  elif s_stderr_taskcomplete.search(line):
+    logging.info('Configure step successful (from: stderr)')
+    return True
+  # Detect missing cycles
+  elif s_missing_cycles.search(line):
+
+    # Only display error once
+    if not SUPPRESS_MISSING_CYCLES:
+      logging.error("Missing cycles detected; Not all cycles copied?")
+      logging.debug("CONF:STDERR:MISSING_CYCLES: %s" % (line))
+      SUPPRESS_MISSING_CYCLES = True
+    return RUN_ABORT
+  
+  # Log all other output as debug output
+  else:
+    logging.debug('CONF:STDERR:?: %s' % (line))
+
+  # Neutral (not failure; nor success)
+  return False
+
+
+#def pipeline_stdout_handler(line, conf_info):
+#  """
+#  Processes each line of output from running the pipeline
+#  and stores useful information using the logging module
+#
+#  Loads useful information into conf_info as well, for future
+#  use outside the function.
+#
+#  returns True if found condition that signifies success.
+#  """
+#
+#  #f.write(line + '\n')
+#
+#  return True
+
+
+
+def pipeline_stderr_handler(line, conf_info):
+  """
+  Processes each line of stderr from pipelien run
+  and stores useful information using the logging module
+
+  ##FIXME: Future feature (doesn't actually do this yet)
+  #Loads useful information into conf_info as well, for future
+  #use outside the function.
+
+  returns RUN_FAILED upon detecting failure;
+          #True on success message; (no clear success state)
+          False if neutral message
+            (i.e. doesn't signify failure or success)
+  """
+
+  if pl_stderr_ignore(line):
+    pass
+  elif s_make_error.search(line):
+    logging.error("make error detected; run failed")
+    return RUN_FAILED
+  elif s_no_gnuplot.search(line):
+    logging.error("gnuplot not found")
+    return RUN_FAILED
+  elif s_no_convert.search(line):
+    logging.error("imagemagick's convert command not found")
+    return RUN_FAILED
+  elif s_no_ghostscript.search(line):
+    logging.error("ghostscript not found")
+    return RUN_FAILED
+  else:
+    logging.debug('PIPE:STDERR:?: %s' % (line))
+
+  return False
+
+
+def retrieve_config(conf_info, flowcell, cfg_filepath, genome_dir):
+  """
+  Gets the config file from server...
+  requires config file in:
+    /etc/ga_frontend/ga_frontend.conf
+   or
+    ~/.ga_frontend.conf
+
+  with:
+  [config_file_server]
+  base_host_url: http://host:port
+
+  return True if successful, False is failure
+  """
+  options = getCombinedOptions()
+
+  if options.url is None:
+    logging.error("~/.ga_frontend.conf or /etc/ga_frontend/ga_frontend.conf" \
+                  " missing base_host_url option")
+    return False
+
+  try:
+    saveConfigFile(flowcell, options.url, cfg_filepath)
+    conf_info.config_filepath = cfg_filepath
+  except FlowCellNotFound, e:
+    logging.error(e)
+    return False
+  except WebError404, e:
+    logging.error(e)
+    return False
+  except IOError, e:
+    logging.error(e)
+    return False
+  except Exception, e:
+    logging.error(e)
+    return False
+
+  f = open(cfg_filepath, 'r')
+  data = f.read()
+  f.close()
+
+  genome_dict = getAvailableGenomes(genome_dir)
+  mapper_dict = constructMapperDict(genome_dict)
+
+  logging.debug(data)
+
+  f = open(cfg_filepath, 'w')
+  f.write(data % (mapper_dict))
+  f.close()
+  
+  return True
+  
+
+
+def configure(conf_info):
+  """
+  Attempts to configure the GA pipeline using goat.
+
+  Uses logging module to store information about status.
+
+  returns True if configuration successful, otherwise False.
+  """
+  #ERROR Test:
+  #pipe = subprocess.Popen(['goat_pipeline.py',
+  #                         '--GERALD=config32bk.txt',
+  #                         '--make .',],
+  #                         #'.'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+  #ERROR Test (2), causes goat_pipeline.py traceback
+  #pipe = subprocess.Popen(['goat_pipeline.py',
+  #                  '--GERALD=%s' % (conf_info.config_filepath),
+  #                         '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104',
+  #                         '--make',
+  #                         '.'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+  ##########################
+  # Run configuration step
+  #   Not a test; actual configure attempt.
+  #pipe = subprocess.Popen(['goat_pipeline.py',
+  #                  '--GERALD=%s' % (conf_info.config_filepath),
+  #                         '--make',
+  #                         '.'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+
+  stdout_filepath = os.path.join(conf_info.analysis_dir,
+                                 "pipeline_configure_stdout.txt")
+  stderr_filepath = os.path.join(conf_info.analysis_dir,
+                                 "pipeline_configure_stderr.txt")
+
+  fout = open(stdout_filepath, 'w')
+  ferr = open(stderr_filepath, 'w')
+  
+  pipe = subprocess.Popen(['goat_pipeline.py',
+                    '--GERALD=%s' % (conf_info.config_filepath),
+                           #'--tiles=s_4_0100,s_4_0101,s_4_0102,s_4_0103,s_4_0104',
+                           '--make',
+                           conf_info.analysis_dir],
+                          stdout=fout,
+                          stderr=ferr)
+
+  print "Configuring pipeline: %s" % (time.ctime())
+  error_code = pipe.wait()
+
+  # Clean up
+  fout.close()
+  ferr.close()
+  
+  
+  ##################
+  # Process stdout
+  fout = open(stdout_filepath, 'r')
+  
+  stdout_line = fout.readline()
+
+  complete = False
+  while stdout_line != '':
+    # Handle stdout
+    if config_stdout_handler(stdout_line, conf_info):
+      complete = True
+    stdout_line = fout.readline()
+
+  fout.close()
+
+
+  #error_code = pipe.wait()
+  if error_code:
+    logging.error('Recieved error_code: %s' % (error_code))
+  else:
+    logging.info('We are go for launch!')
+
+  #Process stderr
+  ferr = open(stderr_filepath, 'r')
+  stderr_line = ferr.readline()
+
+  abort = 'NO!'
+  stderr_success = False
+  while stderr_line != '':
+    stderr_status = config_stderr_handler(stderr_line, conf_info)
+    if stderr_status == RUN_ABORT:
+      abort = RUN_ABORT
+    elif stderr_status is True:
+      stderr_success = True
+    stderr_line = ferr.readline()
+
+  ferr.close()
+
+
+  #Success requirements:
+  # 1) The stdout completed without error
+  # 2) The program exited with status 0
+  # 3) No errors found in stdout
+  print '#Expect: True, False, True, True'
+  print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True
+  status = complete is True and \
+           bool(error_code) is False and \
+           abort != RUN_ABORT and \
+           stderr_success is True
+
+  # If everything was successful, but for some reason
+  #  we didn't retrieve the path info, log it.
+  if status is True:
+    if conf_info.bustard_path is None or conf_info.run_path is None:
+      logging.error("Failed to retrieve run_path")
+      return False
+  
+  return status
+
+
+def run_pipeline(conf_info):
+  """
+  Run the pipeline and monitor status.
+  """
+  # Fail if the run_path doesn't actually exist
+  if not os.path.exists(conf_info.run_path):
+    logging.error('Run path does not exist: %s' \
+              % (conf_info.run_path))
+    return False
+
+  # Change cwd to run_path
+  stdout_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stdout.txt')
+  stderr_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stderr.txt')
+
+  # Create status object
+  conf_info.createStatusObject()
+
+  # Monitor file creation
+  wm = WatchManager()
+  mask = EventsCodes.IN_DELETE | EventsCodes.IN_CREATE
+  event = RunEvent(conf_info)
+  notifier = ThreadedNotifier(wm, event)
+  notifier.start()
+  wdd = wm.add_watch(conf_info.run_path, mask, rec=True)
+
+  # Log pipeline starting
+  logging.info('STARTING PIPELINE @ %s' % (time.ctime()))
+  
+  # Start the pipeline (and hide!)
+  #pipe = subprocess.Popen(['make',
+  #                         '-j8',
+  #                         'recursive'],
+  #                        stdout=subprocess.PIPE,
+  #                        stderr=subprocess.PIPE)
+
+  fout = open(stdout_filepath, 'w')
+  ferr = open(stderr_filepath, 'w')
+
+  pipe = subprocess.Popen(['make',
+                           '--directory=%s' % (conf_info.run_path),
+                           '-j8',
+                           'recursive'],
+                           stdout=fout,
+                           stderr=ferr)
+                           #shell=True)
+  # Wait for run to finish
+  retcode = pipe.wait()
+
+
+  # Clean up
+  notifier.stop()
+  fout.close()
+  ferr.close()
+
+  # Process stderr
+  ferr = open(stderr_filepath, 'r')
+
+  run_failed_stderr = False
+  for line in ferr:
+    err_status = pipeline_stderr_handler(line, conf_info)
+    if err_status == RUN_FAILED:
+      run_failed_stderr = True
+
+  ferr.close()
+
+  # Finished file check!
+  print 'RUN SUCCESS CHECK:'
+  for key, value in event.run_status_dict.items():
+    print '  %s: %s' % (key, value)
+
+  dstatus = event.run_status_dict
+
+  # Success or failure check
+  status = (retcode == 0) and \
+           run_failed_stderr is False and \
+           dstatus['firecrest'] is True and \
+           dstatus['bustard'] is True and \
+           dstatus['gerald'] is True
+
+  return status
+
+
diff --git a/htswdataprod/htswdataprod/genome_mapper.py b/htswdataprod/htswdataprod/genome_mapper.py

new file mode 100644 (file)

index 0000000..6b604b3
--- /dev/null
+++ b/htswdataprod/htswdataprod/genome_mapper.py
@@ -0,0 +1,137 @@
+#!/usr/bin/python
+import glob
+import sys
+import os
+import re
+
+import logging
+
+from htswcommon.util.alphanum import alphanum
+
+class DuplicateGenome(Exception): pass
+
+
+def _has_metainfo(genome_dir):
+  metapath = os.path.join(genome_dir, '_metainfo_')
+  if os.path.isfile(metapath):
+    return True
+  else:
+    return False
+
+def getAvailableGenomes(genome_base_dir):
+  """
+  raises IOError (on genome_base_dir not found)
+  raises DuplicateGenome on duplicate genomes found.
+  
+  returns a double dictionary (i.e. d[species][build] = path)
+  """
+
+  # Need valid directory
+  if not os.path.exists(genome_base_dir):
+    msg = "Directory does not exist: %s" % (genome_base_dir)
+    raise IOError, msg
+
+  # Find all subdirectories
+  filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
+  potential_genome_dirs = \
+    [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
+
+  # Get list of metadata files
+  genome_dir_list = \
+    [ dirpath \
+      for dirpath in potential_genome_dirs \
+      if _has_metainfo(dirpath) ]
+
+  # Genome double dictionary
+  d = {}
+
+  for genome_dir in genome_dir_list:
+    line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
+
+    # Get species, build... log and skip on failure
+    try:
+      species, build = line.split('|')
+    except:
+      logging.warning('Skipping: Invalid metafile (%s) line: %s' \
+                      % (metafile, line))
+      continue
+
+    build_dict = d.setdefault(species, {})
+    if build in build_dict:
+      msg = "Duplicate genome for %s|%s" % (species, build)
+      raise DuplicateGenome, msg
+
+    build_dict[build] = genome_dir
+
+  return d
+  
+
+class constructMapperDict(object):
+    """
+    Emulate a dictionary to map genome|build names to paths.
+    
+    It uses the dictionary generated by getAvailableGenomes.
+    """
+    def __init__(self, genome_dict):
+        self.genome_dict = genome_dict
+        
+    def __getitem__(self, key):
+        """
+        Return the best match for key
+        """
+        elements = re.split("\|", key)
+          
+        if len(elements) == 1:
+            # we just the species name
+            # get the set of builds
+            builds = self.genome_dict[elements[0]]
+            
+            # sort build names the way humans would
+            keys = builds.keys()
+            keys.sort(cmp=alphanum)
+            
+            # return the path from the 'last' build name
+            return builds[keys[-1]]
+                        
+        elif len(elements) == 2:
+            # we have species, and build name
+            return self.genome_dict[elements[0]][elements[1]]
+        else:
+            raise KeyError("Unrecognized key")
+        
+    def keys(self):
+        keys = []
+        for species in self.genome_dict.keys():
+            for build in self.genome_dict[species]:
+                keys.append([species+'|'+build])
+        return keys
+            
+    def values(self):
+        values = []
+        for species in self.genome_dict.keys():
+            for build in self.genome_dict[species]:
+                values.append(self.genome_dict[species][build])
+        return values
+       
+    def items(self):
+        items = []
+        for species in self.genome_dict.keys():
+            for build in self.genome_dict[species]:
+                key = [species+'|'+build]
+                value = self.genome_dict[species][build]
+                items.append((key, value))
+        return items
+            
+if __name__ == '__main__':
+
+  if len(sys.argv) != 2:
+    print 'useage: %s <base_genome_dir>' % (sys.argv[0])
+    sys.exit(1)
+
+  d = getAvailableGenomes(sys.argv[1])
+  d2 = constructMapperDict(d)
+
+  for k,v in d2.items():
+    print '%s: %s' % (k,v)
+  
+  
diff --git a/htswdataprod/htswdataprod/monitors.py b/htswdataprod/htswdataprod/monitors.py

new file mode 100644 (file)

index 0000000..78380fd
--- /dev/null
+++ b/htswdataprod/htswdataprod/monitors.py
@@ -0,0 +1,64 @@
+import time
+import threading
+
+######################
+# Utility functions
+def _percentCompleted(completed, total):
+  """
+  Returns precent completed as float
+  """
+  return (completed / float(total)) * 100
+
+
+##################################################
+# Functions to be called by Thread(target=<func>)
+def _cmdLineStatusMonitorFunc(conf_info):
+  """
+  Given a ConfigInfo object, provides status to stdout.
+
+  You should probably use startCmdLineStatusMonitor()
+  instead of ths function.
+
+  Use with:
+    t = threading.Thread(target=_cmdLineStatusMonitorFunc,
+                         args=[conf_info])
+    t.setDaemon(True)
+    t.start()
+  """
+  SLEEP_AMOUNT = 30
+
+  while 1:
+    if conf_info.status is None:
+      print "No status object yet."
+      time.sleep(SLEEP_AMOUNT)
+      continue
+    
+    fc, ft = conf_info.status.statusFirecrest()
+    bc, bt = conf_info.status.statusBustard()
+    gc, gt = conf_info.status.statusGerald()
+    tc, tt = conf_info.status.statusTotal()
+    
+    fp = _percentCompleted(fc, ft)
+    bp = _percentCompleted(bc, bt)
+    gp = _percentCompleted(gc, gt)
+    tp = _percentCompleted(tc, tt)
+    
+    print 'Firecrest: %s%% (%s/%s)' % (fp, fc, ft)
+    print '  Bustard: %s%% (%s/%s)' % (bp, bc, bt)
+    print '   Gerald: %s%% (%s/%s)' % (gp, gc, gt)
+    print '-----------------------'
+    print '    Total: %s%% (%s/%s)' % (tp, tc, tt)
+    print ''
+
+    time.sleep(SLEEP_AMOUNT)
+
+
+#############################################
+# Start monitor thread convenience functions
+def startCmdLineStatusMonitor(conf_info):
+  """
+  Starts a command line status monitor given a conf_info object.
+  """
+  t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
+  t.setDaemon(True)
+  t.start()
diff --git a/htswdataprod/htswdataprod/retrieve_config.py b/htswdataprod/htswdataprod/retrieve_config.py

new file mode 100644 (file)

index 0000000..72cff17
--- /dev/null
+++ b/htswdataprod/htswdataprod/retrieve_config.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python
+
+from optparse import OptionParser, IndentedHelpFormatter
+from ConfigParser import SafeConfigParser
+
+import logging
+import os
+import sys
+import urllib2
+
+CONFIG_SYSTEM = '/etc/ga_frontend/ga_frontend.conf'
+CONFIG_USER = os.path.expanduser('~/.ga_frontend.conf')
+
+#Disable or enable commandline arg parsing; disabled by default.
+DISABLE_CMDLINE = True
+
+class FlowCellNotFound(Exception): pass
+class WebError404(Exception): pass
+
+class DummyOptions:
+  """
+  Used when command line parsing is disabled; default
+  """
+  def __init__(self):
+    self.url = None
+    self.output_filepath = None
+    self.flowcell = None
+    self.genome_dir = None
+
+class PreformattedDescriptionFormatter(IndentedHelpFormatter):
+  
+  #def format_description(self, description):
+  #  
+  #  if description:
+  #      return description + "\n"
+  #  else:
+  #     return ""
+      
+  def format_epilog(self, epilog):
+    """
+    It was removing my preformated epilog, so this should override
+    that behavior! Muhahaha!
+    """
+    if epilog:
+        return "\n" + epilog + "\n"
+    else:
+        return ""
+
+
+def constructOptionParser():
+  """
+  returns a pre-setup optparser
+  """
+  global DISABLE_CMDLINE
+  
+  if DISABLE_CMDLINE:
+    return None
+  
+  parser = OptionParser(formatter=PreformattedDescriptionFormatter())
+
+  parser.set_description('Retrieves eland config file from ga_frontend web frontend.')
+  
+  parser.epilog = """
+Config File:
+  * %s (System wide)
+  * %s (User specific; overrides system)
+  * command line overrides all config file options
+  
+  Example Config File:
+  
+    [config_file_server]
+    base_host_url=http://somewhere.domain:port
+""" % (CONFIG_SYSTEM, CONFIG_USER)
+  
+  #Special formatter for allowing preformatted description.
+  ##parser.format_epilog(PreformattedDescriptionFormatter())
+
+  parser.add_option("-u", "--url",
+                    action="store", type="string", dest="url")
+  
+  parser.add_option("-o", "--output",
+                    action="store", type="string", dest="output_filepath")
+  
+  parser.add_option("-f", "--flowcell",
+                    action="store", type="string", dest="flowcell")
+
+  parser.add_option("-g", "--genome_dir",
+                    action="store", type="string", dest="genome_dir")
+  
+  #parser.set_default("url", "default")
+  
+  return parser
+
+def constructConfigParser():
+  """
+  returns a pre-setup config parser
+  """
+  parser = SafeConfigParser()
+  parser.read([CONFIG_SYSTEM, CONFIG_USER])
+  if not parser.has_section('config_file_server'):
+    parser.add_section('config_file_server')
+  if not parser.has_section('local_setup'):
+    parser.add_section('local_setup')
+  
+  return parser
+
+
+def getCombinedOptions():
+  """
+  Returns optparse options after it has be updated with ConfigParser
+  config files and merged with parsed commandline options.
+  """
+  cl_parser = constructOptionParser()
+  conf_parser = constructConfigParser()
+  
+  if cl_parser is None:
+    options = DummyOptions()
+  else:
+    options, args = cl_parser.parse_args()
+  
+  if options.url is None:
+    if conf_parser.has_option('config_file_server', 'base_host_url'):
+      options.url = conf_parser.get('config_file_server', 'base_host_url')
+
+  if options.genome_dir is None:
+    if conf_parser.has_option('local_setup', 'genome_dir'):
+      options.genome_dir = conf_parser.get('local_setup', 'genome_dir')
+  
+  print 'USING OPTIONS:'
+  print ' URL:', options.url
+  print ' OUT:', options.output_filepath
+  print '  FC:', options.flowcell
+  print 'GDIR:', options.genome_dir
+  print ''
+  
+  return options
+
+
+def saveConfigFile(flowcell, base_host_url, output_filepath):
+  """
+  retrieves the flowcell eland config file, give the base_host_url
+  (i.e. http://sub.domain.edu:port)
+  """
+  url = base_host_url + '/eland_config/%s/' % (flowcell)
+  
+  f = open(output_filepath, 'w')
+  #try:
+  try:
+    web = urllib2.urlopen(url)
+  except urllib2.URLError, e:
+    errmsg = 'URLError: %d' % (e.code,)
+    logging.error(errmsg)
+    logging.error('opened %s' % (url,))
+    logging.error('%s' % ( e.read(),))
+    raise IOError(errmsg)
+
+  #except IOError, msg:
+  #  if str(msg).find("Connection refused") >= 0:
+  #    print 'Error: Connection refused for: %s' % (url)
+  #    f.close()
+  #    sys.exit(1)
+  #  elif str(msg).find("Name or service not known") >= 0:
+  #    print 'Error: Invalid domain or ip address for: %s' % (url)
+  #    f.close()
+  #    sys.exit(2)
+  #  else:
+  #    raise IOError, msg
+
+  data = web.read()
+
+  if data.find('Hmm, config file for') >= 0:
+    msg = "Flowcell (%s) not found in DB; full url(%s)" % (flowcell, url)
+    raise FlowCellNotFound, msg
+
+  if data.find('404 - Not Found') >= 0:
+    msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
+          "Did you get right port #?" % (flowcell, base_host_url, url)
+    raise FlowCellNotFound, msg
+  
+  f.write(data)
+  web.close()
+  f.close()
+  logging.info('Wrote config file to %s' % (output_filepath,))
+
+  
diff --git a/htswdataprod/htswdataprod/run_status.py b/htswdataprod/htswdataprod/run_status.py

new file mode 100644 (file)

index 0000000..8e19c01
--- /dev/null
+++ b/htswdataprod/htswdataprod/run_status.py
@@ -0,0 +1,387 @@
+import glob
+import re
+import os
+
+s_comment = re.compile('^#')
+s_general_read_len = re.compile('^READ_LENGTH ')
+s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
+
+s_firecrest = None
+
+def _four_digit_num_in_string(num):
+  if num < 0:
+    pass
+  elif num < 10:
+    return '000' + str(num)
+  elif num < 100:
+    return '00' + str(num)
+  elif num < 1000:
+    return '0' + str(num)
+  elif num < 10000:
+    return str(num)
+
+  msg = 'Invalid number: %s' % (num)
+  raise ValueError, msg
+
+def _two_digit_num_in_string(num):
+  if num < 0:
+    pass
+  elif num < 10:
+    return '0' + str(num)
+  elif num < 100:
+    return str(num)
+
+  msg = 'Invalid number: %s' % (num)
+  raise ValueError, msg
+
+
+# FIRECREST PATTERNS
+# _p2f(<pattern>, lane, tile, cycle)
+PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
+
+# _p2f(<pattern>, lane, tile)
+PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
+PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
+PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
+PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
+PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
+PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
+PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
+PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
+
+
+# BUSTARD PATTERNS
+# _p2f(<pattern>, lane, tile)
+PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
+PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
+
+
+
+# GERALD PATTERNS
+# _p2f(<pattern>, lane, tile)
+PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
+PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
+PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
+PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
+PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
+PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
+PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
+PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
+PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
+PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
+PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
+PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
+PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
+
+# _p2f(<pattern>, lane)
+PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
+PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
+PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
+PATTERN_GERALD_CALLPNG = 's_%s_call.png'
+PATTERN_GERALD_ALLPNG = 's_%s_all.png'
+PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
+PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
+PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
+PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
+PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
+PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
+PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
+PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
+PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
+PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
+
+
+
+def _p2f(pattern, lane, tile=None, cycle=None):
+  """
+  Converts a pattern plus info into file names
+  """
+
+  # lane, and cycle provided (INVALID)
+  if tile is None and cycle is not None:
+    msg = "Handling of cycle without tile is not currently implemented."
+    raise ValueError, msg
+
+  # lane, tile, cycle provided
+  elif cycle:
+    return pattern % (lane,
+                      _four_digit_num_in_string(tile),
+                      _two_digit_num_in_string(cycle))
+  
+  # lane, tile provided
+  elif tile:
+    return pattern % (lane, _four_digit_num_in_string(tile))
+
+  # lane provided
+  else:
+    return pattern % (lane)
+    
+
+
+class GARunStatus(object):
+
+  def __init__(self, conf_filepath):
+    """
+    Given an eland config file in the top level directory
+    of a run, predicts the files that will be generated
+    during a run and provides methods for retrieving
+    (completed, total) for each step or entire run.
+    """
+    #print 'self._conf_filepath = %s' % (conf_filepath)
+    self._conf_filepath = conf_filepath
+    self._base_dir, junk = os.path.split(conf_filepath)
+    self._image_dir = os.path.join(self._base_dir, 'Images')
+    
+    self.lanes = []
+    self.lane_read_length = {}
+    self.tiles = None
+    self.cycles = None
+    
+    self.status = {}
+    self.status['firecrest'] = {}
+    self.status['bustard'] = {}
+    self.status['gerald'] = {}
+    
+    self._process_config()
+    self._count_tiles()
+    self._count_cycles()
+    self._generate_expected()
+
+
+  def _process_config(self):
+    """
+    Grabs info from self._conf_filepath
+    """
+    f = open(self._conf_filepath, 'r')
+
+    for line in f:
+
+      #Skip comment lines for now.
+      if s_comment.search(line):
+        continue
+
+      mo =  s_general_read_len.search(line)
+      if mo:
+        read_length = int(line[mo.end():])
+        #Handle general READ_LENGTH
+        for i in range(1,9):
+          self.lane_read_length[i] = read_length
+      
+      mo = s_read_len.search(line)
+      if mo:
+        read_length = int(line[mo.end():])
+        lanes, junk = line.split(':')
+
+        #Convert lanes from string of lanes to list of lane #s.
+        lanes = [ int(i) for i in lanes ]
+
+        
+        for lane in lanes:
+
+          #Keep track of which lanes are being run.
+          if lane not in self.lanes:
+            self.lanes.append(lane)
+
+          #Update with lane specific read lengths
+          self.lane_read_length[lane] = read_length
+
+        self.lanes.sort()
+
+
+  def _count_tiles(self):
+    """
+    Count the number of tiles being used
+    """
+    self.tiles = len(glob.glob(os.path.join(self._image_dir,
+                                            'L001',
+                                            'C1.1',
+                                            's_1_*_a.tif')))
+
+  def _count_cycles(self):
+    """
+    Figures out the number of cycles that are available
+    """
+    #print 'self._image_dir = %s' % (self._image_dir)
+    cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
+    #print 'cycle_dirs = %s' % (cycle_dirs)
+    cycle_list = []
+    for cycle_dir in cycle_dirs:
+      junk, c = os.path.split(cycle_dir)
+      cycle_list.append(int(c[1:c.find('.')]))
+
+    self.cycles = max(cycle_list)
+    
+
+
+
+  def _generate_expected(self):
+    """
+    generates a list of files we expect to find.
+    """
+
+    firecrest = self.status['firecrest']
+    bustard = self.status['bustard']
+    gerald = self.status['gerald']
+    
+    
+    for lane in self.lanes:
+      for tile in range(1,self.tiles+1):
+        for cycle in range(1, self.cycles+1):
+
+          ##########################
+          # LANE, TILE, CYCLE LAYER
+
+          # FIRECREST
+          firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
+
+
+        ###################
+        # LANE, TILE LAYER
+
+        # FIRECREST
+        firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
+        firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
+
+
+        # BUSTARD
+        bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
+        bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
+
+
+        # GERALD
+        #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
+        gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
+        #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
+
+      ###################
+      # LANE LAYER
+
+      # GERALD
+      #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
+      #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
+      gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
+      
+      
+
+    #################
+    # LOOPS FINISHED
+
+    # FIRECREST
+    firecrest['offsets_finished.txt'] = False
+    firecrest['finished.txt'] = False
+
+    # BUSTARD
+    bustard['finished.txt'] = False
+
+    # GERALD
+    gerald['tiles.txt'] = False
+    gerald['FullAll.htm'] = False
+    #gerald['All.htm.tmp'] = False
+    #gerald['Signal_Means.txt.tmp'] = False
+    #gerald['plotIntensity_for_IVC'] = False
+    #gerald['IVC.htm.tmp'] = False
+    gerald['FullError.htm'] = False
+    gerald['FullPerfect.htm'] = False
+    #gerald['Error.htm.tmp'] = False
+    #gerald['Perfect.htm.tmp'] = False
+    #gerald['Summary.htm.tmp'] = False
+    #gerald['Tile.htm.tmp'] = False
+    gerald['finished.txt'] = False
+    
+    
+
+
+  def statusFirecrest(self):
+    """
+    returns (<completed>, <total>)
+    """
+    firecrest = self.status['firecrest']
+    total = len(firecrest)
+    completed = firecrest.values().count(True)
+
+    return (completed, total)
+
+
+  def statusBustard(self):
+    """
+    returns (<completed>, <total>)
+    """
+    bustard = self.status['bustard']
+    total = len(bustard)
+    completed = bustard.values().count(True)
+
+    return (completed, total)
+
+
+  def statusGerald(self):
+    """
+    returns (<completed>, <total>)
+    """
+    gerald = self.status['gerald']
+    total = len(gerald)
+    completed = gerald.values().count(True)
+
+    return (completed, total)
+
+
+  def statusTotal(self):
+    """
+    returns (<completed>, <total>)
+    """
+    #f = firecrest  c = completed
+    #b = bustard    t = total
+    #g = gerald
+    fc, ft = self.statusFirecrest()
+    bc, bt = self.statusBustard()
+    gc, gt = self.statusGerald()
+
+    return (fc+bc+gc, ft+bt+gt)
+
+
+  def updateFirecrest(self, filename):
+    """
+    Marks firecrest filename as being completed.
+    """
+    self.status['firecrest'][filename] = True
+    
+
+  def updateBustard(self, filename):
+    """
+    Marks bustard filename as being completed.
+    """
+    self.status['bustard'][filename] = True
+
+
+  def updateGerald(self, filename):
+    """
+    Marks gerald filename as being completed.
+    """
+    self.status['gerald'][filename] = True
diff --git a/htswdataprod/scripts/copier b/htswdataprod/scripts/copier

index 7025d57a72ceb85ba1b2b0a040abe34716056d91..d8dcc9fdf56a978bbaa92f5dc193d5fc34a6fa45 100644 (file)
--- a/htswdataprod/scripts/copier
+++ b/htswdataprod/scripts/copier
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
  import sys
-from gaworkflow.automation.copier import main
+from htswdataprod.automation.copier import main
  
  if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
diff --git a/htswdataprod/scripts/runner b/htswdataprod/scripts/runner

index 6b6da11e1a85b5e55b89cd965471912ff694e05f..56f7786a670954ef4843aee2a334211cdf1d29c7 100644 (file)
--- a/htswdataprod/scripts/runner
+++ b/htswdataprod/scripts/runner
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
  import sys
-from gaworkflow.automation.runner import main
+from htswdataprod.automation.runner import main
  
  if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
diff --git a/htswdataprod/scripts/spoolwatcher b/htswdataprod/scripts/spoolwatcher

index 269292e606b36c2706ac33cdd867d15cb36f8ce9..c805b5c35e598e860c05eabf4a2285b4bc4e22d2 100644 (file)
--- a/htswdataprod/scripts/spoolwatcher
+++ b/htswdataprod/scripts/spoolwatcher
@@ -1,6 +1,6 @@
  #!/usr/bin/env python
  import sys
-from gaworkflow.automation.spoolwatcher import main
+from htsdataprod.automation.spoolwatcher import main
  
  if __name__ == "__main__":
      sys.exit(main(sys.argv[1:]))
author	Brandon King <kingb@caltech.edu>
	Tue, 5 Aug 2008 20:03:40 +0000 (20:03 +0000)
committer	Brandon King <kingb@caltech.edu>
	Tue, 5 Aug 2008 20:03:40 +0000 (20:03 +0000)
htswcommon/htswcommon/util/alphanum.py	[new file with mode: 0644]	patch \| blob
htswdataprod/htswdataprod/automation/runner.py		patch \| blob \| history
htswdataprod/htswdataprod/configure_run.py	[new file with mode: 0644]	patch \| blob
htswdataprod/htswdataprod/genome_mapper.py	[new file with mode: 0644]	patch \| blob
htswdataprod/htswdataprod/monitors.py	[new file with mode: 0644]	patch \| blob
htswdataprod/htswdataprod/retrieve_config.py	[new file with mode: 0644]	patch \| blob
htswdataprod/htswdataprod/run_status.py	[new file with mode: 0644]	patch \| blob
htswdataprod/scripts/copier		patch \| blob \| history
htswdataprod/scripts/runner		patch \| blob \| history
htswdataprod/scripts/spoolwatcher		patch \| blob \| history