--- /dev/null
+import ConfigParser
+import copy
+import logging
+import logging.handlers
+import os
+import re
+import subprocess
+import sys
+import time
+import traceback
+
+from benderjab import rpc
+
+def runfolder_validate(fname):
+ """
+ Return True if fname looks like a runfolder name
+ """
+ if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname):
+ return True
+ else:
+ return False
+
+class rsync(object):
+ def __init__(self, source, dest, pwfile):
+ self.pwfile = os.path.expanduser(pwfile)
+ self.cmd = ['/usr/bin/rsync', ]
+ self.cmd.append('--password-file=%s' % (self.pwfile))
+ self.source_base = source
+ self.dest_base = dest
+ self.processes = {}
+ self.exit_code = None
+
+ def list(self):
+ """Get a directory listing"""
+ args = copy.copy(self.cmd)
+ args.append(self.source_base)
+
+ logging.debug("Rsync cmd:" + " ".join(args))
+ short_process = subprocess.Popen(args, stdout=subprocess.PIPE)
+ return self.list_filter(short_process.stdout)
+
+ def list_filter(self, lines):
+ """
+ parse rsync directory listing
+ """
+ dirs_to_copy = []
+ direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ]
+ for permissions, size, filedate, filetime, filename in direntries:
+ if permissions[0] == 'd':
+ # hey its a directory, the first step to being something we want to
+ # copy
+ if re.match("[0-9]{6}", filename):
+ # it starts with something that looks like a 6 digit date
+ # aka good enough for me
+ dirs_to_copy.append(filename)
+ return dirs_to_copy
+
+ def create_copy_process(self, dirname):
+ args = copy.copy(self.cmd)
+ # we want to copy everything
+ args.append('-rlt')
+ # from here
+ args.append(os.path.join(self.source_base, dirname))
+ # to here
+ args.append(self.dest_base)
+ logging.debug("Rsync cmd:" + " ".join(args))
+ return subprocess.Popen(args)
+
+ def copy(self):
+ """
+ copy any interesting looking directories over
+ return list of items that we started copying.
+ """
+ # clean up any lingering non-running processes
+ self.poll()
+
+ # what's available to copy?
+ dirs_to_copy = self.list()
+
+ # lets start copying
+ started = []
+ for d in dirs_to_copy:
+ process = self.processes.get(d, None)
+
+ if process is None:
+ # we don't have a process, so make one
+ logging.info("rsyncing %s" % (d))
+ self.processes[d] = self.create_copy_process(d)
+ started.append(d)
+ return started
+
+ def poll(self):
+ """
+ check currently running processes to see if they're done
+
+ return path roots that have finished.
+ """
+ for dir_key, proc_value in self.processes.items():
+ retcode = proc_value.poll()
+ if retcode is None:
+ # process hasn't finished yet
+ pass
+ elif retcode == 0:
+ logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode))
+ del self.processes[dir_key]
+ else:
+ logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode))
+
+ def __len__(self):
+ """
+ Return how many active rsync processes we currently have
+
+ Call poll first to close finished processes.
+ """
+ return len(self.processes)
+
+ def keys(self):
+ """
+ Return list of current run folder names
+ """
+ return self.processes.keys()
+
+class CopierBot(rpc.XmlRpcBot):
+ def __init__(self, section=None, configfile=None):
+ #if configfile is None:
+ # configfile = '~/.gaworkflow'
+
+ super(CopierBot, self).__init__(section, configfile)
+
+ # options for rsync command
+ self.cfg['rsync_password_file'] = None
+ self.cfg['rsync_source'] = None
+ self.cfg['rsync_destination'] = None
+
+ # options for reporting we're done
+ self.cfg['notify_users'] = None
+ self.cfg['notify_runner'] = None
+
+ self.pending = []
+ self.rsync = None
+ self.notify_users = None
+ self.notify_runner = None
+
+ self.register_function(self.startCopy)
+ self.register_function(self.sequencingFinished)
+ self.eventTasks.append(self.update)
+
+ def read_config(self, section=None, configfile=None):
+ """
+ read the config file
+ """
+ super(CopierBot, self).read_config(section, configfile)
+
+ password = self._check_required_option('rsync_password_file')
+ source = self._check_required_option('rsync_source')
+ destination = self._check_required_option('rsync_destination')
+ self.rsync = rsync(source, destination, password)
+
+ self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+ try:
+ self.notify_runner = \
+ self._parse_user_list(self.cfg['notify_runner'],
+ require_resource=True)
+ except bot.JIDMissingResource:
+ msg = 'need a full jabber ID + resource for xml-rpc destinations'
+ logging.FATAL(msg)
+ raise bot.JIDMissingResource(msg)
+
+ def startCopy(self, *args):
+ """
+ start our copy
+ """
+ logging.info("starting copy scan")
+ started = self.rsync.copy()
+ logging.info("copying:" + " ".join(started)+".")
+ return started
+
+ def sequencingFinished(self, runDir, *args):
+ """
+ The run was finished, if we're done copying, pass the message on
+ """
+ # close any open processes
+ self.rsync.poll()
+
+ # see if we're still copying
+ if runfolder_validate(runDir):
+ logging.info("recevied sequencing finshed for %s" % (runDir))
+ self.pending.append(runDir)
+ self.startCopy()
+ return "PENDING"
+ else:
+ errmsg = "received bad runfolder name (%s)" % (runDir)
+ logging.warning(errmsg)
+ # maybe I should use a different error message
+ raise RuntimeError(errmsg)
+
+ def reportSequencingFinished(self, runDir):
+ """
+ Send the sequencingFinished message to the interested parties
+ """
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, 'Sequencing run %s finished' % (runDir))
+ if self.notify_runner is not None:
+ for r in self.notify_runner:
+ self.rpc_send(r, (runDir,), 'sequencingFinished')
+ logging.info("forwarding sequencingFinshed message for %s" % (runDir))
+
+ def update(self, *args):
+ """
+ Update our current status.
+ Report if we've finished copying files.
+ """
+ self.rsync.poll()
+ for p in self.pending:
+ if p not in self.rsync.keys():
+ self.reportSequencingFinished(p)
+ self.pending.remove(p)
+
+ def _parser(self, msg, who):
+ """
+ Parse xmpp chat messages
+ """
+ help = u"I can [copy], or report current [status]"
+ if re.match(u"help", msg):
+ reply = help
+ elif re.match("copy", msg):
+ started = self.startCopy()
+ reply = u"started copying " + ", ".join(started)
+ elif re.match(u"status", msg):
+ msg = [u"Currently %d rsync processes are running." % (len(self.rsync))]
+ for d in self.rsync.keys():
+ msg.append(u" " + d)
+ reply = os.linesep.join(msg)
+ else:
+ reply = u"I didn't understand '%s'" % (unicode(msg))
+ return reply
+
+def main(args=None):
+ bot = CopierBot()
+ bot.main(args)
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+#!/usr/bin/env python
+from glob import glob
+import logging
+import os
+import re
+import sys
+import time
+import threading
+
+from benderjab import rpc
+
+from gaworkflow.pipeline.configure_run import *
+from gaworkflow.pipeline.monitors import _percentCompleted
+
+#s_fc = re.compile('FC[0-9]+')
+s_fc = re.compile('_[0-9a-zA-Z]*$')
+
+
+def _get_flowcell_from_rundir(run_dir):
+ """
+ Returns flowcell string based on run_dir.
+ Returns None and logs error if flowcell can't be found.
+ """
+ junk, dirname = os.path.split(run_dir)
+ mo = s_fc.search(dirname)
+ if not mo:
+ logging.error('RunDir 2 FlowCell error: %s' % (run_dir))
+ return None
+
+ return dirname[mo.start()+1:]
+
+
+
+class Runner(rpc.XmlRpcBot):
+ """
+ Manage running pipeline jobs.
+ """
+ def __init__(self, section=None, configfile=None):
+ #if configfile is None:
+ # self.configfile = "~/.gaworkflow"
+ super(Runner, self).__init__(section, configfile)
+
+ self.cfg['notify_users'] = None
+ self.cfg['genome_dir'] = None
+ self.cfg['base_analysis_dir'] = None
+
+ self.cfg['notify_users'] = None
+ self.cfg['notify_postanalysis'] = None
+
+ self.conf_info_dict = {}
+
+ self.register_function(self.sequencingFinished)
+ #self.eventTasks.append(self.update)
+
+
+ def read_config(self, section=None, configfile=None):
+ super(Runner, self).read_config(section, configfile)
+
+ self.genome_dir = self._check_required_option('genome_dir')
+ self.base_analysis_dir = self._check_required_option('base_analysis_dir')
+
+ self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+ #FIXME: process notify_postpipeline cfg
+
+
+ def _parser(self, msg, who):
+ """
+ Parse xmpp chat messages
+ """
+ help = u"I can send [start] a run, or report [status]"
+ if re.match(u"help", msg):
+ reply = help
+ elif re.match("status", msg):
+ words = msg.split()
+ if len(words) == 2:
+ reply = self.getStatusReport(words[1])
+ else:
+ reply = u"Status available for: %s" \
+ % (', '.join([k for k in self.conf_info_dict.keys()]))
+ elif re.match(u"start", msg):
+ words = msg.split()
+ if len(words) == 2:
+ self.sequencingFinished(words[1])
+ reply = u"starting run for %s" % (words[1])
+ else:
+ reply = u"need runfolder name"
+ else:
+ reply = u"I didn't understand '%s'" %(msg)
+
+ logging.debug("reply: " + str(reply))
+ return reply
+
+
+ def getStatusReport(self, fc_num):
+ """
+ Returns text status report for flow cell number
+ """
+ if fc_num not in self.conf_info_dict:
+ return "No record of a %s run." % (fc_num)
+
+ status = self.conf_info_dict[fc_num].status
+
+ if status is None:
+ return "No status information for %s yet." \
+ " Probably still in configure step. Try again later." % (fc_num)
+
+ fc,ft = status.statusFirecrest()
+ bc,bt = status.statusBustard()
+ gc,gt = status.statusGerald()
+
+ tc,tt = status.statusTotal()
+
+ fp = _percentCompleted(fc, ft)
+ bp = _percentCompleted(bc, bt)
+ gp = _percentCompleted(gc, gt)
+ tp = _percentCompleted(tc, tt)
+
+ output = []
+
+ output.append(u'Firecrest: %s%% (%s/%s)' % (fp, fc, ft))
+ output.append(u' Bustard: %s%% (%s/%s)' % (bp, bc, bt))
+ output.append(u' Gerald: %s%% (%s/%s)' % (gp, gc, gt))
+ output.append(u'-----------------------')
+ output.append(u' Total: %s%% (%s/%s)' % (tp, tc, tt))
+
+ return '\n'.join(output)
+
+
+ def sequencingFinished(self, run_dir):
+ """
+ Sequenceing (and copying) is finished, time to start pipeline
+ """
+ logging.debug("received sequencing finished message")
+
+ # Setup config info object
+ ci = ConfigInfo()
+ ci.base_analysis_dir = self.base_analysis_dir
+ ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir)
+
+ # get flowcell from run_dir name
+ flowcell = _get_flowcell_from_rundir(run_dir)
+
+ # Store ci object in dictionary
+ self.conf_info_dict[flowcell] = ci
+
+
+ # Launch the job in it's own thread and turn.
+ self.launchJob(run_dir, flowcell, ci)
+ return "started"
+
+
+ def pipelineFinished(self, run_dir):
+ # need to strip off self.watch_dir from rundir I suspect.
+ logging.info("pipeline finished in" + str(run_dir))
+ #pattern = self.watch_dir
+ #if pattern[-1] != os.path.sep:
+ # pattern += os.path.sep
+ #stripped_run_dir = re.sub(pattern, "", run_dir)
+ #logging.debug("stripped to " + stripped_run_dir)
+
+ # Notify each user that the run has finished.
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, 'Pipeline run %s finished' % (run_dir))
+
+ #if self.notify_runner is not None:
+ # for r in self.notify_runner:
+ # self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
+
+ def reportMsg(self, msg):
+
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, msg)
+
+
+ def _runner(self, run_dir, flowcell, conf_info):
+
+ # retrieve config step
+ cfg_filepath = os.path.join(conf_info.analysis_dir,
+ 'config32auto.txt')
+ status_retrieve_cfg = retrieve_config(conf_info,
+ flowcell,
+ cfg_filepath,
+ self.genome_dir)
+ if status_retrieve_cfg:
+ logging.info("Runner: Retrieve config: success")
+ self.reportMsg("Retrieve config (%s): success" % (run_dir))
+ else:
+ logging.error("Runner: Retrieve config: failed")
+ self.reportMsg("Retrieve config (%s): FAILED" % (run_dir))
+
+
+ # configure step
+ if status_retrieve_cfg:
+ status = configure(conf_info)
+ if status:
+ logging.info("Runner: Configure: success")
+ self.reportMsg("Configure (%s): success" % (run_dir))
+ self.reportMsg(
+ os.linesep.join(glob(os.path.join(run_dir,'Data','C*')))
+ )
+ else:
+ logging.error("Runner: Configure: failed")
+ self.reportMsg("Configure (%s): FAILED" % (run_dir))
+
+ #if successful, continue
+ if status:
+ # Setup status cmdline status monitor
+ #startCmdLineStatusMonitor(ci)
+
+ # running step
+ print 'Running pipeline now!'
+ run_status = run_pipeline(conf_info)
+ if run_status is True:
+ logging.info('Runner: Pipeline: success')
+ self.reportMsg("Pipeline run (%s): Finished" % (run_dir,))
+ else:
+ logging.info('Runner: Pipeline: failed')
+ self.reportMsg("Pipeline run (%s): FAILED" % (run_dir))
+
+
+ def launchJob(self, run_dir, flowcell, conf_info):
+ """
+ Starts up a thread for running the pipeline
+ """
+ t = threading.Thread(target=self._runner,
+ args=[run_dir, flowcell, conf_info])
+ t.setDaemon(True)
+ t.start()
+
+
+
+def main(args=None):
+ bot = Runner()
+ return bot.main(args)
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+
--- /dev/null
+#!/usr/bin/env python
+import logging
+import os
+import re
+import sys
+import time
+#import glob
+
+from gaworkflow.util import mount
+
+# this uses pyinotify
+import pyinotify
+from pyinotify import EventsCodes
+
+from benderjab import rpc
+
+
+class WatcherEvents(object):
+ # two events need to be tracked
+ # one to send startCopy
+ # one to send OMG its broken
+ # OMG its broken needs to stop when we've seen enough
+ # cycles
+ # this should be per runfolder.
+ # read the xml files
+ def __init__(self):
+ pass
+
+
+class Handler(pyinotify.ProcessEvent):
+ def __init__(self, watchmanager, bot):
+ self.last_event_time = None
+ self.watchmanager = watchmanager
+ self.bot = bot
+
+ def process_IN_CREATE(self, event):
+ self.last_event_time = time.time()
+ msg = "Create: %s" % os.path.join(event.path, event.name)
+ if event.name.lower() == "run.completed":
+ try:
+ self.bot.sequencingFinished(event.path)
+ except IOError, e:
+ logging.error("Couldn't send sequencingFinished")
+ logging.debug(msg)
+
+ def process_IN_DELETE(self, event):
+ logging.debug("Remove: %s" % os.path.join(event.path, event.name))
+
+ def process_IN_UNMOUNT(self, event):
+ pathname = os.path.join(event.path, event.name)
+ logging.debug("IN_UNMOUNT: %s" % (pathname,))
+ self.bot.unmount_watch()
+
+class SpoolWatcher(rpc.XmlRpcBot):
+ """
+ Watch a directory and send a message when another process is done writing.
+
+ This monitors a directory tree using inotify (linux specific) and
+ after some files having been written will send a message after <timeout>
+ seconds of no file writing.
+
+ (Basically when the solexa machine finishes dumping a round of data
+ this'll hopefully send out a message saying hey look theres data available
+
+ """
+ # these params need to be in the config file
+ # I wonder where I should put the documentation
+ #:Parameters:
+ # `watchdir` - which directory tree to monitor for modifications
+ # `profile` - specify which .gaworkflow profile to use
+ # `write_timeout` - how many seconds to wait for writes to finish to
+ # the spool
+ # `notify_timeout` - how often to timeout from notify
+
+ def __init__(self, section=None, configfile=None):
+ #if configfile is None:
+ # self.configfile = "~/.gaworkflow"
+ super(SpoolWatcher, self).__init__(section, configfile)
+
+ self.cfg['watchdir'] = None
+ self.cfg['write_timeout'] = 10
+ self.cfg['notify_users'] = None
+ self.cfg['notify_runner'] = None
+
+ self.notify_timeout = 0.001
+ self.wm = pyinotify.WatchManager()
+ self.handler = Handler(self.wm, self)
+ self.notifier = pyinotify.Notifier(self.wm, self.handler)
+ self.wdd = None
+ self.mount_point = None
+ self.mounted = True
+
+ self.notify_users = None
+ self.notify_runner = None
+
+ self.eventTasks.append(self.process_notify)
+
+ def read_config(self, section=None, configfile=None):
+ super(SpoolWatcher, self).read_config(section, configfile)
+
+ self.watch_dir = self._check_required_option('watchdir')
+ self.write_timeout = int(self.cfg['write_timeout'])
+
+ self.notify_users = self._parse_user_list(self.cfg['notify_users'])
+ try:
+ self.notify_runner = \
+ self._parse_user_list(self.cfg['notify_runner'],
+ require_resource=True)
+ except bot.JIDMissingResource:
+ msg = 'need a full jabber ID + resource for xml-rpc destinations'
+ logging.FATAL(msg)
+ raise bot.JIDMissingResource(msg)
+
+ def add_watch(self, watchdir=None):
+ """
+ start watching watchdir or self.watch_dir
+ we're currently limited to watching one directory tree.
+ """
+ # the one tree limit is mostly because self.wdd is a single item
+ # but managing it as a list might be a bit more annoying
+ if watchdir is None:
+ watchdir = self.watch_dir
+ logging.info("Watching:"+str(watchdir))
+
+ self.mount_point = mount.find_mount_point_for(watchdir)
+
+ mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
+ # rec traverses the tree and adds all the directories that are there
+ # at the start.
+ # auto_add will add in new directories as they are created
+ self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
+
+ def unmount_watch(self):
+ if self.wdd is not None:
+ self.wm.rm_watch(self.wdd.values())
+ self.wdd = None
+ self.mounted = False
+
+ def process_notify(self, *args):
+ # process the queue of events as explained above
+ self.notifier.process_events()
+ #check events waits timeout
+ if self.notifier.check_events(self.notify_timeout):
+ # read notified events and enqeue them
+ self.notifier.read_events()
+ # should we do something?
+ # has something happened?
+ last_event_time = self.handler.last_event_time
+ if last_event_time is not None:
+ time_delta = time.time() - last_event_time
+ if time_delta > self.write_timeout:
+ self.startCopy()
+ self.handler.last_event_time = None
+ # handle unmounted filesystems
+ if not self.mounted:
+ if mount.is_mounted(self.mount_point):
+ # we've been remounted. Huzzah!
+ # restart the watch
+ self.add_watch()
+ self.mounted = True
+ logging.info(
+ "%s was remounted, restarting watch" % \
+ (self.mount_point)
+ )
+
+ def _parser(self, msg, who):
+ """
+ Parse xmpp chat messages
+ """
+ help = u"I can send [copy] message, or squencer [finished]"
+ if re.match(u"help", msg):
+ reply = help
+ elif re.match("copy", msg):
+ self.startCopy()
+ reply = u"sent copy message"
+ elif re.match(u"finished", msg):
+ words = msg.split()
+ if len(words) == 2:
+ self.sequencingFinished(words[1])
+ reply = u"sending sequencing finished for %s" % (words[1])
+ else:
+ reply = u"need runfolder name"
+ else:
+ reply = u"I didn't understand '%s'" %(msg)
+ return reply
+
+ def start(self, daemonize):
+ """
+ Start application
+ """
+ self.add_watch()
+ super(SpoolWatcher, self).start(daemonize)
+
+ def stop(self):
+ """
+ shutdown application
+ """
+ # destroy the inotify's instance on this interrupt (stop monitoring)
+ self.notifier.stop()
+ super(SpoolWatcher, self).stop()
+
+ def startCopy(self):
+ logging.debug("writes seem to have stopped")
+ if self.notify_runner is not None:
+ for r in self.notify_runner:
+ self.rpc_send(r, tuple(), 'startCopy')
+
+ def sequencingFinished(self, run_dir):
+ # need to strip off self.watch_dir from rundir I suspect.
+ logging.info("run.completed in " + str(run_dir))
+ pattern = self.watch_dir
+ if pattern[-1] != os.path.sep:
+ pattern += os.path.sep
+ stripped_run_dir = re.sub(pattern, "", run_dir)
+ logging.debug("stripped to " + stripped_run_dir)
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
+ if self.notify_runner is not None:
+ for r in self.notify_runner:
+ self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
+
+def main(args=None):
+ bot = SpoolWatcher()
+ return bot.main(args)
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
+