import logging
import os
import re
+import shlex
import sys
import time
-#import glob
from htsworkflow.util import mount
+from htsworkflow.automation.solexa import is_runfolder, get_top_dir
# this uses pyinotify
import pyinotify
from pyinotify import EventsCodes
+IN_CREATE = EventsCodes.ALL_FLAGS['IN_CREATE']
+IN_UNMOUNT = EventsCodes.ALL_FLAGS['IN_UNMOUNT']
from benderjab import rpc
+LOGGER = logging.getLogger(__name__)
-class WatcherEvents(object):
- # two events need to be tracked
- # one to send startCopy
- # one to send OMG its broken
- # OMG its broken needs to stop when we've seen enough
- # cycles
- # this should be per runfolder.
- # read the xml files
- def __init__(self):
- pass
-
+class WatcherEvent(object):
+ """
+ Track information about a file event
+
+ Currently its time, and if it was an indication we've completed the run.
+ """
+ def __init__(self, event_root=None):
+ self.time = time.time()
+ self.event_root = event_root
+ self.complete = False
+
+ def __unicode__(self):
+ if self.complete:
+ complete = "(completed)"
+ else:
+ complete = ""
+ return "<WatchEvent: %s %s %s>" % (time.ctime(self.time), self.event_root, complete)
class Handler(pyinotify.ProcessEvent):
- def __init__(self, watchmanager, bot, ipar=False):
+ def __init__(self, watchmanager, bot, completion_files=None):
"""
- ipar flag indicates we should wait for ipar to finish, instead of
- just the run finishing
+ Completion file contains current "completion" filename
"""
- print 'ipar flag: ' + str(ipar)
- self.last_event_time = None
+ self.last_event = {}
self.watchmanager = watchmanager
self.bot = bot
- self.ipar_mode = ipar
- if self.ipar_mode:
- self.last_file = 'IPAR_Netcopy_Complete.txt'.lower()
- else:
- self.last_file = "run.completed".lower()
+ self.log = bot.log
+ if completion_files is not None:
+ completion_files = [ x.lower() for x in completion_files ]
+ self.completion_files = completion_files
def process_IN_CREATE(self, event):
- self.last_event_time = time.time()
- msg = "Create: %s" % os.path.join(event.path, event.name)
+ for wdd in self.bot.wdds:
+ for watch_path in self.bot.watchdirs:
+ run_already_complete = False
+ # I only care about things created inside the watch directory, so
+ # the event path needs to be longer than the watch path in addition to
+ # starting with the watch_path
+ if len(event.path) > len(watch_path) and event.path.startswith(watch_path):
+ # compute name of the top level directory that had an event
+ # in the current watch path
+ target = get_top_dir(watch_path, event.path)
+ runfolder = os.path.join(watch_path, target)
+
+ if not is_runfolder(target):
+ self.log.debug("Skipping %s, not a runfolder" % (target,))
+ continue
+
+ # grab the previous events for this watch path
+ watch_path_events = self.last_event.setdefault(watch_path, {})
+
+ # if we've already seen an event in this directory (AKA runfolder)
+ # keep track if its already hit the "completed" flag
+ if target in watch_path_events:
+ run_already_complete = watch_path_events[target].complete
+
+ watch_path_events[target] = WatcherEvent(target)
+ #self.last_event.setdefault(watch_path, {})[target] = WatcherEvent(target)
- if event.name.lower() == self.last_file:
- try:
- self.bot.sequencingFinished(event.path)
- except IOError, e:
- logging.error("Couldn't send sequencingFinished")
- logging.debug(msg)
+ msg = "Create: %s %s %s %s" % (watch_path, target, event.path, event.name)
+
+ # the ReadPrep step uses some of the same file completion flags as the
+ # main analysis, which means this completion code might get tripped because of it
+ # so we need to make sure we're getting the completion file in the root of the
+ # runfolder
+ event_name = event.name.lower()
+ if (event_name in self.completion_files and event.path == runfolder) \
+ or run_already_complete:
+ self.last_event[watch_path][target].complete = True
+ msg += "(completed)"
+
+ self.log.debug(msg)
def process_IN_DELETE(self, event):
- logging.debug("Remove: %s" % os.path.join(event.path, event.name))
+ self.log.debug("Remove: %s" % os.path.join(event.path, event.name))
+ pass
def process_IN_UNMOUNT(self, event):
pathname = os.path.join(event.path, event.name)
- logging.debug("IN_UNMOUNT: %s" % (pathname,))
- self.bot.unmount_watch()
+ self.log.debug("IN_UNMOUNT: %s" % (pathname,))
+ self.bot.unmount_watch(event.path)
class SpoolWatcher(rpc.XmlRpcBot):
"""
Watch a directory and send a message when another process is done writing.
-
+
This monitors a directory tree using inotify (linux specific) and
after some files having been written will send a message after <timeout>
seconds of no file writing.
-
+
(Basically when the solexa machine finishes dumping a round of data
this'll hopefully send out a message saying hey look theres data available
-
+
"""
# these params need to be in the config file
# I wonder where I should put the documentation
#:Parameters:
- # `watchdir` - which directory tree to monitor for modifications
+ # `watchdirs` - list of directories to monitor for modifications
# `profile` - specify which .htsworkflow profile to use
# `write_timeout` - how many seconds to wait for writes to finish to
# the spool
# `notify_timeout` - how often to timeout from notify
-
+ # `completion_files` - what files indicates we've finished sequencing
+ # defaults to: netcopy_complete.txt
+
def __init__(self, section=None, configfile=None):
#if configfile is None:
# self.configfile = "~/.htsworkflow"
super(SpoolWatcher, self).__init__(section, configfile)
-
- self.cfg['watchdir'] = None
+
+ self.cfg['watchdirs'] = None
self.cfg['write_timeout'] = 10
self.cfg['notify_users'] = None
self.cfg['notify_runner'] = None
- self.cfg['wait_for_ipar'] = 0
-
+ self.cfg['completion_files'] = 'ImageAnalysis_Netcopy_complete_READ2.txt ImageAnalysis_Netcopy_complete_SINGLEREAD.txt'
+
+ self.watchdirs = []
+ self.watchdir_url_map = {}
self.notify_timeout = 0.001
- self.wm = pyinotify.WatchManager()
- self.wdd = None
- self.mount_point = None
- self.mounted = True
-
+
+ self.wm = None
self.notify_users = None
self.notify_runner = None
-
+ self.wdds = []
+
+ # keep track if the specified mount point is currently mounted
+ self.mounted_points = {}
+ # keep track of which mount points tie to which watch directories
+ # so maybe we can remount them.
+ self.mounts_to_watches = {}
+
self.eventTasks.append(self.process_notify)
def read_config(self, section=None, configfile=None):
+ # Don't give in to the temptation to use logging functions here,
+ # need to wait until after we detach in start
super(SpoolWatcher, self).read_config(section, configfile)
-
- self.watch_dir = self._check_required_option('watchdir')
+
+ self.watchdirs = shlex.split(self._check_required_option('watchdirs'))
+ # see if there's an alternate url that should be used for the watchdir
+ for watchdir in self.watchdirs:
+ self.watchdir_url_map[watchdir] = self.cfg.get(watchdir, watchdir)
+
self.write_timeout = int(self.cfg['write_timeout'])
- self.wait_for_ipar = int(self.cfg['wait_for_ipar'])
- print 'wait for ipar: ' + str(self.cfg['wait_for_ipar'])
-
+ self.completion_files = shlex.split(self.cfg['completion_files'])
+
self.notify_users = self._parse_user_list(self.cfg['notify_users'])
try:
self.notify_runner = \
require_resource=True)
except bot.JIDMissingResource:
msg = 'need a full jabber ID + resource for xml-rpc destinations'
- logging.FATAL(msg)
raise bot.JIDMissingResource(msg)
- self.handler = Handler(self.wm, self, self.wait_for_ipar)
- self.notifier = pyinotify.Notifier(self.wm, self.handler)
+ self.handler = None
+ self.notifier = None
- def add_watch(self, watchdir=None):
+ def add_watch(self, watchdirs=None):
"""
- start watching watchdir or self.watch_dir
+ start watching watchdir or self.watchdir
we're currently limited to watching one directory tree.
"""
+ # create the watch managers if we need them
+ if self.wm is None:
+ self.wm = pyinotify.WatchManager()
+ self.handler = Handler(self.wm, self, self.completion_files)
+ self.notifier = pyinotify.Notifier(self.wm, self.handler)
+
# the one tree limit is mostly because self.wdd is a single item
# but managing it as a list might be a bit more annoying
- if watchdir is None:
- watchdir = self.watch_dir
- logging.info("Watching:"+str(watchdir))
-
- self.mount_point = mount.find_mount_point_for(watchdir)
+ if watchdirs is None:
+ watchdirs = self.watchdirs
- mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
+ mask = IN_CREATE | IN_UNMOUNT
# rec traverses the tree and adds all the directories that are there
# at the start.
# auto_add will add in new directories as they are created
- self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
-
- def unmount_watch(self):
- if self.wdd is not None:
- self.wm.rm_watch(self.wdd.values())
- self.wdd = None
- self.mounted = False
-
+ for w in watchdirs:
+ mount_location = mount.find_mount_point_for(w)
+ self.mounted_points[mount_location] = True
+ mounts = self.mounts_to_watches.get(mount_location, [])
+ if w not in mounts:
+ mounts.append(w)
+ self.mounts_to_watches[mount_location] = mounts
+
+ self.log.info("Watching:"+str(w))
+ self.wdds.append(self.wm.add_watch(w, mask, rec=True, auto_add=True))
+
+ def unmount_watch(self, event_path):
+ # remove backwards so we don't get weirdness from
+ # the list getting shorter
+ for i in range(len(self.wdds),0, -1):
+ wdd = self.wdds[i]
+ self.log.info('unmounting: '+str(list(wdd.items())))
+ self.wm.rm_watch(list(wdd.values()))
+ del self.wdds[i]
+ self.mounted = False
+
+ def make_copy_url(self, watchdir, list_event_dir):
+ root_copy_url = self.watchdir_url_map[watchdir]
+ if root_copy_url[-1] != '/':
+ root_copy_url += '/'
+ copy_url = root_copy_url + list_event_dir
+ self.log.debug('Copy url: %s' % (copy_url,))
+ return copy_url
+
def process_notify(self, *args):
+ if self.notifier is None:
+ # nothing to do yet
+ return
# process the queue of events as explained above
self.notifier.process_events()
#check events waits timeout
self.notifier.read_events()
# should we do something?
# has something happened?
- last_event_time = self.handler.last_event_time
- if last_event_time is not None:
- time_delta = time.time() - last_event_time
- if time_delta > self.write_timeout:
- self.startCopy()
- self.handler.last_event_time = None
+ for watchdir, last_events in list(self.handler.last_event.items()):
+ for last_event_dir, last_event_detail in list(last_events.items()):
+ time_delta = time.time() - last_event_detail.time
+ if time_delta > self.write_timeout:
+ LOGGER.info("timeout: %s" % (str(last_event_detail),))
+ copy_url = self.make_copy_url(watchdir, last_event_dir)
+ self.startCopy(copy_url)
+ if last_event_detail.complete:
+ self.sequencingFinished(last_event_detail.event_root)
+
+ self.handler.last_event[watchdir] = {}
# handle unmounted filesystems
- if not self.mounted:
- if mount.is_mounted(self.mount_point):
+ for mount_point, was_mounted in list(self.mounted_points.items()):
+ if not was_mounted and mount.is_mounted(mount_point):
# we've been remounted. Huzzah!
# restart the watch
- self.add_watch()
- self.mounted = True
- logging.info(
- "%s was remounted, restarting watch" % \
- (self.mount_point)
- )
+ for watch in self.mounts_to_watches[mount_point]:
+ self.add_watch(watch)
+ self.logg.info(
+ "%s was remounted, restarting watch" % \
+ (mount_point)
+ )
+ self.mounted_points[mount_point] = True
def _parser(self, msg, who):
"""
Parse xmpp chat messages
"""
- help = u"I can send [copy] message, or squencer [finished]"
- if re.match(u"help", msg):
+ help = "I can send [copy] message, or squencer [finished]"
+ if re.match("help", msg):
reply = help
- elif re.match("copy", msg):
- self.startCopy()
- reply = u"sent copy message"
- elif re.match(u"finished", msg):
+ elif re.match("copy", msg):
+ self.startCopy(msg)
+ reply = "sent copy message"
+ elif re.match("finished", msg):
words = msg.split()
if len(words) == 2:
self.sequencingFinished(words[1])
- reply = u"sending sequencing finished for %s" % (words[1])
+ reply = "sending sequencing finished for %s" % (words[1])
else:
- reply = u"need runfolder name"
+ reply = "need runfolder name"
else:
- reply = u"I didn't understand '%s'" %(msg)
+ reply = "I didn't understand '%s'" %(msg)
return reply
-
- def start(self, daemonize):
+
+ def run(self):
"""
Start application
"""
+ # we have to configure pyinotify after BenderJab.start is called
+ # as weird things happen to pyinotify if the stdio is closed
+ # after it's initialized.
self.add_watch()
- super(SpoolWatcher, self).start(daemonize)
-
+ super(SpoolWatcher, self).run()
+
def stop(self):
"""
shutdown application
"""
# destroy the inotify's instance on this interrupt (stop monitoring)
- self.notifier.stop()
+ if self.notifier is not None:
+ self.notifier.stop()
super(SpoolWatcher, self).stop()
-
- def startCopy(self):
- logging.debug("writes seem to have stopped")
+
+ def startCopy(self, copy_url=None):
+ self.log.debug("writes seem to have stopped")
if self.notify_runner is not None:
for r in self.notify_runner:
- self.rpc_send(r, tuple(), 'startCopy')
-
+ self.rpc_send(r, tuple([copy_url]), 'startCopy')
+ if self.notify_users is not None:
+ for u in self.notify_users:
+ self.send(u, 'startCopy %s.' % (copy_url,))
+
def sequencingFinished(self, run_dir):
- # need to strip off self.watch_dir from rundir I suspect.
- logging.info("run.completed in " + str(run_dir))
- pattern = self.watch_dir
- if pattern[-1] != os.path.sep:
- pattern += os.path.sep
- stripped_run_dir = re.sub(pattern, "", run_dir)
- logging.debug("stripped to " + stripped_run_dir)
+ # need to strip off self.watchdirs from rundir I suspect.
+ self.log.info("run.completed in " + str(run_dir))
+ for watch in self.watchdirs:
+ if not run_dir.startswith(watch):
+ LOGGER.info("%s didn't start with %s" % (run_dir, watch))
+ continue
+ if watch[-1] != os.path.sep:
+ watch += os.path.sep
+ stripped_run_dir = re.sub(watch, "", run_dir)
+ else:
+ stripped_run_dir = run_dir
+
+ self.log.debug("stripped to " + stripped_run_dir)
if self.notify_users is not None:
for u in self.notify_users:
- self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
+ self.send(u, 'Sequencing run %s finished' % \
+ (stripped_run_dir))
if self.notify_runner is not None:
for r in self.notify_runner:
self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
-
+
def main(args=None):
bot = SpoolWatcher()
return bot.main(args)
-
+
if __name__ == "__main__":
- sys.exit(main(sys.argv[1:]))
+ ret = main(sys.argv[1:])
+ #sys.exit(ret)
+# TODO:
+# send messages to copier specifying which mount to copy