Pyinotify behaves oddly when the stdio file descriptors are closed.

[htsworkflow.git] / htsworkflow / automation / spoolwatcher.py
diff --git a/htsworkflow/automation/spoolwatcher.py b/htsworkflow/automation/spoolwatcher.py

index 2a575353d124587db3158289533d644ea57979ec..6683a65862e7a3eda37e5f7905915f19acf4f091 100644 (file)
--- a/htsworkflow/automation/spoolwatcher.py
+++ b/htsworkflow/automation/spoolwatcher.py
@@ -2,9 +2,9 @@
  import logging
  import os
  import re
+import shlex
  import sys
  import time
-#import glob
  
  from htsworkflow.util import mount
  
@@ -14,6 +14,27 @@ from pyinotify import EventsCodes
  
  from benderjab import rpc
  
+def get_top_dir(root, path):
+    """
+    Return the directory in path that is a subdirectory of root.
+    e.g.
+
+    >>> print get_top_dir('/a/b/c', '/a/b/c/d/e/f')
+    d
+    >>> print get_top_dir('/a/b/c/', '/a/b/c/d/e/f')
+    d
+    >>> print get_top_dir('/a/b/c', '/g/e/f')
+    None
+    >>> print get_top_dir('/a/b/c', '/a/b/c')
+    <BLANKLINE>
+    """
+    if path.startswith(root):
+        subpath = path[len(root):]
+        if subpath.startswith('/'):
+            subpath = subpath[1:]
+        return subpath.split(os.path.sep)[0]
+    else:
+        return None
  
  class WatcherEvents(object):
      # two events need to be tracked
@@ -28,28 +49,45 @@ class WatcherEvents(object):
          
  
  class Handler(pyinotify.ProcessEvent):
-    def __init__(self, watchmanager, bot):
-        self.last_event_time = None
+    def __init__(self, watchmanager, bot, ipar=False):
+        """
+        ipar flag indicates we should wait for ipar to finish, instead of 
+             just the run finishing
+        """
+        self.last_event = {}
          self.watchmanager = watchmanager
          self.bot = bot
+        self.ipar_mode = ipar
+        if self.ipar_mode:
+            self.last_file = 'IPAR_Netcopy_Complete.txt'.lower()
+        else:
+            self.last_file = "run.completed".lower()
  
      def process_IN_CREATE(self, event):
-        self.last_event_time = time.time()
-        msg = "Create: %s" %  os.path.join(event.path, event.name)
-        if event.name.lower() == "run.completed":
-            try:
-                self.bot.sequencingFinished(event.path)
-            except IOError, e:
-                logging.error("Couldn't send sequencingFinished")
-        logging.debug(msg)
+        for wdd in self.bot.wdds:
+            for watch_path in self.bot.watchdirs:
+                if event.path.startswith(watch_path):
+                    target = get_top_dir(watch_path, event.path)
+                    self.last_event.setdefault(watch_path, {})[target] = time.time()
+
+                    msg = "Create: %s %s" % (event.path, event.name)
+
+                    if event.name.lower() == self.last_file:
+                        try:
+                            self.bot.sequencingFinished(event.path)
+                        except IOError, e:
+                            pass
+                            logging.error("Couldn't send sequencingFinished")
+                    logging.debug(msg)
  
      def process_IN_DELETE(self, event):
          logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
+        pass
  
      def process_IN_UNMOUNT(self, event):
          pathname = os.path.join(event.path, event.name)
          logging.debug("IN_UNMOUNT: %s" % (pathname,))
-        self.bot.unmount_watch()
+        self.bot.unmount_watch(event.path)
  
  class SpoolWatcher(rpc.XmlRpcBot):
      """
@@ -66,7 +104,7 @@ class SpoolWatcher(rpc.XmlRpcBot):
      # these params need to be in the config file
      # I wonder where I should put the documentation
      #:Parameters:
-    #    `watchdir` - which directory tree to monitor for modifications
+    #    `watchdirs` - list of directories to monitor for modifications
      #    `profile` - specify which .htsworkflow profile to use
      #    `write_timeout` - how many seconds to wait for writes to finish to
      #                      the spool
@@ -77,29 +115,39 @@ class SpoolWatcher(rpc.XmlRpcBot):
          #    self.configfile = "~/.htsworkflow"
          super(SpoolWatcher, self).__init__(section, configfile)
          
-        self.cfg['watchdir'] = None
+        self.cfg['watchdirs'] = None
          self.cfg['write_timeout'] = 10
          self.cfg['notify_users'] = None
          self.cfg['notify_runner'] = None
-        
+        self.cfg['wait_for_ipar'] = 0
+       
+        self.watchdirs = []
+        self.watchdir_url_map = {}
          self.notify_timeout = 0.001
-        self.wm = pyinotify.WatchManager()
-        self.handler = Handler(self.wm, self)
-        self.notifier = pyinotify.Notifier(self.wm, self.handler)
-        self.wdd = None
-        self.mount_point = None
-        self.mounted = True
-        
+
+        self.wm = None 
          self.notify_users = None
          self.notify_runner = None
+        self.wdds = []
+
+        # keep track if the specified mount point is currently mounted
+        self.mounted_points = {}
+        # keep track of which mount points tie to which watch directories
+        # so maybe we can remount them.
+        self.mounts_to_watches = {}
          
          self.eventTasks.append(self.process_notify)
  
      def read_config(self, section=None, configfile=None):
          super(SpoolWatcher, self).read_config(section, configfile)
          
-        self.watch_dir = self._check_required_option('watchdir')
+        self.watchdirs = shlex.split(self._check_required_option('watchdirs'))
+        # see if there's an alternate url that should be used for the watchdir
+        for watchdir in self.watchdirs:
+            self.watchdir_url_map[watchdir] = self.cfg.get(watchdir, watchdir)
+
          self.write_timeout = int(self.cfg['write_timeout'])
+        self.wait_for_ipar = int(self.cfg['wait_for_ipar'])
          
          self.notify_users = self._parse_user_list(self.cfg['notify_users'])
          try:
@@ -108,35 +156,56 @@ class SpoolWatcher(rpc.XmlRpcBot):
                                     require_resource=True)
          except bot.JIDMissingResource:
              msg = 'need a full jabber ID + resource for xml-rpc destinations'
-            logging.FATAL(msg)
              raise bot.JIDMissingResource(msg)
  
-    def add_watch(self, watchdir=None):
+        self.handler = None
+        self.notifier = None
+
+    def add_watch(self, watchdirs=None):
          """
-        start watching watchdir or self.watch_dir
+        start watching watchdir or self.watchdir
          we're currently limited to watching one directory tree.
          """
+        # create the watch managers if we need them
+        if self.wm is None:
+            self.wm = pyinotify.WatchManager()
+            self.handler = Handler(self.wm, self, self.wait_for_ipar)
+            self.notifier = pyinotify.Notifier(self.wm, self.handler)
+
          # the one tree limit is mostly because self.wdd is a single item
          # but managing it as a list might be a bit more annoying
-        if watchdir is None:
-            watchdir = self.watch_dir
-        logging.info("Watching:"+str(watchdir))
-
-        self.mount_point = mount.find_mount_point_for(watchdir)
+        if watchdirs is None:
+            watchdirs = self.watchdirs
  
          mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
          # rec traverses the tree and adds all the directories that are there
          # at the start.
          # auto_add will add in new directories as they are created
-        self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
+        for w in watchdirs:
+            mount_location = mount.find_mount_point_for(w)
+            self.mounted_points[mount_location] = True
+            mounts = self.mounts_to_watches.get(mount_location, [])
+            if w not in mounts:
+                mounts.append(w)
+                self.mounts_to_watches[mount_location] = mounts
+
+            logging.info(u"Watching:"+unicode(w))
+            self.wdds.append(self.wm.add_watch(w, mask, rec=True, auto_add=True))
  
-    def unmount_watch(self):
-        if self.wdd is not None:
-            self.wm.rm_watch(self.wdd.values())
-            self.wdd = None
-            self.mounted = False
+    def unmount_watch(self, event_path):
+        # remove backwards so we don't get weirdness from 
+        # the list getting shorter
+        for i in range(len(self.wdds),0, -1):
+            wdd = self.wdds[i]
+            logging.info(u'unmounting: '+unicode(wdd.items()))
+            self.wm.rm_watch(wdd.values())
+            del self.wdds[i]
+        self.mounted = False
              
      def process_notify(self, *args):
+        if self.notifier is None:
+            # nothing to do yet
+            return
          # process the queue of events as explained above
          self.notifier.process_events()
          #check events waits timeout
@@ -145,23 +214,25 @@ class SpoolWatcher(rpc.XmlRpcBot):
              self.notifier.read_events()
              # should we do something?
          # has something happened?
-        last_event_time = self.handler.last_event_time
-        if last_event_time is not None:
-            time_delta = time.time() - last_event_time
-            if time_delta > self.write_timeout:
-                self.startCopy()
-                self.handler.last_event_time = None
+        for watchdir, last_events in self.handler.last_event.items():
+            #logging.debug('last_events: %s %s' % (watchdir, last_events))
+            for last_event_dir, last_event_time in last_events.items():
+                time_delta = time.time() - last_event_time
+                if time_delta > self.write_timeout:
+                    self.startCopy(watchdir, last_event_dir)
+                    self.handler.last_event[watchdir] = {}
          # handle unmounted filesystems
-        if not self.mounted:
-            if mount.is_mounted(self.mount_point):
+        for mount_point, was_mounted in self.mounted_points.items():
+            if not was_mounted and mount.is_mounted(mount_point):
                  # we've been remounted. Huzzah!
                  # restart the watch
-                self.add_watch()
-                self.mounted = True
-                logging.info(
-                    "%s was remounted, restarting watch" % \
-                        (self.mount_point)
-                )
+                for watch in self.mounts_to_watches[mount_point]:
+                    self.add_watch(watch)
+                    logging.info(
+                        "%s was remounted, restarting watch" % \
+                            (mount_point)
+                    )
+                self.mounted_points[mount_point] = True
  
      def _parser(self, msg, who):
          """
@@ -184,29 +255,36 @@ class SpoolWatcher(rpc.XmlRpcBot):
              reply = u"I didn't understand '%s'" %(msg)            
          return reply
          
-    def start(self, daemonize):
+    def run(self):
          """
          Start application
          """
+        # we have to configure pyinotify after BenderJab.start is called
+        # as weird things happen to pyinotify if the stdio is closed
+        # after it's initialized.
          self.add_watch()
-        super(SpoolWatcher, self).start(daemonize)
+        super(SpoolWatcher, self).run()
          
      def stop(self):
          """
          shutdown application
          """
          # destroy the inotify's instance on this interrupt (stop monitoring)
-        self.notifier.stop()
+        if self.notifier is not None:
+            self.notifier.stop()
          super(SpoolWatcher, self).stop()
      
-    def startCopy(self):
+    def startCopy(self, watchdir=None, event_path=None):
          logging.debug("writes seem to have stopped")
          if self.notify_runner is not None:
              for r in self.notify_runner:
                  self.rpc_send(r, tuple(), 'startCopy')
+        if self.notify_users is not None:
+            for u in self.notify_users:
+                self.send(u, 'startCopy %s %s' % (watchdir, event_path))
          
      def sequencingFinished(self, run_dir):
-        # need to strip off self.watch_dir from rundir I suspect.
+        # need to strip off self.watchdirs from rundir I suspect.
          logging.info("run.completed in " + str(run_dir))
          pattern = self.watch_dir
          if pattern[-1] != os.path.sep:
@@ -225,5 +303,8 @@ def main(args=None):
      return bot.main(args)
      
  if __name__ == "__main__":
-    sys.exit(main(sys.argv[1:]))
+    ret = main(sys.argv[1:])
+    #sys.exit(ret)
  
+# TODO:
+# send messages to copier specifying which mount to copy