Adjust spoolwatchers use of inotify to correspond to pyinotify 0.8.9
[htsworkflow.git] / htsworkflow / automation / spoolwatcher.py
1 #!/usr/bin/env python
2 import logging
3 import os
4 import re
5 import shlex
6 import sys
7 import time
8
9 from htsworkflow.util import mount
10
11 # this uses pyinotify
12 import pyinotify
13 from pyinotify import EventsCodes
14 IN_CREATE = EventsCodes.ALL_FLAGS['IN_CREATE']
15 IN_UNMOUNT = EventsCodes.ALL_FLAGS['IN_UNMOUNT']
16
17 from benderjab import rpc
18
19 def is_runfolder(name):
20     """
21     Is it a runfolder?
22
23     >>> print is_runfolder('090630_HWUSI-EAS999_0006_30LNFAAXX')
24     True
25     >>> print is_runfolder('hello')
26     False
27     """
28     if re.match("[0-9]{6}_.*", name):
29         return True
30     else:
31         return False
32
33 def get_top_dir(root, path):
34     """
35     Return the directory in path that is a subdirectory of root.
36     e.g.
37
38     >>> print get_top_dir('/a/b/c', '/a/b/c/d/e/f')
39     d
40     >>> print get_top_dir('/a/b/c/', '/a/b/c/d/e/f')
41     d
42     >>> print get_top_dir('/a/b/c', '/g/e/f')
43     None
44     >>> print get_top_dir('/a/b/c', '/a/b/c')
45     <BLANKLINE>
46     """
47     if path.startswith(root):
48         subpath = path[len(root):]
49         if subpath.startswith('/'):
50             subpath = subpath[1:]
51         return subpath.split(os.path.sep)[0]
52     else:
53         return None
54
55 class WatcherEvent(object):
56     """
57     Track information about a file event
58
59     Currently its time, and if it was an indication we've completed the run.
60     """
61     def __init__(self, event_root=None):
62         self.time = time.time()
63         self.event_root = event_root
64         self.complete = False
65         
66     def __unicode__(self):
67         if self.complete:
68            complete = "(completed)"
69         else:
70            complete = ""
71         return u"<WatchEvent: %s %s %s>" % (time.ctime(self.time), self.event_root, complete)
72
73 class Handler(pyinotify.ProcessEvent):
74     def __init__(self, watchmanager, bot, completion_files=None):
75         """
76         Completion file contains current "completion" filename
77         """
78         self.last_event = {}
79         self.watchmanager = watchmanager
80         self.bot = bot
81         if completion_files is not None:
82             completion_files = [ x.lower() for x in completion_files ]
83         self.completion_files = completion_files
84
85     def process_IN_CREATE(self, event):
86         for wdd in self.bot.wdds:
87             for watch_path in self.bot.watchdirs:
88                 run_already_complete = False
89                 # I only care about things created inside the watch directory, so
90                 # the event path needs to be longer than the watch path in addition to
91                 # starting with the watch_path
92                 if len(event.path) > len(watch_path) and event.path.startswith(watch_path):
93                     # compute name of the top level directory that had an event
94                     # in the current watch path
95                     target = get_top_dir(watch_path, event.path)
96                     runfolder = os.path.join(watch_path, target)
97
98                     if not is_runfolder(target):
99                         logging.debug("Skipping %s, not a runfolder" % (target,))
100                         continue
101                     
102                     # grab the previous events for this watch path
103                     watch_path_events = self.last_event.setdefault(watch_path, {})
104
105                     # if we've already seen an event in this directory (AKA runfolder)
106                     # keep track if its already hit the "completed" flag
107                     if watch_path_events.has_key(target):
108                        run_already_complete = watch_path_events[target].complete
109
110                     watch_path_events[target] = WatcherEvent(target)
111                     #self.last_event.setdefault(watch_path, {})[target] = WatcherEvent(target)
112
113                     msg = "Create: %s %s %s %s" % (watch_path, target, event.path, event.name)
114
115                     # the ReadPrep step uses some of the same file completion flags as the
116                     # main analysis, which means this completion code might get tripped because of it
117                     # so we need to make sure we're getting the completion file in the root of the
118                     # runfolder
119                     event_name = event.name.lower()
120                     if (event_name in self.completion_files and event.path == runfolder) \
121                       or run_already_complete:
122                         self.last_event[watch_path][target].complete = True
123                         msg += "(completed)"
124
125                     logging.debug(msg)
126
127     def process_IN_DELETE(self, event):
128         logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
129         pass
130
131     def process_IN_UNMOUNT(self, event):
132         pathname = os.path.join(event.path, event.name)
133         logging.debug("IN_UNMOUNT: %s" % (pathname,))
134         self.bot.unmount_watch(event.path)
135
136 class SpoolWatcher(rpc.XmlRpcBot):
137     """
138     Watch a directory and send a message when another process is done writing.
139     
140     This monitors a directory tree using inotify (linux specific) and
141     after some files having been written will send a message after <timeout>
142     seconds of no file writing.
143     
144     (Basically when the solexa machine finishes dumping a round of data
145     this'll hopefully send out a message saying hey look theres data available
146     
147     """
148     # these params need to be in the config file
149     # I wonder where I should put the documentation
150     #:Parameters:
151     #    `watchdirs` - list of directories to monitor for modifications
152     #    `profile` - specify which .htsworkflow profile to use
153     #    `write_timeout` - how many seconds to wait for writes to finish to
154     #                      the spool
155     #    `notify_timeout` - how often to timeout from notify
156     #    `completion_files` - what files indicates we've finished sequencing
157     #                        defaults to: netcopy_complete.txt
158     
159     def __init__(self, section=None, configfile=None):
160         #if configfile is None:
161         #    self.configfile = "~/.htsworkflow"
162         super(SpoolWatcher, self).__init__(section, configfile)
163         
164         self.cfg['watchdirs'] = None
165         self.cfg['write_timeout'] = 10
166         self.cfg['notify_users'] = None
167         self.cfg['notify_runner'] = None
168         self.cfg['completion_files'] = 'ImageAnalysis_Netcopy_complete_READ2.txt ImageAnalysis_Netcopy_complete_SINGLEREAD.txt'
169        
170         self.watchdirs = []
171         self.watchdir_url_map = {}
172         self.notify_timeout = 0.001
173
174         self.wm = None 
175         self.notify_users = None
176         self.notify_runner = None
177         self.wdds = []
178
179         # keep track if the specified mount point is currently mounted
180         self.mounted_points = {}
181         # keep track of which mount points tie to which watch directories
182         # so maybe we can remount them.
183         self.mounts_to_watches = {}
184         
185         self.eventTasks.append(self.process_notify)
186
187     def read_config(self, section=None, configfile=None):
188         # Don't give in to the temptation to use logging functions here, 
189         # need to wait until after we detach in start
190         super(SpoolWatcher, self).read_config(section, configfile)
191         
192         self.watchdirs = shlex.split(self._check_required_option('watchdirs'))
193         # see if there's an alternate url that should be used for the watchdir
194         for watchdir in self.watchdirs:
195             self.watchdir_url_map[watchdir] = self.cfg.get(watchdir, watchdir)
196
197         self.write_timeout = int(self.cfg['write_timeout'])
198         self.completion_files = shlex.split(self.cfg['completion_files'])
199         
200         self.notify_users = self._parse_user_list(self.cfg['notify_users'])
201         try:
202           self.notify_runner = \
203              self._parse_user_list(self.cfg['notify_runner'],
204                                    require_resource=True)
205         except bot.JIDMissingResource:
206             msg = 'need a full jabber ID + resource for xml-rpc destinations'
207             raise bot.JIDMissingResource(msg)
208
209         self.handler = None
210         self.notifier = None
211
212     def add_watch(self, watchdirs=None):
213         """
214         start watching watchdir or self.watchdir
215         we're currently limited to watching one directory tree.
216         """
217         # create the watch managers if we need them
218         if self.wm is None:
219             self.wm = pyinotify.WatchManager()
220             self.handler = Handler(self.wm, self, self.completion_files)
221             self.notifier = pyinotify.Notifier(self.wm, self.handler)
222
223         # the one tree limit is mostly because self.wdd is a single item
224         # but managing it as a list might be a bit more annoying
225         if watchdirs is None:
226             watchdirs = self.watchdirs
227
228         mask = IN_CREATE | IN_UNMOUNT
229         # rec traverses the tree and adds all the directories that are there
230         # at the start.
231         # auto_add will add in new directories as they are created
232         for w in watchdirs:
233             mount_location = mount.find_mount_point_for(w)
234             self.mounted_points[mount_location] = True
235             mounts = self.mounts_to_watches.get(mount_location, [])
236             if w not in mounts:
237                 mounts.append(w)
238                 self.mounts_to_watches[mount_location] = mounts
239
240             logging.info(u"Watching:"+unicode(w))
241             self.wdds.append(self.wm.add_watch(w, mask, rec=True, auto_add=True))
242
243     def unmount_watch(self, event_path):
244         # remove backwards so we don't get weirdness from 
245         # the list getting shorter
246         for i in range(len(self.wdds),0, -1):
247             wdd = self.wdds[i]
248             logging.info(u'unmounting: '+unicode(wdd.items()))
249             self.wm.rm_watch(wdd.values())
250             del self.wdds[i]
251         self.mounted = False
252
253     def make_copy_url(self, watchdir, list_event_dir):
254         root_copy_url = self.watchdir_url_map[watchdir]
255         if root_copy_url[-1] != '/':
256             root_copy_url += '/'
257         copy_url = root_copy_url + list_event_dir
258         logging.debug('Copy url: %s' % (copy_url,))
259         return copy_url
260                   
261     def process_notify(self, *args):
262         if self.notifier is None:
263             # nothing to do yet
264             return
265         # process the queue of events as explained above
266         self.notifier.process_events()
267         #check events waits timeout
268         if self.notifier.check_events(self.notify_timeout):
269             # read notified events and enqeue them
270             self.notifier.read_events()
271             # should we do something?
272         # has something happened?
273         for watchdir, last_events in self.handler.last_event.items():
274             for last_event_dir, last_event_detail in last_events.items():
275                 time_delta = time.time() - last_event_detail.time
276                 if time_delta > self.write_timeout:
277                     print "timeout", unicode(last_event_detail)
278                     copy_url = self.make_copy_url(watchdir, last_event_dir)
279                     self.startCopy(copy_url)
280                     if last_event_detail.complete:
281                         self.sequencingFinished(last_event_detail.event_root)
282
283                     self.handler.last_event[watchdir] = {}
284         # handle unmounted filesystems
285         for mount_point, was_mounted in self.mounted_points.items():
286             if not was_mounted and mount.is_mounted(mount_point):
287                 # we've been remounted. Huzzah!
288                 # restart the watch
289                 for watch in self.mounts_to_watches[mount_point]:
290                     self.add_watch(watch)
291                     logging.info(
292                         "%s was remounted, restarting watch" % \
293                             (mount_point)
294                     )
295                 self.mounted_points[mount_point] = True
296
297     def _parser(self, msg, who):
298         """
299         Parse xmpp chat messages
300         """
301         help = u"I can send [copy] message, or squencer [finished]"
302         if re.match(u"help", msg):
303             reply = help
304         elif re.match("copy", msg):            
305             self.startCopy(msg)
306             reply = u"sent copy message"
307         elif re.match(u"finished", msg):
308             words = msg.split()
309             if len(words) == 2:
310                 self.sequencingFinished(words[1])
311                 reply = u"sending sequencing finished for %s" % (words[1])
312             else:
313                 reply = u"need runfolder name"
314         else:
315             reply = u"I didn't understand '%s'" %(msg)            
316         return reply
317         
318     def run(self):
319         """
320         Start application
321         """
322         # we have to configure pyinotify after BenderJab.start is called
323         # as weird things happen to pyinotify if the stdio is closed
324         # after it's initialized.
325         self.add_watch()
326         super(SpoolWatcher, self).run()
327         
328     def stop(self):
329         """
330         shutdown application
331         """
332         # destroy the inotify's instance on this interrupt (stop monitoring)
333         if self.notifier is not None:
334             self.notifier.stop()
335         super(SpoolWatcher, self).stop()
336     
337     def startCopy(self, copy_url=None):
338         logging.debug("writes seem to have stopped")
339         if self.notify_runner is not None:
340             for r in self.notify_runner:
341                 self.rpc_send(r, tuple([copy_url]), 'startCopy')
342         if self.notify_users is not None:
343             for u in self.notify_users:
344                 self.send(u, 'startCopy %s.' % (copy_url,))
345         
346     def sequencingFinished(self, run_dir):
347         # need to strip off self.watchdirs from rundir I suspect.
348         logging.info("run.completed in " + str(run_dir))
349         for watch in self.watchdirs:
350             if not run_dir.startswith(watch):
351                 print "%s didn't start with %s" % (run_dir, watch)
352                 continue
353             if watch[-1] != os.path.sep:
354                 watch += os.path.sep
355             stripped_run_dir = re.sub(watch, "", run_dir)
356         else:
357             stripped_run_dir = run_dir
358
359         logging.debug("stripped to " + stripped_run_dir)
360         if self.notify_users is not None:
361             for u in self.notify_users:
362                 self.send(u, 'Sequencing run %s finished' % \
363                           (stripped_run_dir))
364         if self.notify_runner is not None:
365             for r in self.notify_runner:
366                 self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
367
368 def main(args=None):
369     bot = SpoolWatcher()
370     return bot.main(args)
371     
372 if __name__ == "__main__":
373     ret = main(sys.argv[1:])
374     #sys.exit(ret)
375
376 # TODO:
377 # send messages to copier specifying which mount to copy