9 from htsworkflow.util import mount
13 from pyinotify import EventsCodes
15 from benderjab import rpc
17 def is_runfolder(name):
21 >>> print is_runfolder('090630_HWUSI-EAS999_0006_30LNFAAXX')
23 >>> print is_runfolder('hello')
26 if re.match("[0-9]{6}_.*", name):
31 def get_top_dir(root, path):
33 Return the directory in path that is a subdirectory of root.
36 >>> print get_top_dir('/a/b/c', '/a/b/c/d/e/f')
38 >>> print get_top_dir('/a/b/c/', '/a/b/c/d/e/f')
40 >>> print get_top_dir('/a/b/c', '/g/e/f')
42 >>> print get_top_dir('/a/b/c', '/a/b/c')
45 if path.startswith(root):
46 subpath = path[len(root):]
47 if subpath.startswith('/'):
49 return subpath.split(os.path.sep)[0]
53 class WatcherEvent(object):
55 Track information about a file event
57 Currently its time, and if it was an indication we've completed the run.
59 def __init__(self, event_root=None):
60 self.time = time.time()
61 self.event_root = event_root
64 def __unicode__(self):
66 complete = "(completed)"
69 return u"<WatchEvent: %s %s %s>" % (time.ctime(self.time), self.event_root, complete)
71 class Handler(pyinotify.ProcessEvent):
72 def __init__(self, watchmanager, bot, completion_file=None):
74 Completion file contains current "completion" filename
77 self.watchmanager = watchmanager
79 if completion_file is not None:
80 completion_file = completion_file.lower()
81 self.completion_file = completion_file
83 def process_IN_CREATE(self, event):
84 for wdd in self.bot.wdds:
85 for watch_path in self.bot.watchdirs:
86 run_already_complete = False
87 # I only care about things created inside the watch directory, so
88 # the event path needs to be longer than the watch path in addition to
89 # starting with the watch_path
90 if len(event.path) > len(watch_path) and event.path.startswith(watch_path):
91 # compute name of the top level directory that had an event
92 # in the current watch path
93 target = get_top_dir(watch_path, event.path)
95 if not is_runfolder(target):
96 logging.debug("Skipping %s, not a runfolder" % (target,))
99 # grab the previous events for this watch path
100 watch_path_events = self.last_event.setdefault(watch_path, {})
102 # if we've already seen an event in this directory (AKA runfolder)
103 # keep track if its already hit the "completed" flag
104 if watch_path_events.has_key(target):
105 run_already_complete = watch_path_events[target].complete
107 watch_path_events[target] = WatcherEvent(target)
108 #self.last_event.setdefault(watch_path, {})[target] = WatcherEvent(target)
110 msg = "Create: %s %s %s %s" % (watch_path, target, event.path, event.name)
112 if self.completion_file == event.name.lower() or run_already_complete:
113 self.last_event[watch_path][target].complete = True
118 def process_IN_DELETE(self, event):
119 logging.debug("Remove: %s" % os.path.join(event.path, event.name))
122 def process_IN_UNMOUNT(self, event):
123 pathname = os.path.join(event.path, event.name)
124 logging.debug("IN_UNMOUNT: %s" % (pathname,))
125 self.bot.unmount_watch(event.path)
127 class SpoolWatcher(rpc.XmlRpcBot):
129 Watch a directory and send a message when another process is done writing.
131 This monitors a directory tree using inotify (linux specific) and
132 after some files having been written will send a message after <timeout>
133 seconds of no file writing.
135 (Basically when the solexa machine finishes dumping a round of data
136 this'll hopefully send out a message saying hey look theres data available
139 # these params need to be in the config file
140 # I wonder where I should put the documentation
142 # `watchdirs` - list of directories to monitor for modifications
143 # `profile` - specify which .htsworkflow profile to use
144 # `write_timeout` - how many seconds to wait for writes to finish to
146 # `notify_timeout` - how often to timeout from notify
147 # `completion_file` - what file indicates we've finished sequencing
148 # defaults to: netcopy_complete.txt
150 def __init__(self, section=None, configfile=None):
151 #if configfile is None:
152 # self.configfile = "~/.htsworkflow"
153 super(SpoolWatcher, self).__init__(section, configfile)
155 self.cfg['watchdirs'] = None
156 self.cfg['write_timeout'] = 10
157 self.cfg['notify_users'] = None
158 self.cfg['notify_runner'] = None
159 self.cfg['completion_file'] = 'netcopy_complete.txt'
162 self.watchdir_url_map = {}
163 self.notify_timeout = 0.001
166 self.notify_users = None
167 self.notify_runner = None
170 # keep track if the specified mount point is currently mounted
171 self.mounted_points = {}
172 # keep track of which mount points tie to which watch directories
173 # so maybe we can remount them.
174 self.mounts_to_watches = {}
176 self.eventTasks.append(self.process_notify)
178 def read_config(self, section=None, configfile=None):
179 # Don't give in to the temptation to use logging functions here,
180 # need to wait until after we detach in start
181 super(SpoolWatcher, self).read_config(section, configfile)
183 self.watchdirs = shlex.split(self._check_required_option('watchdirs'))
184 # see if there's an alternate url that should be used for the watchdir
185 for watchdir in self.watchdirs:
186 self.watchdir_url_map[watchdir] = self.cfg.get(watchdir, watchdir)
188 self.write_timeout = int(self.cfg['write_timeout'])
189 self.completion_file = self.cfg['completion_file']
191 self.notify_users = self._parse_user_list(self.cfg['notify_users'])
193 self.notify_runner = \
194 self._parse_user_list(self.cfg['notify_runner'],
195 require_resource=True)
196 except bot.JIDMissingResource:
197 msg = 'need a full jabber ID + resource for xml-rpc destinations'
198 raise bot.JIDMissingResource(msg)
203 def add_watch(self, watchdirs=None):
205 start watching watchdir or self.watchdir
206 we're currently limited to watching one directory tree.
208 # create the watch managers if we need them
210 self.wm = pyinotify.WatchManager()
211 self.handler = Handler(self.wm, self, self.completion_file)
212 self.notifier = pyinotify.Notifier(self.wm, self.handler)
214 # the one tree limit is mostly because self.wdd is a single item
215 # but managing it as a list might be a bit more annoying
216 if watchdirs is None:
217 watchdirs = self.watchdirs
219 mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
220 # rec traverses the tree and adds all the directories that are there
222 # auto_add will add in new directories as they are created
224 mount_location = mount.find_mount_point_for(w)
225 self.mounted_points[mount_location] = True
226 mounts = self.mounts_to_watches.get(mount_location, [])
229 self.mounts_to_watches[mount_location] = mounts
231 logging.info(u"Watching:"+unicode(w))
232 self.wdds.append(self.wm.add_watch(w, mask, rec=True, auto_add=True))
234 def unmount_watch(self, event_path):
235 # remove backwards so we don't get weirdness from
236 # the list getting shorter
237 for i in range(len(self.wdds),0, -1):
239 logging.info(u'unmounting: '+unicode(wdd.items()))
240 self.wm.rm_watch(wdd.values())
244 def make_copy_url(self, watchdir, list_event_dir):
245 root_copy_url = self.watchdir_url_map[watchdir]
246 if root_copy_url[-1] != '/':
248 copy_url = root_copy_url + list_event_dir
249 logging.debug('Copy url: %s' % (copy_url,))
252 def process_notify(self, *args):
253 if self.notifier is None:
256 # process the queue of events as explained above
257 self.notifier.process_events()
258 #check events waits timeout
259 if self.notifier.check_events(self.notify_timeout):
260 # read notified events and enqeue them
261 self.notifier.read_events()
262 # should we do something?
263 # has something happened?
264 for watchdir, last_events in self.handler.last_event.items():
265 for last_event_dir, last_event_detail in last_events.items():
266 time_delta = time.time() - last_event_detail.time
267 if time_delta > self.write_timeout:
268 print "timeout", unicode(last_event_detail)
269 copy_url = self.make_copy_url(watchdir, last_event_dir)
270 self.startCopy(copy_url)
271 if last_event_detail.complete:
272 self.sequencingFinished(last_event_detail.event_root)
274 self.handler.last_event[watchdir] = {}
275 # handle unmounted filesystems
276 for mount_point, was_mounted in self.mounted_points.items():
277 if not was_mounted and mount.is_mounted(mount_point):
278 # we've been remounted. Huzzah!
280 for watch in self.mounts_to_watches[mount_point]:
281 self.add_watch(watch)
283 "%s was remounted, restarting watch" % \
286 self.mounted_points[mount_point] = True
288 def _parser(self, msg, who):
290 Parse xmpp chat messages
292 help = u"I can send [copy] message, or squencer [finished]"
293 if re.match(u"help", msg):
295 elif re.match("copy", msg):
297 reply = u"sent copy message"
298 elif re.match(u"finished", msg):
301 self.sequencingFinished(words[1])
302 reply = u"sending sequencing finished for %s" % (words[1])
304 reply = u"need runfolder name"
306 reply = u"I didn't understand '%s'" %(msg)
313 # we have to configure pyinotify after BenderJab.start is called
314 # as weird things happen to pyinotify if the stdio is closed
315 # after it's initialized.
317 super(SpoolWatcher, self).run()
323 # destroy the inotify's instance on this interrupt (stop monitoring)
324 if self.notifier is not None:
326 super(SpoolWatcher, self).stop()
328 def startCopy(self, copy_url=None):
329 logging.debug("writes seem to have stopped")
330 if self.notify_runner is not None:
331 for r in self.notify_runner:
332 self.rpc_send(r, tuple([copy_url]), 'startCopy')
333 if self.notify_users is not None:
334 for u in self.notify_users:
335 self.send(u, 'startCopy %s.' % (copy_url,))
337 def sequencingFinished(self, run_dir):
338 # need to strip off self.watchdirs from rundir I suspect.
339 logging.info("run.completed in " + str(run_dir))
340 for watch in self.watchdirs:
341 if not run_dir.startswith(watch):
342 print "%s didn't start with %s" % (run_dir, watch)
344 if watch[-1] != os.path.sep:
346 stripped_run_dir = re.sub(watch, "", run_dir)
348 stripped_run_dir = run_dir
350 logging.debug("stripped to " + stripped_run_dir)
351 if self.notify_users is not None:
352 for u in self.notify_users:
353 self.send(u, 'Sequencing run %s finished' % \
355 if self.notify_runner is not None:
356 for r in self.notify_runner:
357 self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
361 return bot.main(args)
363 if __name__ == "__main__":
364 ret = main(sys.argv[1:])
368 # send messages to copier specifying which mount to copy