9 from htsworkflow.util import mount
10 from htsworkflow.automation.solexa import is_runfolder, get_top_dir
14 from pyinotify import EventsCodes
15 IN_CREATE = EventsCodes.ALL_FLAGS['IN_CREATE']
16 IN_UNMOUNT = EventsCodes.ALL_FLAGS['IN_UNMOUNT']
18 from benderjab import rpc
20 LOGGER = logging.getLogger(__name__)
22 class WatcherEvent(object):
24 Track information about a file event
26 Currently its time, and if it was an indication we've completed the run.
28 def __init__(self, event_root=None):
29 self.time = time.time()
30 self.event_root = event_root
33 def __unicode__(self):
35 complete = "(completed)"
38 return "<WatchEvent: %s %s %s>" % (time.ctime(self.time), self.event_root, complete)
40 class Handler(pyinotify.ProcessEvent):
41 def __init__(self, watchmanager, bot, completion_files=None):
43 Completion file contains current "completion" filename
46 self.watchmanager = watchmanager
49 if completion_files is not None:
50 completion_files = [ x.lower() for x in completion_files ]
51 self.completion_files = completion_files
53 def process_IN_CREATE(self, event):
54 for wdd in self.bot.wdds:
55 for watch_path in self.bot.watchdirs:
56 run_already_complete = False
57 # I only care about things created inside the watch directory, so
58 # the event path needs to be longer than the watch path in addition to
59 # starting with the watch_path
60 if len(event.path) > len(watch_path) and event.path.startswith(watch_path):
61 # compute name of the top level directory that had an event
62 # in the current watch path
63 target = get_top_dir(watch_path, event.path)
64 runfolder = os.path.join(watch_path, target)
66 if not is_runfolder(target):
67 self.log.debug("Skipping %s, not a runfolder" % (target,))
70 # grab the previous events for this watch path
71 watch_path_events = self.last_event.setdefault(watch_path, {})
73 # if we've already seen an event in this directory (AKA runfolder)
74 # keep track if its already hit the "completed" flag
75 if target in watch_path_events:
76 run_already_complete = watch_path_events[target].complete
78 watch_path_events[target] = WatcherEvent(target)
79 #self.last_event.setdefault(watch_path, {})[target] = WatcherEvent(target)
81 msg = "Create: %s %s %s %s" % (watch_path, target, event.path, event.name)
83 # the ReadPrep step uses some of the same file completion flags as the
84 # main analysis, which means this completion code might get tripped because of it
85 # so we need to make sure we're getting the completion file in the root of the
87 event_name = event.name.lower()
88 if (event_name in self.completion_files and event.path == runfolder) \
89 or run_already_complete:
90 self.last_event[watch_path][target].complete = True
95 def process_IN_DELETE(self, event):
96 self.log.debug("Remove: %s" % os.path.join(event.path, event.name))
99 def process_IN_UNMOUNT(self, event):
100 pathname = os.path.join(event.path, event.name)
101 self.log.debug("IN_UNMOUNT: %s" % (pathname,))
102 self.bot.unmount_watch(event.path)
104 class SpoolWatcher(rpc.XmlRpcBot):
106 Watch a directory and send a message when another process is done writing.
108 This monitors a directory tree using inotify (linux specific) and
109 after some files having been written will send a message after <timeout>
110 seconds of no file writing.
112 (Basically when the solexa machine finishes dumping a round of data
113 this'll hopefully send out a message saying hey look theres data available
116 # these params need to be in the config file
117 # I wonder where I should put the documentation
119 # `watchdirs` - list of directories to monitor for modifications
120 # `profile` - specify which .htsworkflow profile to use
121 # `write_timeout` - how many seconds to wait for writes to finish to
123 # `notify_timeout` - how often to timeout from notify
124 # `completion_files` - what files indicates we've finished sequencing
125 # defaults to: netcopy_complete.txt
127 def __init__(self, section=None, configfile=None):
128 #if configfile is None:
129 # self.configfile = "~/.htsworkflow"
130 super(SpoolWatcher, self).__init__(section, configfile)
132 self.cfg['watchdirs'] = None
133 self.cfg['write_timeout'] = 10
134 self.cfg['notify_users'] = None
135 self.cfg['notify_runner'] = None
136 self.cfg['completion_files'] = 'ImageAnalysis_Netcopy_complete_READ2.txt ImageAnalysis_Netcopy_complete_SINGLEREAD.txt'
139 self.watchdir_url_map = {}
140 self.notify_timeout = 0.001
143 self.notify_users = None
144 self.notify_runner = None
147 # keep track if the specified mount point is currently mounted
148 self.mounted_points = {}
149 # keep track of which mount points tie to which watch directories
150 # so maybe we can remount them.
151 self.mounts_to_watches = {}
153 self.eventTasks.append(self.process_notify)
155 def read_config(self, section=None, configfile=None):
156 # Don't give in to the temptation to use logging functions here,
157 # need to wait until after we detach in start
158 super(SpoolWatcher, self).read_config(section, configfile)
160 self.watchdirs = shlex.split(self._check_required_option('watchdirs'))
161 # see if there's an alternate url that should be used for the watchdir
162 for watchdir in self.watchdirs:
163 self.watchdir_url_map[watchdir] = self.cfg.get(watchdir, watchdir)
165 self.write_timeout = int(self.cfg['write_timeout'])
166 self.completion_files = shlex.split(self.cfg['completion_files'])
168 self.notify_users = self._parse_user_list(self.cfg['notify_users'])
170 self.notify_runner = \
171 self._parse_user_list(self.cfg['notify_runner'],
172 require_resource=True)
173 except bot.JIDMissingResource:
174 msg = 'need a full jabber ID + resource for xml-rpc destinations'
175 raise bot.JIDMissingResource(msg)
180 def add_watch(self, watchdirs=None):
182 start watching watchdir or self.watchdir
183 we're currently limited to watching one directory tree.
185 # create the watch managers if we need them
187 self.wm = pyinotify.WatchManager()
188 self.handler = Handler(self.wm, self, self.completion_files)
189 self.notifier = pyinotify.Notifier(self.wm, self.handler)
191 # the one tree limit is mostly because self.wdd is a single item
192 # but managing it as a list might be a bit more annoying
193 if watchdirs is None:
194 watchdirs = self.watchdirs
196 mask = IN_CREATE | IN_UNMOUNT
197 # rec traverses the tree and adds all the directories that are there
199 # auto_add will add in new directories as they are created
201 mount_location = mount.find_mount_point_for(w)
202 self.mounted_points[mount_location] = True
203 mounts = self.mounts_to_watches.get(mount_location, [])
206 self.mounts_to_watches[mount_location] = mounts
208 self.log.info("Watching:"+str(w))
209 self.wdds.append(self.wm.add_watch(w, mask, rec=True, auto_add=True))
211 def unmount_watch(self, event_path):
212 # remove backwards so we don't get weirdness from
213 # the list getting shorter
214 for i in range(len(self.wdds),0, -1):
216 self.log.info('unmounting: '+str(list(wdd.items())))
217 self.wm.rm_watch(list(wdd.values()))
221 def make_copy_url(self, watchdir, list_event_dir):
222 root_copy_url = self.watchdir_url_map[watchdir]
223 if root_copy_url[-1] != '/':
225 copy_url = root_copy_url + list_event_dir
226 self.log.debug('Copy url: %s' % (copy_url,))
229 def process_notify(self, *args):
230 if self.notifier is None:
233 # process the queue of events as explained above
234 self.notifier.process_events()
235 #check events waits timeout
236 if self.notifier.check_events(self.notify_timeout):
237 # read notified events and enqeue them
238 self.notifier.read_events()
239 # should we do something?
240 # has something happened?
241 for watchdir, last_events in list(self.handler.last_event.items()):
242 for last_event_dir, last_event_detail in list(last_events.items()):
243 time_delta = time.time() - last_event_detail.time
244 if time_delta > self.write_timeout:
245 LOGGER.info("timeout: %s" % (str(last_event_detail),))
246 copy_url = self.make_copy_url(watchdir, last_event_dir)
247 self.startCopy(copy_url)
248 if last_event_detail.complete:
249 self.sequencingFinished(last_event_detail.event_root)
251 self.handler.last_event[watchdir] = {}
252 # handle unmounted filesystems
253 for mount_point, was_mounted in list(self.mounted_points.items()):
254 if not was_mounted and mount.is_mounted(mount_point):
255 # we've been remounted. Huzzah!
257 for watch in self.mounts_to_watches[mount_point]:
258 self.add_watch(watch)
260 "%s was remounted, restarting watch" % \
263 self.mounted_points[mount_point] = True
265 def _parser(self, msg, who):
267 Parse xmpp chat messages
269 help = "I can send [copy] message, or squencer [finished]"
270 if re.match("help", msg):
272 elif re.match("copy", msg):
274 reply = "sent copy message"
275 elif re.match("finished", msg):
278 self.sequencingFinished(words[1])
279 reply = "sending sequencing finished for %s" % (words[1])
281 reply = "need runfolder name"
283 reply = "I didn't understand '%s'" %(msg)
290 # we have to configure pyinotify after BenderJab.start is called
291 # as weird things happen to pyinotify if the stdio is closed
292 # after it's initialized.
294 super(SpoolWatcher, self).run()
300 # destroy the inotify's instance on this interrupt (stop monitoring)
301 if self.notifier is not None:
303 super(SpoolWatcher, self).stop()
305 def startCopy(self, copy_url=None):
306 self.log.debug("writes seem to have stopped")
307 if self.notify_runner is not None:
308 for r in self.notify_runner:
309 self.rpc_send(r, tuple([copy_url]), 'startCopy')
310 if self.notify_users is not None:
311 for u in self.notify_users:
312 self.send(u, 'startCopy %s.' % (copy_url,))
314 def sequencingFinished(self, run_dir):
315 # need to strip off self.watchdirs from rundir I suspect.
316 self.log.info("run.completed in " + str(run_dir))
317 for watch in self.watchdirs:
318 if not run_dir.startswith(watch):
319 LOGGER.info("%s didn't start with %s" % (run_dir, watch))
321 if watch[-1] != os.path.sep:
323 stripped_run_dir = re.sub(watch, "", run_dir)
325 stripped_run_dir = run_dir
327 self.log.debug("stripped to " + stripped_run_dir)
328 if self.notify_users is not None:
329 for u in self.notify_users:
330 self.send(u, 'Sequencing run %s finished' % \
332 if self.notify_runner is not None:
333 for r in self.notify_runner:
334 self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
338 return bot.main(args)
340 if __name__ == "__main__":
341 ret = main(sys.argv[1:])
345 # send messages to copier specifying which mount to copy