56ad42f3c81e55fc89aa4491c08b5457f0026301
[htsworkflow.git] / gaworkflow / automation / spoolwatcher.py
1 #!/usr/bin/env python
2 import logging
3 import os
4 import re
5 import sys
6 import time
7 #import glob
8
9 from gaworkflow.util import mount
10
11 # this uses pyinotify
12 import pyinotify
13 from pyinotify import EventsCodes
14
15 from benderjab import rpc
16
17
18 class WatcherEvents(object):
19     # two events need to be tracked
20     # one to send startCopy
21     # one to send OMG its broken
22     # OMG its broken needs to stop when we've seen enough
23     #  cycles
24     # this should be per runfolder. 
25     # read the xml files 
26     def __init__(self):
27         pass
28         
29
30 class Handler(pyinotify.ProcessEvent):
31     def __init__(self, watchmanager, bot):
32         self.last_event_time = None
33         self.watchmanager = watchmanager
34         self.bot = bot
35
36     def process_IN_CREATE(self, event):
37         self.last_event_time = time.time()
38         msg = "Create: %s" %  os.path.join(event.path, event.name)
39         if event.name.lower() == "run.completed":
40             try:
41                 self.bot.sequencingFinished(event.path)
42             except IOError, e:
43                 logging.error("Couldn't send sequencingFinished")
44         logging.debug(msg)
45
46     def process_IN_DELETE(self, event):
47         logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
48
49     def process_IN_UNMOUNT(self, event):
50         pathname = os.path.join(event.path, event.name)
51         logging.debug("IN_UNMOUNT: %s" % (pathname,))
52         self.bot.unmount_watch()
53
54 class SpoolWatcher(rpc.XmlRpcBot):
55     """
56     Watch a directory and send a message when another process is done writing.
57     
58     This monitors a directory tree using inotify (linux specific) and
59     after some files having been written will send a message after <timeout>
60     seconds of no file writing.
61     
62     (Basically when the solexa machine finishes dumping a round of data
63     this'll hopefully send out a message saying hey look theres data available
64     
65     """
66     # these params need to be in the config file
67     # I wonder where I should put the documentation
68     #:Parameters:
69     #    `watchdir` - which directory tree to monitor for modifications
70     #    `profile` - specify which .gaworkflow profile to use
71     #    `write_timeout` - how many seconds to wait for writes to finish to
72     #                      the spool
73     #    `notify_timeout` - how often to timeout from notify
74     
75     def __init__(self, section=None, configfile=None):
76         #if configfile is None:
77         #    self.configfile = "~/.gaworkflow"
78         super(SpoolWatcher, self).__init__(section, configfile)
79         
80         self.cfg['watchdir'] = None
81         self.cfg['write_timeout'] = 10
82         self.cfg['notify_users'] = None
83         self.cfg['notify_runner'] = None
84         
85         self.notify_timeout = 0.001
86         self.wm = pyinotify.WatchManager()
87         self.handler = Handler(self.wm, self)
88         self.notifier = pyinotify.Notifier(self.wm, self.handler)
89         self.wdd = None
90         self.mount_point = None
91         self.mounted = True
92         
93         self.notify_users = None
94         self.notify_runner = None
95         
96         self.eventTasks.append(self.process_notify)
97
98     def read_config(self, section=None, configfile=None):
99         super(SpoolWatcher, self).read_config(section, configfile)
100         
101         self.watch_dir = self._check_required_option('watchdir')
102         self.write_timeout = int(self.cfg['write_timeout'])
103         
104         self.notify_users = self._parse_user_list(self.cfg['notify_users'])
105         try:
106           self.notify_runner = \
107              self._parse_user_list(self.cfg['notify_runner'],
108                                    require_resource=True)
109         except bot.JIDMissingResource:
110             msg = 'need a full jabber ID + resource for xml-rpc destinations'
111             logging.FATAL(msg)
112             raise bot.JIDMissingResource(msg)
113
114     def add_watch(self, watchdir=None):
115         """
116         start watching watchdir or self.watch_dir
117         we're currently limited to watching one directory tree.
118         """
119         # the one tree limit is mostly because self.wdd is a single item
120         # but managing it as a list might be a bit more annoying
121         if watchdir is None:
122             watchdir = self.watch_dir
123         logging.info("Watching:"+str(watchdir))
124
125         self.mount_point = mount.find_mount_point_for(watchdir)
126
127         mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
128         # rec traverses the tree and adds all the directories that are there
129         # at the start.
130         # auto_add will add in new directories as they are created
131         self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
132
133     def unmount_watch(self):
134         if self.wdd is not None:
135             self.wm.rm_watch(self.wdd.values())
136             self.wdd = None
137             self.mounted = False
138             
139     def process_notify(self, *args):
140         # process the queue of events as explained above
141         self.notifier.process_events()
142         #check events waits timeout
143         if self.notifier.check_events(self.notify_timeout):
144             # read notified events and enqeue them
145             self.notifier.read_events()
146             # should we do something?
147         # has something happened?
148         last_event_time = self.handler.last_event_time
149         if last_event_time is not None:
150             time_delta = time.time() - last_event_time
151             if time_delta > self.write_timeout:
152                 self.startCopy()
153                 self.handler.last_event_time = None
154         # handle unmounted filesystems
155         if not self.mounted:
156             if mount.is_mounted(self.mount_point):
157                 # we've been remounted. Huzzah!
158                 # restart the watch
159                 self.add_watch()
160                 self.mounted = True
161                 logging.info(
162                     "%s was remounted, restarting watch" % \
163                         (self.mount_point)
164                 )
165
166     def _parser(self, msg, who):
167         """
168         Parse xmpp chat messages
169         """
170         help = u"I can send [copy] message, or squencer [finished]"
171         if re.match(u"help", msg):
172             reply = help
173         elif re.match("copy", msg):            
174             self.startCopy()
175             reply = u"sent copy message"
176         elif re.match(u"finished", msg):
177             words = msg.split()
178             if len(words) == 2:
179                 self.sequencingFinished(words[1])
180                 reply = u"sending sequencing finished for %s" % (words[1])
181             else:
182                 reply = u"need runfolder name"
183         else:
184             reply = u"I didn't understand '%s'" %(msg)            
185         return reply
186         
187     def start(self, daemonize):
188         """
189         Start application
190         """
191         self.add_watch()
192         super(SpoolWatcher, self).start(daemonize)
193         
194     def stop(self):
195         """
196         shutdown application
197         """
198         # destroy the inotify's instance on this interrupt (stop monitoring)
199         self.notifier.stop()
200         super(SpoolWatcher, self).stop()
201     
202     def startCopy(self):
203         logging.debug("writes seem to have stopped")
204         if self.notify_runner is not None:
205             for r in self.notify_runner:
206                 self.rpc_send(r, tuple(), 'startCopy')
207         
208     def sequencingFinished(self, run_dir):
209         # need to strip off self.watch_dir from rundir I suspect.
210         logging.info("run.completed in " + str(run_dir))
211         pattern = self.watch_dir
212         if pattern[-1] != os.path.sep:
213             pattern += os.path.sep
214         stripped_run_dir = re.sub(pattern, "", run_dir)
215         logging.debug("stripped to " + stripped_run_dir)
216         if self.notify_users is not None:
217             for u in self.notify_users:
218                 self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
219         if self.notify_runner is not None:
220             for r in self.notify_runner:
221                 self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
222         
223 def main(args=None):
224     bot = SpoolWatcher()
225     return bot.main(args)
226     
227 if __name__ == "__main__":
228     sys.exit(main(sys.argv[1:]))
229