Add wait_for_ipar option to the spool watcher config which will wait until
[htsworkflow.git] / htsworkflow / automation / spoolwatcher.py
1 #!/usr/bin/env python
2 import logging
3 import os
4 import re
5 import sys
6 import time
7 #import glob
8
9 from htsworkflow.util import mount
10
11 # this uses pyinotify
12 import pyinotify
13 from pyinotify import EventsCodes
14
15 from benderjab import rpc
16
17
18 class WatcherEvents(object):
19     # two events need to be tracked
20     # one to send startCopy
21     # one to send OMG its broken
22     # OMG its broken needs to stop when we've seen enough
23     #  cycles
24     # this should be per runfolder. 
25     # read the xml files 
26     def __init__(self):
27         pass
28         
29
30 class Handler(pyinotify.ProcessEvent):
31     def __init__(self, watchmanager, bot, ipar=False):
32         """
33         ipar flag indicates we should wait for ipar to finish, instead of 
34              just the run finishing
35         """
36         print 'ipar flag: ' + str(ipar)
37         self.last_event_time = None
38         self.watchmanager = watchmanager
39         self.bot = bot
40         self.ipar_mode = ipar
41         if self.ipar_mode:
42             self.last_file = 'IPAR_Netcopy_Complete.txt'.lower()
43         else:
44             self.last_file = "run.completed".lower()
45
46     def process_IN_CREATE(self, event):
47         self.last_event_time = time.time()
48         msg = "Create: %s" %  os.path.join(event.path, event.name)
49
50         if event.name.lower() == self.last_file:
51             try:
52                 self.bot.sequencingFinished(event.path)
53             except IOError, e:
54                 logging.error("Couldn't send sequencingFinished")
55         logging.debug(msg)
56
57     def process_IN_DELETE(self, event):
58         logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
59
60     def process_IN_UNMOUNT(self, event):
61         pathname = os.path.join(event.path, event.name)
62         logging.debug("IN_UNMOUNT: %s" % (pathname,))
63         self.bot.unmount_watch()
64
65 class SpoolWatcher(rpc.XmlRpcBot):
66     """
67     Watch a directory and send a message when another process is done writing.
68     
69     This monitors a directory tree using inotify (linux specific) and
70     after some files having been written will send a message after <timeout>
71     seconds of no file writing.
72     
73     (Basically when the solexa machine finishes dumping a round of data
74     this'll hopefully send out a message saying hey look theres data available
75     
76     """
77     # these params need to be in the config file
78     # I wonder where I should put the documentation
79     #:Parameters:
80     #    `watchdir` - which directory tree to monitor for modifications
81     #    `profile` - specify which .htsworkflow profile to use
82     #    `write_timeout` - how many seconds to wait for writes to finish to
83     #                      the spool
84     #    `notify_timeout` - how often to timeout from notify
85     
86     def __init__(self, section=None, configfile=None):
87         #if configfile is None:
88         #    self.configfile = "~/.htsworkflow"
89         super(SpoolWatcher, self).__init__(section, configfile)
90         
91         self.cfg['watchdir'] = None
92         self.cfg['write_timeout'] = 10
93         self.cfg['notify_users'] = None
94         self.cfg['notify_runner'] = None
95         self.cfg['wait_for_ipar'] = 0
96         
97         self.notify_timeout = 0.001
98         self.wm = pyinotify.WatchManager()
99         self.wdd = None
100         self.mount_point = None
101         self.mounted = True
102         
103         self.notify_users = None
104         self.notify_runner = None
105         
106         self.eventTasks.append(self.process_notify)
107
108     def read_config(self, section=None, configfile=None):
109         super(SpoolWatcher, self).read_config(section, configfile)
110         
111         self.watch_dir = self._check_required_option('watchdir')
112         self.write_timeout = int(self.cfg['write_timeout'])
113         self.wait_for_ipar = int(self.cfg['wait_for_ipar'])
114         print 'wait for ipar: ' + str(self.cfg['wait_for_ipar'])
115         
116         self.notify_users = self._parse_user_list(self.cfg['notify_users'])
117         try:
118           self.notify_runner = \
119              self._parse_user_list(self.cfg['notify_runner'],
120                                    require_resource=True)
121         except bot.JIDMissingResource:
122             msg = 'need a full jabber ID + resource for xml-rpc destinations'
123             logging.FATAL(msg)
124             raise bot.JIDMissingResource(msg)
125
126         self.handler = Handler(self.wm, self, self.wait_for_ipar)
127         self.notifier = pyinotify.Notifier(self.wm, self.handler)
128
129     def add_watch(self, watchdir=None):
130         """
131         start watching watchdir or self.watch_dir
132         we're currently limited to watching one directory tree.
133         """
134         # the one tree limit is mostly because self.wdd is a single item
135         # but managing it as a list might be a bit more annoying
136         if watchdir is None:
137             watchdir = self.watch_dir
138         logging.info("Watching:"+str(watchdir))
139
140         self.mount_point = mount.find_mount_point_for(watchdir)
141
142         mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
143         # rec traverses the tree and adds all the directories that are there
144         # at the start.
145         # auto_add will add in new directories as they are created
146         self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
147
148     def unmount_watch(self):
149         if self.wdd is not None:
150             self.wm.rm_watch(self.wdd.values())
151             self.wdd = None
152             self.mounted = False
153             
154     def process_notify(self, *args):
155         # process the queue of events as explained above
156         self.notifier.process_events()
157         #check events waits timeout
158         if self.notifier.check_events(self.notify_timeout):
159             # read notified events and enqeue them
160             self.notifier.read_events()
161             # should we do something?
162         # has something happened?
163         last_event_time = self.handler.last_event_time
164         if last_event_time is not None:
165             time_delta = time.time() - last_event_time
166             if time_delta > self.write_timeout:
167                 self.startCopy()
168                 self.handler.last_event_time = None
169         # handle unmounted filesystems
170         if not self.mounted:
171             if mount.is_mounted(self.mount_point):
172                 # we've been remounted. Huzzah!
173                 # restart the watch
174                 self.add_watch()
175                 self.mounted = True
176                 logging.info(
177                     "%s was remounted, restarting watch" % \
178                         (self.mount_point)
179                 )
180
181     def _parser(self, msg, who):
182         """
183         Parse xmpp chat messages
184         """
185         help = u"I can send [copy] message, or squencer [finished]"
186         if re.match(u"help", msg):
187             reply = help
188         elif re.match("copy", msg):            
189             self.startCopy()
190             reply = u"sent copy message"
191         elif re.match(u"finished", msg):
192             words = msg.split()
193             if len(words) == 2:
194                 self.sequencingFinished(words[1])
195                 reply = u"sending sequencing finished for %s" % (words[1])
196             else:
197                 reply = u"need runfolder name"
198         else:
199             reply = u"I didn't understand '%s'" %(msg)            
200         return reply
201         
202     def start(self, daemonize):
203         """
204         Start application
205         """
206         self.add_watch()
207         super(SpoolWatcher, self).start(daemonize)
208         
209     def stop(self):
210         """
211         shutdown application
212         """
213         # destroy the inotify's instance on this interrupt (stop monitoring)
214         self.notifier.stop()
215         super(SpoolWatcher, self).stop()
216     
217     def startCopy(self):
218         logging.debug("writes seem to have stopped")
219         if self.notify_runner is not None:
220             for r in self.notify_runner:
221                 self.rpc_send(r, tuple(), 'startCopy')
222         
223     def sequencingFinished(self, run_dir):
224         # need to strip off self.watch_dir from rundir I suspect.
225         logging.info("run.completed in " + str(run_dir))
226         pattern = self.watch_dir
227         if pattern[-1] != os.path.sep:
228             pattern += os.path.sep
229         stripped_run_dir = re.sub(pattern, "", run_dir)
230         logging.debug("stripped to " + stripped_run_dir)
231         if self.notify_users is not None:
232             for u in self.notify_users:
233                 self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
234         if self.notify_runner is not None:
235             for r in self.notify_runner:
236                 self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
237         
238 def main(args=None):
239     bot = SpoolWatcher()
240     return bot.main(args)
241     
242 if __name__ == "__main__":
243     sys.exit(main(sys.argv[1:]))
244