[project @ add a parser to spoolwatcher]
[htsworkflow.git] / gaworkflow / spoolwatcher.py
1 #!/usr/bin/env python
2 import logging
3 import os
4 import re
5 import sys
6 import time
7
8 # this uses pyinotify
9 import pyinotify
10 from pyinotify import EventsCodes
11
12 from benderjab import rpc
13
14
15 class Handler(pyinotify.ProcessEvent):
16     def __init__(self, watchmanager, bot):
17         self.last_event_time = None
18         self.watchmanager = watchmanager
19         self.bot = bot
20
21     def process_IN_CREATE(self, event):
22         self.last_event_time = time.time()
23         msg = "Create: %s" %  os.path.join(event.path, event.name)
24         if event.name.lower() == "run.completed":
25             try:
26                 self.bot.sequencingFinished(event.path)
27             except IOError, e:
28                 logging.error("Couldn't send sequencingFinished")
29         logging.debug(msg)
30
31     def process_IN_DELETE(self, event):
32         logging.debug("Remove: %s" %  os.path.join(event.path, event.name))
33
34     def process_IN_UNMOUNT(self, event):
35         self.bot.unmount_watch()
36
37 class SpoolWatcher(rpc.XmlRpcBot):
38     """
39     Watch a directory and send a message when another process is done writing.
40     
41     This monitors a directory tree using inotify (linux specific) and
42     after some files having been written will send a message after <timeout>
43     seconds of no file writing.
44     
45     (Basically when the solexa machine finishes dumping a round of data
46     this'll hopefully send out a message saying hey look theres data available
47     
48     """
49     # these params need to be in the config file
50     # I wonder where I should put the documentation
51     #:Parameters:
52     #    `watchdir` - which directory tree to monitor for modifications
53     #    `profile` - specify which .gaworkflow profile to use
54     #    `write_timeout` - how many seconds to wait for writes to finish to
55     #                      the spool
56     #    `notify_timeout` - how often to timeout from notify
57     
58     def __init__(self, section=None, configfile=None):
59         #if configfile is None:
60         #    self.configfile = "~/.gaworkflow"
61         super(SpoolWatcher, self).__init__(section, configfile)
62         
63         self.cfg['watchdir'] = None
64         self.cfg['write_timeout'] = 10
65         self.cfg['notify_users'] = None
66         self.cfg['notify_runner'] = None
67         
68         self.notify_timeout = 0.001
69         self.wm = pyinotify.WatchManager()
70         self.handler = Handler(self.wm, self)
71         self.notifier = pyinotify.Notifier(self.wm, self.handler)
72         self.wdd = None
73         
74         self.notify_users = None
75         self.notify_runner = None
76         
77         self.eventTasks.append(self.process_notify)
78
79     def read_config(self, section=None, configfile=None):
80         super(SpoolWatcher, self).read_config(section, configfile)
81         
82         self.watch_dir = self._check_required_option('watchdir')
83         self.write_timeout = int(self.cfg['write_timeout'])
84         
85         self.notify_users = self._parse_user_list(self.cfg['notify_users'])
86         try:
87           self.notify_runner = \
88              self._parse_user_list(self.cfg['notify_runner'],
89                                    require_resource=True)
90         except bot.JIDMissingResource:
91             msg = 'need a full jabber ID + resource for xml-rpc destinations'
92             logging.FATAL(msg)
93             raise bot.JIDMissingResource(msg)
94             
95     def add_watch(self, watchdir=None):
96         """
97         start watching watchdir or self.watch_dir
98         we're currently limited to watching one directory tree.
99         """
100         # the one tree limit is mostly because self.wdd is a single item
101         # but managing it as a list might be a bit more annoying
102         if watchdir is None:
103             watchdir = self.watch_dir
104         logging.info("Watching:"+str(watchdir))
105         mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT
106         # rec traverses the tree and adds all the directories that are there
107         # at the start.
108         # auto_add will add in new directories as they are created
109         self.wdd = self.wm.add_watch(watchdir, mask, rec=True, auto_add=True)
110
111     def unmount_watch(self):
112         if self.wdd is not None:
113             logging.debug("disabling watch")
114             logging.debug(str(self.wdd))
115             self.wm.rm_watch(self.wdd)
116             self.wdd = None
117             
118     def process_notify(self, *args):
119         # process the queue of events as explained above
120         self.notifier.process_events()
121         #check events waits timeout
122         if self.notifier.check_events(self.notify_timeout):
123             # read notified events and enqeue them
124             self.notifier.read_events()
125             # should we do something?
126         last_event_time = self.handler.last_event_time
127         if last_event_time is not None:
128             time_delta = time.time() - last_event_time
129             if time_delta > self.write_timeout:
130                 self.startCopy()
131                 self.handler.last_event_time = None
132     
133     def _parser(self, msg, who):
134         """
135         Parse xmpp chat messages
136         """
137         help = u"I can send [copy] message, or squencer [finished]"
138         if re.match(u"help", msg):
139             reply = help
140         elif re.match("copy", msg):            
141             self.startCopy()
142             reply = u"sent copy message"
143         elif re.match(u"finished", msg):
144             words = msg.split()
145             if len(words) == 2:
146                 self.sequencingFinished(words[1])
147                 reply = u"sending sequencing finished for %s" % (words[1])
148             else:
149                 reply = u"need runfolder name"
150         else:
151             reply = u"I didn't understand '%s'" %(msg)            
152         return reply
153         
154     def start(self, daemonize):
155         """
156         Start application
157         """
158         self.add_watch()
159         super(SpoolWatcher, self).start(daemonize)
160         
161     def stop(self):
162         """
163         shutdown application
164         """
165         # destroy the inotify's instance on this interrupt (stop monitoring)
166         self.notifier.stop()
167         super(SpoolWatcher, self).stop()
168     
169     def startCopy(self):
170         logging.debug("writes seem to have stopped")
171         if self.notify_runner is not None:
172             for r in self.notify_runner:
173                 self.rpc_send(r, tuple(), 'startCopy')
174         
175     def sequencingFinished(self, run_dir):
176         # need to strip off self.watch_dir from rundir I suspect.
177         logging.info("run.completed in " + str(run_dir))
178         pattern = self.watch_dir
179         if pattern[-1] != os.path.sep:
180             pattern += os.path.sep
181         stripped_run_dir = re.sub(pattern, "", run_dir)
182         logging.debug("stripped to " + stripped_run_dir)
183         if self.notify_users is not None:
184             for u in self.notify_users:
185                 self.send(u, 'Sequencing run %s finished' % (stripped_run_dir))
186         if self.notify_runner is not None:
187             for r in self.notify_runner:
188                 self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished')
189         
190 def main(args=None):
191     bot = SpoolWatcher()
192     return bot.main(args)
193     
194 if __name__ == "__main__":
195     sys.exit(main(sys.argv[1:]))
196