change from hand coded formatting functions to the built in python
[htsworkflow.git] / htsworkflow / pipelines / run_status.py
1 import glob
2 import re
3 import os
4 import sys
5 import time
6 import threading
7
8 s_comment = re.compile('^#')
9 s_general_read_len = re.compile('^READ_LENGTH ')
10 s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
11
12 s_firecrest = None
13
14 # FIRECREST PATTERNS
15 # _p2f(<pattern>, lane, tile, cycle)
16 PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
17
18 # _p2f(<pattern>, lane, tile)
19 PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
20 PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
21 PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
22 PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
23 PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
24 PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
25 PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
26 PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
27
28
29 # BUSTARD PATTERNS
30 # _p2f(<pattern>, lane, tile)
31 PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
32 PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
33
34
35
36 # GERALD PATTERNS
37 # _p2f(<pattern>, lane, tile)
38 PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
39 PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
40 PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
41 PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
42 PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
43 PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
44 PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
45 PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
46 PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
47 PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
48 PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
49 PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
50 PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
51
52 # _p2f(<pattern>, lane)
53 PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
54 PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
55 PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
56 PATTERN_GERALD_CALLPNG = 's_%s_call.png'
57 PATTERN_GERALD_ALLPNG = 's_%s_all.png'
58 PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
59 PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
60 PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
61 PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
62 PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
63 PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
64 PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
65 PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
66 PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
67 PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
68
69
70
71 def _p2f(pattern, lane, tile=None, cycle=None):
72   """
73   Converts a pattern plus info into file names
74   """
75
76   # lane, and cycle provided (INVALID)
77   if tile is None and cycle is not None:
78     msg = "Handling of cycle without tile is not currently implemented."
79     raise ValueError, msg
80
81   # lane, tile, cycle provided
82   elif cycle:
83     return pattern % (lane,
84                       "%04d" % (tile,),
85                       "%02d" % (cycle,))
86   
87   # lane, tile provided
88   elif tile:
89     return pattern % (lane, "%04d" % (tile,))
90
91   # lane provided
92   else:
93     return pattern % (lane)
94     
95
96 class GARunStatus(object):
97
98   def __init__(self, conf_filepath):
99     """
100     Given an eland config file in the top level directory
101     of a run, predicts the files that will be generated
102     during a run and provides methods for retrieving
103     (completed, total) for each step or entire run.
104     """
105     #print 'self._conf_filepath = %s' % (conf_filepath)
106     self._conf_filepath = conf_filepath
107     self._base_dir, junk = os.path.split(conf_filepath)
108     self._image_dir = os.path.join(self._base_dir, 'Images')
109     
110     self.lanes = []
111     self.lane_read_length = {}
112     self.tiles = None
113     self.cycles = None
114     
115     self.status = {}
116     self.status['firecrest'] = {}
117     self.status['bustard'] = {}
118     self.status['gerald'] = {}
119     
120     self._process_config()
121     self._count_tiles()
122     self._count_cycles()
123     self._generate_expected()
124
125
126   def _process_config(self):
127     """
128     Grabs info from self._conf_filepath
129     """
130     f = open(self._conf_filepath, 'r')
131
132     for line in f:
133
134       #Skip comment lines for now.
135       if s_comment.search(line):
136         continue
137
138       mo =  s_general_read_len.search(line)
139       if mo:
140         read_length = int(line[mo.end():])
141         #Handle general READ_LENGTH
142         for i in range(1,9):
143           self.lane_read_length[i] = read_length
144       
145       mo = s_read_len.search(line)
146       if mo:
147         read_length = int(line[mo.end():])
148         lanes, junk = line.split(':')
149
150         #Convert lanes from string of lanes to list of lane #s.
151         lanes = [ int(i) for i in lanes ]
152
153         
154         for lane in lanes:
155
156           #Keep track of which lanes are being run.
157           if lane not in self.lanes:
158             self.lanes.append(lane)
159
160           #Update with lane specific read lengths
161           self.lane_read_length[lane] = read_length
162
163         self.lanes.sort()
164
165
166   def _count_tiles(self):
167     """
168     Count the number of tiles being used
169     """
170     self.tiles = len(glob.glob(os.path.join(self._image_dir,
171                                             'L001',
172                                             'C1.1',
173                                             's_1_*_a.tif')))
174
175   def _count_cycles(self):
176     """
177     Figures out the number of cycles that are available
178     """
179     #print 'self._image_dir = %s' % (self._image_dir)
180     cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
181     #print 'cycle_dirs = %s' % (cycle_dirs)
182     cycle_list = []
183     for cycle_dir in cycle_dirs:
184       junk, c = os.path.split(cycle_dir)
185       cycle_list.append(int(c[1:c.find('.')]))
186
187     self.cycles = max(cycle_list)
188     
189
190
191
192   def _generate_expected(self):
193     """
194     generates a list of files we expect to find.
195     """
196
197     firecrest = self.status['firecrest']
198     bustard = self.status['bustard']
199     gerald = self.status['gerald']
200     
201     
202     for lane in self.lanes:
203       for tile in range(1,self.tiles+1):
204         for cycle in range(1, self.cycles+1):
205
206           ##########################
207           # LANE, TILE, CYCLE LAYER
208
209           # FIRECREST
210           firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
211
212
213         ###################
214         # LANE, TILE LAYER
215
216         # FIRECREST
217         firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
218         firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
219         firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
220         firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
221         firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
222         firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
223         firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
224         firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
225
226
227         # BUSTARD
228         bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
229         bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
230
231
232         # GERALD
233         #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
234         #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
235         #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
236         #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
237         #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
238         #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
239         #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
240         #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
241         #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
242         gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
243         #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
244         #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
245         #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
246
247       ###################
248       # LANE LAYER
249
250       # GERALD
251       #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
252       #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
253       #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
254       gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
255       gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
256       gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
257       gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
258       gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
259       #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
260       #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
261       #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
262       #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
263       #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
264       #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
265       gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
266       
267       
268
269     #################
270     # LOOPS FINISHED
271
272     # FIRECREST
273     firecrest['offsets_finished.txt'] = False
274     firecrest['finished.txt'] = False
275
276     # BUSTARD
277     bustard['finished.txt'] = False
278
279     # GERALD
280     gerald['tiles.txt'] = False
281     gerald['FullAll.htm'] = False
282     #gerald['All.htm.tmp'] = False
283     #gerald['Signal_Means.txt.tmp'] = False
284     #gerald['plotIntensity_for_IVC'] = False
285     #gerald['IVC.htm.tmp'] = False
286     gerald['FullError.htm'] = False
287     gerald['FullPerfect.htm'] = False
288     #gerald['Error.htm.tmp'] = False
289     #gerald['Perfect.htm.tmp'] = False
290     #gerald['Summary.htm.tmp'] = False
291     #gerald['Tile.htm.tmp'] = False
292     gerald['finished.txt'] = False
293     
294   def statusFirecrest(self):
295     """
296     returns (<completed>, <total>)
297     """
298     firecrest = self.status['firecrest']
299     total = len(firecrest)
300     completed = firecrest.values().count(True)
301
302     return (completed, total)
303
304
305   def statusBustard(self):
306     """
307     returns (<completed>, <total>)
308     """
309     bustard = self.status['bustard']
310     total = len(bustard)
311     completed = bustard.values().count(True)
312
313     return (completed, total)
314
315
316   def statusGerald(self):
317     """
318     returns (<completed>, <total>)
319     """
320     gerald = self.status['gerald']
321     total = len(gerald)
322     completed = gerald.values().count(True)
323
324     return (completed, total)
325
326
327   def statusTotal(self):
328     """
329     returns (<completed>, <total>)
330     """
331     #f = firecrest  c = completed
332     #b = bustard    t = total
333     #g = gerald
334     fc, ft = self.statusFirecrest()
335     bc, bt = self.statusBustard()
336     gc, gt = self.statusGerald()
337
338     return (fc+bc+gc, ft+bt+gt)
339
340
341   def statusReport(self):
342     """
343     Generate the basic percent complete report
344     """
345     def _percentCompleted(completed, total):
346       """
347       Returns precent completed as float
348       """
349       return (completed / float(total)) * 100
350
351     fc, ft = self.statusFirecrest()
352     bc, bt = self.statusBustard()
353     gc, gt = self.statusGerald()
354     tc, tt = self.statusTotal()
355     
356     fp = _percentCompleted(fc, ft)
357     bp = _percentCompleted(bc, bt)
358     gp = _percentCompleted(gc, gt)
359     tp = _percentCompleted(tc, tt)
360     
361     report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
362               '  Bustard: %s%% (%s/%s)' % (bp, bc, bt),
363               '   Gerald: %s%% (%s/%s)' % (gp, gc, gt),
364               '-----------------------',
365               '    Total: %s%% (%s/%s)' % (tp, tc, tt),
366              ]
367     return report
368
369   def updateFirecrest(self, filename):
370     """
371     Marks firecrest filename as being completed.
372     """
373     self.status['firecrest'][filename] = True
374     
375
376   def updateBustard(self, filename):
377     """
378     Marks bustard filename as being completed.
379     """
380     self.status['bustard'][filename] = True
381
382
383   def updateGerald(self, filename):
384     """
385     Marks gerald filename as being completed.
386     """
387     self.status['gerald'][filename] = True
388
389
390
391 ##################################################
392 # Functions to be called by Thread(target=<func>)
393 def _cmdLineStatusMonitorFunc(conf_info):
394   """
395   Given a ConfigInfo object, provides status to stdout.
396
397   You should probably use startCmdLineStatusMonitor()
398   instead of ths function.
399
400   Use with:
401     t = threading.Thread(target=_cmdLineStatusMonitorFunc,
402                          args=[conf_info])
403     t.setDaemon(True)
404     t.start()
405   """
406   SLEEP_AMOUNT = 30
407
408   while 1:
409     if conf_info.status is None:
410       print "No status object yet."
411       time.sleep(SLEEP_AMOUNT)
412       continue
413
414     report = conf_info.status.statusReport()
415     print os.linesep.join(report)
416     print
417
418     time.sleep(SLEEP_AMOUNT)
419
420
421 #############################################
422 # Start monitor thread convenience functions
423 def startCmdLineStatusMonitor(conf_info):
424   """
425   Starts a command line status monitor given a conf_info object.
426   """
427   t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
428   t.setDaemon(True)
429   t.start()
430
431 from optparse import OptionParser
432 def make_parser():
433   usage = "%prog: config file"
434
435   parser = OptionParser()
436   return parser
437   
438 def main(cmdline=None):
439   parser = make_parser()
440   opt, args = parser.parse_args(cmdline)
441
442   if len(args) != 1:
443     parser.error("need name of configuration file")
444     
445   status = GARunStatus(args[0])
446   print os.linesep.join(status.statusReport())
447   return 0
448
449 if __name__ == "__main__":
450   sys.exit(main(sys.argv[1:]))
451