39dc54cb5adc435fa1e59c8aeda945269a655937
[htsworkflow.git] / htsworkflow / pipelines / run_status.py
1 import glob
2 import re
3 import os
4 import sys
5 import time
6 import threading
7
8 s_comment = re.compile('^#')
9 s_general_read_len = re.compile('^READ_LENGTH ')
10 s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
11
12 s_firecrest = None
13
14 def _four_digit_num_in_string(num):
15   if num < 0:
16     pass
17   elif num < 10:
18     return '000' + str(num)
19   elif num < 100:
20     return '00' + str(num)
21   elif num < 1000:
22     return '0' + str(num)
23   elif num < 10000:
24     return str(num)
25
26   msg = 'Invalid number: %s' % (num)
27   raise ValueError, msg
28
29 def _two_digit_num_in_string(num):
30   if num < 0:
31     pass
32   elif num < 10:
33     return '0' + str(num)
34   elif num < 100:
35     return str(num)
36
37   msg = 'Invalid number: %s' % (num)
38   raise ValueError, msg
39
40
41 # FIRECREST PATTERNS
42 # _p2f(<pattern>, lane, tile, cycle)
43 PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
44
45 # _p2f(<pattern>, lane, tile)
46 PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
47 PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
48 PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
49 PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
50 PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
51 PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
52 PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
53 PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
54
55
56 # BUSTARD PATTERNS
57 # _p2f(<pattern>, lane, tile)
58 PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
59 PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
60
61
62
63 # GERALD PATTERNS
64 # _p2f(<pattern>, lane, tile)
65 PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
66 PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
67 PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
68 PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
69 PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
70 PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
71 PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
72 PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
73 PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
74 PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
75 PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
76 PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
77 PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
78
79 # _p2f(<pattern>, lane)
80 PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
81 PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
82 PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
83 PATTERN_GERALD_CALLPNG = 's_%s_call.png'
84 PATTERN_GERALD_ALLPNG = 's_%s_all.png'
85 PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
86 PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
87 PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
88 PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
89 PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
90 PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
91 PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
92 PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
93 PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
94 PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
95
96
97
98 def _p2f(pattern, lane, tile=None, cycle=None):
99   """
100   Converts a pattern plus info into file names
101   """
102
103   # lane, and cycle provided (INVALID)
104   if tile is None and cycle is not None:
105     msg = "Handling of cycle without tile is not currently implemented."
106     raise ValueError, msg
107
108   # lane, tile, cycle provided
109   elif cycle:
110     return pattern % (lane,
111                       _four_digit_num_in_string(tile),
112                       _two_digit_num_in_string(cycle))
113   
114   # lane, tile provided
115   elif tile:
116     return pattern % (lane, _four_digit_num_in_string(tile))
117
118   # lane provided
119   else:
120     return pattern % (lane)
121     
122
123 class GARunStatus(object):
124
125   def __init__(self, conf_filepath):
126     """
127     Given an eland config file in the top level directory
128     of a run, predicts the files that will be generated
129     during a run and provides methods for retrieving
130     (completed, total) for each step or entire run.
131     """
132     #print 'self._conf_filepath = %s' % (conf_filepath)
133     self._conf_filepath = conf_filepath
134     self._base_dir, junk = os.path.split(conf_filepath)
135     self._image_dir = os.path.join(self._base_dir, 'Images')
136     
137     self.lanes = []
138     self.lane_read_length = {}
139     self.tiles = None
140     self.cycles = None
141     
142     self.status = {}
143     self.status['firecrest'] = {}
144     self.status['bustard'] = {}
145     self.status['gerald'] = {}
146     
147     self._process_config()
148     self._count_tiles()
149     self._count_cycles()
150     self._generate_expected()
151
152
153   def _process_config(self):
154     """
155     Grabs info from self._conf_filepath
156     """
157     f = open(self._conf_filepath, 'r')
158
159     for line in f:
160
161       #Skip comment lines for now.
162       if s_comment.search(line):
163         continue
164
165       mo =  s_general_read_len.search(line)
166       if mo:
167         read_length = int(line[mo.end():])
168         #Handle general READ_LENGTH
169         for i in range(1,9):
170           self.lane_read_length[i] = read_length
171       
172       mo = s_read_len.search(line)
173       if mo:
174         read_length = int(line[mo.end():])
175         lanes, junk = line.split(':')
176
177         #Convert lanes from string of lanes to list of lane #s.
178         lanes = [ int(i) for i in lanes ]
179
180         
181         for lane in lanes:
182
183           #Keep track of which lanes are being run.
184           if lane not in self.lanes:
185             self.lanes.append(lane)
186
187           #Update with lane specific read lengths
188           self.lane_read_length[lane] = read_length
189
190         self.lanes.sort()
191
192
193   def _count_tiles(self):
194     """
195     Count the number of tiles being used
196     """
197     self.tiles = len(glob.glob(os.path.join(self._image_dir,
198                                             'L001',
199                                             'C1.1',
200                                             's_1_*_a.tif')))
201
202   def _count_cycles(self):
203     """
204     Figures out the number of cycles that are available
205     """
206     #print 'self._image_dir = %s' % (self._image_dir)
207     cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
208     #print 'cycle_dirs = %s' % (cycle_dirs)
209     cycle_list = []
210     for cycle_dir in cycle_dirs:
211       junk, c = os.path.split(cycle_dir)
212       cycle_list.append(int(c[1:c.find('.')]))
213
214     self.cycles = max(cycle_list)
215     
216
217
218
219   def _generate_expected(self):
220     """
221     generates a list of files we expect to find.
222     """
223
224     firecrest = self.status['firecrest']
225     bustard = self.status['bustard']
226     gerald = self.status['gerald']
227     
228     
229     for lane in self.lanes:
230       for tile in range(1,self.tiles+1):
231         for cycle in range(1, self.cycles+1):
232
233           ##########################
234           # LANE, TILE, CYCLE LAYER
235
236           # FIRECREST
237           firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
238
239
240         ###################
241         # LANE, TILE LAYER
242
243         # FIRECREST
244         firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
245         firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
246         firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
247         firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
248         firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
249         firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
250         firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
251         firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
252
253
254         # BUSTARD
255         bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
256         bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
257
258
259         # GERALD
260         #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
261         #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
262         #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
263         #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
264         #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
265         #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
266         #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
267         #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
268         #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
269         gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
270         #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
271         #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
272         #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
273
274       ###################
275       # LANE LAYER
276
277       # GERALD
278       #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
279       #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
280       #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
281       gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
282       gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
283       gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
284       gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
285       gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
286       #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
287       #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
288       #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
289       #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
290       #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
291       #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
292       gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
293       
294       
295
296     #################
297     # LOOPS FINISHED
298
299     # FIRECREST
300     firecrest['offsets_finished.txt'] = False
301     firecrest['finished.txt'] = False
302
303     # BUSTARD
304     bustard['finished.txt'] = False
305
306     # GERALD
307     gerald['tiles.txt'] = False
308     gerald['FullAll.htm'] = False
309     #gerald['All.htm.tmp'] = False
310     #gerald['Signal_Means.txt.tmp'] = False
311     #gerald['plotIntensity_for_IVC'] = False
312     #gerald['IVC.htm.tmp'] = False
313     gerald['FullError.htm'] = False
314     gerald['FullPerfect.htm'] = False
315     #gerald['Error.htm.tmp'] = False
316     #gerald['Perfect.htm.tmp'] = False
317     #gerald['Summary.htm.tmp'] = False
318     #gerald['Tile.htm.tmp'] = False
319     gerald['finished.txt'] = False
320     
321   def statusFirecrest(self):
322     """
323     returns (<completed>, <total>)
324     """
325     firecrest = self.status['firecrest']
326     total = len(firecrest)
327     completed = firecrest.values().count(True)
328
329     return (completed, total)
330
331
332   def statusBustard(self):
333     """
334     returns (<completed>, <total>)
335     """
336     bustard = self.status['bustard']
337     total = len(bustard)
338     completed = bustard.values().count(True)
339
340     return (completed, total)
341
342
343   def statusGerald(self):
344     """
345     returns (<completed>, <total>)
346     """
347     gerald = self.status['gerald']
348     total = len(gerald)
349     completed = gerald.values().count(True)
350
351     return (completed, total)
352
353
354   def statusTotal(self):
355     """
356     returns (<completed>, <total>)
357     """
358     #f = firecrest  c = completed
359     #b = bustard    t = total
360     #g = gerald
361     fc, ft = self.statusFirecrest()
362     bc, bt = self.statusBustard()
363     gc, gt = self.statusGerald()
364
365     return (fc+bc+gc, ft+bt+gt)
366
367
368   def statusReport(self):
369     """
370     Generate the basic percent complete report
371     """
372     def _percentCompleted(completed, total):
373       """
374       Returns precent completed as float
375       """
376       return (completed / float(total)) * 100
377
378     fc, ft = self.statusFirecrest()
379     bc, bt = self.statusBustard()
380     gc, gt = self.statusGerald()
381     tc, tt = self.statusTotal()
382     
383     fp = _percentCompleted(fc, ft)
384     bp = _percentCompleted(bc, bt)
385     gp = _percentCompleted(gc, gt)
386     tp = _percentCompleted(tc, tt)
387     
388     report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
389               '  Bustard: %s%% (%s/%s)' % (bp, bc, bt),
390               '   Gerald: %s%% (%s/%s)' % (gp, gc, gt),
391               '-----------------------',
392               '    Total: %s%% (%s/%s)' % (tp, tc, tt),
393              ]
394     return report
395
396   def updateFirecrest(self, filename):
397     """
398     Marks firecrest filename as being completed.
399     """
400     self.status['firecrest'][filename] = True
401     
402
403   def updateBustard(self, filename):
404     """
405     Marks bustard filename as being completed.
406     """
407     self.status['bustard'][filename] = True
408
409
410   def updateGerald(self, filename):
411     """
412     Marks gerald filename as being completed.
413     """
414     self.status['gerald'][filename] = True
415
416
417
418 ##################################################
419 # Functions to be called by Thread(target=<func>)
420 def _cmdLineStatusMonitorFunc(conf_info):
421   """
422   Given a ConfigInfo object, provides status to stdout.
423
424   You should probably use startCmdLineStatusMonitor()
425   instead of ths function.
426
427   Use with:
428     t = threading.Thread(target=_cmdLineStatusMonitorFunc,
429                          args=[conf_info])
430     t.setDaemon(True)
431     t.start()
432   """
433   SLEEP_AMOUNT = 30
434
435   while 1:
436     if conf_info.status is None:
437       print "No status object yet."
438       time.sleep(SLEEP_AMOUNT)
439       continue
440
441     report = conf_info.status.statusReport()
442     print os.linesep.join(report)
443     print
444
445     time.sleep(SLEEP_AMOUNT)
446
447
448 #############################################
449 # Start monitor thread convenience functions
450 def startCmdLineStatusMonitor(conf_info):
451   """
452   Starts a command line status monitor given a conf_info object.
453   """
454   t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
455   t.setDaemon(True)
456   t.start()
457
458 from optparse import OptionParser
459 def make_parser():
460   usage = "%prog: config file"
461
462   parser = OptionParser()
463   return parser
464   
465 def main(cmdline=None):
466   parser = make_parser()
467   opt, args = parser.parse_args(cmdline)
468
469   if len(args) != 1:
470     parser.error("need name of configuration file")
471     
472   status = GARunStatus(args[0])
473   print os.linesep.join(status.statusReport())
474   return 0
475
476 if __name__ == "__main__":
477   sys.exit(main(sys.argv[1:]))
478