Initial port to python3
[htsworkflow.git] / htsworkflow / pipelines / run_status.py
1 __docformat__ = "restructuredtext en"
2
3 import glob
4 import re
5 import os
6 import sys
7 import time
8 import threading
9
10 s_comment = re.compile('^#')
11 s_general_read_len = re.compile('^READ_LENGTH ')
12 s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
13
14 s_firecrest = None
15
16 # FIRECREST PATTERNS
17 # _p2f(<pattern>, lane, tile, cycle)
18 PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
19
20 # _p2f(<pattern>, lane, tile)
21 PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
22 PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
23 PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
24 PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
25 PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
26 PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
27 PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
28 PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
29
30
31 # BUSTARD PATTERNS
32 # _p2f(<pattern>, lane, tile)
33 PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
34 PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
35
36
37
38 # GERALD PATTERNS
39 # _p2f(<pattern>, lane, tile)
40 PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
41 PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
42 PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
43 PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
44 PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
45 PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
46 PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
47 PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
48 PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
49 PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
50 PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
51 PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
52 PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
53
54 # _p2f(<pattern>, lane)
55 PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
56 PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
57 PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
58 PATTERN_GERALD_CALLPNG = 's_%s_call.png'
59 PATTERN_GERALD_ALLPNG = 's_%s_all.png'
60 PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
61 PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
62 PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
63 PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
64 PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
65 PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
66 PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
67 PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
68 PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
69 PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
70
71
72
73 def _p2f(pattern, lane, tile=None, cycle=None):
74   """
75   Converts a pattern plus info into file names
76   """
77
78   # lane, and cycle provided (INVALID)
79   if tile is None and cycle is not None:
80     msg = "Handling of cycle without tile is not currently implemented."
81     raise ValueError(msg)
82
83   # lane, tile, cycle provided
84   elif cycle:
85     return pattern % (lane,
86                       "%04d" % (tile,),
87                       "%02d" % (cycle,))
88   
89   # lane, tile provided
90   elif tile:
91     return pattern % (lane, "%04d" % (tile,))
92
93   # lane provided
94   else:
95     return pattern % (lane)
96     
97
98 class GARunStatus(object):
99
100   def __init__(self, conf_filepath):
101     """
102     Given an eland config file in the top level directory
103     of a run, predicts the files that will be generated
104     during a run and provides methods for retrieving
105     (completed, total) for each step or entire run.
106     """
107     #print 'self._conf_filepath = %s' % (conf_filepath)
108     self._conf_filepath = conf_filepath
109     self._base_dir, junk = os.path.split(conf_filepath)
110     self._image_dir = os.path.join(self._base_dir, 'Images')
111     
112     self.lanes = []
113     self.lane_read_length = {}
114     self.tiles = None
115     self.cycles = None
116     
117     self.status = {}
118     self.status['firecrest'] = {}
119     self.status['bustard'] = {}
120     self.status['gerald'] = {}
121     
122     self._process_config()
123     self._count_tiles()
124     self._count_cycles()
125     self._generate_expected()
126
127
128   def _process_config(self):
129     """
130     Grabs info from self._conf_filepath
131     """
132     f = open(self._conf_filepath, 'r')
133
134     for line in f:
135
136       #Skip comment lines for now.
137       if s_comment.search(line):
138         continue
139
140       mo =  s_general_read_len.search(line)
141       if mo:
142         read_length = int(line[mo.end():])
143         #Handle general READ_LENGTH
144         for i in range(1,9):
145           self.lane_read_length[i] = read_length
146       
147       mo = s_read_len.search(line)
148       if mo:
149         read_length = int(line[mo.end():])
150         lanes, junk = line.split(':')
151
152         #Convert lanes from string of lanes to list of lane #s.
153         lanes = [ int(i) for i in lanes ]
154
155         
156         for lane in lanes:
157
158           #Keep track of which lanes are being run.
159           if lane not in self.lanes:
160             self.lanes.append(lane)
161
162           #Update with lane specific read lengths
163           self.lane_read_length[lane] = read_length
164
165         self.lanes.sort()
166
167
168   def _count_tiles(self):
169     """
170     Count the number of tiles being used
171     """
172     self.tiles = len(glob.glob(os.path.join(self._image_dir,
173                                             'L001',
174                                             'C1.1',
175                                             's_1_*_a.tif')))
176
177   def _count_cycles(self):
178     """
179     Figures out the number of cycles that are available
180     """
181     #print 'self._image_dir = %s' % (self._image_dir)
182     cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
183     #print 'cycle_dirs = %s' % (cycle_dirs)
184     cycle_list = []
185     for cycle_dir in cycle_dirs:
186       junk, c = os.path.split(cycle_dir)
187       cycle_list.append(int(c[1:c.find('.')]))
188
189     self.cycles = max(cycle_list)
190     
191
192
193
194   def _generate_expected(self):
195     """
196     generates a list of files we expect to find.
197     """
198
199     firecrest = self.status['firecrest']
200     bustard = self.status['bustard']
201     gerald = self.status['gerald']
202     
203     
204     for lane in self.lanes:
205       for tile in range(1,self.tiles+1):
206         for cycle in range(1, self.cycles+1):
207
208           ##########################
209           # LANE, TILE, CYCLE LAYER
210
211           # FIRECREST
212           firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
213
214
215         ###################
216         # LANE, TILE LAYER
217
218         # FIRECREST
219         firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
220         firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
221         firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
222         firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
223         firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
224         firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
225         firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
226         firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
227
228
229         # BUSTARD
230         bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
231         bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
232
233
234         # GERALD
235         #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
236         #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
237         #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
238         #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
239         #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
240         #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
241         #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
242         #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
243         #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
244         gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
245         #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
246         #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
247         #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
248
249       ###################
250       # LANE LAYER
251
252       # GERALD
253       #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
254       #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
255       #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
256       gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
257       gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
258       gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
259       gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
260       gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
261       #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
262       #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
263       #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
264       #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
265       #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
266       #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
267       gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
268       
269       
270
271     #################
272     # LOOPS FINISHED
273
274     # FIRECREST
275     firecrest['offsets_finished.txt'] = False
276     firecrest['finished.txt'] = False
277
278     # BUSTARD
279     bustard['finished.txt'] = False
280
281     # GERALD
282     gerald['tiles.txt'] = False
283     gerald['FullAll.htm'] = False
284     #gerald['All.htm.tmp'] = False
285     #gerald['Signal_Means.txt.tmp'] = False
286     #gerald['plotIntensity_for_IVC'] = False
287     #gerald['IVC.htm.tmp'] = False
288     gerald['FullError.htm'] = False
289     gerald['FullPerfect.htm'] = False
290     #gerald['Error.htm.tmp'] = False
291     #gerald['Perfect.htm.tmp'] = False
292     #gerald['Summary.htm.tmp'] = False
293     #gerald['Tile.htm.tmp'] = False
294     gerald['finished.txt'] = False
295     
296   def statusFirecrest(self):
297     """
298     returns (<completed>, <total>)
299     """
300     firecrest = self.status['firecrest']
301     total = len(firecrest)
302     completed = list(firecrest.values()).count(True)
303
304     return (completed, total)
305
306
307   def statusBustard(self):
308     """
309     returns (<completed>, <total>)
310     """
311     bustard = self.status['bustard']
312     total = len(bustard)
313     completed = list(bustard.values()).count(True)
314
315     return (completed, total)
316
317
318   def statusGerald(self):
319     """
320     returns (<completed>, <total>)
321     """
322     gerald = self.status['gerald']
323     total = len(gerald)
324     completed = list(gerald.values()).count(True)
325
326     return (completed, total)
327
328
329   def statusTotal(self):
330     """
331     returns (<completed>, <total>)
332     """
333     #f = firecrest  c = completed
334     #b = bustard    t = total
335     #g = gerald
336     fc, ft = self.statusFirecrest()
337     bc, bt = self.statusBustard()
338     gc, gt = self.statusGerald()
339
340     return (fc+bc+gc, ft+bt+gt)
341
342
343   def statusReport(self):
344     """
345     Generate the basic percent complete report
346     """
347     def _percentCompleted(completed, total):
348       """
349       Returns precent completed as float
350       """
351       return (completed / float(total)) * 100
352
353     fc, ft = self.statusFirecrest()
354     bc, bt = self.statusBustard()
355     gc, gt = self.statusGerald()
356     tc, tt = self.statusTotal()
357     
358     fp = _percentCompleted(fc, ft)
359     bp = _percentCompleted(bc, bt)
360     gp = _percentCompleted(gc, gt)
361     tp = _percentCompleted(tc, tt)
362     
363     report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
364               '  Bustard: %s%% (%s/%s)' % (bp, bc, bt),
365               '   Gerald: %s%% (%s/%s)' % (gp, gc, gt),
366               '-----------------------',
367               '    Total: %s%% (%s/%s)' % (tp, tc, tt),
368              ]
369     return report
370
371   def updateFirecrest(self, filename):
372     """
373     Marks firecrest filename as being completed.
374     """
375     self.status['firecrest'][filename] = True
376     
377
378   def updateBustard(self, filename):
379     """
380     Marks bustard filename as being completed.
381     """
382     self.status['bustard'][filename] = True
383
384
385   def updateGerald(self, filename):
386     """
387     Marks gerald filename as being completed.
388     """
389     self.status['gerald'][filename] = True
390
391
392
393 ##################################################
394 # Functions to be called by Thread(target=<func>)
395 def _cmdLineStatusMonitorFunc(conf_info):
396   """
397   Given a ConfigInfo object, provides status to stdout.
398
399   You should probably use startCmdLineStatusMonitor()
400   instead of ths function.
401
402   .. python:
403     def example_launch():
404         t = threading.Thread(target=_cmdLineStatusMonitorFunc,
405                              args=[conf_info])
406         t.setDaemon(True)
407         t.start()
408   """
409   SLEEP_AMOUNT = 30
410
411   while 1:
412     if conf_info.status is None:
413       print("No status object yet.")
414       time.sleep(SLEEP_AMOUNT)
415       continue
416
417     report = conf_info.status.statusReport()
418     print(os.linesep.join(report))
419     print()
420
421     time.sleep(SLEEP_AMOUNT)
422
423
424 #############################################
425 # Start monitor thread convenience functions
426 def startCmdLineStatusMonitor(conf_info):
427   """
428   Starts a command line status monitor given a conf_info object.
429   """
430   t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
431   t.setDaemon(True)
432   t.start()
433
434 from optparse import OptionParser
435 def make_parser():
436   usage = "%prog: config file"
437
438   parser = OptionParser()
439   return parser
440   
441 def main(cmdline=None):
442   parser = make_parser()
443   opt, args = parser.parse_args(cmdline)
444
445   if len(args) != 1:
446     parser.error("need name of configuration file")
447     
448   status = GARunStatus(args[0])
449   print(os.linesep.join(status.statusReport()))
450   return 0
451
452 if __name__ == "__main__":
453   sys.exit(main(sys.argv[1:]))
454