convert several not covered by unit-test modules to use print function
[htsworkflow.git] / htsworkflow / pipelines / run_status.py
1 from __future__ import print_function
2
3 __docformat__ = "restructuredtext en"
4
5 import glob
6 import re
7 import os
8 import sys
9 import time
10 import threading
11
12 s_comment = re.compile('^#')
13 s_general_read_len = re.compile('^READ_LENGTH ')
14 s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
15
16 s_firecrest = None
17
18 # FIRECREST PATTERNS
19 # _p2f(<pattern>, lane, tile, cycle)
20 PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
21
22 # _p2f(<pattern>, lane, tile)
23 PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
24 PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
25 PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
26 PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
27 PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
28 PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
29 PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
30 PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
31
32
33 # BUSTARD PATTERNS
34 # _p2f(<pattern>, lane, tile)
35 PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
36 PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
37
38
39
40 # GERALD PATTERNS
41 # _p2f(<pattern>, lane, tile)
42 PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp'
43 PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp'
44 PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
45 PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
46 PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
47 PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
48 PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
49 PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
50 PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
51 PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
52 PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
53 PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
54 PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
55
56 # _p2f(<pattern>, lane)
57 PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp'
58 PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
59 PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp'
60 PATTERN_GERALD_CALLPNG = 's_%s_call.png'
61 PATTERN_GERALD_ALLPNG = 's_%s_all.png'
62 PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
63 PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
64 PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
65 PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
66 PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
67 PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
68 PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
69 PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
70 PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
71 PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
72
73
74
75 def _p2f(pattern, lane, tile=None, cycle=None):
76   """
77   Converts a pattern plus info into file names
78   """
79
80   # lane, and cycle provided (INVALID)
81   if tile is None and cycle is not None:
82     msg = "Handling of cycle without tile is not currently implemented."
83     raise ValueError, msg
84
85   # lane, tile, cycle provided
86   elif cycle:
87     return pattern % (lane,
88                       "%04d" % (tile,),
89                       "%02d" % (cycle,))
90   
91   # lane, tile provided
92   elif tile:
93     return pattern % (lane, "%04d" % (tile,))
94
95   # lane provided
96   else:
97     return pattern % (lane)
98     
99
100 class GARunStatus(object):
101
102   def __init__(self, conf_filepath):
103     """
104     Given an eland config file in the top level directory
105     of a run, predicts the files that will be generated
106     during a run and provides methods for retrieving
107     (completed, total) for each step or entire run.
108     """
109     #print('self._conf_filepath = %s' % (conf_filepath))
110     self._conf_filepath = conf_filepath
111     self._base_dir, junk = os.path.split(conf_filepath)
112     self._image_dir = os.path.join(self._base_dir, 'Images')
113     
114     self.lanes = []
115     self.lane_read_length = {}
116     self.tiles = None
117     self.cycles = None
118     
119     self.status = {}
120     self.status['firecrest'] = {}
121     self.status['bustard'] = {}
122     self.status['gerald'] = {}
123     
124     self._process_config()
125     self._count_tiles()
126     self._count_cycles()
127     self._generate_expected()
128
129
130   def _process_config(self):
131     """
132     Grabs info from self._conf_filepath
133     """
134     f = open(self._conf_filepath, 'r')
135
136     for line in f:
137
138       #Skip comment lines for now.
139       if s_comment.search(line):
140         continue
141
142       mo =  s_general_read_len.search(line)
143       if mo:
144         read_length = int(line[mo.end():])
145         #Handle general READ_LENGTH
146         for i in range(1,9):
147           self.lane_read_length[i] = read_length
148       
149       mo = s_read_len.search(line)
150       if mo:
151         read_length = int(line[mo.end():])
152         lanes, junk = line.split(':')
153
154         #Convert lanes from string of lanes to list of lane #s.
155         lanes = [ int(i) for i in lanes ]
156
157         
158         for lane in lanes:
159
160           #Keep track of which lanes are being run.
161           if lane not in self.lanes:
162             self.lanes.append(lane)
163
164           #Update with lane specific read lengths
165           self.lane_read_length[lane] = read_length
166
167         self.lanes.sort()
168
169
170   def _count_tiles(self):
171     """
172     Count the number of tiles being used
173     """
174     self.tiles = len(glob.glob(os.path.join(self._image_dir,
175                                             'L001',
176                                             'C1.1',
177                                             's_1_*_a.tif')))
178
179   def _count_cycles(self):
180     """
181     Figures out the number of cycles that are available
182     """
183     #print('self._image_dir = %s' % (self._image_dir))
184     cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
185     #print('cycle_dirs = %s' % (cycle_dirs))
186     cycle_list = []
187     for cycle_dir in cycle_dirs:
188       junk, c = os.path.split(cycle_dir)
189       cycle_list.append(int(c[1:c.find('.')]))
190
191     self.cycles = max(cycle_list)
192     
193
194
195
196   def _generate_expected(self):
197     """
198     generates a list of files we expect to find.
199     """
200
201     firecrest = self.status['firecrest']
202     bustard = self.status['bustard']
203     gerald = self.status['gerald']
204     
205     
206     for lane in self.lanes:
207       for tile in range(1,self.tiles+1):
208         for cycle in range(1, self.cycles+1):
209
210           ##########################
211           # LANE, TILE, CYCLE LAYER
212
213           # FIRECREST
214           firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
215
216
217         ###################
218         # LANE, TILE LAYER
219
220         # FIRECREST
221         firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
222         firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
223         firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
224         firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
225         firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
226         firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
227         firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
228         firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
229
230
231         # BUSTARD
232         bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
233         bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
234
235
236         # GERALD
237         #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False
238         #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False
239         #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
240         #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
241         #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
242         #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
243         #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
244         #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
245         #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
246         gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
247         #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
248         #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
249         #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
250
251       ###################
252       # LANE LAYER
253
254       # GERALD
255       #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False
256       #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
257       #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False
258       gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
259       gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
260       gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
261       gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
262       gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
263       #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
264       #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
265       #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
266       #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
267       #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
268       #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
269       gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
270       
271       
272
273     #################
274     # LOOPS FINISHED
275
276     # FIRECREST
277     firecrest['offsets_finished.txt'] = False
278     firecrest['finished.txt'] = False
279
280     # BUSTARD
281     bustard['finished.txt'] = False
282
283     # GERALD
284     gerald['tiles.txt'] = False
285     gerald['FullAll.htm'] = False
286     #gerald['All.htm.tmp'] = False
287     #gerald['Signal_Means.txt.tmp'] = False
288     #gerald['plotIntensity_for_IVC'] = False
289     #gerald['IVC.htm.tmp'] = False
290     gerald['FullError.htm'] = False
291     gerald['FullPerfect.htm'] = False
292     #gerald['Error.htm.tmp'] = False
293     #gerald['Perfect.htm.tmp'] = False
294     #gerald['Summary.htm.tmp'] = False
295     #gerald['Tile.htm.tmp'] = False
296     gerald['finished.txt'] = False
297     
298   def statusFirecrest(self):
299     """
300     returns (<completed>, <total>)
301     """
302     firecrest = self.status['firecrest']
303     total = len(firecrest)
304     completed = firecrest.values().count(True)
305
306     return (completed, total)
307
308
309   def statusBustard(self):
310     """
311     returns (<completed>, <total>)
312     """
313     bustard = self.status['bustard']
314     total = len(bustard)
315     completed = bustard.values().count(True)
316
317     return (completed, total)
318
319
320   def statusGerald(self):
321     """
322     returns (<completed>, <total>)
323     """
324     gerald = self.status['gerald']
325     total = len(gerald)
326     completed = gerald.values().count(True)
327
328     return (completed, total)
329
330
331   def statusTotal(self):
332     """
333     returns (<completed>, <total>)
334     """
335     #f = firecrest  c = completed
336     #b = bustard    t = total
337     #g = gerald
338     fc, ft = self.statusFirecrest()
339     bc, bt = self.statusBustard()
340     gc, gt = self.statusGerald()
341
342     return (fc+bc+gc, ft+bt+gt)
343
344
345   def statusReport(self):
346     """
347     Generate the basic percent complete report
348     """
349     def _percentCompleted(completed, total):
350       """
351       Returns precent completed as float
352       """
353       return (completed / float(total)) * 100
354
355     fc, ft = self.statusFirecrest()
356     bc, bt = self.statusBustard()
357     gc, gt = self.statusGerald()
358     tc, tt = self.statusTotal()
359     
360     fp = _percentCompleted(fc, ft)
361     bp = _percentCompleted(bc, bt)
362     gp = _percentCompleted(gc, gt)
363     tp = _percentCompleted(tc, tt)
364     
365     report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft),
366               '  Bustard: %s%% (%s/%s)' % (bp, bc, bt),
367               '   Gerald: %s%% (%s/%s)' % (gp, gc, gt),
368               '-----------------------',
369               '    Total: %s%% (%s/%s)' % (tp, tc, tt),
370              ]
371     return report
372
373   def updateFirecrest(self, filename):
374     """
375     Marks firecrest filename as being completed.
376     """
377     self.status['firecrest'][filename] = True
378     
379
380   def updateBustard(self, filename):
381     """
382     Marks bustard filename as being completed.
383     """
384     self.status['bustard'][filename] = True
385
386
387   def updateGerald(self, filename):
388     """
389     Marks gerald filename as being completed.
390     """
391     self.status['gerald'][filename] = True
392
393
394
395 ##################################################
396 # Functions to be called by Thread(target=<func>)
397 def _cmdLineStatusMonitorFunc(conf_info):
398   """
399   Given a ConfigInfo object, provides status to stdout.
400
401   You should probably use startCmdLineStatusMonitor()
402   instead of ths function.
403
404   .. python:
405     def example_launch():
406         t = threading.Thread(target=_cmdLineStatusMonitorFunc,
407                              args=[conf_info])
408         t.setDaemon(True)
409         t.start()
410   """
411   SLEEP_AMOUNT = 30
412
413   while 1:
414     if conf_info.status is None:
415       print("No status object yet.")
416       time.sleep(SLEEP_AMOUNT)
417       continue
418
419     report = conf_info.status.statusReport()
420     print( os.linesep.join(report))
421     print()
422
423     time.sleep(SLEEP_AMOUNT)
424
425
426 #############################################
427 # Start monitor thread convenience functions
428 def startCmdLineStatusMonitor(conf_info):
429   """
430   Starts a command line status monitor given a conf_info object.
431   """
432   t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info])
433   t.setDaemon(True)
434   t.start()
435
436 from optparse import OptionParser
437 def make_parser():
438   usage = "%prog: config file"
439
440   parser = OptionParser()
441   return parser
442   
443 def main(cmdline=None):
444   parser = make_parser()
445   opt, args = parser.parse_args(cmdline)
446
447   if len(args) != 1:
448     parser.error("need name of configuration file")
449     
450   status = GARunStatus(args[0])
451   print(os.linesep.join(status.statusReport()))
452   return 0
453
454 if __name__ == "__main__":
455   sys.exit(main(sys.argv[1:]))