[project @ Added GARunStatus class for tracking percent complete through each step...
[htsworkflow.git] / gaworkflow / pipeline / run_status.py
1 import glob
2 import re
3 import os
4
5 s_comment = re.compile('^#')
6 s_general_read_len = re.compile('^READ_LENGTH ')
7 s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
8
9 s_firecrest = None
10
11 def _four_digit_num_in_string(num):
12   if num < 0:
13     pass
14   elif num < 10:
15     return '000' + str(num)
16   elif num < 100:
17     return '00' + str(num)
18   elif num < 1000:
19     return '0' + str(num)
20   elif num < 10000:
21     return str(num)
22
23   msg = 'Invalid number: %s' % (num)
24   raise ValueError, msg
25
26 def _two_digit_num_in_string(num):
27   if num < 0:
28     pass
29   elif num < 10:
30     return '0' + str(num)
31   elif num < 100:
32     return str(num)
33
34   msg = 'Invalid number: %s' % (num)
35   raise ValueError, msg
36
37
38 # FIRECREST PATTERNS
39 # _p2f(<pattern>, lane, tile, cycle)
40 PATTERN_FIRECREST_INT = 's_%s_%s_%s_int.txt'
41 PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
42 PATTERN_FIRECREST_CLU1 = 's_%s_%s_%s_1_clu.txt'
43 PATTERN_FIRECREST_CLU2 = 's_%s_%s_%s_2_clu.txt'
44 PATTERN_FIRECREST_CLU3 = 's_%s_%s_%s_3_clu.txt'
45 PATTERN_FIRECREST_CLU4 = 's_%s_%s_%s_4_clu.txt'
46
47 # _p2f(<pattern>, lane, tile)
48 PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
49 PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
50 PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
51
52
53 # BUSTARD PATTERNS
54 # _p2f(<pattern>, lane, tile)
55 PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
56 PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
57
58
59
60 # GERALD PATTERNS
61 # _p2f(<pattern>, lane, tile)
62 PATTERN_GERALD_ALL = 's_%s_%s_all.txt.tmp'
63 PATTERN_GERALD_QRAW = 's_%s_%s_qraw.txt.tmp'
64 PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
65 PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
66 PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
67 PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
68 PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
69 PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
70 PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
71 PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
72 PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
73 PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
74 PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
75
76 # _p2f(<pattern>, lane)
77 PATTERN_GERALD_SEQPRE = 's_%s_seqpre.txt.tmp'
78 PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
79 PATTERN_GERALD_SIGMEANS = 's_%s_Signal_Means.txt.tmp'
80 PATTERN_GERALD_CALLPNG = 's_%s_call.png'
81 PATTERN_GERALD_ALLPNG = 's_%s_all.png'
82 PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
83 PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
84 PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
85 PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
86 PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
87 PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
88 PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
89 PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
90 PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
91 PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
92
93
94
95 def _p2f(pattern, lane, tile=None, cycle=None):
96   """
97   Converts a pattern plus info into file names
98   """
99
100   # lane, and cycle provided (INVALID)
101   if tile is None and cycle is not None:
102     msg = "Handling of cycle without tile is not currently implemented."
103     raise ValueError, msg
104
105   # lane, tile, cycle provided
106   elif cycle:
107     return pattern % (lane,
108                       _four_digit_num_in_string(tile),
109                       _two_digit_num_in_string(cycle))
110   
111   # lane, tile provided
112   elif tile:
113     return pattern % (lane, _four_digit_num_in_string(tile))
114
115   # lane provided
116   else:
117     return pattern % (lane)
118     
119
120
121 class GARunStatus(object):
122
123   def __init__(self, conf_filepath):
124     """
125     Given an eland config file in the top level directory
126     of a run, predicts the files that will be generated
127     during a run and provides methods for retrieving
128     (completed, total) for each step or entire run.
129     """
130
131     self._conf_filepath = conf_filepath
132     self._base_dir, junk = os.path.split(conf_filepath)
133     self._image_dir = os.path.join(self._base_dir, 'Images')
134     
135     self.lanes = []
136     self.lane_read_length = {}
137     self.tiles = None
138     self.cycles = None
139     
140     self.status = {}
141     self.status['firecrest'] = {}
142     self.status['bustard'] = {}
143     self.status['gerald'] = {}
144     
145     self._process_config()
146     self._count_tiles()
147     self._count_cycles()
148     self._generate_expected()
149
150
151   def _process_config(self):
152     """
153     Grabs info from self._conf_filepath
154     """
155     f = open(self._conf_filepath, 'r')
156
157     for line in f:
158
159       #Skip comment lines for now.
160       if s_comment.search(line):
161         continue
162
163       mo =  s_general_read_len.search(line)
164       if mo:
165         read_length = int(line[mo.end():])
166         #Handle general READ_LENGTH
167         for i in range(1,9):
168           self.lane_read_length[i] = read_length
169       
170       mo = s_read_len.search(line)
171       if mo:
172         read_length = int(line[mo.end():])
173         lanes, junk = line.split(':')
174
175         #Convert lanes from string of lanes to list of lane #s.
176         lanes = [ int(i) for i in lanes ]
177
178         
179         for lane in lanes:
180
181           #Keep track of which lanes are being run.
182           if lane not in self.lanes:
183             self.lanes.append(lane)
184
185           #Update with lane specific read lengths
186           self.lane_read_length[lane] = read_length
187
188         self.lanes.sort()
189
190
191   def _count_tiles(self):
192     """
193     Count the number of tiles being used
194     """
195     self.tiles = len(glob.glob(os.path.join(self._image_dir,
196                                             'L001',
197                                             'C1.1',
198                                             's_1_*_a.tif')))
199
200   def _count_cycles(self):
201     """
202     Figures out the number of cycles that are available
203     """
204     cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
205     cycle_list = []
206     for cycle_dir in cycle_dirs:
207       junk, c = os.path.split(cycle_dir)
208       cycle_list.append(int(c[1:c.find('.')]))
209
210     self.cycles = max(cycle_list)
211     
212
213
214
215   def _generate_expected(self):
216     """
217     generates a list of files we expect to find.
218     """
219
220     firecrest = self.status['firecrest']
221     bustard = self.status['bustard']
222     gerald = self.status['gerald']
223     
224     
225     for lane in self.lanes:
226       for tile in range(1,self.tiles+1):
227         for cycle in range(1, self.cycles+1):
228
229           ##########################
230           # LANE, TILE, CYCLE LAYER
231
232           # FIRECREST
233           firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile, cycle)] = False
234                          
235           firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
236
237           firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile, cycle)] = False
238           firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile, cycle)] = False
239           firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile, cycle)] = False
240           firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile, cycle)] = False
241
242         ###################
243         # LANE, TILE LAYER
244
245         # FIRECREST
246         
247         firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
248         firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
249         firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
250
251
252         # BUSTARD
253         bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
254         bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
255
256
257         # GERALD
258         gerald[_p2f(PATTERN_GERALD_ALL, lane, tile)] = False
259         gerald[_p2f(PATTERN_GERALD_QRAW, lane, tile)] = False
260         gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
261         gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
262         gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
263         gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
264         gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
265         gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
266         gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
267         gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
268         gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
269         gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
270         gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
271
272       ###################
273       # LANE LAYER
274
275       # GERALD
276       gerald[_p2f(PATTERN_GERALD_SEQPRE, lane)] = False
277       gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
278       gerald[_p2f(PATTERN_GERALD_SIGMEANS, lane)] = False
279       gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
280       gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
281       gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
282       gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
283       gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
284       gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
285       gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
286       gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
287       gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
288       gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
289       gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
290       gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
291       
292       
293
294     #################
295     # LOOPS FINISHED
296
297     # FIRECREST
298     firecrest['offsets_finished.txt'] = False
299     firecrest['finished.txt'] = False
300
301     # BUSTARD
302     bustard['finished.txt'] = False
303
304     # GERALD
305     gerald['tiles.txt'] = False
306     gerald['FullAll.htm'] = False
307     gerald['All.htm.tmp'] = False
308     gerald['Signal_Means.txt.tmp'] = False
309     gerald['plotIntensity_for_IVC'] = False
310     gerald['IVC.htm.tmp'] = False
311     gerald['FullError.htm'] = False
312     gerald['FullPerfect.htm'] = False
313     gerald['Error.htm.tmp'] = False
314     gerald['Perfect.htm.tmp'] = False
315     gerald['Summary.htm.tmp'] = False
316     gerald['Tile.htm.tmp'] = False
317     gerald['finished.txt'] = False
318     
319     
320
321
322   def statusFirecrest(self):
323     """
324     returns (<completed>, <total>)
325     """
326     firecrest = self.status['firecrest']
327     total = len(firecrest)
328     completed = firecrest.values().count(True)
329
330     return (completed, total)
331
332
333   def statusBustard(self):
334     """
335     returns (<completed>, <total>)
336     """
337     bustard = self.status['bustard']
338     total = len(bustard)
339     completed = bustard.values().count(True)
340
341     return (completed, total)
342
343
344   def statusGerald(self):
345     """
346     returns (<completed>, <total>)
347     """
348     gerald = self.status['gerald']
349     total = len(gerald)
350     completed = gerald.values().count(True)
351
352     return (completed, total)
353
354
355   def statusTotal(self):
356     """
357     returns (<completed>, <total>)
358     """
359     #f = firecrest  c = completed
360     #b = bustard    t = total
361     #g = gerald
362     fc, ft = self.statusFirecrest()
363     bc, bt = self.statusBustard()
364     gc, gt = self.statusGerald()
365
366     return (fc+bc+gc, ft+bt+gt)
367
368
369   def updateFirecrest(self, filename):
370     """
371     Marks firecrest filename as being completed.
372     """
373     self.status['firecrest'][filename] = True
374     
375
376   def updateBustard(self, filename):
377     """
378     Marks bustard filename as being completed.
379     """
380     self.status['bustard'][filename] = True
381
382
383   def updateGerald(self, filename):
384     """
385     Marks gerald filename as being completed.
386     """
387     self.status['gerald'][filename] = True