[project @ run_status correct firecrest expected file estimate]
[htsworkflow.git] / gaworkflow / pipeline / run_status.py
1 import glob
2 import re
3 import os
4
5 s_comment = re.compile('^#')
6 s_general_read_len = re.compile('^READ_LENGTH ')
7 s_read_len = re.compile('^[1-8]+:READ_LENGTH ')
8
9 s_firecrest = None
10
11 def _four_digit_num_in_string(num):
12   if num < 0:
13     pass
14   elif num < 10:
15     return '000' + str(num)
16   elif num < 100:
17     return '00' + str(num)
18   elif num < 1000:
19     return '0' + str(num)
20   elif num < 10000:
21     return str(num)
22
23   msg = 'Invalid number: %s' % (num)
24   raise ValueError, msg
25
26 def _two_digit_num_in_string(num):
27   if num < 0:
28     pass
29   elif num < 10:
30     return '0' + str(num)
31   elif num < 100:
32     return str(num)
33
34   msg = 'Invalid number: %s' % (num)
35   raise ValueError, msg
36
37
38 # FIRECREST PATTERNS
39 # _p2f(<pattern>, lane, tile, cycle)
40 PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml'
41
42 # _p2f(<pattern>, lane, tile)
43 PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt'
44 PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz'
45 PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt'
46 PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt'
47 PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt'
48 PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt'
49 PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt'
50 PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt'
51
52
53 # BUSTARD PATTERNS
54 # _p2f(<pattern>, lane, tile)
55 PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt'
56 PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt'
57
58
59
60 # GERALD PATTERNS
61 # _p2f(<pattern>, lane, tile)
62 PATTERN_GERALD_ALL = 's_%s_%s_all.txt.tmp'
63 PATTERN_GERALD_QRAW = 's_%s_%s_qraw.txt.tmp'
64 PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png'
65 PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp'
66 PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp'
67 PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp'
68 PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp'
69 PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp'
70 PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp'
71 PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png'
72 PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png'
73 PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp'
74 PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt'
75
76 # _p2f(<pattern>, lane)
77 PATTERN_GERALD_SEQPRE = 's_%s_seqpre.txt.tmp'
78 PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp'
79 PATTERN_GERALD_SIGMEANS = 's_%s_Signal_Means.txt.tmp'
80 PATTERN_GERALD_CALLPNG = 's_%s_call.png'
81 PATTERN_GERALD_ALLPNG = 's_%s_all.png'
82 PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png'
83 PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png'
84 PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png'
85 PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp'
86 PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp'
87 PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp'
88 PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp'
89 PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp'
90 PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp'
91 PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt'
92
93
94
95 def _p2f(pattern, lane, tile=None, cycle=None):
96   """
97   Converts a pattern plus info into file names
98   """
99
100   # lane, and cycle provided (INVALID)
101   if tile is None and cycle is not None:
102     msg = "Handling of cycle without tile is not currently implemented."
103     raise ValueError, msg
104
105   # lane, tile, cycle provided
106   elif cycle:
107     return pattern % (lane,
108                       _four_digit_num_in_string(tile),
109                       _two_digit_num_in_string(cycle))
110   
111   # lane, tile provided
112   elif tile:
113     return pattern % (lane, _four_digit_num_in_string(tile))
114
115   # lane provided
116   else:
117     return pattern % (lane)
118     
119
120
121 class GARunStatus(object):
122
123   def __init__(self, conf_filepath):
124     """
125     Given an eland config file in the top level directory
126     of a run, predicts the files that will be generated
127     during a run and provides methods for retrieving
128     (completed, total) for each step or entire run.
129     """
130
131     self._conf_filepath = conf_filepath
132     self._base_dir, junk = os.path.split(conf_filepath)
133     self._image_dir = os.path.join(self._base_dir, 'Images')
134     
135     self.lanes = []
136     self.lane_read_length = {}
137     self.tiles = None
138     self.cycles = None
139     
140     self.status = {}
141     self.status['firecrest'] = {}
142     self.status['bustard'] = {}
143     self.status['gerald'] = {}
144     
145     self._process_config()
146     self._count_tiles()
147     self._count_cycles()
148     self._generate_expected()
149
150
151   def _process_config(self):
152     """
153     Grabs info from self._conf_filepath
154     """
155     f = open(self._conf_filepath, 'r')
156
157     for line in f:
158
159       #Skip comment lines for now.
160       if s_comment.search(line):
161         continue
162
163       mo =  s_general_read_len.search(line)
164       if mo:
165         read_length = int(line[mo.end():])
166         #Handle general READ_LENGTH
167         for i in range(1,9):
168           self.lane_read_length[i] = read_length
169       
170       mo = s_read_len.search(line)
171       if mo:
172         read_length = int(line[mo.end():])
173         lanes, junk = line.split(':')
174
175         #Convert lanes from string of lanes to list of lane #s.
176         lanes = [ int(i) for i in lanes ]
177
178         
179         for lane in lanes:
180
181           #Keep track of which lanes are being run.
182           if lane not in self.lanes:
183             self.lanes.append(lane)
184
185           #Update with lane specific read lengths
186           self.lane_read_length[lane] = read_length
187
188         self.lanes.sort()
189
190
191   def _count_tiles(self):
192     """
193     Count the number of tiles being used
194     """
195     self.tiles = len(glob.glob(os.path.join(self._image_dir,
196                                             'L001',
197                                             'C1.1',
198                                             's_1_*_a.tif')))
199
200   def _count_cycles(self):
201     """
202     Figures out the number of cycles that are available
203     """
204     cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1'))
205     cycle_list = []
206     for cycle_dir in cycle_dirs:
207       junk, c = os.path.split(cycle_dir)
208       cycle_list.append(int(c[1:c.find('.')]))
209
210     self.cycles = max(cycle_list)
211     
212
213
214
215   def _generate_expected(self):
216     """
217     generates a list of files we expect to find.
218     """
219
220     firecrest = self.status['firecrest']
221     bustard = self.status['bustard']
222     gerald = self.status['gerald']
223     
224     
225     for lane in self.lanes:
226       for tile in range(1,self.tiles+1):
227         for cycle in range(1, self.cycles+1):
228
229           ##########################
230           # LANE, TILE, CYCLE LAYER
231
232           # FIRECREST
233           firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False
234
235
236         ###################
237         # LANE, TILE LAYER
238
239         # FIRECREST
240         firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False
241         firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False
242         firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False
243         firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False
244         firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False
245         firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False
246         firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False
247         firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False
248
249
250         # BUSTARD
251         bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False
252         bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False
253
254
255         # GERALD
256         gerald[_p2f(PATTERN_GERALD_ALL, lane, tile)] = False
257         gerald[_p2f(PATTERN_GERALD_QRAW, lane, tile)] = False
258         gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False
259         gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False
260         gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False
261         gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False
262         gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False
263         gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False
264         gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False
265         gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False
266         gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False
267         gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False
268         gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False
269
270       ###################
271       # LANE LAYER
272
273       # GERALD
274       gerald[_p2f(PATTERN_GERALD_SEQPRE, lane)] = False
275       gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False
276       gerald[_p2f(PATTERN_GERALD_SIGMEANS, lane)] = False
277       gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False
278       gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False
279       gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False
280       gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False
281       gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False
282       gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False
283       gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False
284       gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False
285       gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False
286       gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False
287       gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False
288       gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False
289       
290       
291
292     #################
293     # LOOPS FINISHED
294
295     # FIRECREST
296     firecrest['offsets_finished.txt'] = False
297     firecrest['finished.txt'] = False
298
299     # BUSTARD
300     bustard['finished.txt'] = False
301
302     # GERALD
303     gerald['tiles.txt'] = False
304     gerald['FullAll.htm'] = False
305     gerald['All.htm.tmp'] = False
306     gerald['Signal_Means.txt.tmp'] = False
307     gerald['plotIntensity_for_IVC'] = False
308     gerald['IVC.htm.tmp'] = False
309     gerald['FullError.htm'] = False
310     gerald['FullPerfect.htm'] = False
311     gerald['Error.htm.tmp'] = False
312     gerald['Perfect.htm.tmp'] = False
313     gerald['Summary.htm.tmp'] = False
314     gerald['Tile.htm.tmp'] = False
315     gerald['finished.txt'] = False
316     
317     
318
319
320   def statusFirecrest(self):
321     """
322     returns (<completed>, <total>)
323     """
324     firecrest = self.status['firecrest']
325     total = len(firecrest)
326     completed = firecrest.values().count(True)
327
328     return (completed, total)
329
330
331   def statusBustard(self):
332     """
333     returns (<completed>, <total>)
334     """
335     bustard = self.status['bustard']
336     total = len(bustard)
337     completed = bustard.values().count(True)
338
339     return (completed, total)
340
341
342   def statusGerald(self):
343     """
344     returns (<completed>, <total>)
345     """
346     gerald = self.status['gerald']
347     total = len(gerald)
348     completed = gerald.values().count(True)
349
350     return (completed, total)
351
352
353   def statusTotal(self):
354     """
355     returns (<completed>, <total>)
356     """
357     #f = firecrest  c = completed
358     #b = bustard    t = total
359     #g = gerald
360     fc, ft = self.statusFirecrest()
361     bc, bt = self.statusBustard()
362     gc, gt = self.statusGerald()
363
364     return (fc+bc+gc, ft+bt+gt)
365
366
367   def updateFirecrest(self, filename):
368     """
369     Marks firecrest filename as being completed.
370     """
371     self.status['firecrest'][filename] = True
372     
373
374   def updateBustard(self, filename):
375     """
376     Marks bustard filename as being completed.
377     """
378     self.status['bustard'][filename] = True
379
380
381   def updateGerald(self, filename):
382     """
383     Marks gerald filename as being completed.
384     """
385     self.status['gerald'][filename] = True