From 062fa6cf5ace5bb7cc670e6bb0744af8efd15c0b Mon Sep 17 00:00:00 2001 From: Brandon King Date: Tue, 20 Nov 2007 23:01:53 +0000 Subject: [PATCH] [project @ Added GARunStatus class for tracking percent complete through each step or the run and/or entire run!] --- gaworkflow/pipeline/run_status.py | 387 ++++++++++++++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 gaworkflow/pipeline/run_status.py diff --git a/gaworkflow/pipeline/run_status.py b/gaworkflow/pipeline/run_status.py new file mode 100644 index 0000000..e9b63fc --- /dev/null +++ b/gaworkflow/pipeline/run_status.py @@ -0,0 +1,387 @@ +import glob +import re +import os + +s_comment = re.compile('^#') +s_general_read_len = re.compile('^READ_LENGTH ') +s_read_len = re.compile('^[1-8]+:READ_LENGTH ') + +s_firecrest = None + +def _four_digit_num_in_string(num): + if num < 0: + pass + elif num < 10: + return '000' + str(num) + elif num < 100: + return '00' + str(num) + elif num < 1000: + return '0' + str(num) + elif num < 10000: + return str(num) + + msg = 'Invalid number: %s' % (num) + raise ValueError, msg + +def _two_digit_num_in_string(num): + if num < 0: + pass + elif num < 10: + return '0' + str(num) + elif num < 100: + return str(num) + + msg = 'Invalid number: %s' % (num) + raise ValueError, msg + + +# FIRECREST PATTERNS +# _p2f(, lane, tile, cycle) +PATTERN_FIRECREST_INT = 's_%s_%s_%s_int.txt' +PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml' +PATTERN_FIRECREST_CLU1 = 's_%s_%s_%s_1_clu.txt' +PATTERN_FIRECREST_CLU2 = 's_%s_%s_%s_2_clu.txt' +PATTERN_FIRECREST_CLU3 = 's_%s_%s_%s_3_clu.txt' +PATTERN_FIRECREST_CLU4 = 's_%s_%s_%s_4_clu.txt' + +# _p2f(, lane, tile) +PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz' +PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt' +PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt' + + +# BUSTARD PATTERNS +# _p2f(, lane, tile) +PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt' +PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt' + + + +# GERALD PATTERNS +# _p2f(, lane, tile) +PATTERN_GERALD_ALL = 's_%s_%s_all.txt.tmp' +PATTERN_GERALD_QRAW = 's_%s_%s_qraw.txt.tmp' +PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png' +PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp' +PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp' +PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp' +PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp' +PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp' +PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp' +PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png' +PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png' +PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp' +PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt' + +# _p2f(, lane) +PATTERN_GERALD_SEQPRE = 's_%s_seqpre.txt.tmp' +PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp' +PATTERN_GERALD_SIGMEANS = 's_%s_Signal_Means.txt.tmp' +PATTERN_GERALD_CALLPNG = 's_%s_call.png' +PATTERN_GERALD_ALLPNG = 's_%s_all.png' +PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png' +PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png' +PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png' +PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp' +PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp' +PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp' +PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp' +PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp' +PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp' +PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt' + + + +def _p2f(pattern, lane, tile=None, cycle=None): + """ + Converts a pattern plus info into file names + """ + + # lane, and cycle provided (INVALID) + if tile is None and cycle is not None: + msg = "Handling of cycle without tile is not currently implemented." + raise ValueError, msg + + # lane, tile, cycle provided + elif cycle: + return pattern % (lane, + _four_digit_num_in_string(tile), + _two_digit_num_in_string(cycle)) + + # lane, tile provided + elif tile: + return pattern % (lane, _four_digit_num_in_string(tile)) + + # lane provided + else: + return pattern % (lane) + + + +class GARunStatus(object): + + def __init__(self, conf_filepath): + """ + Given an eland config file in the top level directory + of a run, predicts the files that will be generated + during a run and provides methods for retrieving + (completed, total) for each step or entire run. + """ + + self._conf_filepath = conf_filepath + self._base_dir, junk = os.path.split(conf_filepath) + self._image_dir = os.path.join(self._base_dir, 'Images') + + self.lanes = [] + self.lane_read_length = {} + self.tiles = None + self.cycles = None + + self.status = {} + self.status['firecrest'] = {} + self.status['bustard'] = {} + self.status['gerald'] = {} + + self._process_config() + self._count_tiles() + self._count_cycles() + self._generate_expected() + + + def _process_config(self): + """ + Grabs info from self._conf_filepath + """ + f = open(self._conf_filepath, 'r') + + for line in f: + + #Skip comment lines for now. + if s_comment.search(line): + continue + + mo = s_general_read_len.search(line) + if mo: + read_length = int(line[mo.end():]) + #Handle general READ_LENGTH + for i in range(1,9): + self.lane_read_length[i] = read_length + + mo = s_read_len.search(line) + if mo: + read_length = int(line[mo.end():]) + lanes, junk = line.split(':') + + #Convert lanes from string of lanes to list of lane #s. + lanes = [ int(i) for i in lanes ] + + + for lane in lanes: + + #Keep track of which lanes are being run. + if lane not in self.lanes: + self.lanes.append(lane) + + #Update with lane specific read lengths + self.lane_read_length[lane] = read_length + + self.lanes.sort() + + + def _count_tiles(self): + """ + Count the number of tiles being used + """ + self.tiles = len(glob.glob(os.path.join(self._image_dir, + 'L001', + 'C1.1', + 's_1_*_a.tif'))) + + def _count_cycles(self): + """ + Figures out the number of cycles that are available + """ + cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1')) + cycle_list = [] + for cycle_dir in cycle_dirs: + junk, c = os.path.split(cycle_dir) + cycle_list.append(int(c[1:c.find('.')])) + + self.cycles = max(cycle_list) + + + + + def _generate_expected(self): + """ + generates a list of files we expect to find. + """ + + firecrest = self.status['firecrest'] + bustard = self.status['bustard'] + gerald = self.status['gerald'] + + + for lane in self.lanes: + for tile in range(1,self.tiles+1): + for cycle in range(1, self.cycles+1): + + ########################## + # LANE, TILE, CYCLE LAYER + + # FIRECREST + firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile, cycle)] = False + + firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False + + firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile, cycle)] = False + firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile, cycle)] = False + firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile, cycle)] = False + firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile, cycle)] = False + + ################### + # LANE, TILE LAYER + + # FIRECREST + + firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False + + + # BUSTARD + bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False + bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False + + + # GERALD + gerald[_p2f(PATTERN_GERALD_ALL, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_QRAW, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False + + ################### + # LANE LAYER + + # GERALD + gerald[_p2f(PATTERN_GERALD_SEQPRE, lane)] = False + gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_SIGMEANS, lane)] = False + gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False + + + + ################# + # LOOPS FINISHED + + # FIRECREST + firecrest['offsets_finished.txt'] = False + firecrest['finished.txt'] = False + + # BUSTARD + bustard['finished.txt'] = False + + # GERALD + gerald['tiles.txt'] = False + gerald['FullAll.htm'] = False + gerald['All.htm.tmp'] = False + gerald['Signal_Means.txt.tmp'] = False + gerald['plotIntensity_for_IVC'] = False + gerald['IVC.htm.tmp'] = False + gerald['FullError.htm'] = False + gerald['FullPerfect.htm'] = False + gerald['Error.htm.tmp'] = False + gerald['Perfect.htm.tmp'] = False + gerald['Summary.htm.tmp'] = False + gerald['Tile.htm.tmp'] = False + gerald['finished.txt'] = False + + + + + def statusFirecrest(self): + """ + returns (, ) + """ + firecrest = self.status['firecrest'] + total = len(firecrest) + completed = firecrest.values().count(True) + + return (completed, total) + + + def statusBustard(self): + """ + returns (, ) + """ + bustard = self.status['bustard'] + total = len(bustard) + completed = bustard.values().count(True) + + return (completed, total) + + + def statusGerald(self): + """ + returns (, ) + """ + gerald = self.status['gerald'] + total = len(gerald) + completed = gerald.values().count(True) + + return (completed, total) + + + def statusTotal(self): + """ + returns (, ) + """ + #f = firecrest c = completed + #b = bustard t = total + #g = gerald + fc, ft = self.statusFirecrest() + bc, bt = self.statusBustard() + gc, gt = self.statusGerald() + + return (fc+bc+gc, ft+bt+gt) + + + def updateFirecrest(self, filename): + """ + Marks firecrest filename as being completed. + """ + self.status['firecrest'][filename] = True + + + def updateBustard(self, filename): + """ + Marks bustard filename as being completed. + """ + self.status['bustard'][filename] = True + + + def updateGerald(self, filename): + """ + Marks gerald filename as being completed. + """ + self.status['gerald'][filename] = True -- 2.30.2