From: Diane Trout Date: Wed, 14 May 2008 23:00:47 +0000 (+0000) Subject: separate computing the sample/lane_id names from calculating read counts X-Git-Tag: stanford.caltech-merged-database-2009-jan-15~62 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=f47bccba22fd258ed432da35ed694eb3ac0e49cc separate computing the sample/lane_id names from calculating read counts the read count computation takes a long time, and if we just want to quickly access some information from the gerald directory it was really annoying to wait for it to finish. --- diff --git a/gaworkflow/pipeline/gerald.py b/gaworkflow/pipeline/gerald.py index 5480f55..e7e09de 100644 --- a/gaworkflow/pipeline/gerald.py +++ b/gaworkflow/pipeline/gerald.py @@ -389,17 +389,15 @@ class ElandLane(object): def __init__(self, pathname=None, genome_map=None, xml=None): self.pathname = pathname - self.sample_name = None - self.lane_id = None + self._sample_name = None + self._lane_id = None self._reads = None - self._mapped_reads = {} - self._match_codes = {} + self._mapped_reads = None + self._match_codes = None if genome_map is None: genome_map = {} self.genome_map = genome_map - if pathname is not None: - self._update() if xml is not None: self.set_elements(xml) @@ -411,12 +409,6 @@ class ElandLane(object): if self.pathname is None: return - # extract the sample name - path, name = os.path.split(self.pathname) - split_name = name.split('_') - self.sample_name = split_name[0] - self.lane_id = split_name[1] - if os.stat(self.pathname)[stat.ST_SIZE] == 0: raise RuntimeError("Eland isn't done, try again later.") @@ -443,6 +435,28 @@ class ElandLane(object): self._mapped_reads = mapped_reads self._reads = reads + def _update_name(self): + # extract the sample name + if self.pathname is None: + return + + path, name = os.path.split(self.pathname) + split_name = name.split('_') + self._sample_name = split_name[0] + self._lane_id = split_name[1] + + def _get_sample_name(self): + if self._sample_name is None: + self._update_name() + return self._sample_name + sample_name = property(_get_sample_name) + + def _get_lane_id(self): + if self._lane_id is None: + self._update_name() + return self._lane_id + lane_id = property(_get_lane_id) + def _get_reads(self): if self._reads is None: self._update() @@ -492,12 +506,17 @@ class ElandLane(object): def set_elements(self, tree): if tree.tag != ElandLane.LANE: raise ValueError('Exptecting %s' % (ElandLane.LANE,)) + + # reset dictionaries + self._mapped_reads = {} + self._match_codes = {} + for element in tree: tag = element.tag.lower() if tag == ElandLane.SAMPLE_NAME.lower(): - self.sample_name = element.text + self._sample_name = element.text elif tag == ElandLane.LANE_ID.lower(): - self.lane_id = element.text + self._lane_id = element.text elif tag == ElandLane.GENOME_MAP.lower(): for child in element: name = child.attrib['name']