From: Diane Trout Date: Sat, 7 Jul 2012 00:34:27 +0000 (-0700) Subject: Use sample keys when looking up lane parameters. X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=1f6c3769677a102963402ebf0690ab9d39c1cf1f Use sample keys when looking up lane parameters. And as a bonus feature I decided to test for a SampleKey incoming and if its not convert the older lane number to a sample key internally. One downside is since I'm storing not fully specified SampleKeys I have to do a brute force lookup of the key. --- diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py index 873dbef..19905bc 100644 --- a/htsworkflow/pipelines/eland.py +++ b/htsworkflow/pipelines/eland.py @@ -716,7 +716,7 @@ class ELAND(collections.MutableMapping): if genome_maps is not None: genome_map = genome_maps[key.lane] elif gerald is not None: - genome_dir = gerald.lanes[key.lane].eland_genome + genome_dir = gerald.lanes[key].eland_genome if genome_dir is not None: genome_map = build_genome_fasta_map(genome_dir) diff --git a/htsworkflow/pipelines/gerald.py b/htsworkflow/pipelines/gerald.py index d9cf473..aca994f 100644 --- a/htsworkflow/pipelines/gerald.py +++ b/htsworkflow/pipelines/gerald.py @@ -10,6 +10,7 @@ import time from htsworkflow.pipelines.summary import Summary, SummaryGA, SummaryHiSeq from htsworkflow.pipelines.eland import eland, ELAND +from htsworkflow.pipelines.samplekey import SampleKey from htsworkflow.pipelines.runfolder import \ ElementTree, \ @@ -377,16 +378,18 @@ class LaneSpecificRunParameters(collections.MutableMapping): # those consistently. for element in analysis: sample, lane_id = element.tag.split('_') - self._lanes[int(lane_id)] = LaneParametersGA( + key = SampleKey(lane=int(lane_id), sample=sample) + self._lanes[key] = LaneParametersGA( self._gerald, lane_id) def _extract_hiseq_analysis_type(self, analysis): """Extract from HiSeq style multiplexed analysis types""" for element in analysis: name = element.attrib['name'] - self._lanes[name] = LaneParametersHiSeq(self._gerald, - name, - element) + key = SampleKey(sample=name) + self._lanes[key] = LaneParametersHiSeq(self._gerald, + name, + element) def __iter__(self): if self._lanes is None: @@ -396,19 +399,49 @@ class LaneSpecificRunParameters(collections.MutableMapping): def __getitem__(self, key): if self._lanes is None: self._initialize_lanes() - return self._lanes[key] + value = self._lanes.get(key, None) + if value is not None: + return value + real_key = self._find_key(key) + if real_key is not None: + return self._lanes[real_key] + raise KeyError("%s not found" % (repr(key),)) def __setitem__(self, key, value): + if len(self._lanes) > 100: + LOGGER.warn("many projects loaded, consider improving dictionary") + real_key = self._find_key(key) + if real_key is not None: + key = real_key self._lanes[key] = value def __delitem__(self, key): - del self._lanes[key] + if key in self._lanes: + del self._lanes[key] + else: + real_key = self._find_key(key) + if real_key is not None: + del self._lanes[real_key] def __len__(self): if self._lanes is None: self._initialize_lanes() return len(self._lanes) + def _find_key(self, lookup_key): + if not isinstance(lookup_key, SampleKey): + lookup_key = SampleKey(lane=lookup_key) + + results = [] + for k in self._lanes: + if k.matches(lookup_key): + results.append(k) + if len(results) > 1: + raise ValueError("More than one key matched query %s" % (str(lookup_key),)) + elif len(results) == 1: + return results[0] + else: + return None def gerald(pathname): LOGGER.info("Parsing gerald config.xml") diff --git a/htsworkflow/pipelines/test/test_runfolder_rta1_12.py b/htsworkflow/pipelines/test/test_runfolder_rta1_12.py index 28863c1..c6233d9 100644 --- a/htsworkflow/pipelines/test/test_runfolder_rta1_12.py +++ b/htsworkflow/pipelines/test/test_runfolder_rta1_12.py @@ -102,12 +102,12 @@ class RunfolderTests(unittest.TestCase): # instead of pythons default 0..8 # test lane specific parameters from gerald config file - undetermined = g.lanes['Undetermined_indices'] + undetermined = g.lanes[SampleKey(sample='Undetermined_indices')] self.failUnlessEqual(undetermined.analysis, 'none') self.failUnlessEqual(undetermined.read_length, None) self.failUnlessEqual(undetermined.use_bases, None) - project = g.lanes['12383'] + project = g.lanes[SampleKey(sample='11115')] self.failUnlessEqual(project.analysis, 'eland_extended') self.failUnlessEqual(project.eland_genome, '/g/hg18/chromosomes/') self.failUnlessEqual(project.read_length, '49') diff --git a/htsworkflow/pipelines/test/testdata/1_12/aligned_config_1_12.xml b/htsworkflow/pipelines/test/testdata/1_12/aligned_config_1_12.xml index 99da400..1a5471f 100644 --- a/htsworkflow/pipelines/test/testdata/1_12/aligned_config_1_12.xml +++ b/htsworkflow/pipelines/test/testdata/1_12/aligned_config_1_12.xml @@ -203,6 +203,105 @@ Y*n yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + + + eland_extended + fileName + /g/hg18/chromosomes/ + *.fa + 32 + 1 + 49 + Y*n + yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn + none