Use sample keys when looking up lane parameters.
authorDiane Trout <diane@caltech.edu>
Sat, 7 Jul 2012 00:34:27 +0000 (17:34 -0700)
committerDiane Trout <diane@caltech.edu>
Sat, 7 Jul 2012 00:34:27 +0000 (17:34 -0700)
And as a bonus feature I decided to test for a SampleKey incoming
and if its not convert the older lane number to a sample key internally.

One downside is since I'm storing not fully specified SampleKeys
I have to do a brute force lookup of the key.

htsworkflow/pipelines/eland.py
htsworkflow/pipelines/gerald.py
htsworkflow/pipelines/test/test_runfolder_rta1_12.py
htsworkflow/pipelines/test/testdata/1_12/aligned_config_1_12.xml

index 873dbefeb8d2e633a80500c632119b8451e26aed..19905bc8da526996924f592d15120865b5c1747b 100644 (file)
@@ -716,7 +716,7 @@ class ELAND(collections.MutableMapping):
         if genome_maps is not None:
             genome_map = genome_maps[key.lane]
         elif gerald is not None:
-            genome_dir = gerald.lanes[key.lane].eland_genome
+            genome_dir = gerald.lanes[key].eland_genome
             if genome_dir is not None:
                 genome_map = build_genome_fasta_map(genome_dir)
 
index d9cf47334c1dca5c128e22d5ef1bb364ab5f3a13..aca994f4a8ac9c3e4cc24ccc674e8cf91ebef6df 100644 (file)
@@ -10,6 +10,7 @@ import time
 
 from htsworkflow.pipelines.summary import Summary, SummaryGA, SummaryHiSeq
 from htsworkflow.pipelines.eland import eland, ELAND
+from htsworkflow.pipelines.samplekey import SampleKey
 
 from htsworkflow.pipelines.runfolder import \
    ElementTree, \
@@ -377,16 +378,18 @@ class LaneSpecificRunParameters(collections.MutableMapping):
         # those consistently.
         for element in analysis:
             sample, lane_id = element.tag.split('_')
-            self._lanes[int(lane_id)] = LaneParametersGA(
+            key = SampleKey(lane=int(lane_id), sample=sample)
+            self._lanes[key] = LaneParametersGA(
                                           self._gerald, lane_id)
 
     def _extract_hiseq_analysis_type(self, analysis):
         """Extract from HiSeq style multiplexed analysis types"""
         for element in analysis:
             name = element.attrib['name']
-            self._lanes[name] = LaneParametersHiSeq(self._gerald,
-                                                    name,
-                                                    element)
+            key = SampleKey(sample=name)
+            self._lanes[key] = LaneParametersHiSeq(self._gerald,
+                                                   name,
+                                                   element)
 
     def __iter__(self):
         if self._lanes is None:
@@ -396,19 +399,49 @@ class LaneSpecificRunParameters(collections.MutableMapping):
     def __getitem__(self, key):
         if self._lanes is None:
             self._initialize_lanes()
-        return self._lanes[key]
+        value = self._lanes.get(key, None)
+        if value is not None:
+            return value
+        real_key = self._find_key(key)
+        if real_key is not None:
+            return self._lanes[real_key]
+        raise KeyError("%s not found" % (repr(key),))
 
     def __setitem__(self, key, value):
+        if len(self._lanes) > 100:
+            LOGGER.warn("many projects loaded, consider improving dictionary")
+        real_key = self._find_key(key)
+        if real_key is not None:
+            key = real_key
         self._lanes[key] = value
 
     def __delitem__(self, key):
-        del self._lanes[key]
+        if key in self._lanes:
+            del self._lanes[key]
+        else:
+            real_key = self._find_key(key)
+            if real_key is not None:
+                del self._lanes[real_key]
 
     def __len__(self):
         if self._lanes is None:
             self._initialize_lanes()
         return len(self._lanes)
 
+    def _find_key(self, lookup_key):
+        if not isinstance(lookup_key, SampleKey):
+            lookup_key = SampleKey(lane=lookup_key)
+
+        results = []
+        for k in self._lanes:
+            if k.matches(lookup_key):
+                results.append(k)
+        if len(results) > 1:
+            raise ValueError("More than one key matched query %s" % (str(lookup_key),))
+        elif len(results) == 1:
+            return results[0]
+        else:
+            return None
 
 def gerald(pathname):
     LOGGER.info("Parsing gerald config.xml")
index 28863c1c3be98ba6e71c72e2d0fcc25a89f5db87..c6233d99a7aa21043873122f3771c421696434c1 100644 (file)
@@ -102,12 +102,12 @@ class RunfolderTests(unittest.TestCase):
         # instead of pythons default 0..8
         # test lane specific parameters from gerald config file
 
-        undetermined = g.lanes['Undetermined_indices']
+        undetermined = g.lanes[SampleKey(sample='Undetermined_indices')]
         self.failUnlessEqual(undetermined.analysis, 'none')
         self.failUnlessEqual(undetermined.read_length, None)
         self.failUnlessEqual(undetermined.use_bases, None)
 
-        project = g.lanes['12383']
+        project = g.lanes[SampleKey(sample='11115')]
         self.failUnlessEqual(project.analysis, 'eland_extended')
         self.failUnlessEqual(project.eland_genome, '/g/hg18/chromosomes/')
         self.failUnlessEqual(project.read_length, '49')
index 99da400a268372523d1d1da92c2d57611a050928..1a5471fa8662b7fc5b897f2493fe7bc4de2316df 100644 (file)
       <USE_BASES>Y*n</USE_BASES>
       <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
     </Project>
+    <Project name="11112">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11113">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11114">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11115">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11116">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11117">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11118">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11119">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
+    <Project name="11120">
+      <ANALYSIS>eland_extended</ANALYSIS>
+      <CHROM_NAME_SOURCE>fileName</CHROM_NAME_SOURCE>
+      <ELAND_GENOME>/g/hg18/chromosomes/</ELAND_GENOME>
+      <ELAND_GENOME_MASK>*.fa</ELAND_GENOME_MASK>
+      <ELAND_SEED_LENGTH1>32</ELAND_SEED_LENGTH1>
+      <READS>1</READS>
+      <READ_LENGTH1>49</READ_LENGTH1>
+      <USE_BASES>Y*n</USE_BASES>
+      <USE_BASES1>yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyn</USE_BASES1>
+    </Project>
     <Project name="Undetermined_indices">
       <ANALYSIS>none</ANALYSIS>
     </Project>