import glob
import sys
import os
+import re
import logging
+from gaworkflow.util.alphanum import alphanum
+
class DuplicateGenome(Exception): pass
return d
-def constructMapperDict(genome_dict):
- """
- Creates a dictionary which can map the genome
- in the eland config generator output to a local
- genome path
-
- ie. 'Homo sapiens|hg18' -> <genome_dir>
- """
- mapper_dict = {}
- for species in genome_dict.keys():
- for build in genome_dict[species]:
- mapper_dict[species+'|'+build] = genome_dict[species][build]
-
- return mapper_dict
-
-
+class constructMapperDict(object):
+ """
+ Emulate a dictionary to map genome|build names to paths.
+
+ It uses the dictionary generated by getAvailableGenomes.
+ """
+ def __init__(self, genome_dict):
+ self.genome_dict = genome_dict
+
+ def __getitem__(self, key):
+ """
+ Return the best match for key
+ """
+ elements = re.split("\|", key)
+
+ if len(elements) == 1:
+ # we just the species name
+ # get the set of builds
+ builds = self.genome_dict[elements[0]]
+
+ # sort build names the way humans would
+ keys = builds.keys()
+ keys.sort(cmp=alphanum)
+
+ # return the path from the 'last' build name
+ return builds[keys[-1]]
+
+ elif len(elements) == 2:
+ # we have species, and build name
+ return self.genome_dict[elements[0]][elements[1]]
+ else:
+ raise KeyError("Unrecognized key")
+
+ def keys(self):
+ keys = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ keys.append([species+'|'+build])
+ return keys
+
+ def values(self):
+ values = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ values.append(self.genome_dict[species][build])
+ return values
+
+ def items(self):
+ items = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ key = [species+'|'+build]
+ value = self.genome_dict[species][build]
+ items.append((key, value))
+ return items
+
if __name__ == '__main__':
if len(sys.argv) != 2: