from htsworkflow.util.alphanum import alphanum
+LOGGER = logging.getLogger(__name__)
class DuplicateGenome(Exception): pass
"""
raises IOError (on genome_base_dir not found)
raises DuplicateGenome on duplicate genomes found.
-
+
returns a double dictionary (i.e. d[species][build] = path)
"""
# Need valid directory
if not os.path.exists(genome_base_dir):
msg = "Directory does not exist: %s" % (genome_base_dir)
- raise IOError, msg
+ raise IOError(msg)
# Find all subdirectories
filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
try:
species, build = line.split('|')
except:
- logging.warning('Skipping: Invalid metafile (%s) line: %s' \
- % (metafile, line))
+ LOGGER.warning('Skipping: Invalid metafile (%s) line: %s' \
+ % (metafile, line))
continue
build_dict = d.setdefault(species, {})
if build in build_dict:
msg = "Duplicate genome for %s|%s" % (species, build)
- raise DuplicateGenome, msg
+ raise DuplicateGenome(msg)
build_dict[build] = genome_dir
return d
-
+
class constructMapperDict(object):
"""
Emulate a dictionary to map genome|build names to paths.
-
+
It uses the dictionary generated by getAvailableGenomes.
"""
def __init__(self, genome_dict):
self.genome_dict = genome_dict
-
+
def __getitem__(self, key):
"""
Return the best match for key
"""
elements = re.split("\|", key)
-
+
if len(elements) == 1:
- # we just the species name
- # get the set of builds
- builds = self.genome_dict[elements[0]]
-
- # sort build names the way humans would
- keys = builds.keys()
- keys.sort(cmp=alphanum)
-
- # return the path from the 'last' build name
- return builds[keys[-1]]
-
+ # we just the species name
+ # get the set of builds
+ builds = self.genome_dict[elements[0]]
+
+ # sort build names the way humans would
+ keys = list(builds.keys())
+ keys.sort(cmp=alphanum)
+
+ # return the path from the 'last' build name
+ return builds[keys[-1]]
+
elif len(elements) == 2:
- # we have species, and build name
- return self.genome_dict[elements[0]][elements[1]]
+ # we have species, and build name
+ return self.genome_dict[elements[0]][elements[1]]
else:
- raise KeyError("Unrecognized key")
-
+ raise KeyError("Unrecognized key")
+
+ def get(self, key, default=None):
+ try:
+ return self[key]
+ except KeyError as e:
+ return default
+
def keys(self):
keys = []
- for species in self.genome_dict.keys():
+ for species in list(self.genome_dict.keys()):
for build in self.genome_dict[species]:
keys.append([species+'|'+build])
return keys
-
+
def values(self):
values = []
- for species in self.genome_dict.keys():
+ for species in list(self.genome_dict.keys()):
for build in self.genome_dict[species]:
values.append(self.genome_dict[species][build])
return values
-
+
def items(self):
items = []
- for species in self.genome_dict.keys():
+ for species in list(self.genome_dict.keys()):
for build in self.genome_dict[species]:
key = [species+'|'+build]
value = self.genome_dict[species][build]
items.append((key, value))
return items
-
+
if __name__ == '__main__':
if len(sys.argv) != 2:
- print 'useage: %s <base_genome_dir>' % (sys.argv[0])
+ print('useage: %s <base_genome_dir>' % (sys.argv[0]))
sys.exit(1)
d = getAvailableGenomes(sys.argv[1])
d2 = constructMapperDict(d)
- for k,v in d2.items():
- print '%s: %s' % (k,v)
-
-
+ for k,v in list(d2.items()):
+ print('%s: %s' % (k,v))
+
+