9 from htsworkflow.util.alphanum import alphanum
11 class DuplicateGenome(Exception): pass
14 def _has_metainfo(genome_dir):
15 metapath = os.path.join(genome_dir, '_metainfo_')
16 if os.path.isfile(metapath):
21 def getAvailableGenomes(genome_base_dir):
23 raises IOError (on genome_base_dir not found)
24 raises DuplicateGenome on duplicate genomes found.
26 returns a double dictionary (i.e. d[species][build] = path)
29 # Need valid directory
30 if not os.path.exists(genome_base_dir):
31 msg = "Directory does not exist: %s" % (genome_base_dir)
34 # Find all subdirectories
35 filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
36 potential_genome_dirs = \
37 [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
39 # Get list of metadata files
42 for dirpath in potential_genome_dirs \
43 if _has_metainfo(dirpath) ]
45 # Genome double dictionary
48 for genome_dir in genome_dir_list:
49 line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
51 # Get species, build... log and skip on failure
53 species, build = line.split('|')
55 logging.warning('Skipping: Invalid metafile (%s) line: %s' \
59 build_dict = d.setdefault(species, {})
60 if build in build_dict:
61 msg = "Duplicate genome for %s|%s" % (species, build)
62 raise DuplicateGenome, msg
64 build_dict[build] = genome_dir
69 class constructMapperDict(object):
71 Emulate a dictionary to map genome|build names to paths.
73 It uses the dictionary generated by getAvailableGenomes.
75 def __init__(self, genome_dict):
76 self.genome_dict = genome_dict
78 def __getitem__(self, key):
80 Return the best match for key
82 elements = re.split("\|", key)
84 if len(elements) == 1:
85 # we just the species name
86 # get the set of builds
87 builds = self.genome_dict[elements[0]]
89 # sort build names the way humans would
91 keys.sort(cmp=alphanum)
93 # return the path from the 'last' build name
94 return builds[keys[-1]]
96 elif len(elements) == 2:
97 # we have species, and build name
98 return self.genome_dict[elements[0]][elements[1]]
100 raise KeyError("Unrecognized key")
102 def get(self, key, default=None):
110 for species in self.genome_dict.keys():
111 for build in self.genome_dict[species]:
112 keys.append([species+'|'+build])
117 for species in self.genome_dict.keys():
118 for build in self.genome_dict[species]:
119 values.append(self.genome_dict[species][build])
124 for species in self.genome_dict.keys():
125 for build in self.genome_dict[species]:
126 key = [species+'|'+build]
127 value = self.genome_dict[species][build]
128 items.append((key, value))
131 if __name__ == '__main__':
133 if len(sys.argv) != 2:
134 print 'useage: %s <base_genome_dir>' % (sys.argv[0])
137 d = getAvailableGenomes(sys.argv[1])
138 d2 = constructMapperDict(d)
140 for k,v in d2.items():
141 print '%s: %s' % (k,v)