9 from htsworkflow.util.alphanum import alphanum
11 LOGGER = logging.getLogger(__name__)
12 class DuplicateGenome(Exception): pass
15 def _has_metainfo(genome_dir):
16 metapath = os.path.join(genome_dir, '_metainfo_')
17 if os.path.isfile(metapath):
22 def getAvailableGenomes(genome_base_dir):
24 raises IOError (on genome_base_dir not found)
25 raises DuplicateGenome on duplicate genomes found.
27 returns a double dictionary (i.e. d[species][build] = path)
30 # Need valid directory
31 if not os.path.exists(genome_base_dir):
32 msg = "Directory does not exist: %s" % (genome_base_dir)
35 # Find all subdirectories
36 filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
37 potential_genome_dirs = \
38 [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
40 # Get list of metadata files
43 for dirpath in potential_genome_dirs \
44 if _has_metainfo(dirpath) ]
46 # Genome double dictionary
49 for genome_dir in genome_dir_list:
50 line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
52 # Get species, build... log and skip on failure
54 species, build = line.split('|')
56 LOGGER.warning('Skipping: Invalid metafile (%s) line: %s' \
60 build_dict = d.setdefault(species, {})
61 if build in build_dict:
62 msg = "Duplicate genome for %s|%s" % (species, build)
63 raise DuplicateGenome, msg
65 build_dict[build] = genome_dir
70 class constructMapperDict(object):
72 Emulate a dictionary to map genome|build names to paths.
74 It uses the dictionary generated by getAvailableGenomes.
76 def __init__(self, genome_dict):
77 self.genome_dict = genome_dict
79 def __getitem__(self, key):
81 Return the best match for key
83 elements = re.split("\|", key)
85 if len(elements) == 1:
86 # we just the species name
87 # get the set of builds
88 builds = self.genome_dict[elements[0]]
90 # sort build names the way humans would
92 keys.sort(cmp=alphanum)
94 # return the path from the 'last' build name
95 return builds[keys[-1]]
97 elif len(elements) == 2:
98 # we have species, and build name
99 return self.genome_dict[elements[0]][elements[1]]
101 raise KeyError("Unrecognized key")
103 def get(self, key, default=None):
111 for species in self.genome_dict.keys():
112 for build in self.genome_dict[species]:
113 keys.append([species+'|'+build])
118 for species in self.genome_dict.keys():
119 for build in self.genome_dict[species]:
120 values.append(self.genome_dict[species][build])
125 for species in self.genome_dict.keys():
126 for build in self.genome_dict[species]:
127 key = [species+'|'+build]
128 value = self.genome_dict[species][build]
129 items.append((key, value))
132 if __name__ == '__main__':
134 if len(sys.argv) != 2:
135 print 'useage: %s <base_genome_dir>' % (sys.argv[0])
138 d = getAvailableGenomes(sys.argv[1])
139 d2 = constructMapperDict(d)
141 for k,v in d2.items():
142 print '%s: %s' % (k,v)