8 class DuplicateGenome(Exception): pass
11 def _has_metainfo(genome_dir):
12 metapath = os.path.join(genome_dir, '_metainfo_')
13 if os.path.isfile(metapath):
18 def getAvailableGenomes(genome_base_dir):
20 raises IOError (on genome_base_dir not found)
21 raises DuplicateGenome on duplicate genomes found.
23 returns a double dictionary (i.e. d[species][build] = path)
26 # Need valid directory
27 if not os.path.exists(genome_base_dir):
28 msg = "Directory does not exist: %s" % (genome_base_dir)
31 # Find all subdirectories
32 filepath_list = glob.glob(os.path.join(genome_base_dir, '*'))
33 potential_genome_dirs = \
34 [ filepath for filepath in filepath_list if os.path.isdir(filepath)]
36 # Get list of metadata files
39 for dirpath in potential_genome_dirs \
40 if _has_metainfo(dirpath) ]
42 # Genome double dictionary
45 for genome_dir in genome_dir_list:
46 line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip()
48 # Get species, build... log and skip on failure
50 species, build = line.split('|')
52 logging.warning('Skipping: Invalid metafile (%s) line: %s' \
56 build_dict = d.setdefault(species, {})
57 if build in build_dict:
58 msg = "Duplicate genome for %s|%s" % (species, build)
59 raise DuplicateGenome, msg
61 build_dict[build] = genome_dir
66 def constructMapperDict(genome_dict):
68 Creates a dictionary which can map the genome
69 in the eland config generator output to a local
72 ie. 'Homo sapiens|hg18' -> <genome_dir>
75 for species in genome_dict.keys():
76 for build in genome_dict[species]:
77 mapper_dict[species+'|'+build] = genome_dict[species][build]
82 if __name__ == '__main__':
84 if len(sys.argv) != 2:
85 print 'useage: %s <base_genome_dir>' % (sys.argv[0])
88 d = getAvailableGenomes(sys.argv[1])
89 d2 = constructMapperDict(d)
91 for k,v in d2.items():
92 print '%s: %s' % (k,v)