1 """Convert chromosome names to genome/chromsome names
7 vldInfo = collections.namedtuple('vldInfo', 'name is_link')
9 class GenomeMap(collections.MutableMapping):
10 def __init__(self, items=None):
13 self._contigs.update(items)
16 return len(self._contigs)
19 return self._contigs.iterkeys()
21 def __getitem__(self, name):
22 return self._contigs[name]
24 def __setitem__(self, name, value):
25 self._contigs[name] = value
27 def __delitem__(self, name):
28 del self._contigs[name]
30 def scan_genome_dir(self, genome_dir):
31 """Build map from a genome directory"""
32 genome = genome_dir.split(os.path.sep)[-1]
33 vld_files = glob(os.path.join(genome_dir, '*.vld'))
34 vld = [ vldInfo(f, os.path.islink(f)) for f in vld_files ]
35 self.build_map_from_dir(genome, vld)
37 def build_map_from_dir(self, genome_name, vld_list):
38 """Initialize contig genome map from list of vldInfo tuples
40 # build fasta to fasta file map
41 for vld_file, is_link in vld_list:
42 vld_file = os.path.realpath(vld_file)
43 path, vld_name = os.path.split(vld_file)
44 name, ext = os.path.splitext(vld_name)
46 self._contigs[name] = name
48 self._contigs[name] = genome_name + '/' + name
50 def parse_genomesize(self, pathname):
51 """Parse genomesize.xml file
53 tree = ElementTree.parse(pathname)
54 self.build_map_from_element(tree.getroot())
56 def build_map_from_element(self, root):
57 """Build genome map from a parsed HiSeq genomesizes.xml file
62 contig = element.attrib['contigName']
63 filename = element.attrib['fileName']
64 filenames[contig] = filename
65 bases = int(element.attrib['totalBases'])
68 genome = guess_genome(sizes)
70 for contig, basese in sizes.items():
71 name = filenames[contig]
72 self._contigs[name] = genome + '/' + name
74 def guess_genome(contig_sizes):
75 """Guess what genome we're using"""
78 genomes = {'chr1': {197195432: 'mm9',
81 'chrI': {230218, 'sacCer3'},
85 size = contig_sizes.get(key, 0)
86 if size in genomes[key]:
87 return genomes[key][size]
89 if len(contig_sizes) == 1:
90 return os.path.splitext(contig_sizes.keys()[0])[0]
92 raise RuntimeError("Unrecognized genome type, update detection code.")