import glob
import sys
import os
+import re
import logging
+from gaworkflow.util.alphanum import alphanum
+
class DuplicateGenome(Exception): pass
return d
-def constructMapperDict(genome_dict):
- """
- Creates a dictionary which can map the genome
- in the eland config generator output to a local
- genome path
-
- ie. 'Homo sapiens|hg18' -> <genome_dir>
- """
- mapper_dict = {}
- for species in genome_dict.keys():
- for build in genome_dict[species]:
- mapper_dict[species+'|'+build] = genome_dict[species][build]
-
- return mapper_dict
-
-
+class constructMapperDict(object):
+ """
+ Emulate a dictionary to map genome|build names to paths.
+
+ It uses the dictionary generated by getAvailableGenomes.
+ """
+ def __init__(self, genome_dict):
+ self.genome_dict = genome_dict
+
+ def __getitem__(self, key):
+ """
+ Return the best match for key
+ """
+ elements = re.split("\|", key)
+
+ if len(elements) == 1:
+ # we just the species name
+ # get the set of builds
+ builds = self.genome_dict[elements[0]]
+
+ # sort build names the way humans would
+ keys = builds.keys()
+ keys.sort(cmp=alphanum)
+
+ # return the path from the 'last' build name
+ return builds[keys[-1]]
+
+ elif len(elements) == 2:
+ # we have species, and build name
+ return self.genome_dict[elements[0]][elements[1]]
+ else:
+ raise KeyError("Unrecognized key")
+
+ def keys(self):
+ keys = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ keys.append([species+'|'+build])
+ return keys
+
+ def values(self):
+ values = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ values.append(self.genome_dict[species][build])
+ return values
+
+ def items(self):
+ items = []
+ for species in self.genome_dict.keys():
+ for build in self.genome_dict[species]:
+ key = [species+'|'+build]
+ value = self.genome_dict[species][build]
+ items.append((key, value))
+ return items
+
if __name__ == '__main__':
if len(sys.argv) != 2:
--- /dev/null
+import unittest
+
+from StringIO import StringIO
+from gaworkflow.pipeline import genome_mapper
+
+class testGenomeMapper(unittest.TestCase):
+ def test_construct_mapper(self):
+ genomes = {
+ 'Arabidopsis thaliana': {'v01212004': '/arabidopsis'},
+ 'Homo sapiens': {'hg18': '/hg18'},
+ 'Mus musculus': {'mm8': '/mm8',
+ 'mm9': '/mm9',
+ 'mm10': '/mm10'},
+ 'Phage': {'174': '/phi'},
+ }
+ genome_map = genome_mapper.constructMapperDict(genomes)
+
+ self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
+ self.failUnlessEqual("%(Phage|174)s" % (genome_map), "/phi")
+ self.failUnlessEqual("%(Mus musculus)s" % (genome_map), "/mm10")
+ self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8")
+ self.failUnlessEqual("%(Mus musculus|mm10)s" % (genome_map), "/mm10")
+
+ self.failUnlessEqual(len(genome_map.keys()), 6)
+ self.failUnlessEqual(len(genome_map.values()), 6)
+ self.failUnlessEqual(len(genome_map.items()), 6)
+
+
+def suite():
+ return unittest.makeSuite(testGenomeMapper,'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest="suite")
--- /dev/null
+#\r
+# The Alphanum Algorithm is an improved sorting algorithm for strings\r
+# containing numbers. Instead of sorting numbers in ASCII order like\r
+# a standard sort, this algorithm sorts numbers in numeric order.\r
+#\r
+# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com\r
+#\r
+#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com)\r
+#* Distributed under same license as original\r
+#\r
+# This library is free software; you can redistribute it and/or\r
+# modify it under the terms of the GNU Lesser General Public\r
+# License as published by the Free Software Foundation; either\r
+# version 2.1 of the License, or any later version.\r
+#\r
+# This library is distributed in the hope that it will be useful,\r
+# but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\r
+# Lesser General Public License for more details.\r
+#\r
+# You should have received a copy of the GNU Lesser General Public\r
+# License along with this library; if not, write to the Free Software\r
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA\r
+#\r
+\r
+import re\r
+\r
+#\r
+# TODO: Make decimal points be considered in the same class as digits\r
+#\r
+\r
+def chunkify(str):\r
+ """return a list of numbers and non-numeric substrings of +str+\r
+\r
+ the numeric substrings are converted to integer, non-numeric are left as is\r
+ """\r
+ chunks = re.findall("(\d+|\D+)",str)\r
+ chunks = [re.match('\d',x) and int(x) or x for x in chunks] #convert numeric strings to numbers\r
+ return chunks\r
+\r
+def alphanum(a,b):\r
+ """breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces\r
+\r
+ +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters\r
+ Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives\r
+ Numeric sorting, comparison of non-numeric gives Lexicographic order\r
+ """\r
+ # split strings into chunks\r
+ aChunks = chunkify(a)\r
+ bChunks = chunkify(b)\r
+\r
+ return cmp(aChunks,bChunks) #built in comparison works once data is prepared\r
+\r
+\r
+\r
+if __name__ == "__main__":\r
+ unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"]\r
+ sorted = unsorted[:]\r
+ sorted.sort(alphanum)\r
+ print '+++++Sorted...++++'\r
+ print '\n'.join(sorted)\r