X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=getallgenes.py;h=9433e2e1dd2ffeb0e299d378eb7ea46b3e10fc9b;hp=addba3653451a96a2db445e239ec31726a84a49b;hb=a3c0e60eb30c924232d7baaa348a15c5554e3864;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/getallgenes.py b/getallgenes.py index addba36..9433e2e 100755 --- a/getallgenes.py +++ b/getallgenes.py @@ -4,12 +4,12 @@ try: except: print 'psyco not running' -import sys, optparse -from cistematic.core import genesIntersecting, featuresIntersecting, cacheGeneDB, uncacheGeneDB -from cistematic.core.geneinfo import geneinfoDB -from cistematic.genomes import Genome +import sys +import optparse +from cistematic.core import genesIntersecting, featuresIntersecting +from commoncode import getConfigParser, getConfigIntOption, getConfigOption, getConfigBoolOption, getGeneInfoDict, getGeneAnnotDict, getExtendedGeneAnnotDict -print "%prog: version 5.5" +print "getallgenes: version 5.6" def main(argv=None): @@ -18,6 +18,24 @@ def main(argv=None): usage = "usage: python %prog genome regionfile outfile [--radius bp] [--nomatch nomatchfile] --trackfar --stranded --cache --compact [--step dist] [--startField colID]" + parser = getParser(usage) + (options, args) = parser.parse_args(argv[1:]) + + if len(args) < 3: + print usage + sys.exit(2) + + genome = args[0] + infilename = args[1] + outfilename = args[2] + + getallgenes(genome, infilename, outfilename, options.maxRadius, + options.nomatchfilename, options.step, options.trackFar, + options.trackStrand, options.compact, options.colID, + options.doCache, options.extendGenome, options.replaceModels) + + +def getParser(usage): parser = optparse.OptionParser(usage=usage) parser.add_option("--radius", type="int", dest="maxRadius") parser.add_option("--nomatch", dest="nomatchfilename") @@ -29,34 +47,31 @@ def main(argv=None): parser.add_option("--startField", type="int", dest="colID") parser.add_option("--models", dest="extendGenome") parser.add_option("--replacemodels", action="store_true", dest="replaceModels") - parser.set_defaults(maxRadius=20002, nomatchfilename="", step=None, trackFar=False, - trackStrand=False, compact=False, colID=1, doCache=False, - extendGenome="", replaceModels=False) - (options, args) = parser.parse_args(argv[1:]) - if len(args) < 3: - print usage - sys.exit(2) + configParser = getConfigParser() + section = "getallgenes" + maxRadius = getConfigIntOption(configParser, section, "maxRadius", 20002) + nomatchfilename = getConfigOption(configParser, section, "nomatchfilename", "") + step = getConfigOption(configParser, section, "step", None) + trackFar = getConfigBoolOption(configParser, section, "trackFar", False) + trackStrand = getConfigBoolOption(configParser, section, "trackStrand", False) + compact = getConfigBoolOption(configParser, section, "compact", False) + colID = getConfigIntOption(configParser, section, "colID", 1) + doCache = getConfigBoolOption(configParser, section, "doCache", False) + extendGenome = getConfigOption(configParser, section, "extendGenome", "") + replaceModels = getConfigBoolOption(configParser, section, "replaceModels", False) - genome = args[0] - infilename = args[1] - outfilename = args[2] + parser.set_defaults(maxRadius=maxRadius, nomatchfilename=nomatchfilename, step=step, trackFar=trackFar, + trackStrand=trackStrand, compact=compact, colID=colID, doCache=doCache, + extendGenome=extendGenome, replaceModels=replaceModels) - getallgenes(genome, infilename, outfilename, options.maxRadius, - options.nomatchfilename, options.step, options.trackFar, - options.trackStrand, options.compact, options.colID, - options.doCache, options.extendgenome, options.replaceModels) + return parser def getallgenes(genome, infilename, outfilename, maxRadius=20002, nomatchfilename="", step=None, trackFar=False, trackStrand=False, compact=False, colID=1, doCache=False, extendGenome="", replaceModels=False): - if doCache: - idb = geneinfoDB(cache=True) - else: - idb = geneinfoDB() - if not step: step = maxRadius - 2 @@ -68,10 +83,7 @@ def getallgenes(genome, infilename, outfilename, maxRadius=20002, nomatchfilenam infile = open(infilename) outfile = open(outfilename,"w") - if genome == "dmelanogaster": - geneinfoDict = idb.getallGeneInfo(genome, infoKey="locus") - else: - geneinfoDict = idb.getallGeneInfo(genome) + geneinfoDict = getGeneInfoDict(genome, cache=doCache) posList = [] altPosDict = {} @@ -120,11 +132,10 @@ def getallgenes(genome, infilename, outfilename, maxRadius=20002, nomatchfilenam if maxRadius < step: step = maxRadius - 2 - hg = Genome(genome, inRAM=True) if extendGenome != "": - hg.extendFeatures(extendGenome, replace = replaceModels) - - geneannotDict = hg.allAnnotInfo() + geneannotDict = getExtendedGeneAnnotDict(genome, extendGenome, replace=replaceModels, inRAM=True) + else: + geneannotDict = getGeneAnnotDict(genome, inRAM=True) for radius in range(1, maxRadius, step): print "radius %d" % radius