X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=getallgenes.py;fp=getallgenes.py;h=a717fe86f0881268f8745791dc94d97f6d161d09;hp=addba3653451a96a2db445e239ec31726a84a49b;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/getallgenes.py b/getallgenes.py index addba36..a717fe8 100755 --- a/getallgenes.py +++ b/getallgenes.py @@ -4,12 +4,12 @@ try: except: print 'psyco not running' -import sys, optparse -from cistematic.core import genesIntersecting, featuresIntersecting, cacheGeneDB, uncacheGeneDB -from cistematic.core.geneinfo import geneinfoDB -from cistematic.genomes import Genome +import sys +import optparse +from cistematic.core import genesIntersecting, featuresIntersecting +from commoncode import getConfigParser, getConfigIntOption, getConfigOption, getConfigBoolOption, getGeneInfoDict, getGeneAnnotDict, getExtendedGeneAnnotDict -print "%prog: version 5.5" +print "getallgenes: version 5.6" def main(argv=None): @@ -18,20 +18,7 @@ def main(argv=None): usage = "usage: python %prog genome regionfile outfile [--radius bp] [--nomatch nomatchfile] --trackfar --stranded --cache --compact [--step dist] [--startField colID]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--radius", type="int", dest="maxRadius") - parser.add_option("--nomatch", dest="nomatchfilename") - parser.add_option("--trackfar", action="store_true", dest="trackFar") - parser.add_option("--stranded", action="store_true", dest="trackStrand") - parser.add_option("--cache", action="store_true", dest="cachePages") - parser.add_option("--compact", action="store_true", dest="compact") - parser.add_option("--step", type="int", dest="step") - parser.add_option("--startField", type="int", dest="colID") - parser.add_option("--models", dest="extendGenome") - parser.add_option("--replacemodels", action="store_true", dest="replaceModels") - parser.set_defaults(maxRadius=20002, nomatchfilename="", step=None, trackFar=False, - trackStrand=False, compact=False, colID=1, doCache=False, - extendGenome="", replaceModels=False) + parser = getParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -48,15 +35,43 @@ def main(argv=None): options.doCache, options.extendgenome, options.replaceModels) +def getParser(usage): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--radius", type="int", dest="maxRadius") + parser.add_option("--nomatch", dest="nomatchfilename") + parser.add_option("--trackfar", action="store_true", dest="trackFar") + parser.add_option("--stranded", action="store_true", dest="trackStrand") + parser.add_option("--cache", action="store_true", dest="cachePages") + parser.add_option("--compact", action="store_true", dest="compact") + parser.add_option("--step", type="int", dest="step") + parser.add_option("--startField", type="int", dest="colID") + parser.add_option("--models", dest="extendGenome") + parser.add_option("--replacemodels", action="store_true", dest="replaceModels") + + configParser = getConfigParser() + section = "getallgenes" + maxRadius = getConfigIntOption(configParser, section, "maxRadius", 20002) + nomatchfilename = getConfigOption(configParser, section, "nomatchfilename", "") + step = getConfigOption(configParser, section, "step", None) + trackFar = getConfigBoolOption(configParser, section, "trackFar", False) + trackStrand = getConfigBoolOption(configParser, section, "trackStrand", False) + compact = getConfigBoolOption(configParser, section, "compact", False) + colID = getConfigIntOption(configParser, section, "colID", 1) + doCache = getConfigBoolOption(configParser, section, "doCache", False) + extendGenome = getConfigOption(configParser, section, "extendGenome", "") + replaceModels = getConfigBoolOption(configParser, section, "replaceModels", False) + + parser.set_defaults(maxRadius=maxRadius, nomatchfilename=nomatchfilename, step=step, trackFar=trackFar, + trackStrand=trackStrand, compact=compact, colID=colID, doCache=doCache, + extendGenome=extendGenome, replaceModels=replaceModels) + + return parser + + def getallgenes(genome, infilename, outfilename, maxRadius=20002, nomatchfilename="", step=None, trackFar=False, trackStrand=False, compact=False, colID=1, doCache=False, extendGenome="", replaceModels=False): - if doCache: - idb = geneinfoDB(cache=True) - else: - idb = geneinfoDB() - if not step: step = maxRadius - 2 @@ -68,10 +83,7 @@ def getallgenes(genome, infilename, outfilename, maxRadius=20002, nomatchfilenam infile = open(infilename) outfile = open(outfilename,"w") - if genome == "dmelanogaster": - geneinfoDict = idb.getallGeneInfo(genome, infoKey="locus") - else: - geneinfoDict = idb.getallGeneInfo(genome) + geneinfoDict = getGeneInfoDict(genome, cache=doCache) posList = [] altPosDict = {} @@ -120,11 +132,10 @@ def getallgenes(genome, infilename, outfilename, maxRadius=20002, nomatchfilenam if maxRadius < step: step = maxRadius - 2 - hg = Genome(genome, inRAM=True) if extendGenome != "": - hg.extendFeatures(extendGenome, replace = replaceModels) - - geneannotDict = hg.allAnnotInfo() + geneannotDict = getExtendedGeneAnnotDict(genome, extendGenome, replace=replaceModels, inRAM=True) + else: + geneannotDict = getGeneAnnotDict(genome, inRAM=True) for radius in range(1, maxRadius, step): print "radius %d" % radius