X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=geneNeighbors.py;h=221ffe215e2ca1c9b21560334329e7b0fbe1ba66;hp=8ec363c77fdbacaa39534d8debe1a65abb1962bb;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/geneNeighbors.py b/geneNeighbors.py index 8ec363c..221ffe2 100755 --- a/geneNeighbors.py +++ b/geneNeighbors.py @@ -9,12 +9,13 @@ try: except: pass -import sys, optparse -from commoncode import getMergedRegions, getLocusByChromDict +import sys +import optparse +from commoncode import getMergedRegions, getLocusByChromDict, getConfigParser, getConfigIntOption, getConfigBoolOption, getConfigOption from cistematic.genomes import Genome -from cistematic.core.geneinfo import geneinfoDB +from commoncode import getGeneInfoDict -print "%prog: version 2.4" +print "geneNeighbors: version 2.5" % sys.argv[0] def main(argv=None): @@ -23,16 +24,7 @@ def main(argv=None): usage = "usage: python %prog genome outfilename [--regions acceptfile] [--downstream bp] [--upstream bp] [--mindist bp] [--minlocus bp] [--maxlocus bp] [--samesense]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--regions", dest="acceptFile") - parser.add_option("--downstream", type="int", dest="downMax") - parser.add_option("--upstream", type="int", dest="upMax") - parser.add_option("--mindist", type="int", dest="minDist") - parser.add_option("--minlocus", type="int", dest="minLocus") - parser.add_option("--maxlocus", type="int", dest="maxLocus") - parser.add_option("--samesense", action="store_true", dest="checkSense") - parser.set_defaults(acceptfile="", checkSense=False, downMax=10000000, - upMax=10000000, minDist=0, minLocus=-1, maxLocus=10000000) + parser = getParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 2: @@ -49,6 +41,32 @@ def main(argv=None): print "\n%d genes matched" % index +def getParser(usage): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--regions", dest="acceptFile") + parser.add_option("--downstream", type="int", dest="downMax") + parser.add_option("--upstream", type="int", dest="upMax") + parser.add_option("--mindist", type="int", dest="minDist") + parser.add_option("--minlocus", type="int", dest="minLocus") + parser.add_option("--maxlocus", type="int", dest="maxLocus") + parser.add_option("--samesense", action="store_true", dest="checkSense") + + configParser = getConfigParser() + section = "geneNeighbors" + acceptfile = getConfigOption(configParser, section, "acceptfile", "") + checkSense = getConfigBoolOption(configParser, section, "checkSense", False) + downMax = getConfigIntOption(configParser, section, "downMax", 10000000) + upMax = getConfigIntOption(configParser, section, "upMax", 10000000) + minDist = getConfigIntOption(configParser, section, "minDist", 0) + minLocus = getConfigIntOption(configParser, section, "minLocus", -1) + maxLocus = getConfigIntOption(configParser, section, "maxLocus", 10000000) + + parser.set_defaults(acceptfile=acceptfile, checkSense=checkSense, downMax=downMax, + upMax=upMax, minDist=minDist, minLocus=minLocus, maxLocus=maxLocus) + + return parser + + def geneNeighbors(genome, outfilename, acceptfile="", checkSense=False, downMax=10000000, upMax=10000000, minDist=0, minLocus=-1, maxLocus=10000000): @@ -58,17 +76,15 @@ def geneNeighbors(genome, outfilename, acceptfile="", checkSense=False, acceptDict = getMergedRegions(acceptfile, maxDist=0, keepLabel=True, verbose=True) hg = Genome(genome) - idb = geneinfoDB(cache=True) - - geneinfoDict = idb.getallGeneInfo(genome) + geneinfoDict = getGeneInfoDict(genome, cache=True) locusByChromDict = getLocusByChromDict(hg, additionalRegionsDict=acceptDict, keepSense=True) gidList = hg.allGIDs() gidList.sort() for chrom in acceptDict: - for (label, start, stop, length) in acceptDict[chrom]: - if label not in gidList: - gidList.append(label) + for region in acceptDict[chrom]: + if region.label not in gidList: + gidList.append(region.label) index = 0 outfile = open(outfilename,"w")