14 from commoncode import getMergedRegions, getLocusByChromDict, getConfigParser, getConfigIntOption, getConfigBoolOption, getConfigOption
15 from cistematic.genomes import Genome
16 from commoncode import getGeneInfoDict
18 print "geneNeighbors: version 2.5" % sys.argv[0]
25 usage = "usage: python %prog genome outfilename [--regions acceptfile] [--downstream bp] [--upstream bp] [--mindist bp] [--minlocus bp] [--maxlocus bp] [--samesense]"
27 parser = getParser(usage)
28 (options, args) = parser.parse_args(argv[1:])
37 index = geneNeighbors(genome, outfilename, options.acceptFile, options.checkSense,
38 options.downMax, options.upMax, options.minDist, options.minLocus,
41 print "\n%d genes matched" % index
45 parser = optparse.OptionParser(usage=usage)
46 parser.add_option("--regions", dest="acceptFile")
47 parser.add_option("--downstream", type="int", dest="downMax")
48 parser.add_option("--upstream", type="int", dest="upMax")
49 parser.add_option("--mindist", type="int", dest="minDist")
50 parser.add_option("--minlocus", type="int", dest="minLocus")
51 parser.add_option("--maxlocus", type="int", dest="maxLocus")
52 parser.add_option("--samesense", action="store_true", dest="checkSense")
54 configParser = getConfigParser()
55 section = "geneNeighbors"
56 acceptfile = getConfigOption(configParser, section, "acceptfile", "")
57 checkSense = getConfigBoolOption(configParser, section, "checkSense", False)
58 downMax = getConfigIntOption(configParser, section, "downMax", 10000000)
59 upMax = getConfigIntOption(configParser, section, "upMax", 10000000)
60 minDist = getConfigIntOption(configParser, section, "minDist", 0)
61 minLocus = getConfigIntOption(configParser, section, "minLocus", -1)
62 maxLocus = getConfigIntOption(configParser, section, "maxLocus", 10000000)
64 parser.set_defaults(acceptfile=acceptfile, checkSense=checkSense, downMax=downMax,
65 upMax=upMax, minDist=minDist, minLocus=minLocus, maxLocus=maxLocus)
70 def geneNeighbors(genome, outfilename, acceptfile="", checkSense=False,
71 downMax=10000000, upMax=10000000, minDist=0, minLocus=-1,
76 acceptDict = getMergedRegions(acceptfile, maxDist=0, keepLabel=True, verbose=True)
79 geneinfoDict = getGeneInfoDict(genome, cache=True)
80 locusByChromDict = getLocusByChromDict(hg, additionalRegionsDict=acceptDict, keepSense=True)
82 gidList = hg.allGIDs()
84 for chrom in acceptDict:
85 for region in acceptDict[chrom]:
86 if region.label not in gidList:
87 gidList.append(region.label)
90 outfile = open(outfilename,"w")
91 chromList = locusByChromDict.keys()
93 for chrom in chromList:
94 if len(locusByChromDict[chrom]) < 3 or "NT" in chrom or "MT" in chrom:
99 prevStop = locusByChromDict[chrom][0][1]
100 prevGID = locusByChromDict[chrom][0][2]
101 if "FAR" not in prevGID:
102 symbol = "LOC" + prevGID
105 geneinfo = geneinfoDict[prevGID]
106 symbol = geneinfo[0][0]
113 prevSense = locusByChromDict[chrom][0][4]
115 currentStart = locusByChromDict[chrom][1][0]
116 currentStop = locusByChromDict[chrom][1][1]
117 currentGID = locusByChromDict[chrom][1][2]
118 if "FAR" not in currentGID:
119 symbol = "LOC" + currentGID
122 geneinfo = geneinfoDict[currentGID]
123 symbol = geneinfo[0][0]
130 currentGlen = locusByChromDict[chrom][1][3]
131 currentSense = locusByChromDict[chrom][1][4]
133 for (nextStart, nextStop, nextGID, nextGlen, nextSense) in locusByChromDict[chrom][2:]:
134 if "FAR" not in nextGID:
135 symbol = "LOC" + nextGID
138 geneinfo = geneinfoDict[nextGID]
139 symbol = geneinfo[0][0]
146 leftDist = currentStart - prevStop
147 rightDist = nextStart - currentStop
148 if (currentSense == "F" and minDist < leftDist < upMax and minDist < rightDist < downMax) or (currentSense == "R" and minDist < rightDist < upMax and minDist < leftDist < downMax):
149 if not checkSense or currentSense == nextSense:
150 if minLocus <= currentGlen <= maxLocus:
151 outfile.write("%s\t%s\t%s\t%s\t%d\t%s\t%s\t%d\n" % (currentGID, currentSense, prevGID, prevSense, leftDist, nextGID, nextSense, rightDist))
154 prevStop = currentStop
156 prevSense = currentSense
157 currentStart = nextStart
158 currentStop = nextStop
160 currentGlen = nextGlen
161 currentSense = nextSense
167 if __name__ == "__main__":