13 from commoncode import getMergedRegions, getLocusByChromDict
14 from cistematic.genomes import Genome
15 from cistematic.core.geneinfo import geneinfoDB
17 print "%prog: version 2.4"
24 usage = "usage: python %prog genome outfilename [--regions acceptfile] [--downstream bp] [--upstream bp] [--mindist bp] [--minlocus bp] [--maxlocus bp] [--samesense]"
26 parser = optparse.OptionParser(usage=usage)
27 parser.add_option("--regions", dest="acceptFile")
28 parser.add_option("--downstream", type="int", dest="downMax")
29 parser.add_option("--upstream", type="int", dest="upMax")
30 parser.add_option("--mindist", type="int", dest="minDist")
31 parser.add_option("--minlocus", type="int", dest="minLocus")
32 parser.add_option("--maxlocus", type="int", dest="maxLocus")
33 parser.add_option("--samesense", action="store_true", dest="checkSense")
34 parser.set_defaults(acceptfile="", checkSense=False, downMax=10000000,
35 upMax=10000000, minDist=0, minLocus=-1, maxLocus=10000000)
36 (options, args) = parser.parse_args(argv[1:])
45 index = geneNeighbors(genome, outfilename, options.acceptFile, options.checkSense,
46 options.downMax, options.upMax, options.minDist, options.minLocus,
49 print "\n%d genes matched" % index
52 def geneNeighbors(genome, outfilename, acceptfile="", checkSense=False,
53 downMax=10000000, upMax=10000000, minDist=0, minLocus=-1,
58 acceptDict = getMergedRegions(acceptfile, maxDist=0, keepLabel=True, verbose=True)
61 idb = geneinfoDB(cache=True)
63 geneinfoDict = idb.getallGeneInfo(genome)
64 locusByChromDict = getLocusByChromDict(hg, additionalRegionsDict=acceptDict, keepSense=True)
66 gidList = hg.allGIDs()
68 for chrom in acceptDict:
69 for (label, start, stop, length) in acceptDict[chrom]:
70 if label not in gidList:
74 outfile = open(outfilename,"w")
75 chromList = locusByChromDict.keys()
77 for chrom in chromList:
78 if len(locusByChromDict[chrom]) < 3 or "NT" in chrom or "MT" in chrom:
83 prevStop = locusByChromDict[chrom][0][1]
84 prevGID = locusByChromDict[chrom][0][2]
85 if "FAR" not in prevGID:
86 symbol = "LOC" + prevGID
89 geneinfo = geneinfoDict[prevGID]
90 symbol = geneinfo[0][0]
97 prevSense = locusByChromDict[chrom][0][4]
99 currentStart = locusByChromDict[chrom][1][0]
100 currentStop = locusByChromDict[chrom][1][1]
101 currentGID = locusByChromDict[chrom][1][2]
102 if "FAR" not in currentGID:
103 symbol = "LOC" + currentGID
106 geneinfo = geneinfoDict[currentGID]
107 symbol = geneinfo[0][0]
114 currentGlen = locusByChromDict[chrom][1][3]
115 currentSense = locusByChromDict[chrom][1][4]
117 for (nextStart, nextStop, nextGID, nextGlen, nextSense) in locusByChromDict[chrom][2:]:
118 if "FAR" not in nextGID:
119 symbol = "LOC" + nextGID
122 geneinfo = geneinfoDict[nextGID]
123 symbol = geneinfo[0][0]
130 leftDist = currentStart - prevStop
131 rightDist = nextStart - currentStop
132 if (currentSense == "F" and minDist < leftDist < upMax and minDist < rightDist < downMax) or (currentSense == "R" and minDist < rightDist < upMax and minDist < leftDist < downMax):
133 if not checkSense or currentSense == nextSense:
134 if minLocus <= currentGlen <= maxLocus:
135 outfile.write("%s\t%s\t%s\t%s\t%d\t%s\t%s\t%d\n" % (currentGID, currentSense, prevGID, prevSense, leftDist, nextGID, nextSense, rightDist))
138 prevStop = currentStop
140 prevSense = currentSense
141 currentStart = nextStart
142 currentStop = nextStop
144 currentGlen = nextGlen
145 currentSense = nextSense
151 if __name__ == "__main__":