14 from commoncode import getMergedRegions, findPeak, getLocusByChromDict
16 from cistematic.genomes import Genome
17 from commoncode import getGeneInfoDict, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
20 print "geneLocusPeaks: version 2.1"
26 usage = "usage: python %prog genome rdsfile outfilename [--up upstream] [--down downstream] [--regions acceptfile] [--raw]"
28 parser = makeParser(usage)
29 (options, args) = parser.parse_args(argv[1:])
33 print "\twhere upstream and downstream are in bp and and optional"
40 geneLocusPeaks(genome, hitfile, outfilename, options.upstream, options.downstream, options.acceptfile, options.normalize, options.doCache)
43 def makeParser(usage=""):
44 parser = optparse.OptionParser(usage=usage)
45 parser.add_option("--up", type="int", dest="upstream")
46 parser.add_option("--down", type="int", dest="downstream")
47 parser.add_option("--regions", dest="acceptfile")
48 parser.add_option("--raw", action="store_false", dest="normalize")
49 parser.add_option("--cache", action="store_true", dest="doCache")
51 configParser = getConfigParser()
52 section = "geneLocusPeaks"
53 upstream = getConfigIntOption(configParser, section, "upstream", 0)
54 downstream = getConfigIntOption(configParser, section, "downstream", 0)
55 acceptfile = getConfigOption(configParser, section, "acceptfile", "")
56 normalize = getConfigBoolOption(configParser, section, "normalize", True)
57 doCache = getConfigBoolOption(configParser, section, "doCache", False)
59 parser.set_defaults(upstream=upstream, downstream=downstream, acceptfile=acceptfile, normalize=normalize, doCache=doCache)
64 def geneLocusPeaks(genome, hitfile, outfilename, upstream=0, downstream=0, acceptfile="", normalize=True, doCache=False):
68 acceptDict = getMergedRegions(acceptfile, maxDist=0, keepLabel=True, verbose=True)
70 print "upstream = %d downstream = %d" % (upstream, downstream)
72 hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache)
73 readlen = hitRDS.getReadSize()
74 normalizationFactor = 1.0
76 totalCount = len(hitRDS)
77 normalizationFactor = totalCount / 1000000.
79 hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
84 geneinfoDict = getGeneInfoDict(genome, cache=True)
85 locusByChromDict = getLocusByChromDict(hg, upstream, downstream, useCDS=True, additionalRegionsDict=acceptDict)
87 gidList = hg.allGIDs()
89 for chrom in acceptDict:
90 for region in acceptDict[chrom]:
91 if region.label not in gidList:
92 gidList.append(region.label)
98 if chrom not in locusByChromDict:
102 for (start, stop, gid, glen) in locusByChromDict[chrom]:
104 peak = findPeak(hitDict[chrom], start, glen, readlen)
105 if len(peak.topPos) > 0:
106 gidCount[gid] = peak.smoothArray[peak.topPos[0]]
107 gidPos[gid] = (chrom, start + peak.topPos[0])
109 gidPos[gid] = (chrom, start)
111 outfile = open(outfilename, "w")
118 geneinfo = geneinfoDict[gid]
119 symbol = geneinfo[0][0]
125 if gid in gidCount and gid in gidPos:
126 (chrom, pos) = gidPos[gid]
127 outfile.write("%s\t%s\tchr%s\t%d\t%.2f\n" % (gid, symbol, chrom, pos, gidCount[gid]))
132 if __name__ == "__main__":