X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=geneLocusPeaks.py;h=00c77d812b3029eaacb294b94377b771bc7d4444;hp=fdfddf95ccb821187b887876ea0aee2c5b283d48;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/geneLocusPeaks.py b/geneLocusPeaks.py index fdfddf9..00c77d8 100755 --- a/geneLocusPeaks.py +++ b/geneLocusPeaks.py @@ -9,12 +9,15 @@ try: except: pass -from commoncode import readDataset, getMergedRegions, findPeak, getLocusByChromDict +import sys +import optparse +from commoncode import getMergedRegions, findPeak, getLocusByChromDict +import ReadDataset from cistematic.genomes import Genome -from cistematic.core.geneinfo import geneinfoDB -import sys, optparse +from commoncode import getGeneInfoDict, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption -print "%prog: version 2.0" + +print "geneLocusPeaks: version 2.1" def main(argv=None): if not argv: @@ -22,13 +25,7 @@ def main(argv=None): usage = "usage: python %prog genome rdsfile outfilename [--up upstream] [--down downstream] [--regions acceptfile] [--raw]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--up", type="int", dest="upstream") - parser.add_option("--down", type="int", dest="downstream") - parser.add_option("--regions", dest="acceptfile") - parser.add_option("--raw", action="store_false", dest="normalize") - parser.add_option("--cache", action="store_true", dest="doCache") - parser.set_defaults(upstream=0, downstream=0, acceptfile="", normalize=True, doCache=False) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -43,6 +40,27 @@ def main(argv=None): geneLocusPeaks(genome, hitfile, outfilename, options.upstream, options.downstream, options.acceptfile, options.normalize, options.doCache) +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--up", type="int", dest="upstream") + parser.add_option("--down", type="int", dest="downstream") + parser.add_option("--regions", dest="acceptfile") + parser.add_option("--raw", action="store_false", dest="normalize") + parser.add_option("--cache", action="store_true", dest="doCache") + + configParser = getConfigParser() + section = "geneLocusPeaks" + upstream = getConfigIntOption(configParser, section, "upstream", 0) + downstream = getConfigIntOption(configParser, section, "downstream", 0) + acceptfile = getConfigOption(configParser, section, "acceptfile", "") + normalize = getConfigBoolOption(configParser, section, "normalize", True) + doCache = getConfigBoolOption(configParser, section, "doCache", False) + + parser.set_defaults(upstream=upstream, downstream=downstream, acceptfile=acceptfile, normalize=normalize, doCache=doCache) + + return parser + + def geneLocusPeaks(genome, hitfile, outfilename, upstream=0, downstream=0, acceptfile="", normalize=True, doCache=False): acceptDict = {} @@ -51,7 +69,7 @@ def geneLocusPeaks(genome, hitfile, outfilename, upstream=0, downstream=0, accep print "upstream = %d downstream = %d" % (upstream, downstream) - hitRDS = readDataset(hitfile, verbose = True, cache=doCache) + hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache) readlen = hitRDS.getReadSize() normalizationFactor = 1.0 if normalize: @@ -61,19 +79,17 @@ def geneLocusPeaks(genome, hitfile, outfilename, upstream=0, downstream=0, accep hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True) hg = Genome(genome) - idb = geneinfoDB(cache=True) - gidCount = {} gidPos = {} - geneinfoDict = idb.getallGeneInfo(genome) + geneinfoDict = getGeneInfoDict(genome, cache=True) locusByChromDict = getLocusByChromDict(hg, upstream, downstream, useCDS=True, additionalRegionsDict=acceptDict) gidList = hg.allGIDs() gidList.sort() for chrom in acceptDict: - for (label, start, stop, length) in acceptDict[chrom]: - if label not in gidList: - gidList.append(label) + for region in acceptDict[chrom]: + if region.label not in gidList: + gidList.append(region.label) for gid in gidList: gidCount[gid] = 0 @@ -85,10 +101,10 @@ def geneLocusPeaks(genome, hitfile, outfilename, upstream=0, downstream=0, accep print chrom for (start, stop, gid, glen) in locusByChromDict[chrom]: gidCount[gid] = 0. - (topPos, numHits, smoothArray, numPlus) = findPeak(hitDict[chrom], start, glen, readlen) - if len(topPos) > 0: - gidCount[gid] = smoothArray[topPos[0]] - gidPos[gid] = (chrom, start + topPos[0]) + peak = findPeak(hitDict[chrom], start, glen, readlen) + if len(peak.topPos) > 0: + gidCount[gid] = peak.smoothArray[peak.topPos[0]] + gidPos[gid] = (chrom, start + peak.topPos[0]) else: gidPos[gid] = (chrom, start)