X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=geneUpstreamBins.py;h=3bdd1dd773c94372692ff066995445c680bd8055;hp=e8554161c6fe18417dd227a2bed985a0c826038c;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/geneUpstreamBins.py b/geneUpstreamBins.py index e855416..3bdd1dd 100755 --- a/geneUpstreamBins.py +++ b/geneUpstreamBins.py @@ -9,12 +9,13 @@ try: except: pass -import sys, optparse -from commoncode import readDataset +import sys +import optparse +import ReadDataset from cistematic.genomes import Genome -from cistematic.core.geneinfo import geneinfoDB +from commoncode import getGeneInfoDict, getConfigParser, getConfigBoolOption, getConfigIntOption -print "%prog: version 2.0" +print "geneUpstreamBins: version 2.1" def main(argv=None): if not argv: @@ -22,12 +23,7 @@ def main(argv=None): usage = "usage: python %prog genome rdsfile outfilename [--max regionSize] [--raw] [--cache]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--raw", action="store_false", dest="normalize", - help="maximum region in bp") - parser.add_option("--max", type="int", dest="standardMinDist") - parser.add_option("--cache", action="store_true", dest="doCache") - parser.set_defaults(standardMinDist=3000, normalize=True, doCache=False) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -41,11 +37,29 @@ def main(argv=None): geneUpstreamBins(genome, hitfile, outfilename, options.standardMinDist, options.normalize, options.doCache) +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--raw", action="store_false", dest="normalize", + help="maximum region in bp") + parser.add_option("--max", type="int", dest="standardMinDist") + parser.add_option("--cache", action="store_true", dest="doCache") + + configParser = getConfigParser() + section = "geneUpstreamBins" + standardMinDist = getConfigIntOption(configParser, section, "regionSize", 3000) + normalize = getConfigBoolOption(configParser, section, "normalize", True) + doCache = getConfigBoolOption(configParser, section, "doCache", False) + + parser.set_defaults(standardMinDist=standardMinDist, normalize=normalize, doCache=doCache) + + return parser + + def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normalize=True, doCache=False): bins = 10 standardMinThresh = standardMinDist / bins - hitRDS = readDataset(hitfile, verbose = True, cache=doCache) + hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache) normalizationFactor = 1.0 if normalize: totalCount = len(hitRDS) @@ -54,9 +68,7 @@ def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normali hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True) hg = Genome(genome) - idb = geneinfoDB(cache=True) - - geneinfoDict = idb.getallGeneInfo(genome) + geneinfoDict = getGeneInfoDict(genome, cache=True) featuresDict = hg.getallGeneFeatures() outfile = open(outfilename,"w") @@ -117,7 +129,9 @@ def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normali continue binList = [0] * bins - for (tagStart, sense, weight) in hitDict[chrom]: + for read in hitDict[chrom]: + tagStart = read["start"] + weight = read["weight"] tagStart -= gstart if tagStart >= glen: break