X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=geneDownstreamBins.py;fp=geneDownstreamBins.py;h=4ce97e4fc1380517447153e59fba80e5531e3029;hp=058ad8236a4f7df6d08a6ca293d0a27ab090c9bd;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/geneDownstreamBins.py b/geneDownstreamBins.py index 058ad82..4ce97e4 100755 --- a/geneDownstreamBins.py +++ b/geneDownstreamBins.py @@ -10,12 +10,13 @@ except: pass # originally from version 1.3 of geneDnaDownstreamCounts.py -import sys, optparse -from commoncode import readDataset +import sys +import optparse +import ReadDataset from cistematic.genomes import Genome -from cistematic.core.geneinfo import geneinfoDB +from commoncode import getGeneInfoDict, getConfigParser, getConfigIntOption -print "%prog: version 2.0" +print "geneDownstreamBins: version 2.1" def main(argv=None): if not argv: @@ -23,10 +24,7 @@ def main(argv=None): usage = "usage: %prog genome rdsfile outfilename [--max regionSize]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--max", type="int", dest="standardMinDist", - help="maximum region in bp") - parser.set_defaults(standardMinDist=3000) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -40,22 +38,33 @@ def main(argv=None): geneDownstreamBins(genome, hitfile, outfilename, options.standardMinDist) +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--max", type="int", dest="standardMinDist", + help="maximum region in bp") + + configParser = getConfigParser() + section = "geneDownstreamBins" + standardMinDist = getConfigIntOption(configParser, section, "regionSize", 3000) + + parser.set_defaults(standardMinDist=standardMinDist) + + return parser + + def geneDownstreamBins(genome, hitfile, outfilename, standardMinDist=3000, doCache=False, normalize=False): bins = 10 standardMinThresh = standardMinDist / bins - hitRDS = readDataset(hitfile, verbose=True, cache=doCache) + hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache) normalizationFactor = 1.0 if normalize: hitDictSize = len(hitRDS) normalizationFactor = hitDictSize / 1000000. hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True) - + geneinfoDict = getGeneInfoDict(genome, cache=True) hg = Genome(genome) - idb = geneinfoDB(cache=True) - - geneinfoDict = idb.getallGeneInfo(genome) featuresDict = hg.getallGeneFeatures() outfile = open(outfilename, "w") @@ -115,7 +124,9 @@ def geneDownstreamBins(genome, hitfile, outfilename, standardMinDist=3000, doCac continue binList = [0.] * bins - for (tagStart, sense, weight) in hitDict[chrom]: + for read in hitDict[chrom]: + tagStart = read["start"] + weight = read["weight"] tagStart -= gstart if tagStart >= glen: break