except:
pass
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
from cistematic.genomes import Genome
-from cistematic.core.geneinfo import geneinfoDB
+from commoncode import getGeneInfoDict, getConfigParser, getConfigBoolOption, getConfigIntOption
-print "%prog: version 2.0"
+print "geneUpstreamBins: version 2.1"
def main(argv=None):
if not argv:
usage = "usage: python %prog genome rdsfile outfilename [--max regionSize] [--raw] [--cache]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--raw", action="store_false", dest="normalize",
- help="maximum region in bp")
- parser.add_option("--max", type="int", dest="standardMinDist")
- parser.add_option("--cache", action="store_true", dest="doCache")
- parser.set_defaults(standardMinDist=3000, normalize=True, doCache=False)
+ parser = makeParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 3:
geneUpstreamBins(genome, hitfile, outfilename, options.standardMinDist, options.normalize, options.doCache)
+def makeParser(usage=""):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--raw", action="store_false", dest="normalize",
+ help="maximum region in bp")
+ parser.add_option("--max", type="int", dest="standardMinDist")
+ parser.add_option("--cache", action="store_true", dest="doCache")
+
+ configParser = getConfigParser()
+ section = "geneUpstreamBins"
+ standardMinDist = getConfigIntOption(configParser, section, "regionSize", 3000)
+ normalize = getConfigBoolOption(configParser, section, "normalize", True)
+ doCache = getConfigBoolOption(configParser, section, "doCache", False)
+
+ parser.set_defaults(standardMinDist=standardMinDist, normalize=normalize, doCache=doCache)
+
+ return parser
+
+
def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normalize=True, doCache=False):
bins = 10
standardMinThresh = standardMinDist / bins
- hitRDS = readDataset(hitfile, verbose = True, cache=doCache)
+ hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache)
normalizationFactor = 1.0
if normalize:
totalCount = len(hitRDS)
hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
hg = Genome(genome)
- idb = geneinfoDB(cache=True)
-
- geneinfoDict = idb.getallGeneInfo(genome)
+ geneinfoDict = getGeneInfoDict(genome, cache=True)
featuresDict = hg.getallGeneFeatures()
outfile = open(outfilename,"w")
continue
binList = [0] * bins
- for (tagStart, sense, weight) in hitDict[chrom]:
+ for read in hitDict[chrom]:
+ tagStart = read["start"]
+ weight = read["weight"]
tagStart -= gstart
if tagStart >= glen:
break