except:
pass
-import sys, optparse
-from commoncode import readDataset, getMergedRegions, computeRegionBins, getLocusByChromDict
+import sys
+import optparse
+from commoncode import getMergedRegions, computeRegionBins, getLocusByChromDict, getConfigParser, getConfigBoolOption, getConfigIntOption, getConfigOption
+import ReadDataset
from cistematic.genomes import Genome
-from cistematic.core.geneinfo import geneinfoDB
+from commoncode import getGeneInfoDict
-print "%prog: version 1.3"
+print "geneStallingBins: version 1.4"
def main(argv=None):
usage = "usage: python %s genome rdsfile controlrdsfile outfilename [--upstream bp] [--downstream bp] [--regions acceptfile] [--cache] [--normalize] [--tagCount]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--upstream", type="int", dest="upstreamBp")
- parser.add_option("--downstream", type="int", dest="downstreamBp")
- parser.add_option("--regions", dest="acceptfile")
- parser.add_option("--cache", action="store_true", dest="doCache")
- parser.add_option("--normalize", action="store_true", dest="normalize")
- parser.add_option("--tagCount", action="store_true", dest="doTagCount")
- parser.add_option("--bins", type="int", dest="bins")
- parser.set_defaults(upstreamBp=300, downstreamBp=0, acceptfile="",
- doCache=False, normalize=False, doTagCount=False, bins=4)
+ parser = getParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 4:
options.normalize, options.doTagCount, options.bins)
+def getParser(usage):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--upstream", type="int", dest="upstreamBp")
+ parser.add_option("--downstream", type="int", dest="downstreamBp")
+ parser.add_option("--regions", dest="acceptfile")
+ parser.add_option("--cache", action="store_true", dest="doCache")
+ parser.add_option("--normalize", action="store_true", dest="normalize")
+ parser.add_option("--tagCount", action="store_true", dest="doTagCount")
+ parser.add_option("--bins", type="int", dest="bins")
+
+ configParser = getConfigParser()
+ section = "geneStallingBins"
+ upstreamBp = getConfigIntOption(configParser, section, "upstreamBp", 300)
+ downstreamBp = getConfigIntOption(configParser, section, "downstreamBp", 0)
+ acceptfile = getConfigOption(configParser, section, "acceptfile", "")
+ doCache = getConfigBoolOption(configParser, section, "doCache", False)
+ normalize = getConfigBoolOption(configParser, section, "normalize", False)
+ doTagCount = getConfigBoolOption(configParser, section, "doTagCount", False)
+ bins = getConfigIntOption(configParser, section, "bins", 4)
+
+ parser.set_defaults(upstreamBp=upstreamBp, downstreamBp=downstreamBp, acceptfile=acceptfile,
+ doCache=doCache, normalize=normalize, doTagCount=doTagCount, bins=bins)
+
+ return parser
+
+
def geneStallingBins(genome, hitfile, controlfile, outfilename, upstreamBp=300,
downstreamBp=0, acceptfile="", doCache=False, normalize=False,
doTagCount=False, bins=4):
doCDS = True
limitNeighbor = False
- hitRDS = readDataset(hitfile, verbose=True, cache=doCache)
+ hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache)
readlen = hitRDS.getReadSize()
hitNormalizationFactor = 1.0
if normalize:
hitDictSize = len(hitRDS)
hitNormalizationFactor = hitDictSize / 1000000.
- controlRDS = readDataset(hitfile, verbose=True, cache=doCache)
+ controlRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache)
controlNormalizationFactor = 1.0
if normalize:
controlDictSize = len(hitRDS)
controlDict = controlRDS.getReadsDict(doMulti=True, findallOptimize=True)
hg = Genome(genome)
- idb = geneinfoDB(cache=doCache)
-
- geneinfoDict = idb.getallGeneInfo(genome)
+ geneinfoDict = getGeneInfoDict(genome, cache=doCache)
locusByChromDict = getLocusByChromDict(hg, upstream=upstreamBp, downstream=downstreamBp, useCDS=doCDS, additionalRegionsDict=acceptDict, keepSense=True, adjustToNeighbor=limitNeighbor)
gidList = hg.allGIDs()
gidList.sort()
for chrom in acceptDict:
- for (label, start, stop, length) in acceptDict[chrom]:
- if label not in gidList:
- gidList.append(label)
+ for region in acceptDict[chrom]:
+ if region.label not in gidList:
+ gidList.append(region.label)
(gidBins, gidLen) = computeRegionBins(locusByChromDict, hitDict, bins, readlen, gidList, hitNormalizationFactor, defaultRegionFormat=False, binLength=upstreamBp)
(controlBins, gidLen) = computeRegionBins(locusByChromDict, controlDict, bins, readlen, gidList, controlNormalizationFactor, defaultRegionFormat=False, binLength=upstreamBp)