except:
pass
-import sys, optparse
-from commoncode import readDataset, getMergedRegions, getLocusByChromDict, computeRegionBins
+import sys
+import optparse
+from commoncode import getMergedRegions, getLocusByChromDict, computeRegionBins, getConfigParser, getConfigIntOption, getConfigOption, getConfigBoolOption
+import ReadDataset
from cistematic.genomes import Genome
-from cistematic.core.geneinfo import geneinfoDB
+from commoncode import getGeneInfoDict
-print '%s: version 2.1' % sys.argv[0]
+print "geneLocusBins: version 2.2"
def main(argv=None):
if not argv:
usage = "usage: python %prog genome rdsfile outfilename [--bins numbins] [--flank bp] [--upstream bp] [--downstream bp] [--nocds] [--regions acceptfile] [--cache] [--raw] [--force]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--bins", type="int", dest="bins",
- help="number of bins to use [default: 10]")
- parser.add_option("--flank", type="int", dest="flankBP",
- help="number of flanking BP on both upstream and downstream [default: 0]")
- parser.add_option("--upstream", type="int", dest="upstreamBP",
- help="number of upstream flanking BP [default: 0]")
- parser.add_option("--downstream", type="int", dest="downstreamBP",
- help="number of downstream flanking BP [default: 0]")
- parser.add_option("--nocds", action="store_false", dest="doCDS",
- help="do not CDS")
- parser.add_option("--raw", action="store_false", dest="normalizeBins",
- help="do not normalize results")
- parser.add_option("--force", action="store_false", dest="limitNeighbor",
- help="limit neighbor region")
- parser.add_option("--regions", dest="acceptfile")
- parser.add_option("--cache", action="store_true", dest="doCache",
- help="use cache")
- parser.set_defaults(normalizeBins=True, doCache=False, bins=10, flankBP=None, upstreamBP=None, downstreamBP=None, doCDS=True, limitNeighbor=True)
+ parser = getParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 3:
geneLocusBins(genome, hitfile, outfilename, upstreamBp, downstreamBp, doFlank, options.normalizeBins, options.doCache, options.bins, options.doCDS, options.limitNeighbor, options.acceptfile)
-def geneLocusBins(genome, hitfile, outfilename, upstreamBp=0, downstreamBp=0, doFlank=False, normalizeBins=True, doCache=False, bins=10, doCDS=True, limitNeighbor=True, acceptfile=None):
+def getParser(usage):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--bins", type="int", dest="bins",
+ help="number of bins to use [default: 10]")
+ parser.add_option("--flank", type="int", dest="flankBP",
+ help="number of flanking BP on both upstream and downstream [default: 0]")
+ parser.add_option("--upstream", type="int", dest="upstreamBP",
+ help="number of upstream flanking BP [default: 0]")
+ parser.add_option("--downstream", type="int", dest="downstreamBP",
+ help="number of downstream flanking BP [default: 0]")
+ parser.add_option("--nocds", action="store_false", dest="doCDS",
+ help="do not CDS")
+ parser.add_option("--raw", action="store_false", dest="normalizeBins",
+ help="do not normalize results")
+ parser.add_option("--force", action="store_false", dest="limitNeighbor",
+ help="limit neighbor region")
+ parser.add_option("--regions", dest="acceptfile")
+ parser.add_option("--cache", action="store_true", dest="doCache",
+ help="use cache")
+
+ configParser = getConfigParser()
+ section = "geneLocusBins"
+ normalizeBins = getConfigBoolOption(configParser, section, "normalizeBins", True)
+ doCache = getConfigBoolOption(configParser, section, "doCache", False)
+ bins = getConfigIntOption(configParser, section, "bins", 10)
+ flankBP = getConfigOption(configParser, section, "flankBP", None)
+ upstreamBP = getConfigOption(configParser, section, "upstreamBP", None)
+ downstreamBP = getConfigOption(configParser, section, "downstreamBP", None)
+ doCDS = getConfigBoolOption(configParser, section, "doCDS", True)
+ limitNeighbor = getConfigBoolOption(configParser, section, "limitNeighbor", True)
+
+ parser.set_defaults(normalizeBins=normalizeBins, doCache=doCache, bins=bins, flankBP=flankBP,
+ upstreamBP=upstreamBP, downstreamBP=downstreamBP, doCDS=doCDS,
+ limitNeighbor=limitNeighbor)
+
+ return parser
+
+def geneLocusBins(genome, hitfile, outfilename, upstreamBp=0, downstreamBp=0, doFlank=False,
+ normalizeBins=True, doCache=False, bins=10, doCDS=True, limitNeighbor=True,
+ acceptfile=None):
+
if acceptfile is None:
acceptDict = {}
else:
acceptDict = getMergedRegions(acceptfile, maxDist=0, keepLabel=True, verbose=True)
- hitRDS = readDataset(hitfile, verbose = True, cache=doCache)
+
+ hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache)
readlen = hitRDS.getReadSize()
normalizationFactor = 1.0
if normalizeBins:
hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
hg = Genome(genome)
- idb = geneinfoDB(cache=doCache)
-
- geneinfoDict = idb.getallGeneInfo(genome)
+ geneinfoDict = getGeneInfoDict(genome, cache=doCache)
if doFlank:
locusByChromDict = getLocusByChromDict(hg, upstream=upstreamBp, downstream=downstreamBp, useCDS=doCDS, additionalRegionsDict=acceptDict, keepSense=True, adjustToNeighbor = limitNeighbor)
else:
gidList = hg.allGIDs()
gidList.sort()
for chrom in acceptDict:
- for (label, start, stop, length) in acceptDict[chrom]:
- if label not in gidList:
- gidList.append(label)
+ for region in acceptDict[chrom]:
+ if region.label not in gidList:
+ gidList.append(region.label)
(gidBins, gidLen) = computeRegionBins(locusByChromDict, hitDict, bins, readlen, gidList, normalizationFactor, defaultRegionFormat=False)