except:
print 'psyco not running'
-import sys, string, optparse
-from commoncode import readDataset, getMergedRegions, findPeak, writeLog
+import sys
+import string
+import optparse
+from commoncode import getMergedRegions, findPeak, writeLog, getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
+import ReadDataset
-versionString = "%prog: version 3.9"
+versionString = "regionCounts: version 3.10"
print versionString
def main(argv=None):
usage = "usage: python %prog regionfile rdsfile outfilename [options]"
+ parser = getParser(usage)
+ (options, args) = parser.parse_args(argv[1:])
+
+ if len(args) < 3:
+ print usage
+ sys.exit(1)
+
+ regionfilename = args[0]
+ hitfile = args[1]
+ outfilename = args[2]
+
+ regionCounts(regionfilename, hitfile, outfilename, options.flagRDS, options.cField,
+ options.useFullchrom, options.normalize, options.padregion,
+ options.mergeregion, options.merging, options.doUniqs, options.doMulti,
+ options.doSplices, options.usePeak, options.cachePages, options.logfilename,
+ options.doRPKM, options.doLength, options.forceRegion)
+
+
+def getParser(usage):
parser = optparse.OptionParser(usage=usage)
parser.add_option("--markRDS", action="store_true", dest="flagRDS")
parser.add_option("--chromField", type="int", dest="cField")
parser.add_option("--rpkm", action="store_true", dest="doRPKM")
parser.add_option("--length", action="store_true", dest="doLength")
parser.add_option("--force", action="store_true", dest="forceRegion")
- parser.set_defaults(flagRDS=False, cField=1, useFullchrom=False, normalize=True,
- padregion=0, mergeregion=0, merging=True, doUniqs=True,
- doMulti=True, doSplices=False, usePeak=False, cachePages=-1,
- logfilename="regionCounts.log", doRPKM=False, doLength=False,
- forceRegion=False)
-
- (options, args) = parser.parse_args(argv[1:])
- if len(args) < 3:
- print usage
- sys.exit(1)
-
- regionfilename = args[0]
- hitfile = args[1]
- outfilename = args[2]
-
- regionCounts(regionfilename, hitfile, outfilename, options.flagRDS, options.cField,
- options.useFullchrom, options.normalize, options.padregion,
- options.mergeregion, options.merging, options.doUniqs, options.doMulti,
- options.doSplices, options.usePeak, options.cachePages, options.logfilename,
- options.doRPKM, options.doLength, options.forceRegion)
+ configParser = getConfigParser()
+ section = "regionCounts"
+ flagRDS = getConfigBoolOption(configParser, section, "flagRDS", False)
+ cField = getConfigIntOption(configParser, section, "cField", 1)
+ useFullchrom = getConfigBoolOption(configParser, section, "useFullchrom", False)
+ normalize = getConfigBoolOption(configParser, section, "normalize", True)
+ padregion = getConfigIntOption(configParser, section, "padregion", 0)
+ mergeregion = getConfigIntOption(configParser, section, "mergeregion", 0)
+ merging = getConfigBoolOption(configParser, section, "merging", True)
+ doUniqs = getConfigBoolOption(configParser, section, "doUniqs", True)
+ doMulti = getConfigBoolOption(configParser, section, "doMulti", True)
+ doSplices = getConfigBoolOption(configParser, section, "doSplices", False)
+ usePeak = getConfigBoolOption(configParser, section, "usePeak", False)
+ cachePages = getConfigIntOption(configParser, section, "cachePages", -1)
+ logfilename = getConfigOption(configParser, section, "logfilename", "regionCounts.log")
+ doRPKM = getConfigBoolOption(configParser, section, "doRPKM", False)
+ doLength = getConfigBoolOption(configParser, section, "doLength", False)
+ forceRegion = getConfigBoolOption(configParser, section, "forceRegion", False)
+
+ parser.set_defaults(flagRDS=flagRDS, cField=cField, useFullchrom=useFullchrom, normalize=normalize,
+ padregion=padregion, mergeregion=mergeregion, merging=merging, doUniqs=doUniqs,
+ doMulti=doMulti, doSplices=doSplices, usePeak=usePeak, cachePages=cachePages,
+ logfilename=logfilename, doRPKM=doRPKM, doLength=doLength,
+ forceRegion=forceRegion)
+
+ return parser
def regionCounts(regionfilename, hitfile, outfilename, flagRDS=False, cField=1,
labeltoRegionDict = {}
regionCount = {}
- hitRDS = readDataset(hitfile, verbose=True, cache=doCache)
+ hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache)
readlen = hitRDS.getReadSize()
if cachePages > hitRDS.getDefaultCacheSize():
hitRDS.setDBcache(cachePages)
for rchrom in regionDict:
if forceRegion and rchrom not in chromList:
print rchrom
- for (label, start, stop, length) in regionDict[rchrom]:
- regionCount[label] = 0
- labelList.append(label)
- labeltoRegionDict[label] = (rchrom, start, stop)
+ for region in regionDict[rchrom]:
+ regionCount[region.label] = 0
+ labelList.append(region.label)
+ labeltoRegionDict[region.label] = (rchrom, region.start, region.stop)
for rchrom in chromList:
regionList = []
rindex = 0
dictLen = len(readDict[fullchrom])
- for (label, start, stop, length) in regionDict[rchrom]:
+ for region in regionDict[rchrom]:
+ label = region.label
+ start = region.start
+ stop = region.stop
regionCount[label] = 0
labelList.append(label)
labeltoRegionDict[label] = (rchrom, start, stop)
-
- if useFullchrom:
- fullchrom = rchrom
- else:
- fullchrom = "chr%s" % rchrom
-
- for (label, rstart, rstop, length) in regionDict[rchrom]:
- regionList.append((label, fullchrom, rstart, rstop))
+ regionList.append((label, fullchrom, start, stop))
if usePeak:
readList = []
for localIndex in xrange(rindex, dictLen):
read = readDict[fullchrom][localIndex]
- if read[0] < rstart:
+ if read["start"] < start:
rindex += 1
- elif rstart <= read[0] <= rstop:
+ elif start <= read["start"] <= stop:
readList.append(read)
else:
break
continue
readList.sort()
- (topPos, numHits, smoothArray, numPlus) = findPeak(readList, rstart, rstop - rstart, readlen, doWeight=True)
+ peak = findPeak(readList, start, stop - start, readlen, doWeight=True)
try:
- topValue = smoothArray[topPos[0]]
+ topValue = peak.smoothArray[peak.topPos[0]]
except:
- print "problem with %s %s" % (str(topPos), str(smoothArray))
+ print "problem with %s %s" % (str(peak.topPos), str(peak.smoothArray))
continue
regionCount[label] += topValue
else:
- regionCount[label] += hitRDS.getCounts(fullchrom, rstart, rstop, uniqs=doUniqs, multi=doMulti, splices=doSplices)
+ regionCount[label] += hitRDS.getCounts(fullchrom, start, stop, uniqs=doUniqs, multi=doMulti, splices=doSplices)
if flagRDS:
hitRDS.flagReads(regionList, uniqs=doUniqs, multi=doMulti, splices=doSplices)