strandfilter=None, combine5p=False):
shiftValue = determineShiftValue(autoshift, shift, noshift, rnaSettings)
-
- if trimValue is not None:
- trimValue = float(trimValue) / 100.
- trimString = "%2.1f%s" % ((100. * trimValue), "%")
- else:
- trimValue = 0.1
- trimString = "10%"
-
- if not doTrim:
- trimString = "none"
+ doControl = False
+ if mockfile is not None:
+ doControl = True
if doRevBackground:
print "Swapping IP and background to calculate FDR"
pValueType = "back"
- doControl = False
- if mockfile is not None:
- doControl = True
-
doPvalue = True
if ptype is not None:
ptype = ptype.upper()
else:
pValueType = "self"
- if cachePages is not None:
- doCache = True
- else:
- doCache = False
- cachePages = -1
-
if withFlag != "":
print "restrict to flag = %s" % withFlag
stringency = max(stringency, 1.0)
writeLog(logfilename, versionString, string.join(sys.argv[1:]))
+ if cachePages is not None:
+ doCache = True
+ else:
+ doCache = False
+ cachePages = -1
+
+ mockRDS = None
if doControl:
print "\ncontrol:"
mockRDS = ReadDataset.ReadDataset(mockfile, verbose=True, cache=doCache)
if cachePages > hitRDS.getDefaultCacheSize():
hitRDS.setDBcache(cachePages)
+ if trimValue is not None:
+ trimValue = float(trimValue) / 100.
+ trimString = "%2.1f%s" % ((100. * trimValue), "%")
+ else:
+ trimValue = 0.1
+ trimString = "10%"
+
+ if not doTrim:
+ trimString = "none"
+
if doAppend:
fileMode = "a"
else:
fileMode = "w"
outfile = open(outfilename, fileMode)
-
outfile.write("#ERANGE %s\n" % versionString)
if doControl:
mockRDSsize = len(mockRDS) / 1000000.
controlSampleString = "\t%s (%.1f M reads)" % (mockfile, mockRDSsize)
else:
+ mockRDSsize = 0
controlSampleString = " none"
hitRDSsize = len(hitRDS) / 1000000.
hitChromList = hitRDS.getChromosomes()
if doControl:
mockChromList = mockRDS.getChromosomes()
+ else:
+ mockChromList = []
+ mockSampleSize = 0
+ hitSampleSize = 0
if normalize:
if doControl:
mockSampleSize = mockRDSsize
continue
#now do background swapping the two samples around
- print "calculating background..."
- backgroundTrimValue = 1/20.
- backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize,
- chromosome, useMulti, normalize, maxSpacing,
- doDirectionality, doTrim, minHits, minRatio,
- readlen, shiftValue, minPeak, minPlusRatio,
- maxPlusRatio, leftPlusRatio, listPeak, noMulti,
- doControl, factor, backgroundTrimValue)
-
- statistics["mIndex"] += backgroundRegionStats["index"]
- statistics["mTotal"] += backgroundRegionStats["total"]
- statistics["failed"] += backgroundRegionStats["failed"]
- print statistics["mIndex"], statistics["mTotal"]
- if doPvalue:
- if pValueType == "self":
- p, poissonmean = calculatePValue(allRegionWeights)
- else:
- p, poissonmean = calculatePValue(backgroundRegionWeights)
+ if mockRDS is not None:
+ print "calculating background..."
+ backgroundTrimValue = 1/20.
+ backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize,
+ chromosome, useMulti, normalize, maxSpacing,
+ doDirectionality, doTrim, minHits, minRatio,
+ readlen, shiftValue, minPeak, minPlusRatio,
+ maxPlusRatio, leftPlusRatio, listPeak, noMulti,
+ doControl, factor, backgroundTrimValue)
+
+ statistics["mIndex"] += backgroundRegionStats["index"]
+ statistics["mTotal"] += backgroundRegionStats["total"]
+ statistics["failed"] += backgroundRegionStats["failed"]
+ print statistics["mIndex"], statistics["mTotal"]
+ if doPvalue:
+ if pValueType == "self":
+ p, poissonmean = calculatePValue(allRegionWeights)
+ else:
+ p, poissonmean = calculatePValue(backgroundRegionWeights)
- print headline
- shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)
+ print headline
+ shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)
footer = getFooter(statistics, doDirectionality, doRevBackground, shiftValue, reportshift, shiftModeValue)
print footer
foldRatio = getFoldRatio(mockRDS, chrom, regionStart, regionStop, doControl, useMulti, normalize, mockSampleSize, sumAll, minRatio)
if foldRatio >= minRatio:
- localshift = getBestShiftForRegion(readList, regionStart, regionLength, doWeight=True)
+ localshift = getBestShiftForRegion(readList, regionStart, regionLength, useWeight=True)
try:
shiftDict[localshift] += 1
except KeyError:
def getFoldRatio(rds, chrom, start, stop, doControl, useMulti, normalize, sampleSize, sumAll, minRatio):
- if doControl:
+ if doControl and rds is not None:
foldRatio = getFoldRatioFromRDS(rds, chrom, start, stop, useMulti, normalize, sampleSize, sumAll)
else:
foldRatio = minRatio
def locateRegions(rds, rdsSampleSize, referenceRDS, referenceSampleSize, chrom, useMulti,
- normalize, maxSpacing, doDirectionality, doTrim, minHits, minRatio, readlen,
+ normalizeToRPM, maxSpacing, doDirectionality, doTrim, minHits, minRatio, readlen,
shiftValue, minPeak, minPlusRatio, maxPlusRatio, leftPlusRatio, listPeak,
noMulti, doControl, factor, trimValue, outputRegionList=False):
weight = read["weight"]
if abs(pos - previousHit) > maxSpacing or pos == maxCoord:
sumAll = currentTotalWeight
- if normalize:
+ if normalizeToRPM:
sumAll /= rdsSampleSize
regionStart = currentHitList[0]
regionWeights.append(int(sumAll))
if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
sumMulti = 0.
- #first pass uses getFoldRatio on mockRDS as there may not be control
- foldRatio = getFoldRatioFromRDS(referenceRDS, chrom, regionStart, regionStop, useMulti, normalize, referenceSampleSize, sumAll)
+ foldRatio = getFoldRatio(referenceRDS, chrom, regionStart, regionStop, doControl, useMulti, normalizeToRPM, referenceSampleSize, sumAll, minRatio)
if foldRatio >= minRatio:
# first pass, with absolute numbers
peak = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue)
peakScore = peak.smoothArray[bestPos]
numPlus = peak.numPlus
shift = peak.shift
- numLeft = peak.numLeft
- if normalize:
+ numLeft = peak.numLeftPlus
+ if normalizeToRPM:
peakScore /= rdsSampleSize
if doTrim:
sumAll = trimPeak.numHits
numPlus = trimPeak.numPlus
- numLeft = trimPeak.numLeft
- if normalize:
+ numLeft = trimPeak.numLeftPlus
+ if normalizeToRPM:
sumAll /= rdsSampleSize
- foldRatio = getFoldRatio(referenceRDS, chrom, regionStart, regionStop, doControl, useMulti, normalize, referenceSampleSize, sumAll, minRatio)
+ foldRatio = getFoldRatio(referenceRDS, chrom, regionStart, regionStop, doControl, useMulti, normalizeToRPM, referenceSampleSize, sumAll, minRatio)
if outputRegionList:
sumMulti = rds.getCounts(chrom, regionStart, regionStop, uniqs=False, multi=useMulti, splices=False, reportCombined=True)
# just in case it changed, use latest data
except:
continue
- # normalize to RPM
- if normalize:
+ if normalizeToRPM:
peakScore /= rdsSampleSize
elif outputRegionList:
sumMulti = currentTotalWeight - currentUniqReadCount
if outputRegionList:
- # normalize to RPM
- if normalize:
+ if normalizeToRPM:
sumMulti /= rdsSampleSize
try:
pValue *= poissonmean
pValue /= i+1
- outputList.append("%1.2f" % pValue)
+ outputList.append("%1.2g" % pValue)
outline = string.join(outputList, "\t")
print outline
return footer
if __name__ == "__main__":
- main(sys.argv)
\ No newline at end of file
+ main(sys.argv)