+class RegionDirectionError(Exception):
+ pass
+
+
+class RegionFinder():
+ def __init__(self, label, minRatio=4.0, minPeak=0.5, minPlusRatio=0.25, maxPlusRatio=0.75, leftPlusRatio=0.3, strandfilter="",
+ minHits=4.0, trimValue=0.1, doTrim=True, doDirectionality=True, shiftValue=0, maxSpacing=50, withFlag="",
+ normalize=True, listPeak=False, reportshift=False, stringency=1.0, controlfile=None, doRevBackground=False):
+
+ self.statistics = {"index": 0,
+ "total": 0,
+ "mIndex": 0,
+ "mTotal": 0,
+ "failed": 0,
+ "badRegionTrim": 0
+ }
+
+ self.regionLabel = label
+ self.rnaSettings = False
+ self.controlRDSsize = 1.0
+ self.sampleRDSsize = 1.0
+ self.minRatio = minRatio
+ self.minPeak = minPeak
+ self.leftPlusRatio = leftPlusRatio
+ self.stranded = "both"
+ if strandfilter == "plus":
+ self.stranded = "+"
+ minPlusRatio = 0.9
+ maxPlusRatio = 1.0
+ elif strandfilter == "minus":
+ self.stranded = "-"
+ minPlusRatio = 0.0
+ maxPlusRatio = 0.1
+
+ if minRatio < minPeak:
+ self.minPeak = minRatio
+
+ self.minPlusRatio = minPlusRatio
+ self.maxPlusRatio = maxPlusRatio
+ self.strandfilter = strandfilter
+ self.minHits = minHits
+ self.trimValue = trimValue
+ self.doTrim = doTrim
+ self.doDirectionality = doDirectionality
+
+ if self.doTrim:
+ self.trimString = string.join(["%2.1f" % (100. * self.trimValue), "%"], "")
+ else:
+ self.trimString = "none"
+
+ self.shiftValue = shiftValue
+ self.maxSpacing = maxSpacing
+ self.withFlag = withFlag
+ self.normalize = normalize
+ self.listPeak = listPeak
+ self.reportshift = reportshift
+ self.stringency = max(stringency, 1.0)
+ self.controlfile = controlfile
+ self.doControl = self.controlfile is not None
+ self.doPvalue = False
+ self.doRevBackground = doRevBackground
+
+
+ def useRNASettings(self, readlen):
+ self.rnaSettings = True
+ self.shiftValue = 0
+ self.doTrim = False
+ self.doDirectionality = False
+ self.maxSpacing = readlen
+
+
+ def getHeader(self):
+ if self.normalize:
+ countType = "RPM"
+ else:
+ countType = "COUNT"
+
+ headerFields = ["#regionID\tchrom\tstart\tstop", countType, "fold\tmulti%"]
+
+ if self.doDirectionality:
+ headerFields.append("plus%\tleftPlus%")
+
+ if self.listPeak:
+ headerFields.append("peakPos\tpeakHeight")
+
+ if self.reportshift:
+ headerFields.append("readShift")
+
+ if self.doPvalue:
+ headerFields.append("pValue")
+
+ return string.join(headerFields, "\t")
+
+
+ def printSettings(self, ptype, useMulti, pValueType):
+ print
+ self.printStatusMessages(ptype, useMulti)
+ self.printOptionsSummary(useMulti, pValueType)
+
+
+ def printStatusMessages(self, ptype, useMulti):
+ if self.shiftValue == "learn":
+ print "Will try to learn shift"
+
+ if self.normalize:
+ print "Normalizing to RPM"
+
+ if self.doRevBackground:
+ print "Swapping IP and background to calculate FDR"
+
+ if ptype != "":
+ if ptype in ["NONE", "SELF"]:
+ pass
+ elif ptype == "BACK":
+ if self.doControl and self.doRevBackground:
+ pass
+ else:
+ print "must have a control dataset and -revbackground for pValue type 'back'"
+ else:
+ print "could not use pValue type : %s" % ptype
+
+ if self.withFlag != "":
+ print "restrict to flag = %s" % self.withFlag
+
+ if not useMulti:
+ print "using unique reads only"
+
+ if self.rnaSettings:
+ print "using settings appropriate for RNA: -nodirectionality -notrim -noshift"
+
+ if self.strandfilter == "plus":
+ print "only analyzing reads on the plus strand"
+ elif self.strandfilter == "minus":
+ print "only analyzing reads on the minus strand"
+
+
+ def printOptionsSummary(self, useMulti, pValueType):
+
+ print "\nenforceDirectionality=%s listPeak=%s nomulti=%s " % (self.doDirectionality, self.listPeak, not useMulti)
+ print "spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f\ttrimmed=%s\tstrand=%s" % (self.maxSpacing, self.minHits, self.minRatio, self.minPeak, self.trimString, self.stranded)
+ try:
+ print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%d pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType)
+ except:
+ print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType)
+
+
+ def getAnalysisDescription(self, hitfile, useMulti, pValueType):
+
+ description = ["#ERANGE %s" % versionString]
+ if self.doControl:
+ description.append("#enriched sample:\t%s (%.1f M reads)\n#control sample:\t%s (%.1f M reads)" % (hitfile, self.sampleRDSsize, self.controlfile, self.controlRDSsize))
+ else:
+ description.append("#enriched sample:\t%s (%.1f M reads)\n#control sample: none" % (hitfile, self.sampleRDSsize))
+
+ if self.withFlag != "":
+ description.append("#restrict to Flag = %s" % self.withFlag)
+
+ description.append("#enforceDirectionality=%s listPeak=%s nomulti=%s" % (self.doDirectionality, self.listPeak, not useMulti))
+ description.append("#spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f trimmed=%s strand=%s" % (self.maxSpacing, self.minHits, self.minRatio, self.minPeak, self.trimString, self.stranded))
+ try:
+ description.append("#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%d pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType))
+ except:
+ description.append("#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType))
+
+ return string.join(description, "\n")
+
+
+ def getFooter(self, bestShift):
+ index = self.statistics["index"]
+ mIndex = self.statistics["mIndex"]
+ footerLines = ["#stats:\t%.1f RPM in %d regions" % (self.statistics["total"], index)]
+ if self.doDirectionality:
+ footerLines.append("#\t\t%d additional regions failed directionality filter" % self.statistics["failed"])
+
+ if self.doRevBackground:
+ try:
+ percent = min(100. * (float(mIndex)/index), 100.)
+ except ZeroDivisionError:
+ percent = 0.
+
+ footerLines.append("#%d regions (%.1f RPM) found in background (FDR = %.2f percent)" % (mIndex, self.statistics["mTotal"], percent))
+
+ if self.shiftValue == "auto" and self.reportshift:
+
+ footerLines.append("#mode of shift values: %d" % bestShift)
+
+ if self.statistics["badRegionTrim"] > 0:
+ footerLines.append("#%d regions discarded due to trimming problems" % self.statistics["badRegionTrim"])
+
+ return string.join(footerLines, "\n")
+
+
+ def updateControlStatistics(self, peak, sumAll, peakScore):
+
+ plusRatio = float(peak.numPlus)/peak.numHits
+ if peakScore >= self.minPeak and self.minPlusRatio <= plusRatio <= self.maxPlusRatio:
+ if self.doDirectionality:
+ if self.leftPlusRatio < peak.numLeftPlus / peak.numPlus:
+ self.statistics["mIndex"] += 1
+ self.statistics["mTotal"] += sumAll
+ else:
+ self.statistics["failed"] += 1
+ else:
+ # we have a region, but didn't check for directionality
+ self.statistics["mIndex"] += 1
+ self.statistics["mTotal"] += sumAll
+
+