+class RegionDirectionError(Exception):
+ pass
+
+
+class RegionFinder():
+ def __init__(self, label, minRatio=4.0, minPeak=0.5, minPlusRatio=0.25, maxPlusRatio=0.75, leftPlusRatio=0.3, strandfilter="",
+ minHits=4.0, trimValue=0.1, doTrim=True, doDirectionality=True, shiftValue=0, maxSpacing=50, withFlag="",
+ normalize=True, listPeak=False, reportshift=False, stringency=1.0):
+
+ self.statistics = {"index": 0,
+ "total": 0,
+ "mIndex": 0,
+ "mTotal": 0,
+ "failed": 0,
+ "badRegionTrim": 0
+ }
+
+ self.regionLabel = label
+ self.rnaSettings = False
+ self.controlRDSsize = 1
+ self.sampleRDSsize = 1
+ self.minRatio = minRatio
+ self.minPeak = minPeak
+ self.leftPlusRatio = leftPlusRatio
+ self.stranded = "both"
+ if strandfilter == "plus":
+ self.stranded = "+"
+ minPlusRatio = 0.9
+ maxPlusRatio = 1.0
+ elif strandfilter == "minus":
+ self.stranded = "-"
+ minPlusRatio = 0.0
+ maxPlusRatio = 0.1
+
+ if minRatio < minPeak:
+ self.minPeak = minRatio
+
+ self.minPlusRatio = minPlusRatio
+ self.maxPlusRatio = maxPlusRatio
+ self.strandfilter = strandfilter
+ self.minHits = minHits
+ self.trimValue = trimValue
+ self.doTrim = doTrim
+ self.doDirectionality = doDirectionality
+
+ if self.doTrim:
+ self.trimString = string.join(["%2.1f" % (100. * self.trimValue), "%"], "")
+ else:
+ self.trimString = "none"
+
+ self.shiftValue = shiftValue
+ self.maxSpacing = maxSpacing
+ self.withFlag = withFlag
+ self.normalize = normalize
+ self.listPeak = listPeak
+ self.reportshift = reportshift
+ self.stringency = max(stringency, 1.0)
+
+
+ def useRNASettings(self, readlen):
+ self.rnaSettings = True
+ self.shiftValue = 0
+ self.doTrim = False
+ self.doDirectionality = False
+ self.maxSpacing = readlen
+
+
+ def getHeader(self, doPvalue):
+ if self.normalize:
+ countType = "RPM"
+ else:
+ countType = "COUNT"
+
+ headerFields = ["#regionID\tchrom\tstart\tstop", countType, "fold\tmulti%"]
+
+ if self.doDirectionality:
+ headerFields.append("plus%\tleftPlus%")
+
+ if self.listPeak:
+ headerFields.append("peakPos\tpeakHeight")
+
+ if self.reportshift:
+ headerFields.append("readShift")
+
+ if doPvalue:
+ headerFields.append("pValue")
+
+ return string.join(headerFields, "\t")
+
+
+ def printSettings(self, doRevBackground, ptype, doControl, useMulti, doCache, pValueType):
+ print
+ self.printStatusMessages(doRevBackground, ptype, doControl, useMulti)
+ self.printOptionsSummary(useMulti, doCache, pValueType)
+
+
+ def printStatusMessages(self, doRevBackground, ptype, doControl, useMulti):
+ if self.shiftValue == "learn":
+ print "Will try to learn shift"
+
+ if self.normalize:
+ print "Normalizing to RPM"
+
+ if doRevBackground:
+ print "Swapping IP and background to calculate FDR"
+
+ if ptype != "":
+ if ptype in ["NONE", "SELF"]:
+ pass
+ elif ptype == "BACK":
+ if doControl and doRevBackground:
+ pass
+ else:
+ print "must have a control dataset and -revbackground for pValue type 'back'"
+ else:
+ print "could not use pValue type : %s" % ptype
+
+ if self.withFlag != "":
+ print "restrict to flag = %s" % self.withFlag
+
+ if not useMulti:
+ print "using unique reads only"
+
+ if self.rnaSettings:
+ print "using settings appropriate for RNA: -nodirectionality -notrim -noshift"
+
+ if self.strandfilter == "plus":
+ print "only analyzing reads on the plus strand"
+ elif self.strandfilter == "minus":
+ print "only analyzing reads on the minus strand"
+
+
+ def printOptionsSummary(self, useMulti, doCache, pValueType):
+
+ print "\nenforceDirectionality=%s listPeak=%s nomulti=%s cache=%s " % (self.doDirectionality, self.listPeak, not useMulti, doCache)
+ print "spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f\ttrimmed=%s\tstrand=%s" % (self.maxSpacing, self.minHits, self.minRatio, self.minPeak, self.trimString, self.stranded)
+ try:
+ print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%d pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType)
+ except:
+ print "minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType)
+
+
+ def getAnalysisDescription(self, hitfile, useMulti, doCache, pValueType, controlfile, doControl):
+
+ description = ["#ERANGE %s" % versionString]
+ if doControl:
+ description.append("#enriched sample:\t%s (%.1f M reads)\n#control sample:\t%s (%.1f M reads)" % (hitfile, self.sampleRDSsize, controlfile, self.controlRDSsize))
+ else:
+ description.append("#enriched sample:\t%s (%.1f M reads)\n#control sample: none" % (hitfile, self.sampleRDSsize))
+
+ if self.withFlag != "":
+ description.append("#restrict to Flag = %s" % self.withFlag)
+
+ description.append("#enforceDirectionality=%s listPeak=%s nomulti=%s cache=%s" % (self.doDirectionality, self.listPeak, not useMulti, doCache))
+ description.append("#spacing<%d minimum>%.1f ratio>%.1f minPeak=%.1f trimmed=%s strand=%s" % (self.maxSpacing, self.minHits, self.minRatio, self.minPeak, self.trimString, self.stranded))
+ try:
+ description.append("#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%d pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType))
+ except:
+ description.append("#minPlus=%.2f maxPlus=%.2f leftPlus=%.2f shift=%s pvalue=%s" % (self.minPlusRatio, self.maxPlusRatio, self.leftPlusRatio, self.shiftValue, pValueType))
+
+ return string.join(description, "\n")
+
+
+ def updateControlStatistics(self, peak, sumAll, peakScore):
+
+ plusRatio = float(peak.numPlus)/peak.numHits
+ if peakScore >= self.minPeak and self.minPlusRatio <= plusRatio <= self.maxPlusRatio:
+ if self.doDirectionality:
+ if self.leftPlusRatio < peak.numLeft / peak.numPlus:
+ self.statistics["mIndex"] += 1
+ self.statistics["mTotal"] += sumAll
+ else:
+ self.statistics["failed"] += 1
+ else:
+ # we have a region, but didn't check for directionality
+ self.statistics["mIndex"] += 1
+ self.statistics["mTotal"] += sumAll
+
+