X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=findall.py;h=aa820374b994d497fbc4dd336a98b795489f5900;hp=ec75efb54f3680c892578a38811c1bd241c37241;hb=a3c0e60eb30c924232d7baaa348a15c5554e3864;hpb=77dccd7c98d8cdb60caaf178b1123df71ea662c9 diff --git a/findall.py b/findall.py index ec75efb..aa82037 100755 --- a/findall.py +++ b/findall.py @@ -231,6 +231,7 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing= doCache = False cachePages = -1 + mockRDS = None if doControl: print "\ncontrol:" mockRDS = ReadDataset.ReadDataset(mockfile, verbose=True, cache=doCache) @@ -268,6 +269,7 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing= mockRDSsize = len(mockRDS) / 1000000. controlSampleString = "\t%s (%.1f M reads)" % (mockfile, mockRDSsize) else: + mockRDSsize = 0 controlSampleString = " none" hitRDSsize = len(hitRDS) / 1000000. @@ -318,7 +320,11 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing= hitChromList = hitRDS.getChromosomes() if doControl: mockChromList = mockRDS.getChromosomes() + else: + mockChromList = [] + mockSampleSize = 0 + hitSampleSize = 0 if normalize: if doControl: mockSampleSize = mockRDSsize @@ -360,27 +366,28 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing= continue #now do background swapping the two samples around - print "calculating background..." - backgroundTrimValue = 1/20. - backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize, - chromosome, useMulti, normalize, maxSpacing, - doDirectionality, doTrim, minHits, minRatio, - readlen, shiftValue, minPeak, minPlusRatio, - maxPlusRatio, leftPlusRatio, listPeak, noMulti, - doControl, factor, backgroundTrimValue) - - statistics["mIndex"] += backgroundRegionStats["index"] - statistics["mTotal"] += backgroundRegionStats["total"] - statistics["failed"] += backgroundRegionStats["failed"] - print statistics["mIndex"], statistics["mTotal"] - if doPvalue: - if pValueType == "self": - p, poissonmean = calculatePValue(allRegionWeights) - else: - p, poissonmean = calculatePValue(backgroundRegionWeights) + if mockRDS is not None: + print "calculating background..." + backgroundTrimValue = 1/20. + backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize, + chromosome, useMulti, normalize, maxSpacing, + doDirectionality, doTrim, minHits, minRatio, + readlen, shiftValue, minPeak, minPlusRatio, + maxPlusRatio, leftPlusRatio, listPeak, noMulti, + doControl, factor, backgroundTrimValue) + + statistics["mIndex"] += backgroundRegionStats["index"] + statistics["mTotal"] += backgroundRegionStats["total"] + statistics["failed"] += backgroundRegionStats["failed"] + print statistics["mIndex"], statistics["mTotal"] + if doPvalue: + if pValueType == "self": + p, poissonmean = calculatePValue(allRegionWeights) + else: + p, poissonmean = calculatePValue(backgroundRegionWeights) - print headline - shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue) + print headline + shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue) footer = getFooter(statistics, doDirectionality, doRevBackground, shiftValue, reportshift, shiftModeValue) print footer @@ -514,7 +521,7 @@ def learnShift(hitDict, hitSampleSize, mockRDS, chrom, doControl, useMulti, norm def getFoldRatio(rds, chrom, start, stop, doControl, useMulti, normalize, sampleSize, sumAll, minRatio): - if doControl: + if doControl and rds is not None: foldRatio = getFoldRatioFromRDS(rds, chrom, start, stop, useMulti, normalize, sampleSize, sumAll) else: foldRatio = minRatio @@ -564,8 +571,7 @@ def locateRegions(rds, rdsSampleSize, referenceRDS, referenceSampleSize, chrom, regionWeights.append(int(sumAll)) if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen: sumMulti = 0. - #first pass uses getFoldRatio on mockRDS as there may not be control - foldRatio = getFoldRatioFromRDS(referenceRDS, chrom, regionStart, regionStop, useMulti, normalizeToRPM, referenceSampleSize, sumAll) + foldRatio = getFoldRatio(referenceRDS, chrom, regionStart, regionStop, doControl, useMulti, normalizeToRPM, referenceSampleSize, sumAll, minRatio) if foldRatio >= minRatio: # first pass, with absolute numbers peak = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue) @@ -719,7 +725,7 @@ def writeRegionsToFile(outfile, outregions, doPvalue, pValue, poissonmean, repor pValue *= poissonmean pValue /= i+1 - outputList.append("%1.2f" % pValue) + outputList.append("%1.2g" % pValue) outline = string.join(outputList, "\t") print outline