development checkpoint
[erange.git] / findall.py
index ec75efb54f3680c892578a38811c1bd241c37241..aa820374b994d497fbc4dd336a98b795489f5900 100755 (executable)
@@ -231,6 +231,7 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
         doCache = False
         cachePages = -1
 
+    mockRDS = None
     if doControl:
         print "\ncontrol:" 
         mockRDS = ReadDataset.ReadDataset(mockfile, verbose=True, cache=doCache)
@@ -268,6 +269,7 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
         mockRDSsize = len(mockRDS) / 1000000.
         controlSampleString = "\t%s (%.1f M reads)" % (mockfile, mockRDSsize)
     else:
+        mockRDSsize = 0
         controlSampleString = " none"
 
     hitRDSsize = len(hitRDS) / 1000000.
@@ -318,7 +320,11 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
     hitChromList = hitRDS.getChromosomes()
     if doControl:
         mockChromList = mockRDS.getChromosomes()
+    else:
+        mockChromList = []
 
+    mockSampleSize = 0
+    hitSampleSize = 0
     if normalize:
         if doControl:
             mockSampleSize = mockRDSsize
@@ -360,27 +366,28 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
             continue
 
         #now do background swapping the two samples around
-        print "calculating background..."
-        backgroundTrimValue = 1/20.
-        backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize,
-                                                                       chromosome, useMulti, normalize, maxSpacing,
-                                                                       doDirectionality, doTrim, minHits, minRatio,
-                                                                       readlen, shiftValue, minPeak, minPlusRatio,
-                                                                       maxPlusRatio, leftPlusRatio, listPeak, noMulti,
-                                                                       doControl, factor, backgroundTrimValue)
-
-        statistics["mIndex"] += backgroundRegionStats["index"]
-        statistics["mTotal"] += backgroundRegionStats["total"]
-        statistics["failed"] += backgroundRegionStats["failed"]
-        print statistics["mIndex"], statistics["mTotal"]
-        if doPvalue:
-            if pValueType == "self":
-                p, poissonmean = calculatePValue(allRegionWeights)
-            else:
-                p, poissonmean = calculatePValue(backgroundRegionWeights)
+        if mockRDS is not None:
+            print "calculating background..."
+            backgroundTrimValue = 1/20.
+            backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize,
+                                                                           chromosome, useMulti, normalize, maxSpacing,
+                                                                           doDirectionality, doTrim, minHits, minRatio,
+                                                                           readlen, shiftValue, minPeak, minPlusRatio,
+                                                                           maxPlusRatio, leftPlusRatio, listPeak, noMulti,
+                                                                           doControl, factor, backgroundTrimValue)
+
+            statistics["mIndex"] += backgroundRegionStats["index"]
+            statistics["mTotal"] += backgroundRegionStats["total"]
+            statistics["failed"] += backgroundRegionStats["failed"]
+            print statistics["mIndex"], statistics["mTotal"]
+            if doPvalue:
+                if pValueType == "self":
+                    p, poissonmean = calculatePValue(allRegionWeights)
+                else:
+                    p, poissonmean = calculatePValue(backgroundRegionWeights)
 
-        print headline
-        shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)
+            print headline
+            shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)
 
     footer = getFooter(statistics, doDirectionality, doRevBackground, shiftValue, reportshift, shiftModeValue)
     print footer
@@ -514,7 +521,7 @@ def learnShift(hitDict, hitSampleSize, mockRDS, chrom, doControl, useMulti, norm
 
 
 def getFoldRatio(rds, chrom, start, stop, doControl, useMulti, normalize, sampleSize, sumAll, minRatio):
-    if doControl:
+    if doControl and rds is not None:
         foldRatio = getFoldRatioFromRDS(rds, chrom, start, stop, useMulti, normalize, sampleSize, sumAll)
     else:
         foldRatio = minRatio
@@ -564,8 +571,7 @@ def locateRegions(rds, rdsSampleSize, referenceRDS, referenceSampleSize, chrom,
             regionWeights.append(int(sumAll))
             if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
                 sumMulti = 0.
-                #first pass uses getFoldRatio on mockRDS as there may not be control
-                foldRatio = getFoldRatioFromRDS(referenceRDS, chrom, regionStart, regionStop, useMulti, normalizeToRPM, referenceSampleSize, sumAll)
+                foldRatio = getFoldRatio(referenceRDS, chrom, regionStart, regionStop, doControl, useMulti, normalizeToRPM, referenceSampleSize, sumAll, minRatio)
                 if foldRatio >= minRatio:
                     # first pass, with absolute numbers
                     peak = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue)
@@ -719,7 +725,7 @@ def writeRegionsToFile(outfile, outregions, doPvalue, pValue, poissonmean, repor
                 pValue *= poissonmean
                 pValue /= i+1
 
-            outputList.append("%1.2f" % pValue)
+            outputList.append("%1.2g" % pValue)
 
         outline = string.join(outputList, "\t")
         print outline