development checkpoint

author Sean Upchurch <sau@caltech.edu>

Mon, 28 Mar 2011 23:40:47 +0000 (16:40 -0700)

committer Sean Upchurch <sau@caltech.edu>

Mon, 28 Mar 2011 23:40:47 +0000 (16:40 -0700)
author Sean Upchurch <sau@caltech.edu>
Mon, 28 Mar 2011 23:40:47 +0000 (16:40 -0700)
committer Sean Upchurch <sau@caltech.edu>
Mon, 28 Mar 2011 23:40:47 +0000 (16:40 -0700)
diff --git a/ReadDataset.py b/ReadDataset.py

index 56e28a8995b734735ace06d1d990ac9b46753580..850a5ec602a7b6d7a9a22ac98c4bf3eef4b873e5 100644 (file)
--- a/ReadDataset.py
+++ b/ReadDataset.py
@@ -764,6 +764,7 @@ class ReadDataset():
      def getSplicesCount(self, chrom="", rmin="", rmax="", restrict="", distinct=False):
          """ returns the number of row in the splices table.
          """
+        # TODO: if the rds type is DNA should this just return zero?
          return self.getTableEntryCount("splices", chrom, rmin, rmax, restrict, distinct, startField="startL")
  
  
@@ -1221,5 +1222,5 @@ class ReadDataset():
              destinationEntries.append((row["readID"], row["chrom"], int(row["startL"]), int(row["stopL"]), int(row["startR"]), int(row["stopR"]), row["sense"],
                                         row["weight"], row["flag"], row["mismatch"]))
  
-        self.memcon.executemany("insert into splices(ID, readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?)", destinationEntries)
+        self.memcon.executemany("insert into splices(ID, readID, chrom, startL, stopL, startR, stopR, sense, weight, flag, mismatch) values (NULL,?,?,?,?,?,?,?,?,?,?)", destinationEntries)
  
diff --git a/featureIntersects.py b/featureIntersects.py

index c24b79edb312fae6c0b19f0d7e3d3ffb45d12984..733849902899ceb75017995ea50bd209aa66c2f9 100755 (executable)
--- a/featureIntersects.py
+++ b/featureIntersects.py
@@ -54,7 +54,7 @@ def makeParser(usage=""):
  def featureIntersects(tabFileName, cistype="TFBSCONSSITES", radius=100):
      
      posList = getPositionList(tabFileName)
-    feats = featuresIntersecting("human", posList, radius, cistype)
+    feats = featuresIntersecting("hsapiens", posList, radius, cistype)
      featkeys = feats.keys()
      featkeys.sort()
      for (chrom, pos) in featkeys:
diff --git a/findall.py b/findall.py

index ec75efb54f3680c892578a38811c1bd241c37241..aa820374b994d497fbc4dd336a98b795489f5900 100755 (executable)
--- a/findall.py
+++ b/findall.py
@@ -231,6 +231,7 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
          doCache = False
          cachePages = -1
  
+    mockRDS = None
      if doControl:
          print "\ncontrol:" 
          mockRDS = ReadDataset.ReadDataset(mockfile, verbose=True, cache=doCache)
@@ -268,6 +269,7 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
          mockRDSsize = len(mockRDS) / 1000000.
          controlSampleString = "\t%s (%.1f M reads)" % (mockfile, mockRDSsize)
      else:
+        mockRDSsize = 0
          controlSampleString = " none"
  
      hitRDSsize = len(hitRDS) / 1000000.
@@ -318,7 +320,11 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
      hitChromList = hitRDS.getChromosomes()
      if doControl:
          mockChromList = mockRDS.getChromosomes()
+    else:
+        mockChromList = []
  
+    mockSampleSize = 0
+    hitSampleSize = 0
      if normalize:
          if doControl:
              mockSampleSize = mockRDSsize
@@ -360,27 +366,28 @@ def findall(factor, hitfile, outfilename, minHits=4.0, minRatio=4.0, maxSpacing=
              continue
  
          #now do background swapping the two samples around
-        print "calculating background..."
-        backgroundTrimValue = 1/20.
-        backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize,
-                                                                       chromosome, useMulti, normalize, maxSpacing,
-                                                                       doDirectionality, doTrim, minHits, minRatio,
-                                                                       readlen, shiftValue, minPeak, minPlusRatio,
-                                                                       maxPlusRatio, leftPlusRatio, listPeak, noMulti,
-                                                                       doControl, factor, backgroundTrimValue)
-
-        statistics["mIndex"] += backgroundRegionStats["index"]
-        statistics["mTotal"] += backgroundRegionStats["total"]
-        statistics["failed"] += backgroundRegionStats["failed"]
-        print statistics["mIndex"], statistics["mTotal"]
-        if doPvalue:
-            if pValueType == "self":
-                p, poissonmean = calculatePValue(allRegionWeights)
-            else:
-                p, poissonmean = calculatePValue(backgroundRegionWeights)
+        if mockRDS is not None:
+            print "calculating background..."
+            backgroundTrimValue = 1/20.
+            backgroundRegionStats, backgroundRegionWeights = locateRegions(mockRDS, mockSampleSize, hitRDS, hitSampleSize,
+                                                                           chromosome, useMulti, normalize, maxSpacing,
+                                                                           doDirectionality, doTrim, minHits, minRatio,
+                                                                           readlen, shiftValue, minPeak, minPlusRatio,
+                                                                           maxPlusRatio, leftPlusRatio, listPeak, noMulti,
+                                                                           doControl, factor, backgroundTrimValue)
+
+            statistics["mIndex"] += backgroundRegionStats["index"]
+            statistics["mTotal"] += backgroundRegionStats["total"]
+            statistics["failed"] += backgroundRegionStats["failed"]
+            print statistics["mIndex"], statistics["mTotal"]
+            if doPvalue:
+                if pValueType == "self":
+                    p, poissonmean = calculatePValue(allRegionWeights)
+                else:
+                    p, poissonmean = calculatePValue(backgroundRegionWeights)
  
-        print headline
-        shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)
+            print headline
+            shiftModeValue = writeRegionsToFile(outfile, outregions, doPvalue, p, poissonmean, reportshift, shiftValue)
  
      footer = getFooter(statistics, doDirectionality, doRevBackground, shiftValue, reportshift, shiftModeValue)
      print footer
@@ -514,7 +521,7 @@ def learnShift(hitDict, hitSampleSize, mockRDS, chrom, doControl, useMulti, norm
  
  
  def getFoldRatio(rds, chrom, start, stop, doControl, useMulti, normalize, sampleSize, sumAll, minRatio):
-    if doControl:
+    if doControl and rds is not None:
          foldRatio = getFoldRatioFromRDS(rds, chrom, start, stop, useMulti, normalize, sampleSize, sumAll)
      else:
          foldRatio = minRatio
@@ -564,8 +571,7 @@ def locateRegions(rds, rdsSampleSize, referenceRDS, referenceSampleSize, chrom,
              regionWeights.append(int(sumAll))
              if sumAll >= minHits and numStarts > minRatio and (regionStop - regionStart) > readlen:
                  sumMulti = 0.
-                #first pass uses getFoldRatio on mockRDS as there may not be control
-                foldRatio = getFoldRatioFromRDS(referenceRDS, chrom, regionStart, regionStop, useMulti, normalizeToRPM, referenceSampleSize, sumAll)
+                foldRatio = getFoldRatio(referenceRDS, chrom, regionStart, regionStop, doControl, useMulti, normalizeToRPM, referenceSampleSize, sumAll, minRatio)
                  if foldRatio >= minRatio:
                      # first pass, with absolute numbers
                      peak = findPeak(currentReadList, regionStart, regionStop - regionStart, readlen, doWeight=True, leftPlus=doDirectionality, shift=shiftValue)
@@ -719,7 +725,7 @@ def writeRegionsToFile(outfile, outregions, doPvalue, pValue, poissonmean, repor
                  pValue *= poissonmean
                  pValue /= i+1
  
-            outputList.append("%1.2f" % pValue)
+            outputList.append("%1.2g" % pValue)
  
          outline = string.join(outputList, "\t")
          print outline
diff --git a/getallgenes.py b/getallgenes.py

index a717fe86f0881268f8745791dc94d97f6d161d09..9433e2e1dd2ffeb0e299d378eb7ea46b3e10fc9b 100755 (executable)
--- a/getallgenes.py
+++ b/getallgenes.py
@@ -32,7 +32,7 @@ def main(argv=None):
      getallgenes(genome, infilename, outfilename, options.maxRadius,
                  options.nomatchfilename, options.step, options.trackFar,
                  options.trackStrand, options.compact, options.colID,
-                options.doCache, options.extendgenome, options.replaceModels)
+                options.doCache, options.extendGenome, options.replaceModels)
  
  
  def getParser(usage):
diff --git a/getsplicefa.py b/getsplicefa.py

index cbca805a2426ace12e5121668cffecf2c646d3d5..197800e09fab70bf2824bcf3e9caa1ecfb364ef1 100755 (executable)
--- a/getsplicefa.py
+++ b/getsplicefa.py
@@ -19,7 +19,7 @@ def main(argv=None):
      print verstring
      delimiter = "|"
  
-    usage = "usage: python %prog genome ucscModels outfilename maxBorder [--verbose] [--spacer num]\
+    usage = "usage: python getsplicefa.py genome ucscModels outfilename maxBorder [--verbose] [--spacer num]\
              \n\twhere spacer is by default 2, and maxBorder should be readlen - (2 * spacer)\
              \n\tdelimiter is set to %s - edit the code to change it, if necessary\n" % delimiter
  
@@ -31,7 +31,7 @@ def main(argv=None):
          genome = args[0]
          datafilename = args[1]
          outfilename = args[2]
-        maxBorder = args[3]
+        maxBorder = int(args[3])
      except IndexError:
          print usage
          sys.exit(1)
diff --git a/normalizeFinalExonic.py b/normalizeFinalExonic.py

index d5a6b4b954d4a9df6cb7b749c97c513e9394dafb..6c1ae3323555722deea8bf91c5254ceb310560cb 100755 (executable)
--- a/normalizeFinalExonic.py
+++ b/normalizeFinalExonic.py
@@ -24,8 +24,8 @@ def main(argv=None):
          print usage
          sys.exit(1)
  
-    rdsfilename = argv[1]
-    expandedRPKMfile = args[3]
+    rdsfilename = args[0]
+    expandedRPKMfile = args[1]
      multicountfile = args[2]
      outfilename = args[3]
author	Sean Upchurch <sau@caltech.edu>
	Mon, 28 Mar 2011 23:40:47 +0000 (16:40 -0700)
committer	Sean Upchurch <sau@caltech.edu>
	Mon, 28 Mar 2011 23:40:47 +0000 (16:40 -0700)
ReadDataset.py		patch \| blob \| history
featureIntersects.py		patch \| blob \| history
findall.py		patch \| blob \| history
getallgenes.py		patch \| blob \| history
getsplicefa.py		patch \| blob \| history
normalizeFinalExonic.py		patch \| blob \| history