erange version 4.0a dev release

[erange.git] / geneUpstreamBins.py
diff --git a/geneUpstreamBins.py b/geneUpstreamBins.py

index e8554161c6fe18417dd227a2bed985a0c826038c..3bdd1dd773c94372692ff066995445c680bd8055 100755 (executable)
--- a/geneUpstreamBins.py
+++ b/geneUpstreamBins.py
@@ -9,12 +9,13 @@ try:
  except:
      pass
  
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
  from cistematic.genomes import Genome
-from cistematic.core.geneinfo import geneinfoDB
+from commoncode import getGeneInfoDict, getConfigParser, getConfigBoolOption, getConfigIntOption
  
-print "%prog: version 2.0"
+print "geneUpstreamBins: version 2.1"
  
  def main(argv=None):
      if not argv:
@@ -22,12 +23,7 @@ def main(argv=None):
  
      usage = "usage: python %prog genome rdsfile outfilename [--max regionSize] [--raw] [--cache]"
  
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--raw", action="store_false", dest="normalize",
-                       help="maximum region in bp")
-    parser.add_option("--max", type="int", dest="standardMinDist")
-    parser.add_option("--cache", action="store_true", dest="doCache")
-    parser.set_defaults(standardMinDist=3000, normalize=True, doCache=False)
+    parser = makeParser(usage)
      (options, args) = parser.parse_args(argv[1:])
  
      if len(args) < 3:
@@ -41,11 +37,29 @@ def main(argv=None):
      geneUpstreamBins(genome, hitfile, outfilename, options.standardMinDist, options.normalize, options.doCache)
  
  
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--raw", action="store_false", dest="normalize",
+                       help="maximum region in bp")
+    parser.add_option("--max", type="int", dest="standardMinDist")
+    parser.add_option("--cache", action="store_true", dest="doCache")
+
+    configParser = getConfigParser()
+    section = "geneUpstreamBins"
+    standardMinDist = getConfigIntOption(configParser, section, "regionSize", 3000)
+    normalize = getConfigBoolOption(configParser, section, "normalize", True)
+    doCache = getConfigBoolOption(configParser, section, "doCache", False)
+
+    parser.set_defaults(standardMinDist=standardMinDist, normalize=normalize, doCache=doCache)
+
+    return parser
+
+
  def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normalize=True, doCache=False):
      bins = 10
      standardMinThresh = standardMinDist / bins
  
-    hitRDS = readDataset(hitfile, verbose = True, cache=doCache)
+    hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache)
      normalizationFactor = 1.0
      if normalize:
          totalCount = len(hitRDS)
@@ -54,9 +68,7 @@ def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normali
      hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
  
      hg = Genome(genome)
-    idb = geneinfoDB(cache=True)
-
-    geneinfoDict = idb.getallGeneInfo(genome)
+    geneinfoDict = getGeneInfoDict(genome, cache=True)
      featuresDict = hg.getallGeneFeatures()
  
      outfile = open(outfilename,"w")
@@ -117,7 +129,9 @@ def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normali
              continue
  
          binList = [0] * bins
-        for (tagStart, sense, weight) in hitDict[chrom]:
+        for read in hitDict[chrom]:
+            tagStart = read["start"]
+            weight = read["weight"]
              tagStart -= gstart
              if tagStart >= glen:
                  break