erange version 4.0a dev release
[erange.git] / geneUpstreamBins.py
index e8554161c6fe18417dd227a2bed985a0c826038c..3bdd1dd773c94372692ff066995445c680bd8055 100755 (executable)
@@ -9,12 +9,13 @@ try:
 except:
     pass
 
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
 from cistematic.genomes import Genome
-from cistematic.core.geneinfo import geneinfoDB
+from commoncode import getGeneInfoDict, getConfigParser, getConfigBoolOption, getConfigIntOption
 
-print "%prog: version 2.0"
+print "geneUpstreamBins: version 2.1"
 
 def main(argv=None):
     if not argv:
@@ -22,12 +23,7 @@ def main(argv=None):
 
     usage = "usage: python %prog genome rdsfile outfilename [--max regionSize] [--raw] [--cache]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--raw", action="store_false", dest="normalize",
-                       help="maximum region in bp")
-    parser.add_option("--max", type="int", dest="standardMinDist")
-    parser.add_option("--cache", action="store_true", dest="doCache")
-    parser.set_defaults(standardMinDist=3000, normalize=True, doCache=False)
+    parser = makeParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 3:
@@ -41,11 +37,29 @@ def main(argv=None):
     geneUpstreamBins(genome, hitfile, outfilename, options.standardMinDist, options.normalize, options.doCache)
 
 
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--raw", action="store_false", dest="normalize",
+                       help="maximum region in bp")
+    parser.add_option("--max", type="int", dest="standardMinDist")
+    parser.add_option("--cache", action="store_true", dest="doCache")
+
+    configParser = getConfigParser()
+    section = "geneUpstreamBins"
+    standardMinDist = getConfigIntOption(configParser, section, "regionSize", 3000)
+    normalize = getConfigBoolOption(configParser, section, "normalize", True)
+    doCache = getConfigBoolOption(configParser, section, "doCache", False)
+
+    parser.set_defaults(standardMinDist=standardMinDist, normalize=normalize, doCache=doCache)
+
+    return parser
+
+
 def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normalize=True, doCache=False):
     bins = 10
     standardMinThresh = standardMinDist / bins
 
-    hitRDS = readDataset(hitfile, verbose = True, cache=doCache)
+    hitRDS = ReadDataset.ReadDataset(hitfile, verbose = True, cache=doCache)
     normalizationFactor = 1.0
     if normalize:
         totalCount = len(hitRDS)
@@ -54,9 +68,7 @@ def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normali
     hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
 
     hg = Genome(genome)
-    idb = geneinfoDB(cache=True)
-
-    geneinfoDict = idb.getallGeneInfo(genome)
+    geneinfoDict = getGeneInfoDict(genome, cache=True)
     featuresDict = hg.getallGeneFeatures()
 
     outfile = open(outfilename,"w")
@@ -117,7 +129,9 @@ def geneUpstreamBins(genome, hitfile, outfilename, standardMinDist=3000, normali
             continue
 
         binList = [0] * bins
-        for (tagStart, sense, weight) in hitDict[chrom]:
+        for read in hitDict[chrom]:
+            tagStart = read["start"]
+            weight = read["weight"]
             tagStart -= gstart
             if tagStart >= glen:
                 break