erange version 4.0a dev release
[erange.git] / geneDownstreamBins.py
index 058ad8236a4f7df6d08a6ca293d0a27ab090c9bd..4ce97e4fc1380517447153e59fba80e5531e3029 100755 (executable)
@@ -10,12 +10,13 @@ except:
     pass
 
 # originally from version 1.3 of geneDnaDownstreamCounts.py
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
 from cistematic.genomes import Genome
-from cistematic.core.geneinfo import geneinfoDB
+from commoncode import getGeneInfoDict, getConfigParser, getConfigIntOption
 
-print "%prog: version 2.0"
+print "geneDownstreamBins: version 2.1"
 
 def main(argv=None):
     if not argv:
@@ -23,10 +24,7 @@ def main(argv=None):
 
     usage = "usage: %prog genome rdsfile outfilename [--max regionSize]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--max", type="int", dest="standardMinDist",
-                      help="maximum region in bp")
-    parser.set_defaults(standardMinDist=3000)
+    parser = makeParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 3:
@@ -40,22 +38,33 @@ def main(argv=None):
     geneDownstreamBins(genome, hitfile, outfilename, options.standardMinDist)
 
 
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--max", type="int", dest="standardMinDist",
+                      help="maximum region in bp")
+
+    configParser = getConfigParser()
+    section = "geneDownstreamBins"
+    standardMinDist = getConfigIntOption(configParser, section, "regionSize", 3000)
+
+    parser.set_defaults(standardMinDist=standardMinDist)
+
+    return parser
+
+
 def geneDownstreamBins(genome, hitfile, outfilename, standardMinDist=3000, doCache=False, normalize=False):
     bins = 10
     standardMinThresh = standardMinDist / bins
 
-    hitRDS = readDataset(hitfile, verbose=True, cache=doCache)
+    hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache)
     normalizationFactor = 1.0
     if normalize:
         hitDictSize = len(hitRDS)
         normalizationFactor = hitDictSize / 1000000.
 
     hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
-
+    geneinfoDict = getGeneInfoDict(genome, cache=True)
     hg = Genome(genome)
-    idb = geneinfoDB(cache=True)
-
-    geneinfoDict = idb.getallGeneInfo(genome)
     featuresDict = hg.getallGeneFeatures()
 
     outfile = open(outfilename, "w")
@@ -115,7 +124,9 @@ def geneDownstreamBins(genome, hitfile, outfilename, standardMinDist=3000, doCac
             continue
 
         binList = [0.] * bins
-        for (tagStart, sense, weight) in hitDict[chrom]:
+        for read in hitDict[chrom]:
+            tagStart = read["start"]
+            weight = read["weight"]
             tagStart -= gstart
             if tagStart >= glen:
                 break