first pass cleanup of cistematic/genomes; change bamPreprocessing

[erange.git] / altSpliceCounts.py
diff --git a/altSpliceCounts.py b/altSpliceCounts.py

index 1517ef888c917bd92d4b22fabde6018c470c9581..c59dc9249b6c82ded0466f872e98a9900e702c46 100755 (executable)
--- a/altSpliceCounts.py
+++ b/altSpliceCounts.py
@@ -2,12 +2,14 @@ try:
      import psyco
      psyco.full()
  except:
-    print 'psyco not running'
+    print "psyco not running"
  
-print 'version 3.6'
+print "altSpliceCounts: version 3.7"
  
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
+from commoncode import getConfigParser, getConfigOption
  
  def main(argv=None):
      if not argv:
@@ -15,10 +17,7 @@ def main(argv=None):
  
      usage = "usage: python %s rdsfile outfilename [--cache pages]"
  
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--cache", type="int", dest="numCachePages",
-                      help="number of cache pages to use [default: 100000]")
-    parser.set_defaults(numCachePages=None)
+    parser = makeParser(usage)
      (options, args) = parser.parse_args(argv[1:])
  
      if len(args) < 2:
@@ -38,18 +37,32 @@ def main(argv=None):
      altSpliceCounts(hitfile, outfilename, doCache, cachePages)
  
  
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--cache", type="int", dest="numCachePages",
+                      help="number of cache pages to use [default: 100000]")
+
+    configParser = getConfigParser()
+    section = "altSpliceCounts"
+    numCachePages = getConfigOption(configParser, section, "numCachePages", None)
+
+    parser.set_defaults(numCachePages=numCachePages)
+
+    return parser
+
+
  def altSpliceCounts(hitfile, outfilename, doCache=False, cachePages=100000):
      startDict = {}
      stopDict = {}
      resultDict = {}
  
-    hitRDS = readDataset(hitfile, verbose = True, cache=doCache)
+    hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache)
      if cachePages > hitRDS.getDefaultCacheSize():
          hitRDS.setDBcache(cachePages)
  
      readlen = hitRDS.getReadSize()
      hitDict = hitRDS.getSplicesDict(noSense=True)
-    outfile = open(outfilename,'w')
+    outfile = open(outfilename, "w")
  
      for chrom in hitDict:
          startDict[chrom] = []
@@ -58,7 +71,9 @@ def altSpliceCounts(hitfile, outfilename, doCache=False, cachePages=100000):
  
      index = 0
      for chrom in hitDict:
-        for (tagStart, lstop, rstart, tagStop) in hitDict[chrom]:
+        for read in hitDict[chrom]:
+            tagStart = read["startL"]
+            tagStop = read["stopR"]
              index += 1
              length = tagStop - tagStart
              if length < readlen + 5:
@@ -140,7 +155,7 @@ def altSpliceCounts(hitfile, outfilename, doCache=False, cachePages=100000):
  
          resultDict[chrom].sort()
          for line in resultDict[chrom]:
-            outfile.write('alt%d' % alternative + '\tchr%s\t%d\t%d\tchr%s\t%d\t%d\n'  % line)
+            outfile.write("alt%d" % alternative + "\tchr%s\t%d\t%d\tchr%s\t%d\t%d\n"  % line)
              alternative += 1
  
          print chrom, maxIndex, spliceEvent, altSpliceEvent