first pass cleanup of cistematic/genomes; change bamPreprocessing

[erange.git] / normalizeFinalExonic.py
diff --git a/normalizeFinalExonic.py b/normalizeFinalExonic.py

index 6053e8077865c2cfbe3142bf8d3231da70d8688f..6b47a208122ed7d24638dfab16cf3862ffaf214f 100755 (executable)
--- a/normalizeFinalExonic.py
+++ b/normalizeFinalExonic.py
@@ -4,43 +4,65 @@ try:
  except:
      pass
  
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
+from commoncode import getConfigParser, getConfigBoolOption, getConfigFloatOption
  
-print "%prog: version 3.5" % sys.argv[0]
+print "normalizeFinalExonic: version 3.6"
  
  def main(argv=None):
      if not argv:
          argv = sys.argv
  
-    usage = "usage: python %prog rdsfile expandedRPKMfile multicountfile outfile [--multifraction] [--multifold] [--minrpkm minThreshold] [--cache] [--withGID]"
-
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--multifraction", action="store_true", dest="reportfraction")
-    parser.add_option("--multifold", action="store_true", dest="reportFold")
-    parser.add_option("--minrpkm", type="float", dest="minThreshold")
-    parser.add_option("--cache", action="store_true", dest="doCache")
-    parser.add_option("--withGID", action="store_true", dest="writeGID")
-    parser.set_defaults(reportFraction=False, reportFold=False, minThreshold=0.,
-                        doCache=False, writeGID=False)
+    usage = "usage: python normalizeFinalExonic rdsfile expandedRPKMfile multicountfile outfile [--multifraction] [--multifold] [--minrpkm minThreshold] [--cache] [--withGID]"
  
+    parser = makeParser(usage)
      (options, args) = parser.parse_args(argv[1:])
  
      if len(args) < 4:
          print usage
          sys.exit(1)
  
-    rdsfilename = argv[1]
-    expandedRPKMfile = args[3]
+    rdsfilename = args[0]
+    expandedRPKMfile = args[1]
      multicountfile = args[2]
      outfilename = args[3]
  
-    normalizeFinalExonic(rdsfilename, expandedRPKMfile, multicountfile, outfilename,
+    readCounts = {}
+    RDS = ReadDataset.ReadDataset(rdsfilename, verbose=True, cache=options.doCache, reportCount=False)
+    readCounts["uniq"] = RDS.getUniqsCount()
+    readCounts["splice"] = RDS.getSplicesCount()
+    readCounts["multi"] = RDS.getMultiCount()
+
+    normalizeFinalExonic(readCounts, expandedRPKMfile, multicountfile, outfilename,
                           options.reportFraction, options.reportFold, options.minThreshold,
                           options.doCache, options.writeGID)
  
  
-def normalizeFinalExonic(rdsfilename, expandedRPKMfilename, multicountfilename, outfilename,
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--multifraction", action="store_true", dest="reportfraction")
+    parser.add_option("--multifold", action="store_true", dest="reportFold")
+    parser.add_option("--minrpkm", type="float", dest="minThreshold")
+    parser.add_option("--cache", action="store_true", dest="doCache")
+    parser.add_option("--withGID", action="store_true", dest="writeGID")
+
+    configParser = getConfigParser()
+    section = "normalizeFinalExonic"
+    reportFraction = getConfigBoolOption(configParser, section, "multifraction", False)
+    reportFold = getConfigBoolOption(configParser, section, "reportFold", False)
+    minThreshold = getConfigFloatOption(configParser, section, "minThreshold", 0.)
+    doCache = getConfigBoolOption(configParser, section, "doCache", False)
+    writeGID = getConfigBoolOption(configParser, section, "writeGID", False)
+
+    parser.set_defaults(reportFraction=reportFraction, reportFold=reportFold, minThreshold=minThreshold,
+                        doCache=doCache, writeGID=writeGID)
+
+    return parser
+
+
+def normalizeFinalExonic(readCounts, expandedRPKMfilename, multicountfilename, outfilename,
                           reportFraction=False, reportFold=False, minThreshold=0., doCache=False,
                           writeGID=False):
  
@@ -53,10 +75,9 @@ def normalizeFinalExonic(rdsfilename, expandedRPKMfilename, multicountfilename,
      elif reportFold:
          print "reporting fold contribution of multireads"
  
-    RDS = readDataset(rdsfilename, verbose=True, cache=doCache, reportCount=False)
-    uniqcount = RDS.getUniqsCount()
-    splicecount = RDS.getSplicesCount()
-    multicount = RDS.getMultiCount()
+    uniqcount = readCounts["uniq"]
+    splicecount = readCounts["splice"]
+    multicount = readCounts["multi"]
      countDict = {}
      multicountDict = {}
      lengthDict = {}