first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / normalizeFinalExonic.py
index 6053e8077865c2cfbe3142bf8d3231da70d8688f..6b47a208122ed7d24638dfab16cf3862ffaf214f 100755 (executable)
@@ -4,43 +4,65 @@ try:
 except:
     pass
 
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
+from commoncode import getConfigParser, getConfigBoolOption, getConfigFloatOption
 
-print "%prog: version 3.5" % sys.argv[0]
+print "normalizeFinalExonic: version 3.6"
 
 def main(argv=None):
     if not argv:
         argv = sys.argv
 
-    usage = "usage: python %prog rdsfile expandedRPKMfile multicountfile outfile [--multifraction] [--multifold] [--minrpkm minThreshold] [--cache] [--withGID]"
-
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--multifraction", action="store_true", dest="reportfraction")
-    parser.add_option("--multifold", action="store_true", dest="reportFold")
-    parser.add_option("--minrpkm", type="float", dest="minThreshold")
-    parser.add_option("--cache", action="store_true", dest="doCache")
-    parser.add_option("--withGID", action="store_true", dest="writeGID")
-    parser.set_defaults(reportFraction=False, reportFold=False, minThreshold=0.,
-                        doCache=False, writeGID=False)
+    usage = "usage: python normalizeFinalExonic rdsfile expandedRPKMfile multicountfile outfile [--multifraction] [--multifold] [--minrpkm minThreshold] [--cache] [--withGID]"
 
+    parser = makeParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 4:
         print usage
         sys.exit(1)
 
-    rdsfilename = argv[1]
-    expandedRPKMfile = args[3]
+    rdsfilename = args[0]
+    expandedRPKMfile = args[1]
     multicountfile = args[2]
     outfilename = args[3]
 
-    normalizeFinalExonic(rdsfilename, expandedRPKMfile, multicountfile, outfilename,
+    readCounts = {}
+    RDS = ReadDataset.ReadDataset(rdsfilename, verbose=True, cache=options.doCache, reportCount=False)
+    readCounts["uniq"] = RDS.getUniqsCount()
+    readCounts["splice"] = RDS.getSplicesCount()
+    readCounts["multi"] = RDS.getMultiCount()
+
+    normalizeFinalExonic(readCounts, expandedRPKMfile, multicountfile, outfilename,
                          options.reportFraction, options.reportFold, options.minThreshold,
                          options.doCache, options.writeGID)
 
 
-def normalizeFinalExonic(rdsfilename, expandedRPKMfilename, multicountfilename, outfilename,
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--multifraction", action="store_true", dest="reportfraction")
+    parser.add_option("--multifold", action="store_true", dest="reportFold")
+    parser.add_option("--minrpkm", type="float", dest="minThreshold")
+    parser.add_option("--cache", action="store_true", dest="doCache")
+    parser.add_option("--withGID", action="store_true", dest="writeGID")
+
+    configParser = getConfigParser()
+    section = "normalizeFinalExonic"
+    reportFraction = getConfigBoolOption(configParser, section, "multifraction", False)
+    reportFold = getConfigBoolOption(configParser, section, "reportFold", False)
+    minThreshold = getConfigFloatOption(configParser, section, "minThreshold", 0.)
+    doCache = getConfigBoolOption(configParser, section, "doCache", False)
+    writeGID = getConfigBoolOption(configParser, section, "writeGID", False)
+
+    parser.set_defaults(reportFraction=reportFraction, reportFold=reportFold, minThreshold=minThreshold,
+                        doCache=doCache, writeGID=writeGID)
+
+    return parser
+
+
+def normalizeFinalExonic(readCounts, expandedRPKMfilename, multicountfilename, outfilename,
                          reportFraction=False, reportFold=False, minThreshold=0., doCache=False,
                          writeGID=False):
 
@@ -53,10 +75,9 @@ def normalizeFinalExonic(rdsfilename, expandedRPKMfilename, multicountfilename,
     elif reportFold:
         print "reporting fold contribution of multireads"
 
-    RDS = readDataset(rdsfilename, verbose=True, cache=doCache, reportCount=False)
-    uniqcount = RDS.getUniqsCount()
-    splicecount = RDS.getSplicesCount()
-    multicount = RDS.getMultiCount()
+    uniqcount = readCounts["uniq"]
+    splicecount = readCounts["splice"]
+    multicount = readCounts["multi"]
     countDict = {}
     multicountDict = {}
     lengthDict = {}