X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=normalizeFinalExonic.py;h=6b47a208122ed7d24638dfab16cf3862ffaf214f;hp=6053e8077865c2cfbe3142bf8d3231da70d8688f;hb=HEAD;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/normalizeFinalExonic.py b/normalizeFinalExonic.py index 6053e80..6b47a20 100755 --- a/normalizeFinalExonic.py +++ b/normalizeFinalExonic.py @@ -4,43 +4,65 @@ try: except: pass -import sys, optparse -from commoncode import readDataset +import sys +import optparse +import ReadDataset +from commoncode import getConfigParser, getConfigBoolOption, getConfigFloatOption -print "%prog: version 3.5" % sys.argv[0] +print "normalizeFinalExonic: version 3.6" def main(argv=None): if not argv: argv = sys.argv - usage = "usage: python %prog rdsfile expandedRPKMfile multicountfile outfile [--multifraction] [--multifold] [--minrpkm minThreshold] [--cache] [--withGID]" - - parser = optparse.OptionParser(usage=usage) - parser.add_option("--multifraction", action="store_true", dest="reportfraction") - parser.add_option("--multifold", action="store_true", dest="reportFold") - parser.add_option("--minrpkm", type="float", dest="minThreshold") - parser.add_option("--cache", action="store_true", dest="doCache") - parser.add_option("--withGID", action="store_true", dest="writeGID") - parser.set_defaults(reportFraction=False, reportFold=False, minThreshold=0., - doCache=False, writeGID=False) + usage = "usage: python normalizeFinalExonic rdsfile expandedRPKMfile multicountfile outfile [--multifraction] [--multifold] [--minrpkm minThreshold] [--cache] [--withGID]" + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 4: print usage sys.exit(1) - rdsfilename = argv[1] - expandedRPKMfile = args[3] + rdsfilename = args[0] + expandedRPKMfile = args[1] multicountfile = args[2] outfilename = args[3] - normalizeFinalExonic(rdsfilename, expandedRPKMfile, multicountfile, outfilename, + readCounts = {} + RDS = ReadDataset.ReadDataset(rdsfilename, verbose=True, cache=options.doCache, reportCount=False) + readCounts["uniq"] = RDS.getUniqsCount() + readCounts["splice"] = RDS.getSplicesCount() + readCounts["multi"] = RDS.getMultiCount() + + normalizeFinalExonic(readCounts, expandedRPKMfile, multicountfile, outfilename, options.reportFraction, options.reportFold, options.minThreshold, options.doCache, options.writeGID) -def normalizeFinalExonic(rdsfilename, expandedRPKMfilename, multicountfilename, outfilename, +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--multifraction", action="store_true", dest="reportfraction") + parser.add_option("--multifold", action="store_true", dest="reportFold") + parser.add_option("--minrpkm", type="float", dest="minThreshold") + parser.add_option("--cache", action="store_true", dest="doCache") + parser.add_option("--withGID", action="store_true", dest="writeGID") + + configParser = getConfigParser() + section = "normalizeFinalExonic" + reportFraction = getConfigBoolOption(configParser, section, "multifraction", False) + reportFold = getConfigBoolOption(configParser, section, "reportFold", False) + minThreshold = getConfigFloatOption(configParser, section, "minThreshold", 0.) + doCache = getConfigBoolOption(configParser, section, "doCache", False) + writeGID = getConfigBoolOption(configParser, section, "writeGID", False) + + parser.set_defaults(reportFraction=reportFraction, reportFold=reportFold, minThreshold=minThreshold, + doCache=doCache, writeGID=writeGID) + + return parser + + +def normalizeFinalExonic(readCounts, expandedRPKMfilename, multicountfilename, outfilename, reportFraction=False, reportFold=False, minThreshold=0., doCache=False, writeGID=False): @@ -53,10 +75,9 @@ def normalizeFinalExonic(rdsfilename, expandedRPKMfilename, multicountfilename, elif reportFold: print "reporting fold contribution of multireads" - RDS = readDataset(rdsfilename, verbose=True, cache=doCache, reportCount=False) - uniqcount = RDS.getUniqsCount() - splicecount = RDS.getSplicesCount() - multicount = RDS.getMultiCount() + uniqcount = readCounts["uniq"] + splicecount = readCounts["splice"] + multicount = readCounts["multi"] countDict = {} multicountDict = {} lengthDict = {}