X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=combineRPKMs.py;h=77f30d6fc2acb2a9424634a138a03ee30e314f91;hp=8fd8f9f56b80d6af257cccdfdbad8f2e8fef1c9b;hb=47bd897210cb85e042f11d7400b46d94400cc428;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/combineRPKMs.py b/combineRPKMs.py index 8fd8f9f..77f30d6 100755 --- a/combineRPKMs.py +++ b/combineRPKMs.py @@ -3,14 +3,17 @@ # ENRAGE # -print 'version 1.0' +print "combineRPKMs: version 1.1" try: import psyco psyco.full() except: pass -import sys, optparse +import sys +import optparse +import string +from commoncode import getConfigParser, getConfigBoolOption def main(argv=None): @@ -18,9 +21,7 @@ def main(argv=None): argv = sys.argv usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--withmultifraction", action="store_true", dest="doFraction") - parser.set_defaults(doFraction=False) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 3: @@ -35,53 +36,69 @@ def main(argv=None): combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction) -def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False): - firstfile = open(firstfileName) - expandedfile = open(expandedfileName) - finalfile = open(finalfileName) - outfile = open(outfileName, "w") +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--withmultifraction", action="store_true", dest="doFraction") - firstDict = {} - gidDict = {} - expandedDict = {} + configParser = getConfigParser() + section = "combineRPKMs" + doFraction = getConfigBoolOption(configParser, section, "doFraction", False) - for line in firstfile: - fields = line.strip().split() - firstDict[fields[1]] = fields[-1] + parser.set_defaults(doFraction=doFraction) - firstfile.close() + return parser - for line in expandedfile: - fields = line.strip().split() - expandedDict[fields[1]] = fields[-1] - gidDict[fields[1]] = fields[0] - expandedfile.close() +def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False): + + firstDict = getRPKMDict(firstfileName) + gidDict, expandedDict = getRPKMDict(expandedfileName, getGIDDict=True) if doFraction: header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n" else: header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n" + outfile = open(outfileName, "w") outfile.write(header) + finalfile = open(finalfileName) for line in finalfile: fields = line.strip().split() gene = fields[0] rnakb = fields[1] finalRPKM = fields[2] firstRPKM = firstDict.get(gene, "") - outline = "%s\t%s\t%s\t%s\t%s\t%s" % (gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM) + outputFields = [gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM] if doFraction: fraction = fields[3] - outline += "\t%s" % fraction - - outfile.write(outline + '\n') + outputFields.append(fraction) + + outline = "%s\n" % string.join(outputFields, "\t") + outfile.write(outline) finalfile.close() outfile.close() +def getRPKMDict(rpkmFileName, getGIDDict=False): + gidDict = {} + rpkmDict = {} + rpkmFile = open(rpkmFileName) + for line in rpkmFile: + fields = line.strip().split() + rpkmDict[fields[1]] = fields[-1] + if getGIDDict: + gidDict[fields[1]] = fields[0] + + rpkmFile.close() + + if getGIDDict: + return gidDict, rpkmDict + else: + return rpkmDict + + if __name__ == "__main__": - main(sys.argv) \ No newline at end of file + main(sys.argv)