# ENRAGE
#
-print 'version 1.0'
+print "combineRPKMs: version 1.1"
try:
import psyco
psyco.full()
except:
pass
-import sys, optparse
+import sys
+import optparse
+import string
+from commoncode import getConfigParser, getConfigBoolOption
def main(argv=None):
argv = sys.argv
usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]"
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
- parser.set_defaults(doFraction=False)
+ parser = makeParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 3:
combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction)
-def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
- firstfile = open(firstfileName)
- expandedfile = open(expandedfileName)
- finalfile = open(finalfileName)
- outfile = open(outfileName, "w")
+def makeParser(usage=""):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
- firstDict = {}
- gidDict = {}
- expandedDict = {}
+ configParser = getConfigParser()
+ section = "combineRPKMs"
+ doFraction = getConfigBoolOption(configParser, section, "doFraction", False)
- for line in firstfile:
- fields = line.strip().split()
- firstDict[fields[1]] = fields[-1]
+ parser.set_defaults(doFraction=doFraction)
- firstfile.close()
+ return parser
- for line in expandedfile:
- fields = line.strip().split()
- expandedDict[fields[1]] = fields[-1]
- gidDict[fields[1]] = fields[0]
- expandedfile.close()
+def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
+
+ firstDict = getRPKMDict(firstfileName)
+ gidDict, expandedDict = getRPKMDict(expandedfileName, getGIDDict=True)
if doFraction:
header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n"
else:
header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n"
+ outfile = open(outfileName, "w")
outfile.write(header)
+ finalfile = open(finalfileName)
+ #TODO: the output lines are driven by finalfile. If there are genes in the first 2 that
+ # are not in the finalfile then they will be lost.
for line in finalfile:
fields = line.strip().split()
gene = fields[0]
rnakb = fields[1]
finalRPKM = fields[2]
firstRPKM = firstDict.get(gene, "")
- outline = "%s\t%s\t%s\t%s\t%s\t%s" % (gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM)
+ outputFields = [gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM]
if doFraction:
fraction = fields[3]
- outline += "\t%s" % fraction
-
- outfile.write(outline + '\n')
+ outputFields.append(fraction)
+
+ outline = "%s\n" % string.join(outputFields, "\t")
+ outfile.write(outline)
finalfile.close()
outfile.close()
+def getRPKMDict(rpkmFileName, getGIDDict=False):
+ gidDict = {}
+ rpkmDict = {}
+ rpkmFile = open(rpkmFileName)
+ for line in rpkmFile:
+ fields = line.strip().split()
+ rpkmDict[fields[1]] = fields[-1]
+ if getGIDDict:
+ gidDict[fields[1]] = fields[0]
+
+ rpkmFile.close()
+
+ if getGIDDict:
+ return gidDict, rpkmDict
+ else:
+ return rpkmDict
+
+
if __name__ == "__main__":
main(sys.argv)
\ No newline at end of file