6 print "combineRPKMs: version 1.1"
16 from commoncode import getConfigParser, getConfigBoolOption
23 usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]"
24 parser = makeParser(usage)
25 (options, args) = parser.parse_args(argv[1:])
32 expandedfile = args[1]
36 combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction)
39 def makeParser(usage=""):
40 parser = optparse.OptionParser(usage=usage)
41 parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
43 configParser = getConfigParser()
44 section = "combineRPKMs"
45 doFraction = getConfigBoolOption(configParser, section, "doFraction", False)
47 parser.set_defaults(doFraction=doFraction)
52 def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
54 firstDict = getRPKMDict(firstfileName)
55 gidDict, expandedDict = getRPKMDict(expandedfileName, getGIDDict=True)
58 header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n"
60 header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n"
62 outfile = open(outfileName, "w")
65 finalfile = open(finalfileName)
66 #TODO: the output lines are driven by finalfile. If there are genes in the first 2 that
67 # are not in the finalfile then they will be lost.
68 for line in finalfile:
69 fields = line.strip().split()
73 firstRPKM = firstDict.get(gene, "")
74 outputFields = [gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM]
78 outputFields.append(fraction)
80 outline = "%s\n" % string.join(outputFields, "\t")
81 outfile.write(outline)
87 def getRPKMDict(rpkmFileName, getGIDDict=False):
90 rpkmFile = open(rpkmFileName)
92 fields = line.strip().split()
93 rpkmDict[fields[1]] = fields[-1]
95 gidDict[fields[1]] = fields[0]
100 return gidDict, rpkmDict
105 if __name__ == "__main__":