rewrite of findall.py and MakeRdsFromBam to fix bugs resulting from poor initial...
[erange.git] / combineRPKMs.py
1 #
2 #  combineRPKMS.py
3 #  ENRAGE
4 #
5
6 print "combineRPKMs: version 1.1"
7 try:
8     import psyco
9     psyco.full()
10 except:
11     pass
12
13 import sys
14 import optparse
15 import string
16 from commoncode import getConfigParser, getConfigBoolOption
17
18
19 def main(argv=None):
20     if not argv:
21         argv = sys.argv
22
23     usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]"
24     parser = makeParser(usage)
25     (options, args) = parser.parse_args(argv[1:])
26
27     if len(args) < 3:
28         print usage
29         sys.exit(1)
30
31     firstfile = args[0]
32     expandedfile = args[1]
33     finalfile = args[2]
34     outfile = args[3]
35
36     combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction)
37
38
39 def makeParser(usage=""):
40     parser = optparse.OptionParser(usage=usage)
41     parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
42
43     configParser = getConfigParser()
44     section = "combineRPKMs"
45     doFraction = getConfigBoolOption(configParser, section, "doFraction", False)
46
47     parser.set_defaults(doFraction=doFraction)
48
49     return parser
50
51
52 def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
53
54     firstDict = getRPKMDict(firstfileName)
55     gidDict, expandedDict = getRPKMDict(expandedfileName, getGIDDict=True)
56
57     if doFraction:
58         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n"
59     else:
60         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n"
61
62     outfile = open(outfileName, "w")
63     outfile.write(header)
64
65     finalfile = open(finalfileName)
66     for line in finalfile:
67         fields = line.strip().split()
68         gene = fields[0]
69         rnakb = fields[1]
70         finalRPKM = fields[2]
71         firstRPKM = firstDict.get(gene, "")
72         outputFields = [gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM]
73
74         if doFraction:
75             fraction = fields[3]
76             outputFields.append(fraction)
77
78         outline = "%s\n" % string.join(outputFields, "\t")
79         outfile.write(outline)
80
81     finalfile.close()
82     outfile.close()
83
84
85 def getRPKMDict(rpkmFileName, getGIDDict=False):
86     gidDict = {}
87     rpkmDict = {}
88     rpkmFile = open(rpkmFileName)
89     for line in rpkmFile:
90         fields = line.strip().split()
91         rpkmDict[fields[1]] = fields[-1]
92         if getGIDDict:
93             gidDict[fields[1]] = fields[0]
94
95     rpkmFile.close()
96
97     if getGIDDict:
98         return gidDict, rpkmDict
99     else:
100         return rpkmDict
101
102
103 if __name__ == "__main__":
104     main(sys.argv)