erange version 4.0a dev release
[erange.git] / combineRPKMs.py
1 #
2 #  combineRPKMS.py
3 #  ENRAGE
4 #
5
6 print "combineRPKMs: version 1.1"
7 try:
8     import psyco
9     psyco.full()
10 except:
11     pass
12
13 import sys
14 import optparse
15 import string
16 from commoncode import getConfigParser, getConfigBoolOption
17
18
19 def main(argv=None):
20     if not argv:
21         argv = sys.argv
22
23     usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]"
24     parser = makeParser(usage)
25     (options, args) = parser.parse_args(argv[1:])
26
27     if len(args) < 3:
28         print usage
29         sys.exit(1)
30
31     firstfile = args[0]
32     expandedfile = args[1]
33     finalfile = args[2]
34     outfile = args[3]
35
36     combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction)
37
38
39 def makeParser(usage=""):
40     parser = optparse.OptionParser(usage=usage)
41     parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
42
43     configParser = getConfigParser()
44     section = "combineRPKMs"
45     doFraction = getConfigBoolOption(configParser, section, "doFraction", False)
46
47     parser.set_defaults(doFraction=doFraction)
48
49     return parser
50
51
52 def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
53
54     firstDict = {}
55     firstfile = open(firstfileName)
56     for line in firstfile:
57         fields = line.strip().split()
58         firstDict[fields[1]] = fields[-1]
59
60     firstfile.close()
61
62     expandedDict = {}
63     gidDict = {}
64     expandedfile = open(expandedfileName)
65     for line in expandedfile:
66         fields = line.strip().split()
67         expandedDict[fields[1]] = fields[-1]
68         gidDict[fields[1]] = fields[0]
69
70     expandedfile.close()
71
72     if doFraction:
73         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n"
74     else:
75         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n"
76
77     outfile = open(outfileName, "w")
78     outfile.write(header)
79
80     finalfile = open(finalfileName)
81     for line in finalfile:
82         fields = line.strip().split()
83         gene = fields[0]
84         rnakb = fields[1]
85         finalRPKM = fields[2]
86         firstRPKM = firstDict.get(gene, "")
87         outputFields = [gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM]
88
89         if doFraction:
90             fraction = fields[3]
91             outputFields.append(fraction)
92
93         outline = "%s\n" % string.join(outputFields, "\t")
94         outfile.write(outline)
95
96     finalfile.close()
97     outfile.close()
98
99
100 if __name__ == "__main__":
101     main(sys.argv)