snapshot of 4.0a development. initial git repo commit
[erange.git] / combineRPKMs.py
1 #
2 #  combineRPKMS.py
3 #  ENRAGE
4 #
5
6 print 'version 1.0'
7 try:
8     import psyco
9     psyco.full()
10 except:
11     pass
12
13 import sys, optparse
14
15
16 def main(argv=None):
17     if not argv:
18         argv = sys.argv
19
20     usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]"
21     parser = optparse.OptionParser(usage=usage)
22     parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
23     parser.set_defaults(doFraction=False)
24     (options, args) = parser.parse_args(argv[1:])
25
26     if len(args) < 3:
27         print usage
28         sys.exit(1)
29
30     firstfile = args[0]
31     expandedfile = args[1]
32     finalfile = args[2]
33     outfile = args[3]
34
35     combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction)
36
37
38 def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
39     firstfile = open(firstfileName)
40     expandedfile = open(expandedfileName)
41     finalfile = open(finalfileName)
42     outfile = open(outfileName, "w")
43
44     firstDict = {}
45     gidDict = {}
46     expandedDict = {}
47
48     for line in firstfile:
49         fields = line.strip().split()
50         firstDict[fields[1]] = fields[-1]
51
52     firstfile.close()
53
54     for line in expandedfile:
55         fields = line.strip().split()
56         expandedDict[fields[1]] = fields[-1]
57         gidDict[fields[1]] = fields[0]
58
59     expandedfile.close()
60
61     if doFraction:
62         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n"
63     else:
64         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n"
65
66     outfile.write(header)
67
68     for line in finalfile:
69         fields = line.strip().split()
70         gene = fields[0]
71         rnakb = fields[1]
72         finalRPKM = fields[2]
73         firstRPKM = firstDict.get(gene, "")
74         outline = "%s\t%s\t%s\t%s\t%s\t%s" % (gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM)
75
76         if doFraction:
77             fraction = fields[3]
78             outline += "\t%s" % fraction
79     
80         outfile.write(outline + '\n')
81
82     finalfile.close()
83     outfile.close()
84
85
86 if __name__ == "__main__":
87     main(sys.argv)