first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / plotnomogram.py
1 #
2 #  plotnomogram.py
3 #  ENRAGE
4 #
5
6 import sys
7
8 import matplotlib
9 from pylab import *
10 import matplotlib.axes 
11
12 try:
13     import psyco
14     psyco.full()
15 except:
16     pass
17
18 print "plotnomogram: version 1.2"
19
20
21 def main(argv=None):
22     if not argv:
23         argv = sys.argv
24
25     if len(argv) < 5:
26         print "usage: python %s maxdev xreads infile outpng" % argv[0]
27         sys.exit(1)
28
29     maxdev = float(argv[1])
30     xreads = float(argv[2])
31     infilename = argv[3]
32     outfilename = argv[4]
33
34     plotnomogram(maxdev, xreads, infilename, outfilename)
35
36
37 def plotnomogram(maxdev, xreads, infilename, outfilename):
38     matplotlib.use("Agg")
39     infile = open(infilename)
40     line = infile.readline().strip()
41
42     percentages = line.split()
43     del percentages[0]
44
45     listWidth = len(percentages)
46
47     geneValues = {}
48
49     for line in infile:
50         fields = line.strip().split()
51         geneValues[fields[0]] = []
52         for pos in range(listWidth):
53             geneValues[fields[0]].append(float(fields[1 + pos]))
54
55     # categories here are: 3000+, 2999-300, 299-30, 29-3
56     genes3000p = []
57     genes300p = []
58     genes30p = []
59     genes3p = []
60
61     for gene in geneValues:
62         finalLevel = geneValues[gene][0]
63         if finalLevel >= 3000:
64             genes3000p.append(gene)
65         elif finalLevel >= 300:
66             genes300p.append(gene)
67         elif finalLevel >= 30:
68             genes30p.append(gene)
69         elif finalLevel >= 3:
70             genes3p.append(gene)
71
72     organizedList = [genes3000p, genes300p, genes30p, genes3p]
73     listNames = ["3000+ RPKM     ", "300-2999 RPKM", "30-299 RPKM    ", "3-29 RPKM        "]
74     listColors = ["k", "c", "m", "r"]
75     geneCounts = {}
76     oldscores = [0.]
77     newscores = {}
78     for name in listNames:
79         newscores[name] = [0.]
80
81     index = 0
82     for percent in percentages[1:]:
83         oldscores.append(xreads * float(percent) / 100.)
84         index += 1
85         listindex = 0
86         for geneList in organizedList:
87             geneCount = len(geneList)
88             numOver = 0.
89             for gene in geneList:
90                 finalVal = geneValues[gene][0]
91                 currentVal = geneValues[gene][index]
92                 if abs((currentVal - finalVal) / finalVal) > maxdev:
93                     numOver += 1.
94
95             fraction = 1. - numOver / geneCount
96             print "%s %s %d %.2f" % (percent, listNames[listindex], geneCount, fraction)
97             newscores[listNames[listindex]].append(fraction)
98             geneCounts[listNames[listindex]] = geneCount        
99             listindex += 1
100
101     matplotlib.axes._process_plot_var_args.defaultColors = ["k", "y", "m", "c", "b", "g", "r"]
102
103     oldscores.append(xreads)
104     index = 0
105     plots = []
106     plotsColors = []
107     plotsLegend = []
108     for name in listNames:
109         newscores[name].append(1.0)
110         plots.append(plot(oldscores, newscores[name], listColors[index], linewidth=2))
111         plot(oldscores[1:-1], newscores[name][1:-1], listColors[index] + "^")
112         plotsColors.append(plots[-1][0])
113         plotsLegend.append("%s n = %d" % (name, geneCounts[name]))
114         index += 1
115
116     legend(plotsColors, plotsLegend, loc=0)
117     xticks(oldscores)
118     locs, labels = xticks()
119     setp(labels, rotation="vertical")
120     ylim(0, 1.03)
121     xlim(-0.1, xreads + .1)
122     savefig(outfilename)
123
124
125 if __name__ == "__main__":
126     main(sys.argv)