snapshot of 4.0a development. initial git repo commit
[erange.git] / plotbardist.py
1 #
2 #  plotbardist.py
3 #  ENRAGE
4 #
5 #  Created by Ali Mortazavi on 12/13/07.
6
7 try:
8     import psyco
9     psyco.full()
10 except:
11     pass
12
13 import sys
14 import optparse
15 import matplotlib
16 from pylab import *
17 from math import *
18
19
20 print "%prog: version 3.2"
21
22
23 def main(argv=None):
24     if not argv:
25         argv = sys.argv
26
27     usage = "usage: python %prog infile1 [infile2] [infile3] [options] outfile.png"
28
29     parser = optparse.OptionParser(usage=usage)
30     parser.add_option("--bins", type="int", dest="bins")
31     parser.add_option("--field", type="int", dest="binnedField")
32     parser.add_option("--binSize", type="float", dest="binLength")
33     parser.add_option("--doLog", type="int", dest="logBase")
34     parser.add_option("--ymax", type="int", dest="maxY")
35     parser.add_option("--xlabel", dest="xLabel")
36     parser.add_option("--ylabel", dest="yLabel")
37     parser.add_option("--binLabels", dest="binLabels", help="comma separated list")
38     parser.add_option("--title", dest="figTitle")
39     parser.add_option("--legend", dest="barsLegend", help="comma separated list")
40     parser.add_option("--xoffset", type="float", dest="pointOffset")
41     parser.add_option("--figsize", dest="figSizes", help="x,y pair")
42     parser.set_defaults(bins=10, binnedField=-1, binLength=-1, logBase=None, maxY=0,
43                         xLabel="bins", yLabel="count", binLabels=None, figTitle="",
44                         barsLegend=None, pointOffset=0., figSizes=None)
45
46     (options, args) = parser.parse_args(argv[1:])
47
48
49     if len(args) < 2 or len(args) > 4:
50         print usage
51         print "where labelList and legendList are comma delimited strings of the form 'labelA,labelB,...,labelN'"
52         sys.exit(1)
53
54     fileList = args[:-1]
55     pngfilename = args[-1]
56
57     plotbardist(fileList, pngfilename, options.bins, options.binnedField, options.binLength,
58                 options.logBase, options.maxY, options.xLabel, options.yLabel, options.binLabels,
59                 options.figTitle, options.barsLegend, options.pointOffset, options.figSizes)
60
61
62 def plotbardist(fileList, pngfilename, bins=10, binnedField=-1, binLength=-1, logBase=None,
63                 maxY=0, xLabel="bins", yLabel="count", binLabels=None, figTitle="",
64                 barsLegend=None, pointOffset=0., figSizes=None):
65
66     matplotlib.use("Agg")
67     plotParameters = {1: {"width": 0.5,
68                           "offset": [-0.25]},
69                       2: {"width": 0.3,
70                           "offset": [-0.3, 0]},
71                       3: {"width": 0.2,
72                           "offset": [-0.2, 0., 0.2]}
73     }
74
75     colorList = ["b", "r", "c"]
76     width = plotParameters[len(fileList)]["width"]
77     offset = plotParameters[len(fileList)]["offset"]
78
79     doLog = False
80     if logBase is not None:
81         doLog = True
82         print "taking log%d of x datapoints" % logBase
83         xLabel = "log%d(%s)" % (logBase, xLabel)
84     else:
85         logBase = 10
86
87     if figSizes is not None:
88         sizes = figSizes.strip().split(",")
89         figure(figsize=(float(sizes[0]),float(sizes[1])))
90
91     doLabels = False
92     if binLabels is not None:
93         binLabels = binLabels.strip().split(",")
94         doLabels = True
95     else:
96         binLabels = []
97
98     if barsLegend is not None:
99         barsLegend = barsLegend.strip().split(",")
100     else:
101         barsLegend = []
102     
103     ind2 = arange(bins)
104
105     bars = []
106     barsColors = []
107     index = 0
108     for fileName in fileList:
109         aFile = open(fileName)
110         distbin = bins * [0]
111
112         dataList = []
113         for line in aFile:
114             fields = line.strip().split()
115             try:
116                 point = float(fields[binnedField]) + pointOffset
117                 if doLog:
118                     if point < 1:
119                         point = 1
120
121                     point = log(point, logBase)
122
123                 dataList.append(point)
124             except:
125                 continue
126
127         print "%d data points" % len(dataList)
128
129         dataList.sort()
130         print "low = %f high = %f" % (dataList[0], dataList[-1])
131
132         if binLength < 0:
133             binLength = abs(dataList[-1] - dataList[0]) / bins
134
135         for point in dataList:
136             try:
137                 distbin[int(round(point/binLength))] += 1
138             except:
139                 distbin[-1] += 1
140
141         print binLength, int(round(point/binLength))
142
143         bars.append(bar(ind2 + offset[index], distbin, width, color=colorList[index]))
144         barsColors.append(bars[-1][0])
145
146         print distbin
147         halfCount = sum(distbin) / 2
148         median = 0
149         foundMedian = False
150         while not foundMedian:
151             if sum(distbin[:median]) < halfCount:
152                 median += 1
153             else:
154                 foundMedian = True
155
156         print median
157         index += 1
158
159     xlim(-1 * width - 0.2, bins + 0.2)
160
161     if len(barsLegend) > 0:
162         legend(barsColors, barsLegend)
163
164     ylabel(yLabel)
165     xlabel(xLabel)
166
167     if doLabels:
168         setp(gca(), "xticklabels", binLabels)
169
170     if maxY > 0:
171         ylim(0, maxY)
172
173     if len(figTitle) > 0:
174         title(figTitle)
175
176     gca().get_xaxis().tick_bottom()
177     gca().get_yaxis().tick_left()
178
179     savefig(pngfilename)
180
181
182 if __name__ == "__main__":
183     main(sys.argv)