2 usage: python scatterfields.py infilename xaxisLabel xField yaxisLabel yField outImageName [--xmin xMin] [--ymin yMin]
3 [--xmax xMax] [--ymax yMax] [--doLogF1] [--doLogF2] [--arcsinh] [--order polyOrder] [--base logBase]
4 [--markGenes geneFile] [--markfold times] [--noregression] [--large] [--markdiag] [--title text] [--verbose]
6 Do a scatter plot of 2 fields from an input file.
7 fields are counted from 0.
8 use [-order polyOrder] to specify polynomial fits > 1
9 Supports very rudimentary compound fields for X value
10 using python's lambda functions (omit the keyword lambda)
23 print "%prog: version 3.1"
31 parser = optparse.OptionParser(usage=usage)
32 parser.add_option("--xmin", type="float", dest="forcexmin")
33 parser.add_option("--ymin", type="float", dest="forceymin")
34 parser.add_option("--xmax", type="float", dest="forcexmax")
35 parser.add_option("--ymax", type="float", dest="forceymax")
36 parser.add_option("--doLogF1", action="store_true", dest="doLogF1")
37 parser.add_option("--doLogF2", action="store_true", dest="doLogF2")
38 parser.add_option("--arcsinh", action="store_true", dest="doArcsinh")
39 parser.add_option("--order", type="int", dest="fitOrder")
40 parser.add_option("--base", type="int", dest="base")
41 parser.add_option("--markGenes", dest="markFile")
42 parser.add_option("--markfold", type="float", dest="foldChange")
43 parser.add_option("--noregression", action="store_false", dest="doRegression")
44 parser.add_option("--large", action="store_true", dest="plotLarge")
45 parser.add_option("--markdiag", action="store_true", dest="markDiag")
46 parser.add_option("--title", type="int", dest="figtitle")
47 parser.add_option("--verbose", action="store_true", dest="verbose")
48 parser.set_defaults(forcexmin=0.0, forceymin=0.0, forcexmax=-1, forceymax=-1, doLogF1=False,
49 doLogF2=False, doArcsinh=False, fitOrder=1, base=10, markFile=None,
50 foldChange=None, doRegression=True, plotLarge=False, markDiag=False,
51 figtitle="", verbose=False)
53 (options, args) = parser.parse_args(argv[1:])
59 infile = open(args[0])
66 scatterfields(infile, xaxis, xField, yaxis, yField, outfilename, options.forcexmin,
67 options.forceymin, options.forcexmax, options.forceymax, options.doLogF1,
68 options.doLogF2, options.doArcsinh, options.fitOrder, options.base,
69 options.markFile, options.foldChange, options.doRegression, options.plotLarge,
70 options.markDiag, options.figtitle, options.verbose)
73 def scatterfields(infilename, xaxis, xField, yaxis, yField, outfilename, forcexmin=0.0, forceymin=0.0,
74 forcexmax=-1, forceymax=-1, doLogF1=False, doLogF2=False, doArcsinh=False, fitOrder=1,
75 base=10, markFile=None, foldChange=None, doRegression=True, plotLarge=False,
76 markDiag=False, figtitle="", verbose=False):
78 infile = open(infilename)
84 compoundOp = "lambda %s" % xField
85 operator = eval(compoundOp)
87 print "compound field %s" % xField
92 print "expression %s not supported" % xField
97 if markFile is not None:
100 markedGenes.append(line.strip().split()[0].upper())
102 markedGenes.append(line.strip().upper())
108 if foldChange is not None:
117 markedfoldnewscores = []
118 markedfoldoldscores = []
123 fields = line.strip().split()
127 score = operator(fields)
129 score = float(fields[xField])
131 newscore = float(fields[yField])
135 foldMarkThisScore = False
141 tempratio = newscore / tempscore
143 tempratio2 = tempscore / 0.03
145 tempratio2 = 1. / tempratio
147 if tempratio > foldChange or tempratio2 > foldChange:
148 foldMarkThisScore = True
151 score = abs(cmath.asinh(score))
154 score = math.log(score, base)
162 newscore = abs(cmath.asinh(newscore))
165 newscore = math.log(newscore, base)
172 oldscores.append(score)
173 newscores.append(newscore)
174 if foldMarkThisScore:
175 markedfoldoldscores.append(score)
176 markedfoldnewscores.append(newscore)
177 if marking and gene.upper() not in markedGenes:
178 print gene, score, newscore, "unmarked"
180 if gene.upper() in markedGenes:
181 print gene, score, newscore, "overfold"
184 print len(markedfoldoldscores), line.strip()
186 if gene.upper() in markedGenes:
187 if not foldMarkThisScore:
188 print gene, score, newscore
190 markedoldscores.append(score)
191 markednewscores.append(newscore)
193 print score, newscore
196 if plotLarge and markFold:
197 plot(oldscores, newscores, "^", markersize=10., color="0.75", alpha=alphaVal)
199 plot(oldscores, newscores, "b^", markersize=10., alpha=alphaVal)
201 plot(oldscores, newscores, ",", color="0.75", alpha=alphaVal)
203 plot(oldscores, newscores, "b,", alpha=alphaVal)
205 if len(markedfoldoldscores) > 0:
207 plot(markedfoldoldscores, markedfoldnewscores, "b^", markersize=10., alpha=alphaVal)
209 plot(markedfoldoldscores, markedfoldnewscores, "b,", alpha=alphaVal)
211 if len(markedoldscores) > 0:
213 plot(markedoldscores, markednewscores, "r^", color="red", markersize=10., alpha=alphaVal)
215 plot(markedoldscores, markednewscores, ".", color="red", markersize=4., alpha=alphaVal)
217 fitvalues = polyfit(oldscores, newscores, fitOrder)
221 meanObserved = float(sum(newscores)) / len(newscores)
222 if len(fitvalues) == 2:
223 predicted = [(fitvalues[0] * x + fitvalues[1]) for x in oldscores]
225 predicted = [(fitvalues[0] * x**2 + fitvalues[1] * x + fitvalues[2]) for x in oldscores]
230 for index in range(len(newscores)):
231 SSt += (newscores[index] - meanObserved) ** 2
232 SSe += (newscores[index] - predicted[index]) ** 2
234 rSquared = 1. - SSe / SSt
235 print "R**2 = %f" % rSquared
238 if len(fitvalues) == 2:
239 predicted = [(fitvalues[0] * x + fitvalues[1]) for x in oldscores]
241 predicted = [(fitvalues[0] * x**2 + fitvalues[1] * x + fitvalues[2]) for x in oldscores]
244 plot(oldscores, predicted, "-k", linewidth=2)
247 figtitle = "%s vs %s (R^2: %.2f)" % (yaxis, xaxis, rSquared)
266 plot([min,max], [min,max], "-g", linewidth=2)
268 print forcexmin, forceymin
271 ylabel("log%s(%s)" % (str(base), yaxis))
276 xlabel("log%s(%s)" % (str(base), xaxis))
281 xlim(forcexmin - 0.05, xmax)
284 ylim(forceymin - 0.05, ymax)
286 if forcexmax > 0 and forceymax > 0:
287 xlim(forcexmin - 0.05, forcexmax)
288 ylim(forceymin - 0.05, forceymax)
290 gca().get_xaxis().tick_bottom()
291 gca().get_yaxis().tick_left()
293 savefig(outfilename, dpi=100)
296 if __name__ == "__main__":