2 usage: python scatterfields.py infilename xaxisLabel xField yaxisLabel yField outImageName [--xmin xMin] [--ymin yMin]
3 [--xmax xMax] [--ymax yMax] [--doLogF1] [--doLogF2] [--arcsinh] [--order polyOrder] [--base logBase]
4 [--markGenes geneFile] [--markfold times] [--noregression] [--large] [--markdiag] [--title text] [--verbose]
6 Do a scatter plot of 2 fields from an input file.
7 fields are counted from 0.
8 use [-order polyOrder] to specify polynomial fits > 1
9 Supports very rudimentary compound fields for X value
10 using python's lambda functions (omit the keyword lambda)
20 from commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigFloatOption, getConfigBoolOption
24 print "scatterfields: version 3.2"
32 parser = getParser(usage)
33 (options, args) = parser.parse_args(argv[1:])
39 infile = open(args[0])
46 scatterfields(infile, xaxis, xField, yaxis, yField, outfilename, options.forcexmin,
47 options.forceymin, options.forcexmax, options.forceymax, options.doLogF1,
48 options.doLogF2, options.doArcsinh, options.fitOrder, options.base,
49 options.markFile, options.foldChange, options.doRegression, options.plotLarge,
50 options.markDiag, options.figtitle, options.verbose)
54 parser = optparse.OptionParser(usage=usage)
55 parser.add_option("--xmin", type="float", dest="forcexmin")
56 parser.add_option("--ymin", type="float", dest="forceymin")
57 parser.add_option("--xmax", type="float", dest="forcexmax")
58 parser.add_option("--ymax", type="float", dest="forceymax")
59 parser.add_option("--doLogF1", action="store_true", dest="doLogF1")
60 parser.add_option("--doLogF2", action="store_true", dest="doLogF2")
61 parser.add_option("--arcsinh", action="store_true", dest="doArcsinh")
62 parser.add_option("--order", type="int", dest="fitOrder")
63 parser.add_option("--base", type="int", dest="base")
64 parser.add_option("--markGenes", dest="markFile")
65 parser.add_option("--markfold", type="float", dest="foldChange")
66 parser.add_option("--noregression", action="store_false", dest="doRegression")
67 parser.add_option("--large", action="store_true", dest="plotLarge")
68 parser.add_option("--markdiag", action="store_true", dest="markDiag")
69 parser.add_option("--title", type="int", dest="figtitle")
70 parser.add_option("--verbose", action="store_true", dest="verbose")
72 configParser = getConfigParser()
73 section = "scatterfields"
74 forcexmin = getConfigFloatOption(configParser, section, "forcexmin", 0.0)
75 forceymin = getConfigFloatOption(configParser, section, "forceymin", 0.0)
76 forcexmax = getConfigIntOption(configParser, section, "forcexmax", -1)
77 forceymax = getConfigIntOption(configParser, section, "forceymax", -1)
78 doLogF1 = getConfigBoolOption(configParser, section, "doLogF1", False)
79 doLogF2 = getConfigBoolOption(configParser, section, "doLogF2", False)
80 doArcsinh = getConfigBoolOption(configParser, section, "doArcsinh", False)
81 fitOrder = getConfigIntOption(configParser, section, "fitOrder", 1)
82 base = getConfigIntOption(configParser, section, "base", 10)
83 markFile = getConfigOption(configParser, section, "markFile", None)
84 foldChange = getConfigOption(configParser, section, "foldChange", None)
85 doRegression = getConfigBoolOption(configParser, section, "doRegression", True)
86 plotLarge = getConfigBoolOption(configParser, section, "plotLarge", False)
87 markDiag = getConfigBoolOption(configParser, section, "markDiag", False)
88 figtitle = getConfigOption(configParser, section, "figtitle", "")
89 verbose = getConfigBoolOption(configParser, section, "verbose", False)
91 parser.set_defaults(forcexmin=forcexmin, forceymin=forceymin, forcexmax=forcexmax, forceymax=forceymax, doLogF1=doLogF1,
92 doLogF2=doLogF2, doArcsinh=doArcsinh, fitOrder=fitOrder, base=base, markFile=markFile,
93 foldChange=foldChange, doRegression=doRegression, plotLarge=plotLarge, markDiag=markDiag,
94 figtitle=figtitle, verbose=verbose)
99 def scatterfields(infilename, xaxis, xField, yaxis, yField, outfilename, forcexmin=0.0, forceymin=0.0,
100 forcexmax=-1, forceymax=-1, doLogF1=False, doLogF2=False, doArcsinh=False, fitOrder=1,
101 base=10, markFile=None, foldChange=None, doRegression=True, plotLarge=False,
102 markDiag=False, figtitle="", verbose=False):
104 infile = open(infilename)
105 compoundField = False
110 compoundOp = "lambda %s" % xField
111 operator = eval(compoundOp)
113 print "compound field %s" % xField
117 if not compoundField:
118 print "expression %s not supported" % xField
123 if markFile is not None:
124 for line in markFile:
126 markedGenes.append(line.strip().split()[0].upper())
128 markedGenes.append(line.strip().upper())
134 if foldChange is not None:
143 markedfoldnewscores = []
144 markedfoldoldscores = []
149 fields = line.strip().split()
153 score = operator(fields)
155 score = float(fields[xField])
157 newscore = float(fields[yField])
161 foldMarkThisScore = False
167 tempratio = newscore / tempscore
169 tempratio2 = tempscore / 0.03
171 tempratio2 = 1. / tempratio
173 if tempratio > foldChange or tempratio2 > foldChange:
174 foldMarkThisScore = True
177 score = abs(cmath.asinh(score))
180 score = math.log(score, base)
188 newscore = abs(cmath.asinh(newscore))
191 newscore = math.log(newscore, base)
198 oldscores.append(score)
199 newscores.append(newscore)
200 if foldMarkThisScore:
201 markedfoldoldscores.append(score)
202 markedfoldnewscores.append(newscore)
203 if marking and gene.upper() not in markedGenes:
204 print gene, score, newscore, "unmarked"
206 if gene.upper() in markedGenes:
207 print gene, score, newscore, "overfold"
210 print len(markedfoldoldscores), line.strip()
212 if gene.upper() in markedGenes:
213 if not foldMarkThisScore:
214 print gene, score, newscore
216 markedoldscores.append(score)
217 markednewscores.append(newscore)
219 print score, newscore
222 if plotLarge and markFold:
223 plot(oldscores, newscores, "^", markersize=10., color="0.75", alpha=alphaVal)
225 plot(oldscores, newscores, "b^", markersize=10., alpha=alphaVal)
227 plot(oldscores, newscores, ",", color="0.75", alpha=alphaVal)
229 plot(oldscores, newscores, "b,", alpha=alphaVal)
231 if len(markedfoldoldscores) > 0:
233 plot(markedfoldoldscores, markedfoldnewscores, "b^", markersize=10., alpha=alphaVal)
235 plot(markedfoldoldscores, markedfoldnewscores, "b,", alpha=alphaVal)
237 if len(markedoldscores) > 0:
239 plot(markedoldscores, markednewscores, "r^", color="red", markersize=10., alpha=alphaVal)
241 plot(markedoldscores, markednewscores, ".", color="red", markersize=4., alpha=alphaVal)
243 fitvalues = polyfit(oldscores, newscores, fitOrder)
247 meanObserved = float(sum(newscores)) / len(newscores)
248 if len(fitvalues) == 2:
249 predicted = [(fitvalues[0] * x + fitvalues[1]) for x in oldscores]
251 predicted = [(fitvalues[0] * x**2 + fitvalues[1] * x + fitvalues[2]) for x in oldscores]
256 for index in range(len(newscores)):
257 SSt += (newscores[index] - meanObserved) ** 2
258 SSe += (newscores[index] - predicted[index]) ** 2
260 rSquared = 1. - SSe / SSt
261 print "R**2 = %f" % rSquared
264 if len(fitvalues) == 2:
265 predicted = [(fitvalues[0] * x + fitvalues[1]) for x in oldscores]
267 predicted = [(fitvalues[0] * x**2 + fitvalues[1] * x + fitvalues[2]) for x in oldscores]
270 plot(oldscores, predicted, "-k", linewidth=2)
273 figtitle = "%s vs %s (R^2: %.2f)" % (yaxis, xaxis, rSquared)
292 plot([min,max], [min,max], "-g", linewidth=2)
294 print forcexmin, forceymin
297 ylabel("log%s(%s)" % (str(base), yaxis))
302 xlabel("log%s(%s)" % (str(base), xaxis))
307 xlim(forcexmin - 0.05, xmax)
310 ylim(forceymin - 0.05, ymax)
312 if forcexmax > 0 and forceymax > 0:
313 xlim(forcexmin - 0.05, forcexmax)
314 ylim(forceymin - 0.05, forceymax)
316 gca().get_xaxis().tick_bottom()
317 gca().get_yaxis().tick_left()
319 savefig(outfilename, dpi=100)
322 if __name__ == "__main__":