7 from cistematic.genomes import Genome
14 from commoncode import getConfigParser, getConfigIntOption
17 print "getgosig: version 2.2"
23 usage = "usage: python %prog genome outimage gofileroot1 title1 cohortsize1 [gofileroot2 title2 cohortsize2 ...] [--fontsize pts] [--length in] [--width in]"
25 parser = makeParser(usage)
26 (options, args) = parser.parse_args(argv[1:])
35 conditionList = args[2:]
36 conditions = len(conditionList) / 3
39 for index in range(conditions):
40 conditionIndex = index * 3
41 fileroots.append(conditionList[conditionIndex])
42 titles.append((conditionList[conditionIndex + 1], "(%s)" % conditionList[conditionIndex + 2]))
44 getgosig(genome, imagename, fileroots, titles, options.fontSize, options.length, options.width)
47 def makeParser(usage=""):
48 parser = optparse.OptionParser(usage=usage)
49 parser.add_option("--fontsize", type="int", dest="fontSize")
50 parser.add_option("--length", type="int", dest="length")
51 parser.add_option("--width", type="int", dest="width")
53 configParser = getConfigParser()
55 fontSize = getConfigIntOption(configParser, section, "fontSize", 5)
56 length = getConfigIntOption(configParser, section, "length", 10)
57 width = getConfigIntOption(configParser, section, "width", 7)
59 parser.set_defaults(fontSize=fontSize, length=length, width=width)
64 def getgosig(genome, imagename, fileroots=[], titles=[], fontSize=5, length=10, width=7):
66 allgodesc = hg.allGOterms()
74 htmlname = imagename[:-4] + ".html"
85 for sigfile in fileroots:
86 infile = open(rootdir + sigfile + ".gosig", "r")
88 if "depleted" in line:
91 fields = line.split("\t")
92 if fields[0] not in goterms:
93 goterms.append(fields[0])
94 goscores[fields[0]] = []
95 numgenes[fields[0]] = []
96 possiblegenes[fields[0]] = 0
98 if float(fields[3]) > highestPval:
99 highestPval = float(fields[3])
101 if float(fields[3]) < lowestPval:
102 lowestPval = float(fields[3])
107 boundaryScore = score = -1 * log(highestPval) / (2.0 * ceiling) + 0.49
110 cdict = {"red": ((0.0, 1.0, 1.0), (boundaryScore, 1.0, 0.1), (1.0, 1.0, 1.0)),
111 "green": ((0.0, 1.0, 1.0), (boundaryScore, 1.0, 0.1), (1.0, 1.0, 1.0)),
112 "blue": ((0.0, 1.0, 1.0), (boundaryScore, 1.0, 0.75), (1.0, 0.0, 0.0))
115 mymap = matplotlib.colors.LinearSegmentedColormap("my_colormap", cdict, 1024)
118 for zfile in fileroots:
119 infile = open(rootdir + zfile + ".gozscore", "r")
121 fields = line.split()
123 if fields[0] not in goterms:
126 score = -1 * log(float(fields[7])) / (2.0 * ceiling)
137 goscores[fields[0]].append(score)
138 numgenes[fields[0]].append(fields[1])
139 possiblegenes[fields[0]] = int(fields[4])
141 goindex /= len(fileroots)
143 gokeys = goscores.keys()
146 gosortarray.append(goscores[term] + [term])
150 htmlfile = open(htmlname, "w")
151 htmlfile.write('<html><head><title>GO Analysis</title></head><body><table border="1">')
152 htmlfile.write("<tr><th>Description</th><th>possible</th>")
154 htmlfile.write("<th>%s<br>%s</th>" % entry)
156 htmlfile.write("</tr>\n")
159 for entry in gosortarray:
161 outline = "%s:\t" % term
162 for entry in goscores[term]:
163 outline += str(round(entry, 4)) + "\t"
166 htmlLine = "<tr><th>%s</th><th>%d</th>" % (allgodesc[term], possiblegenes[term])
168 for fileroot in fileroots:
169 gofile = fileroot + "." + term[3:]
170 ngene = numgenes[term][index]
171 if os.path.exists(gofile):
172 htmlLine += '<td><a href="%s">%s</a></td>' % (gofile, ngene)
174 htmlLine += "<td>%s</td>" % (ngene)
178 tableLines.append(htmlLine + "</tr>\n")
179 flatArray.append(goscores[term])
180 godesc.append(allgodesc[term])
183 for line in tableLines:
186 htmlfile.write("<tr><th>Cohort Size:</th>")
187 htmlfile.write("</tr>\n")
188 htmlfile.write("</table></body></html>")
190 figure(figsize=(length, width))
191 myaxe = axes([0.3, 0.1, 0.55, 0.75])
196 c = pcolor(Z, cmap=cm.gray, vmin=0.0, vmax=1.0)
198 c = pcolor(Z, cmap=mymap, vmin=0.0, vmax=1.0)
203 ind = arange(len(fileroots))
210 pcolorAxes = c.get_axes()
211 for entry in gosortarray:
214 for genenum in numgenes[term]:
215 if len(genenum) == 1:
216 genenum = " " + genenum
217 elif len(genenum) == 2:
218 genenum = " " + genenum
220 pcolorAxes.text(coordx, coordy, genenum, fontsize=fontSize)
226 for (line1,line2) in titles:
227 pcolorAxes.text(coordx + 0.1, coordy + 3 * deltaY + 0.5, line1, fontsize=int(fontSize*1.5))
228 pcolorAxes.text(coordx + 0.1, coordy + deltaY, line2, fontsize=int(fontSize*1.5))
231 setp(gca(), "xticks", [])
232 setp(gca(), "xticklabels", [])
233 setp(gca(), "yticks", arange(len(godesc)))
234 setp(gca(), "yticklabels", godesc)
235 locs, labels = yticks()
236 setp(labels, fontsize=fontSize)
237 setp(labels, verticalalignment="bottom")
238 setp(gca(), "ylim", [0, len(godesc)])
240 figtext(0.3,0.02, str(goindex - len(gokeys)) + " additional GO Terms below threshold of significance", fontsize=fontSize*2)
242 d = colorbar(orientation="vertical", drawedges=False)
243 for t in d.ax.get_yticklabels():
246 locs, labels = yticks()
247 setp(labels, fontsize=5)
248 pcolorAxes.text(conditions + 1,len(godesc), str(lowestPval), fontsize=fontSize*2)
249 pcolorAxes.text(conditions + 1,boundaryScore * len(godesc), str(highestPval), fontsize=fontSize*2)
251 savefig(imagename, dpi=250)
255 if __name__ == "__main__":