+ gidDict = getGeneData(uniquefilecount, splicefilecount)
+ gidList = gidDict.keys()
+ gidList.sort()
+
+ outfile = open(outfilename, "w")
+ for gid in gidList:
+ featureList = hg.getGeneFeatures((genome, gid))
+ featuresizesum, splicearea = getStats(featureList, splicelead)
+ fractionCoverage = featuresizesum / float(splicearea + featuresizesum)
+ geneData = gidDict[gid]
+ uniqueCount = geneData.uniqueCount
+ expectedSpliceCount = int(round(uniqueCount/fractionCoverage)) - uniqueCount
+
+ # this p-value is based on the observed unique count, not the expected total count
+ # nor the multi-read adjusted count
+ pvalue = 1 - pow(1 - float(splicelead)/featuresizesum, uniqueCount)
+ symbol = geneData.symbol
+ spliceCount = geneData.spliceCount
+ print '%s %s %f %d %d' % (gid, symbol, pvalue, expectedSpliceCount, spliceCount)
+ outfile.write('%s\t%s\t%f\t%d\t%d\n' % (gid, symbol, pvalue, expectedSpliceCount, spliceCount))