5 print 'psyco not running'
8 from cistematic.genomes import Genome
11 class GeneSymbolAndCount():
13 def __init__(self, symbol="", uniqueCount=0, spliceCount=0):
15 self.uniqueCount = uniqueCount
16 self.spliceCount = spliceCount
23 print "predictSpliceCount: version 1.2"
26 print 'usage: python %s genome maxBorder uniquecountfile splicecountfile outfile' % argv[0]
30 # number of nucleotides at the end of each exon that is affected by splicing
31 splicelead = int(argv[2])
32 uniquefilecount = argv[3]
33 splicefilecount = argv[4]
36 predictSpliceCount(genome, splicelead, uniquefilecount, splicefilecount, outfilename)
39 def predictSpliceCount(genome, splicelead, uniquefilecount, splicefilecount, outfilename):
42 gidDict = getGeneData(uniquefilecount, splicefilecount)
43 gidList = gidDict.keys()
46 outfile = open(outfilename, "w")
48 featureList = hg.getGeneFeatures((genome, gid))
49 featuresizesum, splicearea = getStats(featureList, splicelead)
50 fractionCoverage = featuresizesum / float(splicearea + featuresizesum)
51 geneData = gidDict[gid]
52 uniqueCount = geneData.uniqueCount
53 expectedSpliceCount = int(round(uniqueCount/fractionCoverage)) - uniqueCount
55 # this p-value is based on the observed unique count, not the expected total count
56 # nor the multi-read adjusted count
57 pvalue = 1 - pow(1 - float(splicelead)/featuresizesum, uniqueCount)
58 symbol = geneData.symbol
59 spliceCount = geneData.spliceCount
60 print '%s %s %f %d %d' % (gid, symbol, pvalue, expectedSpliceCount, spliceCount)
61 outfile.write('%s\t%s\t%f\t%d\t%d\n' % (gid, symbol, pvalue, expectedSpliceCount, spliceCount))
66 def getGeneData(uniquefilecount, splicefilecount):
68 uniquefile = open(uniquefilecount)
69 for line in uniquefile:
70 fields = line.strip().split()
71 geneData = GeneSymbolAndCount(symbol=fields[1], uniqueCount=int(fields[2]))
72 gidDict[fields[0]] = geneData
75 splicefile = open(splicefilecount)
76 for line in splicefile:
77 fields = line.strip().split()
78 gidDict[fields[0]].spliceCount = int(fields[2])
85 def getStats(featureList, splicelead):
88 for (ftype, chrom, start, stop, sense) in featureList:
89 if (start, stop) not in newfeatureList:
90 newfeatureList.append((start, stop))
91 featuresizesum += stop - start + 1
93 if featuresizesum < 1:
96 splicearea = (len(newfeatureList) - 1) * splicelead
97 if splicearea < splicelead:
100 return featuresizesum, splicearea
103 if __name__ == "__main__":