5 print 'psyco not running'
8 from cistematic.genomes import Genome
15 print "predictSpliceCount: version 1.2"
18 print 'usage: python %s genome maxBorder uniquecountfile splicecountfile outfile' % argv[0]
22 # number of nucleotides at the end of each exon that is affected by splicing
23 splicelead = int(argv[2])
24 uniquefilecount = argv[3]
25 splicefilecount = argv[4]
28 predictSpliceCount(genome, splicelead, uniquefilecount, splicefilecount, outfilename)
31 def predictSpliceCount(genome, splicelead, uniquefilecount, splicefilecount, outfilename):
39 uniquefile = open(uniquefilecount)
40 for line in uniquefile:
41 fields = line.strip().split()
42 gidDict[fields[0]] = fields[1]
43 gidList.append(fields[0])
44 uniqueCountDict[fields[0]] = int(fields[2])
46 splicefile = open(splicefilecount)
47 for line in splicefile:
48 fields = line.strip().split()
49 spliceCountDict[fields[0]] = int(fields[2])
51 outfile = open(outfilename,'w')
56 featureList = hg.getGeneFeatures((genome, gid))
59 for (ftype, chrom, start, stop, sense) in featureList:
60 if (start, stop) not in newfeatureList:
61 newfeatureList.append((start, stop))
62 featuresizesum += stop - start + 1
64 if featuresizesum < 1:
67 splicearea = (len(newfeatureList) - 1) * splicelead
68 if splicearea < splicelead:
71 fractionCoverage = featuresizesum / float(splicearea + featuresizesum)
72 expectedSpliceCount = int(round(uniqueCountDict[gid]/fractionCoverage)) - uniqueCountDict[gid]
74 # this p-value is based on the observed unique count, not the expected total count
75 # nor the multi-read adjusted count
76 pvalue = 1 - pow(1 - float(splicelead)/featuresizesum, uniqueCountDict[gid])
77 print '%s %s %f %d %d' % (gid, symbol, pvalue, expectedSpliceCount, spliceCountDict[gid])
78 outfile.write('%s\t%s\t%f\t%d\t%d\n' % (gid, symbol, pvalue, expectedSpliceCount, spliceCountDict[gid]))
83 if __name__ == "__main__":