15 print "%prog: version 1.1"
22 usage = "usage: python %prog stalledPercentFile1 stalledPercentFile2 transcriptFile [--out oufile] [--statout statoutfile] [--expression level]"
24 parser = optparse.OptionParser(usage=usage)
25 parser.add_option("--out", dest="outFileName")
26 parser.add_option("--statout", dest="statOutFileName")
27 parser.add_option("--expression", type="float", dest="expressionLevel")
28 parser.set_defaults(outFileName=None, statOutFileName=None, expressionLevel=0.9)
29 (options, args) = parser.parse_args(argv[1:])
37 transcriptFile = args[2]
39 stallCategory(infile1, infile2, transcriptFile, options.outFileName, options.statOutFileName, options.expressionLevel)
42 def stallCategory(inFile1Name, inFile2Name, transcriptFileName, outFileName=None, statOutFileName=None, expressionLevel=0.9):
44 infile1 = open(inFile1Name)
45 infile2 = open(inFile2Name)
46 transcriptFile = open(transcriptFileName)
49 if outFileName is not None:
50 outfile = open(outFileName, "w")
51 outfile.write("gene\texpression\tratio1\tpromAmount1\ttotal1\trestRPKM1\tratio2\tpromAmount2\ttotal2\trestRPKM2\n")
55 if statOutFileName is not None:
56 statoutfile = open(statOutFileName, "w")
57 statoutfile.write("ExpressionR1R2Stalled1Stalled2\tCount\n")
68 fields = line.strip().split()
69 promAmount = float(fields[4]) + float(fields[5])
70 genelen = float(fields[3])/100
71 total = float(fields[2])
75 restRPKM = (total * (1. - promAmount/100.))/ (genelen - 0.6)
76 ratio = float(fields[-1])
77 dictOne[fields[1]] = (ratio, promAmount, total, restRPKM)
83 fields = line.strip().split()
84 promAmount = float(fields[4]) + float(fields[5])
85 genelen = float(fields[3])/100
89 total = float(fields[2])
93 restRPKM = (total * (1. - promAmount/100.))/ (genelen - 0.6)
94 ratio = float(fields[-1])
95 dictTwo[fields[1]] = (ratio, promAmount, total, restRPKM)
97 for line in transcriptFile:
98 (gene, transc, transcpercell) = line.strip().split()
99 expressionDict[gene] = float(transcpercell)
103 for atype in ["HH", "HL", "LH", "LL"]:
104 for expression in ["E", "N"]:
105 for cat1 in ["Y", "N"]:
106 for cat2 in ["Y", "N"]:
107 category = expression + cat1 + cat2 + atype
108 categoryList.append(category)
109 categoryDict[category] = []
112 if gene not in expressionDict:
114 print "%s is not in expressionDict - skipping" % gene
118 expression = expressionDict[gene]
119 (ratio1, promAmount1, total1, restRPKM1) = dictOne[gene]
120 (ratio2, promAmount2, total2, restRPKM2) = dictTwo[gene]
122 if expression > expressionLevel:
147 categoryDict[category].append(gene)
149 outfile.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%s\n" % (gene, expression, ratio1, promAmount1, total1, restRPKM1, ratio2, promAmount2, total2, restRPKM2, category)
152 print "%s %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %s" % (gene, expression, ratio1, promAmount1, total1, restRPKM1, ratio2, promAmount2, total2, restRPKM2, category)
157 for category in categoryList:
159 statoutfile.write("%s\t%d\n" % (category, len(categoryDict[category])))
161 print "%s %d" % (category, len(categoryDict[category]))
164 if __name__ == "__main__":