14 from commoncode import getConfigParser, getConfigOption, getConfigFloatOption
16 print "stallCategory: version 1.2"
23 usage = "usage: python %prog stalledPercentFile1 stalledPercentFile2 transcriptFile [--out oufile] [--statout statoutfile] [--expression level]"
25 parser = makeParser(usage)
26 (options, args) = parser.parse_args(argv[1:])
34 transcriptFile = args[2]
36 stallCategory(infile1, infile2, transcriptFile, options.outFileName, options.statOutFileName, options.expressionLevel)
39 def makeParser(usage=""):
40 parser = optparse.OptionParser(usage=usage)
41 parser.add_option("--out", dest="outFileName")
42 parser.add_option("--statout", dest="statOutFileName")
43 parser.add_option("--expression", type="float", dest="expressionLevel")
45 configParser = getConfigParser()
46 section = "stallCategory"
47 outFileName = getConfigOption(configParser, section, "outFileName", None)
48 statOutFileName = getConfigOption(configParser, section, "statOutFileName", None)
49 expressionLevel = getConfigFloatOption(configParser, section, "expressionLevel", 0.9)
51 parser.set_defaults(outFileName=outFileName, statOutFileName=statOutFileName, expressionLevel=expressionLevel)
56 def stallCategory(inFile1Name, inFile2Name, transcriptFileName, outFileName=None, statOutFileName=None, expressionLevel=0.9):
58 infile1 = open(inFile1Name)
59 infile2 = open(inFile2Name)
60 transcriptFile = open(transcriptFileName)
63 if outFileName is not None:
64 outfile = open(outFileName, "w")
65 outfile.write("gene\texpression\tratio1\tpromAmount1\ttotal1\trestRPKM1\tratio2\tpromAmount2\ttotal2\trestRPKM2\n")
69 if statOutFileName is not None:
70 statoutfile = open(statOutFileName, "w")
71 statoutfile.write("ExpressionR1R2Stalled1Stalled2\tCount\n")
82 fields = line.strip().split()
83 promAmount = float(fields[4]) + float(fields[5])
84 genelen = float(fields[3])/100
85 total = float(fields[2])
89 restRPKM = (total * (1. - promAmount/100.))/ (genelen - 0.6)
90 ratio = float(fields[-1])
91 dictOne[fields[1]] = (ratio, promAmount, total, restRPKM)
97 fields = line.strip().split()
98 promAmount = float(fields[4]) + float(fields[5])
99 genelen = float(fields[3])/100
103 total = float(fields[2])
107 restRPKM = (total * (1. - promAmount/100.))/ (genelen - 0.6)
108 ratio = float(fields[-1])
109 dictTwo[fields[1]] = (ratio, promAmount, total, restRPKM)
111 for line in transcriptFile:
112 (gene, transc, transcpercell) = line.strip().split()
113 expressionDict[gene] = float(transcpercell)
117 for atype in ["HH", "HL", "LH", "LL"]:
118 for expression in ["E", "N"]:
119 for cat1 in ["Y", "N"]:
120 for cat2 in ["Y", "N"]:
121 category = expression + cat1 + cat2 + atype
122 categoryList.append(category)
123 categoryDict[category] = []
126 if gene not in expressionDict:
128 print "%s is not in expressionDict - skipping" % gene
132 expression = expressionDict[gene]
133 (ratio1, promAmount1, total1, restRPKM1) = dictOne[gene]
134 (ratio2, promAmount2, total2, restRPKM2) = dictTwo[gene]
136 if expression > expressionLevel:
161 categoryDict[category].append(gene)
163 outfile.write("%s\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%s\n" % (gene, expression, ratio1, promAmount1, total1, restRPKM1, ratio2, promAmount2, total2, restRPKM2, category)
166 print "%s %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %s" % (gene, expression, ratio1, promAmount1, total1, restRPKM1, ratio2, promAmount2, total2, restRPKM2, category)
171 for category in categoryList:
173 statoutfile.write("%s\t%d\n" % (category, len(categoryDict[category])))
175 print "%s %d" % (category, len(categoryDict[category]))
178 if __name__ == "__main__":