first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / profilebins.py
1 #
2 #  profilebins.py
3 #  ENRAGE
4 #
5
6 try:
7     import psyco
8     psyco.full()
9 except:
10     pass
11
12 import sys
13 import optparse
14 from commoncode import getConfigParser, getConfigOption, getConfigBoolOption, getConfigFloatOption
15
16 print "profilebins: version 2.3"
17
18
19 def main(argv=None):
20     if not argv:
21         argv = sys.argv
22
23     usage = "usage: python %prog label infile1 [--upstream infile2] [--downstream infile3] [--uplength kb] [--downlength kb] [--gene geneName] [--genes genefile] [--append] outfile"
24
25     parser = makeParser(usage)
26     (options, args) = parser.parse_args(argv[1:])
27
28     if len(args) < 3:
29         print usage
30         sys.exit(1)
31
32     label = args[0]
33     infilename = args[1]
34     outfilename = args[2]
35
36     profilebins(label, infilename, outfilename, options.upfilename, options.downfilename,
37                 options.uplength, options.downlength, options.gene, options.genefile,
38                 options.doAppend)
39
40
41 def makeParser(usage=""):
42     parser = optparse.OptionParser(usage=usage)
43     parser.add_option("--upstream", dest="upfilename")
44     parser.add_option("--downstream", dest="downfilename")
45     parser.add_option("--uplength", type="float", dest="uplength")
46     parser.add_option("--downlength", type="int", dest="")
47     parser.add_option("--gene", dest="gene")
48     parser.add_option("--genes", dest="genefile")
49     parser.add_option("--append", action="store_true", dest="doAppend")
50
51     configParser = getConfigParser()
52     section = "profilebins"
53     upfilename = getConfigOption(configParser, section, "upfilename", None)
54     downfilename = getConfigOption(configParser, section, "downfilename", None)
55     uplength = getConfigFloatOption(configParser, section, "uplength", 0.0)
56     downlength = getConfigFloatOption(configParser, section, "downlength", 0.0)
57     gene = getConfigOption(configParser, section, "gene", None)
58     genefile = getConfigOption(configParser, section, "genefile", None)
59     doAppend = getConfigBoolOption(configParser, section, "doAppend", False)
60
61     parser.set_defaults(upfilename=upfilename, downfilename=downfilename, uplength=uplength, downlength=downlength,
62                         gene=gene, genefile=genefile, doAppend=doAppend)
63
64     return parser
65
66
67 def profilebins(label, infilename, outfilename, upfilename=None, downfilename=None,
68                 uplength=0.0, downlength=0.0, gene=None, genefile=None, doAppend=False):
69
70     fileList = [infilename]
71     geneList = []
72     restrictGenes = False
73     if gene is not None:
74         geneList.append(gene)
75         restrictGenes = True
76
77     if genefile is not None:
78         for line in genefile:
79             fields = line.strip().split()
80             if len(fields) > 1:
81                 geneList.append(fields[0])
82             else:
83                 geneList.append(line.strip())
84
85         restrictGenes = True
86
87     if upfilename is not None:
88         fileList = [upfilename, infilename]
89
90     if downfilename is not None:
91         fileList.append(downfilename)
92
93     partLength = [10.]
94     partOffset = [0.]
95
96     if uplength:
97         partLength = [uplength, 10.]
98         partOffset = [-1. * uplength, 0.]
99
100     if downlength:
101         partLength.append(downlength)
102         partOffset.append(10.)
103
104     totalWeight = 0.
105     totalBins = []
106     for afile in fileList:   
107         infile = open(afile)
108
109         line = infile.readline()
110         fields = line.strip().split()
111         numBins = len(fields) - 4
112
113         geneName = fields[1]
114         weight = float(fields[2])
115         if restrictGenes and geneName in geneList:
116             totalWeight += weight
117
118         totalBins.append([])
119         for myBin in fields[4:]:
120             if not restrictGenes or (restrictGenes and geneName in geneList):
121                 totalBins[-1].append(weight * float(myBin))
122             else:
123                 totalBins[-1].append(0.)
124
125         for line in infile:
126             fields = line.strip().split()
127             geneName = fields[1]
128             if restrictGenes and geneName not in geneList:
129                 continue
130
131             weight = float(fields[2])
132             index = 0
133             for myBin in fields[4:]:
134                 totalBins[-1][index] += weight * float(myBin)
135                 index += 1
136
137             totalWeight += weight
138
139     sumWeight = 0.
140     totalPercent = 0.
141     if doAppend:
142         outfile = open(outfilename, "a")
143     else:
144         outfile = open(outfilename, "w")
145         outfile.write("x-axis")
146         partIndex = 0
147         for partBins in totalBins:
148             partLen = partLength[partIndex]
149             numBins = len(partBins)
150             for binIndex in range(numBins):
151                 outfile.write("\t%.2f" % (partOffset[partIndex] + (binIndex * partLen/numBins)))
152
153             partIndex += 1
154
155         outfile.write("\tweight\n")
156
157     outfile.write(label)
158     for partBins in totalBins:
159         for aBin in partBins:
160             percent = aBin / totalWeight
161             outfile.write("\t%.1f" % percent)
162             sumWeight += aBin
163             totalPercent += percent
164
165     outfile.write("\t%.1f\n" % totalWeight)
166     outfile.close()
167
168     print sumWeight
169     print totalPercent
170
171
172 if __name__ == "__main__":
173     main(sys.argv)