snapshot of 4.0a development. initial git repo commit
[erange.git] / profilebins.py
1 #
2 #  profilebins.py
3 #  ENRAGE
4 #
5
6 try:
7     import psyco
8     psyco.full()
9 except:
10     pass
11
12 import sys, optparse
13 print "%prog: version 2.2"
14
15
16 def main(argv=None):
17     if not argv:
18         argv = sys.argv
19
20     usage = "usage: python %prog label infile1 [--upstream infile2] [--downstream infile3] [--uplength kb] [--downlength kb] [--gene geneName] [--genes genefile] [--append] outfile"
21
22     parser = optparse.OptionParser(usage=usage)
23     parser.add_option("--upstream", dest="upfilename")
24     parser.add_option("--downstream", dest="downfilename")
25     parser.add_option("--uplength", type="float", dest="uplength")
26     parser.add_option("--downlength", type="int", dest="")
27     parser.add_option("--gene", dest="gene")
28     parser.add_option("--genes", dest="genefile")
29     parser.add_option("--append", action="store_true", dest="doAppend")
30     parser.set_defaults(upfilename=None, downfilename=None, uplength=0.0, downlength=0.0,
31                         gene=None, genefile=None, doAppend=False)
32
33     (options, args) = parser.parse_args(argv[1:])
34
35     if len(args) < 3:
36         print usage
37         sys.exit(1)
38
39     label = args[0]
40     infilename = args[1]
41     outfilename = args[2]
42
43     profilebins(label, infilename, outfilename, options.upfilename, options.downfilename,
44                 options.uplength, options.downlength, options.gene, options.genefile,
45                 options.doAppend)
46
47
48 def profilebins(label, infilename, outfilename, upfilename=None, downfilename=None,
49                 uplength=0.0, downlength=0.0, gene=None, genefile=None, doAppend=False):
50
51     fileList = [infilename]
52     geneList = []
53     restrictGenes = False
54     if gene is not None:
55         geneList.append(gene)
56         restrictGenes = True
57
58     if genefile is not None:
59         for line in genefile:
60             fields = line.strip().split()
61             if len(fields) > 1:
62                 geneList.append(fields[0])
63             else:
64                 geneList.append(line.strip())
65
66         restrictGenes = True
67
68     if upfilename is not None:
69         fileList = [upfilename, infilename]
70
71     if downfilename is not None:
72         fileList.append(downfilename)
73
74     partLength = [10.]
75     partOffset = [0.]
76
77     if uplength:
78         partLength = [uplength, 10.]
79         partOffset = [-1. * uplength, 0.]
80
81     if downlength:
82         partLength.append(downlength)
83         partOffset.append(10.)
84
85     totalWeight = 0.
86     totalBins = []
87     for afile in fileList:   
88         infile = open(afile)
89
90         line = infile.readline()
91         fields = line.strip().split()
92         numBins = len(fields) - 4
93
94         geneName = fields[1]
95         weight = float(fields[2])
96         if restrictGenes and geneName in geneList:
97             totalWeight += weight
98
99         totalBins.append([])
100         for myBin in fields[4:]:
101             if not restrictGenes or (restrictGenes and geneName in geneList):
102                 totalBins[-1].append(weight * float(myBin))
103             else:
104                 totalBins[-1].append(0.)
105
106         for line in infile:
107             fields = line.strip().split()
108             geneName = fields[1]
109             if restrictGenes and geneName not in geneList:
110                 continue
111
112             weight = float(fields[2])
113             index = 0
114             for myBin in fields[4:]:
115                 totalBins[-1][index] += weight * float(myBin)
116                 index += 1
117
118             totalWeight += weight
119
120     sumWeight = 0.
121     totalPercent = 0.
122     if doAppend:
123         outfile = open(outfilename, "a")
124     else:
125         outfile = open(outfilename, "w")
126         outfile.write("x-axis")
127         partIndex = 0
128         for partBins in totalBins:
129             partLen = partLength[partIndex]
130             numBins = len(partBins)
131             for binIndex in range(numBins):
132                 outfile.write("\t%.2f" % (partOffset[partIndex] + (binIndex * partLen/numBins)))
133
134             partIndex += 1
135
136         outfile.write("\tweight\n")
137
138     outfile.write(label)
139     for partBins in totalBins:
140         for aBin in partBins:
141             percent = aBin / totalWeight
142             outfile.write("\t%.1f" % percent)
143             sumWeight += aBin
144             totalPercent += percent
145
146     outfile.write("\t%.1f\n" % totalWeight)
147     outfile.close()
148
149     print sumWeight
150     print totalPercent
151
152
153 if __name__ == "__main__":
154     main(sys.argv)