snapshot of 4.0a development. initial git repo commit
[erange.git] / makewiggle.py
1 #
2 #  makewiggle.py
3 #  ENRAGE
4 #
5 import sys, optparse
6 from commoncode import readDataset
7
8 print "%prog: version 6.7"
9
10 try:
11     import psyco
12     psyco.full()
13 except:
14     print 'psyco not running'
15
16
17 def main(argv=None):
18     if not argv:
19         argv = sys.argv
20
21     usage = "usage: python %s name rdsfile outfilename [options]"
22
23     parser = optparse.OptionParser(usage=usage)
24     parser.add_option("--raw", action="store_false", dest="doNormalize")
25     parser.add_option("--color", dest="color")
26     parser.add_option("--altcolor", dest="altColor")
27     parser.add_option("--chrom", dest="limitChrom")
28     parser.add_option("--shift", type="int", dest="shift")
29     parser.add_option("--split", action="store_true", dest="doSplit")
30     parser.add_option("--listfile", dest="listfilename")
31     parser.add_option("--listprefix", dest="listPrefix")
32     parser.add_option("--group", dest="group")
33     parser.add_option("--startPriority", type="float", dest="startPriority")
34     parser.add_option("--skiprandom", action="store_true", dest="skipRandom")
35     parser.add_option("--nomulti", action="store_false", dest="withMulti")
36     parser.add_option("--splices", action="store_true", dest="withSplices")
37     parser.add_option("--singlebase", action="store_true", dest="doSingle")
38     parser.add_option("--cache", type="int", dest="cachePages")
39     parser.add_option("--enforceChr", action="store_true", dest="enforceChr")
40     parser.add_option("--stranded", dest="strand")
41     parser.add_option("--maxchunk", type="int", dest="chunk")
42     parser.set_defaults(doNormalize=True, color=None, altColor="", limitChrom=None,
43                         shift=0, doSplit=False, listfilename=None, listPrefix="",
44                         group="", startPriority=0.01, skipRandom=False, withMulti=True,
45                         withSplices=False, doSingle=False, cachePages=-1, enforceChr=False,
46                         strand=None, chunk=20)
47
48     (options, args) = parser.parse_args(argv[1:])
49
50     if len(args) < 3:
51         print usage
52         sys.exit(1)
53
54     name = args[0]
55     hitfilename = args[1]
56     outfilename = args[2]
57
58     makewiggle(name, hitfilename, outfilename, options.doNormalize, options.color, options.altColor,
59                options.limitChrom, options.shift, options.doSplit, options.listfilename, options.listPrefix,
60                options.group, options.startPriority, options.skipRandom, options.withMulti,
61                options.withSplices, options.doSingle, options.cachePages, options.enforceChr, options.strand,
62                options.chunk)
63
64
65 def makewiggle(name, hitfilename, outfilename, doNormalize=True, color=None, altColor="",
66                limitChrom=None, shift=0, doSplit=False, listfilename=None, listPrefix="",
67                group="", startPriority=0.01, skipRandom=False, withMulti=True, withSplices=False,
68                doSingle=False, cachePages=-1, enforceChr=False, strand=None, chunk=20):
69
70     priorityIncrement = 0.01
71     wigType = "bedGraph"
72
73     if color is not None:
74         colorString = " color=%s" % color
75     else:
76         colorString = ""
77
78     if altColor:
79         colorString += " altcolor=%s" % altColor
80
81     doList = False
82     if listfilename is not None:
83         doList = True
84     
85     chromLimit = False
86     if limitChrom is not None:
87         chromLimit = True
88
89     if group:
90         groupName = "group=%s" % group
91
92     doCache = False
93     if cachePages > 0:
94         doCache = True
95
96     maxSpan = chunk * 1000000
97
98     isStranded = False
99     strandedDirection = "both"
100     if strand is not None:
101         isStranded = True
102         if strand == "plus":
103             strandedDirection = "plusOnly"
104         elif strand == "minus":
105             strandedDirection = "minusOnly"
106
107         print "will keep track of %s strand(s)" % strandedDirection
108
109     if shift:
110         print "Will shift reads by +/- %d bp according to their sense" % shift
111         name += "shift=%d" % shift
112     
113     hitRDS = readDataset(hitfilename, verbose=True, cache=doCache)
114
115     if cachePages > hitRDS.getDefaultCacheSize():
116         hitRDS.setDBcache(cachePages)
117
118     readlen = hitRDS.getReadSize()
119
120     if doNormalize:
121         normalizeBy = len(hitRDS) / 1000000.
122     else:
123         normalizeBy = 1.
124
125     if doList:
126         listfile = open(listfilename, "w")
127
128     priority = startPriority    
129     if not doSplit:
130         outfile = open(outfilename, "w")
131         if doList:
132             listfile.write("%s%s\n" % (listPrefix, outfilename))
133
134         outfile.write('track type=%s name="%s" %s priority=%.3f visibility=full%s\n' % (wigType, name, groupName, priority, colorString)) 
135
136     chromList = hitRDS.getChromosomes()
137     chromList.sort()
138     for achrom in chromList:
139         if enforceChr and ("chr" not in achrom):
140             continue
141
142         if chromLimit and achrom != limitChrom:
143             continue
144
145         if skipRandom and "random" in achrom:
146             continue
147
148         if doSplit:
149             outfile = open("%s.%s" % (outfilename, achrom), "w")
150             if doList:
151                 listfile.write("%s%s.%s\n" % (listPrefix, outfilename, achrom))
152
153             outfile.write('track type=%s name="%s %s" %s priority=%.3f visibility=full%s\n' % (wigType, name, achrom, groupName, priority, colorString))   
154             priority += priorityIncrement  
155
156         lastNT = hitRDS.getMaxCoordinate(achrom, doMulti=withMulti, doSplices=withSplices) + readlen
157         spanStart = 0
158
159         previousVal = 0
160         previousStart = 1
161         lineIndex = 0
162         for spanStop in xrange(maxSpan, lastNT+maxSpan, maxSpan):
163             if spanStop > lastNT:
164                 spanStop = lastNT
165
166             print achrom, spanStart, spanStop
167             chromModel = hitRDS.getChromProfile(achrom, spanStart, spanStop, withMulti, withSplices, normalizeBy, isStranded, strandedDirection, shiftValue=shift)
168
169             for index in xrange(len(chromModel)):
170                 currentVal = chromModel[index]
171                 if doSingle:
172                     outline = "%s %d %.4f\n" % (achrom, spanStart + index, currentVal)
173                     outfile.write(outline)
174                     continue
175
176                 if currentVal == previousVal:
177                     continue
178
179                 if currentVal != previousVal:
180                     if previousVal != 0:
181                         lastpos = index + spanStart
182                         outline = "%s %d %d %.4f\n" % (achrom, previousStart, lastpos, previousVal)
183                         outfile.write(outline)
184                         lineIndex += 1
185
186                     previousVal = currentVal
187                     previousStart = index + spanStart
188
189             currentVal = 0
190             del chromModel
191             spanStart = spanStop + 1
192
193         if doSplit:
194             outfile.close()
195
196         if doSingle:
197             print index + 1
198         else:
199             print lineIndex
200
201     if not doSplit:
202         outfile.close()
203
204     if doList:
205         listfile.close()
206
207
208 if __name__ == "__main__":
209     main(sys.argv)