5 # Created by Ali Mortazavi on 7/19/08.
17 from commoncode import getConfigParser, getConfigOption, getConfigBoolOption, getConfigIntOption
19 PLUS_COLOR = "0,0,255"
20 MINUS_COLOR = "255,0,0"
21 MULTI_PLUS_COLOR = "64,64,64"
22 MULTI_MINUS_COLOR = "192,192,192"
23 SPLICE_COLOR = "255,0,0"
24 UNIQUE_COLOR = "0,0,0"
25 MULTI_COLOR = "128,128,128"
32 verstring = "makebedfromrds: version 3.2"
37 usage = "usage: %prog trackLabel rdsFile bamFile [options]"
39 parser = getParser(usage)
40 (options, args) = parser.parse_args(argv[1:])
45 print "no track specified - see --help for usage"
51 print "no RDS file specified - see --help for usage"
57 print "no output file specified - see --help for usage"
60 if options.pairDist is not None:
64 options.allChrom = False
66 outputBedFromRds(trackType, rdsfile, outfilename, options.withUniqs, options.withMulti,
67 options.doSplices, options.doSpliceColor, doPairs, options.pairDist,
68 options.withFlag, options.useFlagLike, options.enforceChr, options.senseStrand,
69 options.allChrom, options.doCache, options.cachePages, options.chromList)
73 parser = optparse.OptionParser(usage=usage)
74 parser.add_option("--nouniq", action="store_false", dest="withUniqs")
75 parser.add_option("--nomulti", action="store_false", dest="withMulti")
76 parser.add_option("--splices", action="store_true", dest="doSplices")
77 parser.add_option("--spliceColor", action="store_true", dest="doSpliceColor")
78 parser.add_option("--flag", dest="withFlag")
79 parser.add_option("--flaglike", action="store_true", dest="useFlagLike")
80 parser.add_option("--pairs", type="int", dest="pairDist")
81 parser.add_option("--cache", type="int", dest="cachePages")
82 parser.add_option("--enforceChr", action="store_true", dest="enforceChr")
83 parser.add_option("--chrom", action="append", dest="chromList")
84 parser.add_option("--strand", dest="strand")
86 configParser = getConfigParser()
87 section = "makebedfromrds"
88 withUniqs = getConfigBoolOption(configParser, section, "withUniqs", True)
89 withMulti = getConfigBoolOption(configParser, section, "withMulti", False)
90 doSplices = getConfigBoolOption(configParser, section, "doSplices", False)
91 doSpliceColor = getConfigBoolOption(configParser, section, "doSpliceColor", False)
92 pairDist = getConfigOption(configParser, section, "pairDist", None)
93 withFlag = getConfigOption(configParser, section, "withFlag", "")
94 useFlagLike = getConfigBoolOption(configParser, section, "useFlagLike", False)
95 enforceChr = getConfigBoolOption(configParser, section, "enforceChr", False)
96 senseStrand = getConfigOption(configParser, section, "senseStrand", "")
97 allChrom = getConfigBoolOption(configParser, section, "allChrom", True)
98 doCache = getConfigBoolOption(configParser, section, "doCache", False)
99 cachePages = getConfigOption(configParser, section, "cachePages", 100000)
101 parser.set_defaults(withUniqs=withUniqs, withMulti=withMulti, doSplices=doSplices, doSpliceColor=doSpliceColor,
102 pairDist=pairDist, withFlag=withFlag, useFlagLike=useFlagLike, enforceChr=enforceChr,
103 senseStrand=senseStrand, allChrom=allChrom, doCache=doCache, cachePages=cachePages,
109 def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=True,
110 doSplices=False, doSpliceColor=False, doPairs=False, pairDist=1000000,
111 withFlag="", useFlagLike=False, enforceChr=False, senseStrand="",
112 allChrom=True, doCache=False, cachePages=100000, chromList=[]):
114 if not withUniqs and not withMulti and not doSplices:
115 print "must be outputing at least one of uniqs, multi, or -splices - exiting"
119 RDS = ReadDataset.ReadDataset(rdsfile, verbose = True, cache=doCache)
121 #check that this is better than the dataset's default cache size
122 if cachePages > RDS.getDefaultCacheSize():
123 RDS.setDBcache(cachePages)
125 readlength = RDS.getReadSize()
126 minDist = -1 * readlength
130 chromList = RDS.getChromosomes()
132 chromList = RDS.getChromosomes(table="multi")
134 chromList = RDS.getChromosomes(table="splices")
138 outfile = open(outfilename, "w")
139 outfile.write('track name="%s" visibility=4 itemRgb="On"\n' % (trackType))
141 if withUniqs or withMulti:
142 for achrom in chromList:
144 if doNotOutputChromosome(achrom, enforceChr):
147 print "chromosome %s" % (achrom)
150 hitDict = RDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag,
151 withWeight=True, withPairID=True, doUniqs=withUniqs,
152 doMulti=withMulti, readIDDict=True,
153 flagLike=useFlagLike, strand=senseStrand)
155 readIDList = hitDict.keys()
157 spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag,
158 withPairID=True, readIDDict=True,
159 flagLike=useFlagLike, strand=senseStrand)
161 spliceIDList = spliceDict.keys()
163 for readID in readIDList:
166 for readID in spliceIDList:
169 combinedIDList = combDict.keys()
171 combinedIDList = readIDList
173 for readID in combinedIDList:
176 localList = hitDict[readID]
182 localList += spliceDict[readID]
187 listLen = len(localList) - 1
189 while localIndex <= listLen:
190 read = localList[localIndex]
192 leftpos = read["start"]
193 leftsense = read["sense"]
194 leftweight = read["weight"]
195 lPairID = read["pairID"]
196 leftstop = leftpos + readlength - 1
198 startList = [leftpos]
199 stopList = [leftstop]
201 leftpos = read["startL"]
202 LLstop = read["stopL"]
203 LRstart = read["startR"]
204 leftstop = read["stopL"]
205 leftsense = read["sense"]
206 lPairID = read["pairID"]
209 startList = [leftpos, LRstart]
210 stopList = [LLstop, leftstop]
212 if localIndex < listLen:
213 read = localList[localIndex + 1]
215 rightpos = read["start"]
216 rightsense = read["sense"]
217 rightweight = read["weight"]
218 rPairID= read["pairID"]
219 rightstop = rightpos + readlength - 1
221 rstartList = [rightpos]
222 rstopList = [rightstop]
224 rightpos = read["startL"]
225 RLstop = read["stopL"]
226 RRstart = read["startR"]
227 rightstop = read["stopR"]
228 rightsense = read["sense"]
229 rPairID = read["pairID"]
232 rstartList = [rightpos, RRstart]
233 rstopList = [RLstop, rightstop]
240 if leftsense == "+" and rightsense == "-" and minDist < (rightpos - leftstop) < pairDist and lPairID != rPairID:
242 plusSenseColor, minusSenseColor = getSpliceColor(lpart, rpart, leftweight, rightweight, hackType="1")
243 elif leftweight == 1.0 or rightweight == 1.0:
244 plusSenseColor = "0,0,0"
245 minusSenseColor = MINUS_COLOR
247 plusSenseColor = "128,128,128"
248 minusSenseColor = MULTI_MINUS_COLOR
250 splitReadWrite(outfile, achrom, lpart + rpart, startList + rstartList, stopList + rstopList, "+", readID, plusSenseColor, minusSenseColor)
255 plusSenseColor, minusSenseColor = getSpliceColor(lpart, rpart, leftweight, rightweight)
257 elif leftweight == 1.0:
258 plusSenseColor = PLUS_COLOR
259 minusSenseColor = MINUS_COLOR
260 outputSense = leftsense
262 plusSenseColor = PLUS_COLOR
263 minusSenseColor = MINUS_COLOR
264 outputSense = leftsense
266 splitReadWrite(outfile, achrom, lpart, startList, stopList, outputSense, readID, plusSenseColor, minusSenseColor)
270 hitDict = RDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag, withWeight=True, withID=True, doUniqs=withUniqs, doMulti=withMulti, readIDDict=False, flagLike=useFlagLike)
272 for read in hitDict[achrom]:
274 sense = read["sense"]
275 readID = read["readID"]
276 splitReadWrite(outfile, achrom, 1, [pos], [pos + readlength - 1], sense, readID, PLUS_COLOR, MINUS_COLOR)
282 spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike)
283 if achrom not in spliceDict:
285 for read in spliceDict[achrom]:
286 readstart = read["startL"]
287 Lstop = read["stopL"]
288 Rstart = read["startR"]
289 readstop = read["stopR"]
290 rsense = read["sense"]
291 readName = read["readID"]
292 splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR)
296 for achrom in chromList:
298 if doNotOutputChromosome(achrom, enforceChr):
301 print "chromosome %s" % (achrom)
303 spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike)
304 if achrom not in spliceDict:
306 for read in spliceDict[achrom]:
307 readstart = read["startL"]
308 Lstop = read["stopL"]
309 Rstart = read["startR"]
310 readstop = read["stopR"]
311 rsense = read["sense"]
312 readName = read["readID"]
313 splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR)
321 def singleReadWrite(chrom, pos, sense, weight, readID, readlength, outfile):
323 stop = pos + readlength - 1
324 senseColor = getSenseColor(sense, weight)
325 outfile.write("%s %d %d %s %.1f %s 0 0 %s\n" % (chrom, start, stop, readID, weight, sense, senseColor))
328 def getSenseColor(sense, weight):
330 senseColor = getMultiSenseColor(sense)
332 senseColor = getSingleSenseColor(sense)
337 def getMultiSenseColor(sense):
339 senseColor = MULTI_PLUS_COLOR
341 senseColor = MULTI_MINUS_COLOR
346 def getSingleSenseColor(sense):
348 senseColor = PLUS_COLOR
350 senseColor = MINUS_COLOR
355 def splitReadWrite(outfile, chrom, numPieces, startList, stopList, rsense, readName, plusSense, minusSense):
356 readSizes = getReadSizes(numPieces, startList, stopList)
357 readCoords = getReadCoords(numPieces, startList)
358 leftStart = startList[0]
359 rightStop = stopList[-1]
362 senseCode = plusSense
364 senseCode = minusSense
366 outline = "%s\t%d\t%d\t%s\t1000\t%s\t0\t0\t%s\t%d\t%s\t%s\n" % (chrom, leftStart, rightStop, readName, rsense, senseCode, numPieces, readSizes, readCoords)
367 outfile.write(outline)
370 def getReadSizes(numPieces, startList, stopList):
371 readSizes = "%d" % (stopList[0] - startList[0])
372 for index in range(1, numPieces):
373 readSizes += ',%d' % (stopList[index] - startList[index])
378 def getReadCoords(numPieces, startList):
380 for index in range(1, numPieces):
381 readCoords += ",%d" % (startList[index] - startList[0])
386 def getSpliceColor(lpart, rpart, leftweight, rightweight, hackType=None):
388 if (lpart + rpart) > 2:
389 aColor = SPLICE_COLOR
390 bColor = SPLICE_COLOR
391 elif leftweight == 1.0 or rightweight == 1.0:
392 aColor = UNIQUE_COLOR
393 bColor = UNIQUE_COLOR
399 aColor = SPLICE_COLOR
400 bColor = SPLICE_COLOR
401 elif leftweight == 1.0:
402 aColor = UNIQUE_COLOR
403 bColor = UNIQUE_COLOR
408 return aColor, bColor
411 def doNotOutputChromosome(achrom, enforceChr):
417 if enforceChr and ("chr" not in achrom):
423 if __name__ == "__main__":