5 # Created by Ali Mortazavi on 7/19/08.
15 from commoncode import readDataset
17 PLUS_COLOR = "0,0,255"
18 MINUS_COLOR = "255,0,0"
19 MULTI_PLUS_COLOR = "64,64,64"
20 MULTI_MINUS_COLOR = "192,192,192"
21 SPLICE_COLOR = "255,0,0"
22 UNIQUE_COLOR = "0,0,0"
23 MULTI_COLOR = "128,128,128"
30 verstring = "%prog: version 3.1"
35 usage = "usage: %prog trackLabel rdsFile bamFile [options]"
37 parser = optparse.OptionParser(usage=usage)
38 parser.add_option("--nouniq", action="store_false", dest="withUniqs")
39 parser.add_option("--nomulti", action="store_false", dest="withMulti")
40 parser.add_option("--splices", action="store_true", dest="doSplices")
41 parser.add_option("--spliceColor", action="store_true", dest="doSpliceColor")
42 parser.add_option("--flag", dest="withFlag")
43 parser.add_option("--flaglike", action="store_true", dest="useFlagLike")
44 parser.add_option("--pairs", type="int", dest="pairDist")
45 parser.add_option("--cache", type="int", dest="cachePages")
46 parser.add_option("--enforceChr", action="store_true", dest="enforceChr")
47 parser.add_option("--chrom", action="append", dest="chromList")
48 parser.add_option("--strand", dest="strand")
49 parser.add_option("-r", "--region", dest="region", type="string",
50 help="samtools region string")
51 parser.set_defaults(withUniqs=True, withMulti=True, doSplices=False, doSpliceColor=False,
52 pairDist=None, withFlag="", useFlagLike=False, enforceChr=False,
53 senseStrand="", allChrom=True, doCache=False, cachePages=100000,
55 (options, args) = parser.parse_args(argv[1:])
60 print "no track specified - see --help for usage"
66 print "no RDS file specified - see --help for usage"
72 print "no output file specified - see --help for usage"
75 if options.pairDist is not None:
79 options.allChrom = False
81 outputBedFromRds(trackType, rdsfile, outfilename, options.withUniqs, options.withMulti,
82 options.doSplices, options.doSpliceColor, doPairs, options.pairDist,
83 options.withFlag, options.useFlagLike, options.enforceChr, options.senseStrand,
84 options.allChrom, options.doCache, options.cachePages, options.chromList)
87 def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=True,
88 doSplices=False, doSpliceColor=False, doPairs=False, pairDist=1000000,
89 withFlag="", useFlagLike=False, enforceChr=False, senseStrand="",
90 allChrom=True, doCache=False, cachePages=100000, chromList=[]):
92 if not withUniqs and not withMulti and not doSplices:
93 print "must be outputing at least one of uniqs, multi, or -splices - exiting"
97 RDS = readDataset(rdsfile, verbose = True, cache=doCache)
99 #check that this is better than the dataset's default cache size
100 if cachePages > RDS.getDefaultCacheSize():
101 RDS.setDBcache(cachePages)
103 readlength = RDS.getReadSize()
104 minDist = -1 * readlength
108 chromList = RDS.getChromosomes()
110 chromList = RDS.getChromosomes(table="multi")
112 chromList = RDS.getChromosomes(table="splices")
116 outfile = open(outfilename, "w")
117 outfile.write('track name="%s" visibility=4 itemRgb="On"\n' % (trackType))
119 if withUniqs or withMulti:
120 for achrom in chromList:
122 if doNotOutputChromosome(achrom, enforceChr):
125 print "chromosome %s" % (achrom)
128 hitDict = RDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag,
129 withWeight=True, withPairID=True, doUniqs=withUniqs,
130 doMulti=withMulti, readIDDict=True,
131 flagLike=useFlagLike, strand=senseStrand)
133 readIDList = hitDict.keys()
135 spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag,
136 withPairID=True, readIDDict=True,
137 flagLike=useFlagLike, strand=senseStrand)
139 spliceIDList = spliceDict.keys()
141 for readID in readIDList:
144 for readID in spliceIDList:
147 combinedIDList = combDict.keys()
149 combinedIDList = readIDList
151 for readID in combinedIDList:
154 localList = hitDict[readID]
160 localList += spliceDict[readID]
165 listLen = len(localList) - 1
167 while localIndex <= listLen:
169 (leftpos, leftsense, leftweight, lPairID) = localList[localIndex]
170 leftstop = leftpos + readlength - 1
172 startList = [leftpos]
173 stopList = [leftstop]
175 (leftpos, LLstop, LRstart, leftstop, leftsense, lPairID) = localList[localIndex]
178 startList = [leftpos, LRstart]
179 stopList = [LLstop, leftstop]
181 if localIndex < listLen:
183 (rightpos, rightsense, rightweight, rPairID) = localList[localIndex + 1]
184 rightstop = rightpos + readlength - 1
186 rstartList = [rightpos]
187 rstopList = [rightstop]
189 (rightpos, RLstop, RRstart, rightstop, rightsense, rPairID) = localList[localIndex + 1]
192 rstartList = [rightpos, RRstart]
193 rstopList = [RLstop, rightstop]
200 if leftsense == "+" and rightsense == "-" and minDist < (rightpos - leftstop) < pairDist and lPairID != rPairID:
202 plusSenseColor, minusSenseColor = getSpliceColor(lpart, rpart, leftweight, rightweight, hackType="1")
203 elif leftweight == 1.0 or rightweight == 1.0:
204 plusSenseColor = "0,0,0"
205 minusSenseColor = MINUS_COLOR
207 plusSenseColor = "128,128,128"
208 minusSenseColor = MULTI_MINUS_COLOR
210 splitReadWrite(outfile, achrom, lpart + rpart, startList + rstartList, stopList + rstopList, "+", readID, plusSenseColor, minusSenseColor)
215 plusSenseColor, minusSenseColor = getSpliceColor(lpart, rpart, leftweight, rightweight)
217 elif leftweight == 1.0:
218 plusSenseColor = PLUS_COLOR
219 minusSenseColor = MINUS_COLOR
220 outputSense = leftsense
222 plusSenseColor = PLUS_COLOR
223 minusSenseColor = MINUS_COLOR
224 outputSense = leftsense
226 splitReadWrite(outfile, achrom, lpart, startList, stopList, outputSense, readID, plusSenseColor, minusSenseColor)
230 hitDict = RDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag, withWeight=True, withID=True, doUniqs=withUniqs, doMulti=withMulti, readIDDict=False, flagLike=useFlagLike)
232 for (pos, sense, weight, readID) in hitDict[achrom]:
233 splitReadWrite(outfile, achrom, 1, [pos], [pos + readlength - 1], sense, readID, PLUS_COLOR, MINUS_COLOR)
239 spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike)
240 if achrom not in spliceDict:
242 for (readstart, Lstop, Rstart, readstop, rsense, readName) in spliceDict[achrom]:
243 splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR)
247 for achrom in chromList:
249 if doNotOutputChromosome(achrom, enforceChr):
252 print "chromosome %s" % (achrom)
254 spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike)
255 if achrom not in spliceDict:
257 for (readstart, Lstop, Rstart, readstop, rsense, readName) in spliceDict[achrom]:
258 splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR)
266 def singleReadWrite(chrom, pos, sense, weight, readID, readlength, outfile):
268 stop = pos + readlength - 1
269 senseColor = getSenseColor(sense, weight)
270 outfile.write("%s %d %d %s %.1f %s 0 0 %s\n" % (chrom, start, stop, readID, weight, sense, senseColor))
273 def getSenseColor(sense, weight):
275 senseColor = getMultiSenseColor(sense)
277 senseColor = getSingleSenseColor(sense)
282 def getMultiSenseColor(sense):
284 senseColor = MULTI_PLUS_COLOR
286 senseColor = MULTI_MINUS_COLOR
291 def getSingleSenseColor(sense):
293 senseColor = PLUS_COLOR
295 senseColor = MINUS_COLOR
300 def splitReadWrite(outfile, chrom, numPieces, startList, stopList, rsense, readName, plusSense, minusSense):
301 readSizes = getReadSizes(numPieces, startList, stopList)
302 readCoords = getReadCoords(numPieces, startList)
303 leftStart = startList[0]
304 rightStop = stopList[-1]
307 senseCode = plusSense
309 senseCode = minusSense
311 outline = "%s\t%d\t%d\t%s\t1000\t%s\t0\t0\t%s\t%d\t%s\t%s\n" % (chrom, leftStart, rightStop, readName, rsense, senseCode, numPieces, readSizes, readCoords)
312 outfile.write(outline)
315 def getReadSizes(numPieces, startList, stopList):
316 readSizes = "%d" % (stopList[0] - startList[0])
317 for index in range(1, numPieces):
318 readSizes += ',%d' % (stopList[index] - startList[index])
323 def getReadCoords(numPieces, startList):
325 for index in range(1, numPieces):
326 readCoords += ",%d" % (startList[index] - startList[0])
331 def getSpliceColor(lpart, rpart, leftweight, rightweight, hackType=None):
333 if (lpart + rpart) > 2:
334 aColor = SPLICE_COLOR
335 bColor = SPLICE_COLOR
336 elif leftweight == 1.0 or rightweight == 1.0:
337 aColor = UNIQUE_COLOR
338 bColor = UNIQUE_COLOR
344 aColor = SPLICE_COLOR
345 bColor = SPLICE_COLOR
346 elif leftweight == 1.0:
347 aColor = UNIQUE_COLOR
348 bColor = UNIQUE_COLOR
353 return aColor, bColor
356 def doNotOutputChromosome(achrom, enforceChr):
362 if enforceChr and ("chr" not in achrom):
368 if __name__ == "__main__":