X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=makebedfromrds.py;h=a0f141699b947f99bc48a8518bce2f8e41ef36e8;hp=924bc7e9f163ef4c9efe282cd82a643a3faaa310;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/makebedfromrds.py b/makebedfromrds.py index 924bc7e..a0f1416 100755 --- a/makebedfromrds.py +++ b/makebedfromrds.py @@ -11,8 +11,10 @@ try: except: pass -import sys, optparse -from commoncode import readDataset +import sys +import optparse +import ReadDataset +from commoncode import getConfigParser, getConfigOption, getConfigBoolOption, getConfigIntOption PLUS_COLOR = "0,0,255" MINUS_COLOR = "255,0,0" @@ -27,31 +29,14 @@ def main(argv=None): if not argv: argv = sys.argv - verstring = "%prog: version 3.1" + verstring = "makebedfromrds: version 3.2" print verstring doPairs = False usage = "usage: %prog trackLabel rdsFile bamFile [options]" - parser = optparse.OptionParser(usage=usage) - parser.add_option("--nouniq", action="store_false", dest="withUniqs") - parser.add_option("--nomulti", action="store_false", dest="withMulti") - parser.add_option("--splices", action="store_true", dest="doSplices") - parser.add_option("--spliceColor", action="store_true", dest="doSpliceColor") - parser.add_option("--flag", dest="withFlag") - parser.add_option("--flaglike", action="store_true", dest="useFlagLike") - parser.add_option("--pairs", type="int", dest="pairDist") - parser.add_option("--cache", type="int", dest="cachePages") - parser.add_option("--enforceChr", action="store_true", dest="enforceChr") - parser.add_option("--chrom", action="append", dest="chromList") - parser.add_option("--strand", dest="strand") - parser.add_option("-r", "--region", dest="region", type="string", - help="samtools region string") - parser.set_defaults(withUniqs=True, withMulti=True, doSplices=False, doSpliceColor=False, - pairDist=None, withFlag="", useFlagLike=False, enforceChr=False, - senseStrand="", allChrom=True, doCache=False, cachePages=100000, - chromList=[]) + parser = getParser(usage) (options, args) = parser.parse_args(argv[1:]) try: @@ -84,6 +69,43 @@ def main(argv=None): options.allChrom, options.doCache, options.cachePages, options.chromList) +def getParser(usage): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--nouniq", action="store_false", dest="withUniqs") + parser.add_option("--nomulti", action="store_false", dest="withMulti") + parser.add_option("--splices", action="store_true", dest="doSplices") + parser.add_option("--spliceColor", action="store_true", dest="doSpliceColor") + parser.add_option("--flag", dest="withFlag") + parser.add_option("--flaglike", action="store_true", dest="useFlagLike") + parser.add_option("--pairs", type="int", dest="pairDist") + parser.add_option("--cache", type="int", dest="cachePages") + parser.add_option("--enforceChr", action="store_true", dest="enforceChr") + parser.add_option("--chrom", action="append", dest="chromList") + parser.add_option("--strand", dest="strand") + + configParser = getConfigParser() + section = "makebedfromrds" + withUniqs = getConfigBoolOption(configParser, section, "withUniqs", True) + withMulti = getConfigBoolOption(configParser, section, "withMulti", False) + doSplices = getConfigBoolOption(configParser, section, "doSplices", False) + doSpliceColor = getConfigBoolOption(configParser, section, "doSpliceColor", False) + pairDist = getConfigOption(configParser, section, "pairDist", None) + withFlag = getConfigOption(configParser, section, "withFlag", "") + useFlagLike = getConfigBoolOption(configParser, section, "useFlagLike", False) + enforceChr = getConfigBoolOption(configParser, section, "enforceChr", False) + senseStrand = getConfigOption(configParser, section, "senseStrand", "") + allChrom = getConfigBoolOption(configParser, section, "allChrom", True) + doCache = getConfigBoolOption(configParser, section, "doCache", False) + cachePages = getConfigOption(configParser, section, "cachePages", 100000) + + parser.set_defaults(withUniqs=withUniqs, withMulti=withMulti, doSplices=doSplices, doSpliceColor=doSpliceColor, + pairDist=pairDist, withFlag=withFlag, useFlagLike=useFlagLike, enforceChr=enforceChr, + senseStrand=senseStrand, allChrom=allChrom, doCache=doCache, cachePages=cachePages, + chromList=[]) + + return parser + + def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=True, doSplices=False, doSpliceColor=False, doPairs=False, pairDist=1000000, withFlag="", useFlagLike=False, enforceChr=False, senseStrand="", @@ -94,7 +116,7 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti= sys.exit(1) print "\nsample:" - RDS = readDataset(rdsfile, verbose = True, cache=doCache) + RDS = ReadDataset.ReadDataset(rdsfile, verbose = True, cache=doCache) #check that this is better than the dataset's default cache size if cachePages > RDS.getDefaultCacheSize(): @@ -165,28 +187,46 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti= listLen = len(localList) - 1 localIndex = 0 while localIndex <= listLen: + read = localList[localIndex] try: - (leftpos, leftsense, leftweight, lPairID) = localList[localIndex] + leftpos = read["start"] + leftsense = read["sense"] + leftweight = read["weight"] + lPairID = read["pairID"] leftstop = leftpos + readlength - 1 lpart = 1 startList = [leftpos] stopList = [leftstop] - except: - (leftpos, LLstop, LRstart, leftstop, leftsense, lPairID) = localList[localIndex] + except KeyError: + leftpos = read["startL"] + LLstop = read["stopL"] + LRstart = read["startR"] + leftstop = read["stopL"] + leftsense = read["sense"] + lPairID = read["pairID"] leftweight = 1.0 lpart = 2 startList = [leftpos, LRstart] stopList = [LLstop, leftstop] if localIndex < listLen: + read = localList[localIndex + 1] try: - (rightpos, rightsense, rightweight, rPairID) = localList[localIndex + 1] + rightpos = read["start"] + rightsense = read["sense"] + rightweight = read["weight"] + rPairID= read["pairID"] rightstop = rightpos + readlength - 1 rpart = 1 rstartList = [rightpos] rstopList = [rightstop] - except: - (rightpos, RLstop, RRstart, rightstop, rightsense, rPairID) = localList[localIndex + 1] + except KeyError: + rightpos = read["startL"] + RLstop = read["stopL"] + RRstart = read["startR"] + rightstop = read["stopR"] + rightsense = read["sense"] + rPairID = read["pairID"] rightweight = 1.0 rpart = 2 rstartList = [rightpos, RRstart] @@ -229,7 +269,10 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti= else: hitDict = RDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag, withWeight=True, withID=True, doUniqs=withUniqs, doMulti=withMulti, readIDDict=False, flagLike=useFlagLike) try: - for (pos, sense, weight, readID) in hitDict[achrom]: + for read in hitDict[achrom]: + pos = read["start"] + sense = read["sense"] + readID = read["readID"] splitReadWrite(outfile, achrom, 1, [pos], [pos + readlength - 1], sense, readID, PLUS_COLOR, MINUS_COLOR) index += 1 except: @@ -239,7 +282,13 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti= spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike) if achrom not in spliceDict: continue - for (readstart, Lstop, Rstart, readstop, rsense, readName) in spliceDict[achrom]: + for read in spliceDict[achrom]: + readstart = read["startL"] + Lstop = read["stopL"] + Rstart = read["startR"] + readstop = read["stopR"] + rsense = read["sense"] + readName = read["readID"] splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR) index += 1 @@ -254,7 +303,13 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti= spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike) if achrom not in spliceDict: continue - for (readstart, Lstop, Rstart, readstop, rsense, readName) in spliceDict[achrom]: + for read in spliceDict[achrom]: + readstart = read["startL"] + Lstop = read["stopL"] + Rstart = read["startR"] + readstop = read["stopR"] + rsense = read["sense"] + readName = read["readID"] splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR) index += 1