erange version 4.0a dev release
[erange.git] / makebedfromrds.py
index 924bc7e9f163ef4c9efe282cd82a643a3faaa310..a0f141699b947f99bc48a8518bce2f8e41ef36e8 100755 (executable)
@@ -11,8 +11,10 @@ try:
 except:
     pass
 
-import sys, optparse
-from commoncode import readDataset
+import sys
+import optparse
+import ReadDataset
+from commoncode import getConfigParser, getConfigOption, getConfigBoolOption, getConfigIntOption
 
 PLUS_COLOR = "0,0,255"
 MINUS_COLOR = "255,0,0"
@@ -27,31 +29,14 @@ def main(argv=None):
     if not argv:
         argv = sys.argv
 
-    verstring = "%prog: version 3.1"
+    verstring = "makebedfromrds: version 3.2"
     print verstring
 
     doPairs = False
     
     usage = "usage:  %prog trackLabel rdsFile bamFile [options]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--nouniq", action="store_false", dest="withUniqs")
-    parser.add_option("--nomulti", action="store_false", dest="withMulti")
-    parser.add_option("--splices", action="store_true", dest="doSplices")
-    parser.add_option("--spliceColor", action="store_true", dest="doSpliceColor")
-    parser.add_option("--flag", dest="withFlag")
-    parser.add_option("--flaglike", action="store_true", dest="useFlagLike")
-    parser.add_option("--pairs", type="int", dest="pairDist")
-    parser.add_option("--cache", type="int", dest="cachePages")
-    parser.add_option("--enforceChr", action="store_true", dest="enforceChr")
-    parser.add_option("--chrom", action="append", dest="chromList")
-    parser.add_option("--strand", dest="strand")
-    parser.add_option("-r", "--region", dest="region", type="string",
-                      help="samtools region string")
-    parser.set_defaults(withUniqs=True, withMulti=True, doSplices=False, doSpliceColor=False,
-                        pairDist=None, withFlag="", useFlagLike=False, enforceChr=False,
-                        senseStrand="", allChrom=True, doCache=False, cachePages=100000,
-                        chromList=[])
+    parser = getParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     try:
@@ -84,6 +69,43 @@ def main(argv=None):
                      options.allChrom, options.doCache, options.cachePages, options.chromList)
 
 
+def getParser(usage):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--nouniq", action="store_false", dest="withUniqs")
+    parser.add_option("--nomulti", action="store_false", dest="withMulti")
+    parser.add_option("--splices", action="store_true", dest="doSplices")
+    parser.add_option("--spliceColor", action="store_true", dest="doSpliceColor")
+    parser.add_option("--flag", dest="withFlag")
+    parser.add_option("--flaglike", action="store_true", dest="useFlagLike")
+    parser.add_option("--pairs", type="int", dest="pairDist")
+    parser.add_option("--cache", type="int", dest="cachePages")
+    parser.add_option("--enforceChr", action="store_true", dest="enforceChr")
+    parser.add_option("--chrom", action="append", dest="chromList")
+    parser.add_option("--strand", dest="strand")
+
+    configParser = getConfigParser()
+    section = "makebedfromrds"
+    withUniqs = getConfigBoolOption(configParser, section, "withUniqs", True)
+    withMulti = getConfigBoolOption(configParser, section, "withMulti", False)
+    doSplices = getConfigBoolOption(configParser, section, "doSplices", False)
+    doSpliceColor = getConfigBoolOption(configParser, section, "doSpliceColor", False)
+    pairDist = getConfigOption(configParser, section, "pairDist", None)
+    withFlag = getConfigOption(configParser, section, "withFlag", "")
+    useFlagLike = getConfigBoolOption(configParser, section, "useFlagLike", False)
+    enforceChr = getConfigBoolOption(configParser, section, "enforceChr", False)
+    senseStrand = getConfigOption(configParser, section, "senseStrand", "")
+    allChrom = getConfigBoolOption(configParser, section, "allChrom", True)
+    doCache = getConfigBoolOption(configParser, section, "doCache", False)
+    cachePages = getConfigOption(configParser, section, "cachePages", 100000)
+
+    parser.set_defaults(withUniqs=withUniqs, withMulti=withMulti, doSplices=doSplices, doSpliceColor=doSpliceColor,
+                        pairDist=pairDist, withFlag=withFlag, useFlagLike=useFlagLike, enforceChr=enforceChr,
+                        senseStrand=senseStrand, allChrom=allChrom, doCache=doCache, cachePages=cachePages,
+                        chromList=[])
+
+    return parser
+
+
 def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=True,
                      doSplices=False, doSpliceColor=False, doPairs=False, pairDist=1000000,
                      withFlag="", useFlagLike=False, enforceChr=False, senseStrand="",
@@ -94,7 +116,7 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=
         sys.exit(1)
 
     print "\nsample:"
-    RDS = readDataset(rdsfile, verbose = True, cache=doCache)
+    RDS = ReadDataset.ReadDataset(rdsfile, verbose = True, cache=doCache)
 
     #check that this is better than the dataset's default cache size
     if cachePages > RDS.getDefaultCacheSize():
@@ -165,28 +187,46 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=
                     listLen = len(localList) - 1
                     localIndex = 0
                     while localIndex <= listLen:
+                        read = localList[localIndex]
                         try:
-                            (leftpos, leftsense, leftweight, lPairID) = localList[localIndex]
+                            leftpos = read["start"]
+                            leftsense = read["sense"]
+                            leftweight = read["weight"]
+                            lPairID = read["pairID"]
                             leftstop = leftpos + readlength - 1
                             lpart = 1
                             startList = [leftpos]
                             stopList = [leftstop]
-                        except:
-                            (leftpos, LLstop, LRstart, leftstop, leftsense, lPairID) = localList[localIndex]
+                        except KeyError:
+                            leftpos = read["startL"]
+                            LLstop = read["stopL"]
+                            LRstart = read["startR"]
+                            leftstop = read["stopL"]
+                            leftsense = read["sense"]
+                            lPairID = read["pairID"]
                             leftweight = 1.0
                             lpart = 2
                             startList = [leftpos, LRstart]
                             stopList = [LLstop, leftstop]
 
                         if localIndex < listLen:
+                            read = localList[localIndex + 1]
                             try:
-                                (rightpos, rightsense, rightweight, rPairID) = localList[localIndex + 1]
+                                rightpos = read["start"]
+                                rightsense = read["sense"]
+                                rightweight = read["weight"]
+                                rPairID= read["pairID"]
                                 rightstop = rightpos + readlength - 1
                                 rpart = 1
                                 rstartList = [rightpos]
                                 rstopList = [rightstop]
-                            except:
-                                (rightpos, RLstop, RRstart, rightstop, rightsense, rPairID) = localList[localIndex + 1]
+                            except KeyError:
+                                rightpos = read["startL"]
+                                RLstop = read["stopL"]
+                                RRstart = read["startR"]
+                                rightstop = read["stopR"]
+                                rightsense = read["sense"]
+                                rPairID = read["pairID"]
                                 rightweight = 1.0
                                 rpart = 2
                                 rstartList = [rightpos, RRstart]
@@ -229,7 +269,10 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=
             else:
                 hitDict = RDS.getReadsDict(fullChrom=True, chrom=achrom, flag=withFlag, withWeight=True, withID=True, doUniqs=withUniqs, doMulti=withMulti, readIDDict=False, flagLike=useFlagLike)
                 try:
-                    for (pos, sense, weight, readID) in hitDict[achrom]:
+                    for read in hitDict[achrom]:
+                        pos = read["start"]
+                        sense = read["sense"]
+                        readID = read["readID"]
                         splitReadWrite(outfile, achrom, 1, [pos], [pos + readlength - 1], sense, readID, PLUS_COLOR, MINUS_COLOR)
                         index += 1
                 except:
@@ -239,7 +282,13 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=
                     spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike)
                     if achrom not in spliceDict:
                         continue
-                    for (readstart, Lstop, Rstart, readstop, rsense, readName) in spliceDict[achrom]:
+                    for read in spliceDict[achrom]:
+                        readstart = read["startL"]
+                        Lstop = read["stopL"]
+                        Rstart = read["startR"]
+                        readstop = read["stopR"]
+                        rsense = read["sense"]
+                        readName = read["readID"]
                         splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR)
                         index += 1
 
@@ -254,7 +303,13 @@ def outputBedFromRds(trackType, rdsfile, outfilename, withUniqs=True, withMulti=
             spliceDict = RDS.getSplicesDict(fullChrom=True, chrom=achrom, flag=withFlag, withID=True, flagLike=useFlagLike)
             if achrom not in spliceDict:
                 continue
-            for (readstart, Lstop, Rstart, readstop, rsense, readName) in spliceDict[achrom]:
+            for read in spliceDict[achrom]:
+                readstart = read["startL"]
+                Lstop = read["stopL"]
+                Rstart = read["startR"]
+                readstop = read["stopR"]
+                rsense = read["sense"]
+                readName = read["readID"]
                 splitReadWrite(outfile, achrom, 2, [readstart, Rstart], [Lstop, readstop], rsense, readName, PLUS_COLOR, MINUS_COLOR)
                 index += 1