except:
pass
-from commoncode import readDataset
-import sys, time, optparse
+import sys
+import time
+import optparse
+import ReadDataset
+from commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption
def main(argv=None):
if not argv:
argv = sys.argv
- print "%prog: version 3.3"
+ print "distalPairs: version 3.4"
print "looks at all chromosomes simultaneously: is both slow and takes up large amount of RAM"
usage = "usage: python %prog minDist rdsfile outfile [--sameChrom] [--splices] [--maxDist bp] [--verbose] [--cache cachepages]"
distalPairs(minDist, rdsfile, outfilename, options.sameChromOnly, options.doSplices, options.doVerbose, options.maxDist, options.cachePages)
+def makeParser(usage=""):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--sameChrom", action="store_true", dest="sameChromOnly")
+ parser.add_option("--splices", action="store_true", dest="doSplices")
+ parser.add_option("--verbose", action="store_true", dest="doVerbose")
+ parser.add_option("--maxDist", type="int", dest="maxDist")
+ parser.add_option("--cache", type="int", dest="cachePages")
+
+ configParser = getConfigParser()
+ section = "distalPairs"
+ sameChromOnly = getConfigBoolOption(configParser, section, "sameChromOnly", False)
+ doSplices = getConfigBoolOption(configParser, section, "doSplices", False)
+ doVerbose = getConfigBoolOption(configParser, section, "doVerbose", False)
+ maxDist = getConfigIntOption(configParser, section, "maxDist", 1000000000)
+ cachePages = getConfigOption(configParser, section, "cachePages", None)
+
+ parser.set_defaults(sameChromOnly=sameChromOnly, doSplices=doSplices, doVerbose=doVerbose, maxDist=maxDist, cachePages=cachePages)
+
+ return parser
+
+
def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=False, doVerbose=False, maxDist=1000000000, cachePages=None):
if cachePages is not None:
doCache = True
doCache = False
cachePages = -1
- RDS = readDataset(rdsfile, verbose = True, cache=doCache)
+ RDS = ReadDataset.ReadDataset(rdsfile, verbose = True, cache=doCache)
if not RDS.hasIndex():
print "Will not attempt to run on unIndexed dataset - please index with rdsmetadata.py and rerun"
sys.exit(1)
print time.ctime()
- if doSplices:
- print "getting splices"
- splicesDict = RDS.getSplicesDict(withChrom=True, withPairID=True, readIDDict=True, splitRead=True)
- print "got splices"
-
print "getting uniq reads"
uniqDict = RDS.getReadsDict(withChrom=True, withPairID=True, doUniqs=True, readIDDict=True)
print "got uniqs"
if doSplices:
- for readID in splicesDict:
- theRead = splicesDict[readID]
- read0 = theRead[0]
- del read0[1]
- try:
- uniqDict[readID].append(read0)
- except:
- if len(theRead) == 4:
- read2 = theRead[2]
- del read2[1]
- uniqDict[readID] = [read0,read2]
+ addSplicesToUniqReads(RDS, uniqDict)
if doVerbose:
print len(uniqDict), time.ctime()
outfile = open(outfilename,"w")
-
diffChrom = 0
distal = 0
total = 0
readList = uniqDict[readID]
if len(readList) == 2:
total += 1
- (start1, sense1, chrom1, pair1) = readList[0]
- (start2, sense2, chrom2, pair2) = readList[1]
+ start1 = readList[0]["start"]
+ sense1 = readList[0]["sense"]
+ chrom1 = readList[0]["chrom"]
+ start2 = readList[1]["start"]
+ sense2 = readList[1]["sense"]
+ chrom2 = readList[1]["chrom"]
if chrom1 != chrom2:
diffChrom += 1
continue
else:
outline = "%s\t%s\t%d\t%s\t%s\t%d\t%s" % (readID, chrom1, start1, sense1, chrom2, start2, sense2)
- outfile.write(outline + "\n")
+ print >> outfile, outline
if doVerbose:
print diffChrom, outline
else:
dist = abs(start1 - start2)
-
if minDist < dist < maxDist:
distal += 1
outline = "%s\t%s\t%d\t%s\t%d\t%s\t%d" % (readID, chrom1, start1, sense1, start2, sense2, dist)
- outfile.write(outline + "\n")
+ print >> outfile, outline
if doVerbose:
print distal, outline
print time.ctime()
+def addSplicesToUniqReads(RDS, uniqDict):
+ print "getting splices"
+ splicesDict = RDS.getSplicesDict(withChrom=True, withPairID=True, readIDDict=True, splitRead=True)
+ print "got splices"
+ for readID in splicesDict:
+ theRead = splicesDict[readID]
+ read0 = theRead[0]
+ read0["start"] = read0["startL"]
+ del read0["stopL"]
+ try:
+ uniqDict[readID].append(read0)
+ except:
+ if len(theRead) == 4:
+ read2 = theRead[2]
+ read2["start"] = read2["startL"]
+ del read2["stopL"]
+ uniqDict[readID] = [read0,read2]
+
+
if __name__ == "__main__":
main(sys.argv)
\ No newline at end of file