X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=distalPairs.py;h=5bc25321b25b8a30e9b09511cabbee4355010b71;hp=d24781a0db7c0ee3702e71cf97f6c2d62ea21c41;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/distalPairs.py b/distalPairs.py index d24781a..5bc2532 100755 --- a/distalPairs.py +++ b/distalPairs.py @@ -12,15 +12,18 @@ try: except: pass -from commoncode import readDataset -import sys, time, optparse +import sys +import time +import optparse +import ReadDataset +from commoncode import getConfigParser, getConfigOption, getConfigIntOption, getConfigBoolOption def main(argv=None): if not argv: argv = sys.argv - print "%prog: version 3.3" + print "distalPairs: version 3.4" print "looks at all chromosomes simultaneously: is both slow and takes up large amount of RAM" usage = "usage: python %prog minDist rdsfile outfile [--sameChrom] [--splices] [--maxDist bp] [--verbose] [--cache cachepages]" @@ -44,6 +47,27 @@ def main(argv=None): distalPairs(minDist, rdsfile, outfilename, options.sameChromOnly, options.doSplices, options.doVerbose, options.maxDist, options.cachePages) +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--sameChrom", action="store_true", dest="sameChromOnly") + parser.add_option("--splices", action="store_true", dest="doSplices") + parser.add_option("--verbose", action="store_true", dest="doVerbose") + parser.add_option("--maxDist", type="int", dest="maxDist") + parser.add_option("--cache", type="int", dest="cachePages") + + configParser = getConfigParser() + section = "distalPairs" + sameChromOnly = getConfigBoolOption(configParser, section, "sameChromOnly", False) + doSplices = getConfigBoolOption(configParser, section, "doSplices", False) + doVerbose = getConfigBoolOption(configParser, section, "doVerbose", False) + maxDist = getConfigIntOption(configParser, section, "maxDist", 1000000000) + cachePages = getConfigOption(configParser, section, "cachePages", None) + + parser.set_defaults(sameChromOnly=sameChromOnly, doSplices=doSplices, doVerbose=doVerbose, maxDist=maxDist, cachePages=cachePages) + + return parser + + def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=False, doVerbose=False, maxDist=1000000000, cachePages=None): if cachePages is not None: doCache = True @@ -51,7 +75,7 @@ def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=Fa doCache = False cachePages = -1 - RDS = readDataset(rdsfile, verbose = True, cache=doCache) + RDS = ReadDataset.ReadDataset(rdsfile, verbose = True, cache=doCache) if not RDS.hasIndex(): print "Will not attempt to run on unIndexed dataset - please index with rdsmetadata.py and rerun" sys.exit(1) @@ -95,8 +119,12 @@ def distalPairs(minDist, rdsfile, outfilename, sameChromOnly=False, doSplices=Fa readList = uniqDict[readID] if len(readList) == 2: total += 1 - (start1, sense1, chrom1, pair1) = readList[0] - (start2, sense2, chrom2, pair2) = readList[1] + start1 = readList[0]["start"] + sense1 = readList[0]["sense"] + chrom1 = readList[0]["chrom"] + start2 = readList[1]["start"] + sense2 = readList[1]["sense"] + chrom2 = readList[1]["chrom"] if chrom1 != chrom2: diffChrom += 1