X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=getSNPs.py;h=f6071ae444be9ca294ac4c4ff39b50a74e3d0cb4;hp=0adde42c0b09a921f45c9edf01070077e68bf805;hb=0d3e3112fd04c2e6b44a25cacef1d591658ad181;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/getSNPs.py b/getSNPs.py index 0adde42..f6071ae 100755 --- a/getSNPs.py +++ b/getSNPs.py @@ -19,10 +19,12 @@ totalRatioMin = total # of reads supporting a base change at position S / total # reads that pass through position S """ -import sys, optparse -from commoncode import readDataset, writeLog +import sys +import optparse +from commoncode import writeLog, getConfigParser, getConfigBoolOption, getConfigIntOption +import ReadDataset -print "%prog: version 3.5" +print "getSNPs: version 3.6" try: import psyco @@ -41,11 +43,7 @@ def main(argv=None): usage = __doc__ - parser = optparse.OptionParser(usage=usage) - parser.add_option("--nosplices", action="store_false", dest="doSplices") - parser.add_option("--enforceChr", action="store_true", dest="forceChr") - parser.add_option("--cache", type="int", dest="cachePages") - parser.set_defaults(doSplices=True, forceChr=False, cachePages=0) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 4: @@ -65,6 +63,23 @@ def main(argv=None): writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, options.cachePages, options.doSplices, options.forceChr) +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--nosplices", action="store_false", dest="doSplices") + parser.add_option("--enforceChr", action="store_true", dest="forceChr") + parser.add_option("--cache", type="int", dest="cachePages") + + configParser = getConfigParser() + section = "getSNPs" + doSplices = getConfigBoolOption(configParser, section, "doSplices", True) + forceChr = getConfigBoolOption(configParser, section, "forceChr", False) + cachePages = getConfigIntOption(configParser, section, "cachePages", 0) + + parser.set_defaults(doSplices=True, forceChr=False, cachePages=0) + + return parser + + def writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, cachePages=0, doSplices=True, forceChr=False): writeLog("snp.log", sys.argv[0], "rdsfile: %s uniqStartMin: %1.2f totalRatioMin: %1.2f" % (hitfile, uniqStartMin, totalRatioMin)) @@ -86,7 +101,7 @@ def writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, def getSNPs(hitfile, uniqStartMin, totalRatioMin, doCache, cachePages=0, doSplices=True, forceChr=False): - hitRDS = readDataset(hitfile, verbose=True, cache=doCache) + hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache) if cachePages > 20000: hitRDS.setDBcache(cachePages) @@ -157,7 +172,9 @@ def getMatchDict(rds, chrom, withSplices=True): except: readDict[chrom] = [] - for (start, stop) in readDict[chrom]: + for read in readDict[chrom]: + start = read["start"] + stop = read["stop"] if finalDict.has_key(start): finalDict[start].append(stop) else: @@ -169,7 +186,14 @@ def getMatchDict(rds, chrom, withSplices=True): except: spliceDict[chrom] = [] - for (start, stop) in spliceDict[chrom]: + for read in spliceDict[chrom]: + try: + start = read["startL"] + stop = read["stopL"] + except KeyError: + start = read["startR"] + stop = read["stopR"] + if finalDict.has_key(start): finalDict[start].append(stop) else: