X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=getSNPs.py;h=64948d405c523146e378c00fa4e204ca3ec75a4b;hp=0adde42c0b09a921f45c9edf01070077e68bf805;hb=HEAD;hpb=5e4ae21098dba3d1edcf11e7279da0d84c3422e4 diff --git a/getSNPs.py b/getSNPs.py index 0adde42..64948d4 100755 --- a/getSNPs.py +++ b/getSNPs.py @@ -19,10 +19,12 @@ totalRatioMin = total # of reads supporting a base change at position S / total # reads that pass through position S """ -import sys, optparse -from commoncode import readDataset, writeLog +import sys +import optparse +from commoncode import writeLog, getConfigParser, getConfigBoolOption, getConfigIntOption +import ReadDataset -print "%prog: version 3.5" +print "getSNPs: version 3.6" try: import psyco @@ -41,11 +43,7 @@ def main(argv=None): usage = __doc__ - parser = optparse.OptionParser(usage=usage) - parser.add_option("--nosplices", action="store_false", dest="doSplices") - parser.add_option("--enforceChr", action="store_true", dest="forceChr") - parser.add_option("--cache", type="int", dest="cachePages") - parser.set_defaults(doSplices=True, forceChr=False, cachePages=0) + parser = makeParser(usage) (options, args) = parser.parse_args(argv[1:]) if len(args) < 4: @@ -65,6 +63,23 @@ def main(argv=None): writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, options.cachePages, options.doSplices, options.forceChr) +def makeParser(usage=""): + parser = optparse.OptionParser(usage=usage) + parser.add_option("--nosplices", action="store_false", dest="doSplices") + parser.add_option("--enforceChr", action="store_true", dest="forceChr") + parser.add_option("--cache", type="int", dest="cachePages") + + configParser = getConfigParser() + section = "getSNPs" + doSplices = getConfigBoolOption(configParser, section, "doSplices", True) + forceChr = getConfigBoolOption(configParser, section, "forceChr", False) + cachePages = getConfigIntOption(configParser, section, "cachePages", 0) + + parser.set_defaults(doSplices=doSplices, forceChr=forceChr, cachePages=cachePages) + + return parser + + def writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, cachePages=0, doSplices=True, forceChr=False): writeLog("snp.log", sys.argv[0], "rdsfile: %s uniqStartMin: %1.2f totalRatioMin: %1.2f" % (hitfile, uniqStartMin, totalRatioMin)) @@ -75,8 +90,7 @@ def writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, snpPropertiesList = getSNPs(hitfile, uniqStartMin, totalRatioMin, doCache, cachePages, doSplices, forceChr) for snpEntry in snpPropertiesList: outline = "%1.2f\t%1.2f\t%s\t%d\t%d\t%d\t\t%d\t%s\n" % snpEntry - print outline - outfile.write(outline + "\n") + outfile.write(outline) outfile.flush() outfile.close() @@ -86,7 +100,7 @@ def writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, def getSNPs(hitfile, uniqStartMin, totalRatioMin, doCache, cachePages=0, doSplices=True, forceChr=False): - hitRDS = readDataset(hitfile, verbose=True, cache=doCache) + hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache) if cachePages > 20000: hitRDS.setDBcache(cachePages) @@ -157,8 +171,10 @@ def getMatchDict(rds, chrom, withSplices=True): except: readDict[chrom] = [] - for (start, stop) in readDict[chrom]: - if finalDict.has_key(start): + for read in readDict[chrom]: + start = read["start"] + stop = read["stop"] + if start in finalDict: finalDict[start].append(stop) else: finalDict[start] = [stop] @@ -169,8 +185,15 @@ def getMatchDict(rds, chrom, withSplices=True): except: spliceDict[chrom] = [] - for (start, stop) in spliceDict[chrom]: - if finalDict.has_key(start): + for read in spliceDict[chrom]: + try: + start = read["startL"] + stop = read["stopL"] + except KeyError: + start = read["startR"] + stop = read["stopR"] + + if start in finalDict: finalDict[start].append(stop) else: finalDict[start] = [stop] @@ -188,16 +211,20 @@ def getMismatchDict(rds, chrom, withSplices=True): back = "%s:%s" % (str(start), change) uniqBaseDict = {change: 1} totalBaseDict = {change: 1} - if mismatchDict.has_key(change_at): - (uniqueReadCount, totalCount, back, uniqBaseDict, totalBaseDict) = mismatchDict[change_at] + if change_at in mismatchDict: pos = "%s:%s" % (str(start), change) + totalCount = mismatchDict[change_at]["totalCount"] totalCount += 1 - if totalBaseDict.has_key(change): + totalBaseDict = mismatchDict[change_at]["totalBaseDict"] + if change in totalBaseDict: totalBaseDict[change] += 1 + uniqBaseDict = mismatchDict[change_at]["uniqBaseDict"] + uniqueReadCount = mismatchDict[change_at]["uniqueReadCount"] + back = mismatchDict[change_at]["back"] if pos not in back: uniqueReadCount += 1 - if uniqBaseDict.has_key(change): + if change in uniqBaseDict: uniqBaseDict[change] += 1 # dict contains total unique read counts back = "%s,%s" % (back, pos)