totalRatioMin = total # of reads supporting a base change at position S / total # reads that pass through position S
"""
-import sys, optparse
-from commoncode import readDataset, writeLog
+import sys
+import optparse
+from commoncode import writeLog, getConfigParser, getConfigBoolOption, getConfigIntOption
+import ReadDataset
-print "%prog: version 3.5"
+print "getSNPs: version 3.6"
try:
import psyco
usage = __doc__
- parser = optparse.OptionParser(usage=usage)
- parser.add_option("--nosplices", action="store_false", dest="doSplices")
- parser.add_option("--enforceChr", action="store_true", dest="forceChr")
- parser.add_option("--cache", type="int", dest="cachePages")
- parser.set_defaults(doSplices=True, forceChr=False, cachePages=0)
+ parser = makeParser(usage)
(options, args) = parser.parse_args(argv[1:])
if len(args) < 4:
writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, options.cachePages, options.doSplices, options.forceChr)
+def makeParser(usage=""):
+ parser = optparse.OptionParser(usage=usage)
+ parser.add_option("--nosplices", action="store_false", dest="doSplices")
+ parser.add_option("--enforceChr", action="store_true", dest="forceChr")
+ parser.add_option("--cache", type="int", dest="cachePages")
+
+ configParser = getConfigParser()
+ section = "getSNPs"
+ doSplices = getConfigBoolOption(configParser, section, "doSplices", True)
+ forceChr = getConfigBoolOption(configParser, section, "forceChr", False)
+ cachePages = getConfigIntOption(configParser, section, "cachePages", 0)
+
+ parser.set_defaults(doSplices=doSplices, forceChr=forceChr, cachePages=cachePages)
+
+ return parser
+
+
def writeSNPsToFile(hitfile, uniqStartMin, totalRatioMin, outfilename, doCache, cachePages=0, doSplices=True, forceChr=False):
writeLog("snp.log", sys.argv[0], "rdsfile: %s uniqStartMin: %1.2f totalRatioMin: %1.2f" % (hitfile, uniqStartMin, totalRatioMin))
snpPropertiesList = getSNPs(hitfile, uniqStartMin, totalRatioMin, doCache, cachePages, doSplices, forceChr)
for snpEntry in snpPropertiesList:
outline = "%1.2f\t%1.2f\t%s\t%d\t%d\t%d\t\t%d\t%s\n" % snpEntry
- print outline
- outfile.write(outline + "\n")
+ outfile.write(outline)
outfile.flush()
outfile.close()
def getSNPs(hitfile, uniqStartMin, totalRatioMin, doCache, cachePages=0, doSplices=True, forceChr=False):
- hitRDS = readDataset(hitfile, verbose=True, cache=doCache)
+ hitRDS = ReadDataset.ReadDataset(hitfile, verbose=True, cache=doCache)
if cachePages > 20000:
hitRDS.setDBcache(cachePages)
except:
readDict[chrom] = []
- for (start, stop) in readDict[chrom]:
- if finalDict.has_key(start):
+ for read in readDict[chrom]:
+ start = read["start"]
+ stop = read["stop"]
+ if start in finalDict:
finalDict[start].append(stop)
else:
finalDict[start] = [stop]
except:
spliceDict[chrom] = []
- for (start, stop) in spliceDict[chrom]:
- if finalDict.has_key(start):
+ for read in spliceDict[chrom]:
+ try:
+ start = read["startL"]
+ stop = read["stopL"]
+ except KeyError:
+ start = read["startR"]
+ stop = read["stopR"]
+
+ if start in finalDict:
finalDict[start].append(stop)
else:
finalDict[start] = [stop]
back = "%s:%s" % (str(start), change)
uniqBaseDict = {change: 1}
totalBaseDict = {change: 1}
- if mismatchDict.has_key(change_at):
- (uniqueReadCount, totalCount, back, uniqBaseDict, totalBaseDict) = mismatchDict[change_at]
+ if change_at in mismatchDict:
pos = "%s:%s" % (str(start), change)
+ totalCount = mismatchDict[change_at]["totalCount"]
totalCount += 1
- if totalBaseDict.has_key(change):
+ totalBaseDict = mismatchDict[change_at]["totalBaseDict"]
+ if change in totalBaseDict:
totalBaseDict[change] += 1
+ uniqBaseDict = mismatchDict[change_at]["uniqBaseDict"]
+ uniqueReadCount = mismatchDict[change_at]["uniqueReadCount"]
+ back = mismatchDict[change_at]["back"]
if pos not in back:
uniqueReadCount += 1
- if uniqBaseDict.has_key(change):
+ if change in uniqBaseDict:
uniqBaseDict[change] += 1 # dict contains total unique read counts
back = "%s,%s" % (back, pos)