12 import sqlite3 as sqlite
13 from commoncode import getConfigParser, getConfigOption, getConfigBoolOption
15 configParser = getConfigParser()
16 cisTemp = getConfigOption(configParser, "general", "cistematic_temp", default="/tmp")
17 tempfile.tempdir = cisTemp
19 print "chkSNPrmask: version 3.4"
26 usage = "usage: python %s dbfile snpsfile nr_snps_outfile [--cache numPages] [--repeats]"
28 parser = makeParser(usage)
29 (options, args) = parser.parse_args(argv[1:])
39 chkSNPrmask(dbfile, filename, outfile, options.repeats, options.cachePages)
42 def makeParser(usage=""):
43 parser = optparse.OptionParser(usage=usage)
44 parser.add_option("--repeats", action="store_true", dest="repeats")
45 parser.add_option("--cache", type="int", dest="cachePages")
47 configParser = getConfigParser()
48 section = "checkSNPrmask"
49 repeats = getConfigBoolOption(configParser, section, "repeats", False)
50 cachePages = getConfigOption(configParser, section, "cachePages", None)
52 parser.set_defaults(repeats=repeats, cachePages=cachePages)
57 def chkSNPrmask(dbfile, filename, outfile, repeats=False, cachePages=None):
60 if cachePages is not None:
61 if cachePages < 250000:
64 print "caching locally..."
65 cachefile = tempfile.mktemp() + ".db"
66 shutil.copyfile(dbfile, cachefile)
67 db = sqlite.connect(cachefile)
73 db = sqlite.connect(dbfile)
76 sql.execute("PRAGMA CACHE_SIZE = %d" % cachePages)
77 sql.execute("PRAGMA temp_store = MEMORY")
78 sql.execute("ANALYZE")
80 infile = open(filename)
85 if doNotProcessLine(line):
88 fields = line.strip().split("\t")
91 featureList.append((chrom,pos))
92 featureDict[(chrom, pos)] = line.strip()
98 for (chrom, pos) in featureList:
100 if chrom != currentChrom:
106 sql.execute("select family from repeats where chrom = '%s' and %d between start and stop" % (chrom, pos))
107 results = sql.fetchall()
111 if repeats: # if user wants to keep track of the SNPs in repeats
112 featureDict[(chrom,pos)] += "\tN\A"
114 featureDict[(chrom,pos)] += "\t" + str(x)
118 del featureDict[(chrom,pos)]
127 print "removing cache"
131 outFile = open(outfile, "w")
132 for key, value in featureDict.iteritems():
133 outStr = str(value) + "\n"
134 outFile.write(outStr)
139 def doNotProcessLine(line):
140 return line[0] == "#"
143 if __name__ == "__main__":