13 import sqlite3 as sqlite
14 from commoncode import getConfigParser, getConfigOption
16 print "chksnp: version 3.7"
23 usage = "usage: python %prog dbfile snpsfile dbsnp_outfile [--cache numPages] [--snpDB dbfile]"
25 parser = optparse.OptionParser(usage=usage)
26 parser.add_option("--cache", type="int", dest="cachePages")
27 parser.add_option("--snpDB", action="append", dest="snpDBList",
28 help="additional snp db files to check will be searched in order given")
29 parser.set_defaults(cachePages=None, snpDBList=[])
30 (options, args) = parser.parse_args(argv[1:])
40 chkSNPFile(dbfile, infile, outfile, options.cachePages, options.snpDBList)
43 def chkSNPFile(dbfile, inputFileName, outputFileName, cachePages=None, snpDBList=[]):
45 snpInputFile = open(inputFileName)
46 snpLocationList, snpDict = getSNPLocationInfo(snpInputFile)
49 for dbFileName in snpDBList:
50 dbList.append(dbFileName)
52 annotatedSnpDict = annotateSNPFromDBList(snpLocationList, snpDict, dbList, cachePages)
54 outputFile = open(outputFileName, "w")
56 outputFile.write(outputLine)
57 for key,value in annotatedSnpDict.iteritems():
58 outputLine = "%s\n" % str(value)
59 outputFile.write(outputLine)
64 def chkSNP(dbList, snpPropertiesList, cachePages=None):
66 snpLocationList, snpDict = getSNPLocationInfo(snpPropertiesList)
67 return annotateSNPFromDBList(snpLocationList, snpDict, dbList, cachePages)
70 def getSNPLocationInfo(snpPropertiesList):
74 for line in snpPropertiesList:
75 if doNotProcessLine(line):
78 fields = line.strip().split("\t")
79 chromosome = fields[2][3:]
80 position = int(fields[3])
81 snpLocation = (chromosome, position)
82 snpLocationList.append(snpLocation)
83 snpDict[snpLocation] = line.strip()
85 snpLocationList.sort()
87 return snpLocationList, snpDict
90 def doNotProcessLine(line):
94 def annotateSNPFromDB(snpLocationList, snpDict, dbFileName, cachePages=None):
95 return annotateSNPFromDBList(snpLocationList, snpDict, [dbFileName], cachePages)
98 def annotateSNPFromDBList(snpLocationList, snpDict, dbList, cachePages=None):
100 configParser = getConfigParser()
101 cisTemp = getConfigOption(configParser, "general", "cistematic_temp", default="/tmp")
102 tempfile.tempdir = cisTemp
104 for dbFileName in dbList:
105 if cachePages is not None:
106 print "caching locally..."
107 cachefile = "%s.db" % tempfile.mktemp()
108 shutil.copyfile(dbFileName, cachefile)
109 db = sqlite.connect(cachefile)
113 db = sqlite.connect(dbFileName)
116 cacheSize = max(cachePages, 500000)
118 sql.execute("PRAGMA CACHE_SIZE = %d" % cacheSize)
119 sql.execute("PRAGMA temp_store = MEMORY")
123 for chromosomePosition in snpLocationList:
124 (chromosome, position) = chromosomePosition
128 startPosition = position - 1
129 sql.execute("select func, name from snp where chrom = '%s' and start = %d and stop = %d" % (chromosome, startPosition, position))
130 results = sql.fetchall()
132 (func, name) = results[0]
135 sql.execute("select func, name from snp where chrom = '%s' and start <= %d and stop >= %d" % (chromosome, startPosition, position))
136 results = sql.fetchall()
138 (func, name) = results[0]
144 snpEntry = snpDict[chromosomePosition]
145 snpDict[chromosomePosition] = string.join([snpEntry, str(name), str(func)], "\t")
146 foundEntries.append(chromosomePosition)
152 for chromosomePosition in foundEntries:
153 del snpLocationList[snpLocationList.index(chromosomePosition)]
156 print "\nremoving cache"
160 for chromosomePosition in snpLocationList:
161 snpEntry = snpDict[chromosomePosition]
162 snpDict[chromosomePosition] = string.join([snpEntry, "N\A", "N\A"], "\t")
167 if __name__ == "__main__":