13 import sqlite3 as sqlite
15 print "version 3.6: %s" % sys.argv[0]
22 usage = "usage: python %prog dbfile snpsfile dbsnp_outfile [--cache numPages] [--snpDB dbfile]"
24 parser = optparse.OptionParser(usage=usage)
25 parser.add_option("--cache", type="int", dest="cachePages")
26 parser.add_option("--snpDB", action="append", dest="snpDBList",
27 help="additional snp db files to check will be searched in order given")
28 parser.set_defaults(cachePages=None, snpDBList=[])
29 (options, args) = parser.parse_args(argv[1:])
39 chkSNPFile(dbfile, infile, outfile, options.cachePages, options.snpDBList)
42 def chkSNPFile(dbfile, inputFileName, outputFileName, cachePages=None, snpDBList=[]):
44 snpInputFile = open(inputFileName)
45 snpLocationList, snpDict = getSNPLocationInfo(snpInputFile)
48 for dbFileName in snpDBList:
49 dbList.append(dbFileName)
51 annotatedSnpDict = annotateSNPFromDBList(snpLocationList, snpDict, dbList, cachePages)
53 outputFile = open(outputFileName, "w")
55 outputFile.write(outputLine)
56 for key,value in annotatedSnpDict.iteritems():
57 outputLine = "%s\n" % str(value)
58 outputFile.write(outputLine)
63 def chkSNP(dbList, snpPropertiesList, cachePages=None):
65 snpLocationList, snpDict = getSNPLocationInfo(snpPropertiesList)
66 return annotateSNPFromDBList(snpLocationList, snpDict, dbList, cachePages)
69 def getSNPLocationInfo(snpPropertiesList):
73 for line in snpPropertiesList:
74 if doNotProcessLine(line):
77 fields = line.strip().split("\t")
78 chromosome = fields[2][3:]
79 position = int(fields[3])
80 snpLocation = (chromosome, position)
81 snpLocationList.append(snpLocation)
82 snpDict[snpLocation] = line.strip()
84 snpLocationList.sort()
86 return snpLocationList, snpDict
89 def doNotProcessLine(line):
93 def annotateSNPFromDB(snpLocationList, snpDict, dbFileName, cachePages=None):
94 return annotateSNPFromDBList(snpLocationList, snpDict, [dbFileName], cachePages)
97 def annotateSNPFromDBList(snpLocationList, snpDict, dbList, cachePages=None):
98 if os.environ.get("CISTEMATIC_TEMP"):
99 cisTemp = os.environ.get("CISTEMATIC_TEMP")
103 tempfile.tempdir = cisTemp
105 for dbFileName in dbList:
106 if cachePages is not None:
107 print "caching locally..."
108 cachefile = "%s.db" % tempfile.mktemp()
109 shutil.copyfile(dbFileName, cachefile)
110 db = sqlite.connect(cachefile)
114 db = sqlite.connect(dbFileName)
117 cacheSize = max(cachePages, 500000)
119 sql.execute("PRAGMA CACHE_SIZE = %d" % cacheSize)
120 sql.execute("PRAGMA temp_store = MEMORY")
124 for chromosomePosition in snpLocationList:
125 (chromosome, position) = chromosomePosition
129 startPosition = position - 1
130 sql.execute("select func, name from snp where chrom = '%s' and start = %d and stop = %d" % (chromosome, startPosition, position))
131 results = sql.fetchall()
133 (func, name) = results[0]
136 sql.execute("select func, name from snp where chrom = '%s' and start <= %d and stop >= %d" % (chromosome, startPosition, position))
137 results = sql.fetchall()
139 (func, name) = results[0]
145 snpEntry = snpDict[chromosomePosition]
146 snpDict[chromosomePosition] = string.join([snpEntry, str(name), str(func)], "\t")
147 foundEntries.append(chromosomePosition)
153 for chromosomePosition in foundEntries:
154 del snpLocationList[snpLocationList.index(chromosomePosition)]
157 print "\nremoving cache"
161 for chromosomePosition in snpLocationList:
162 snpEntry = snpDict[chromosomePosition]
163 snpDict[chromosomePosition] = string.join([snpEntry, "N\A", "N\A"], "\t")
168 if __name__ == "__main__":