+import sys
+import optparse
+from commoncode import writeLog
+from makeSNPtrack import makeSNPtrack
+from getNovelSNPs import getNovelSNPsFromFile
+from getSNPGeneInfo import writeSNPGeneInfo
+from chksnp import chkSNPFile
+from chkSNPrmask import chkSNPrmask
+from getSNPs import writeSNPsToFile
+
+VERSION = "1.0"
+
+def main(argv=None):
+ if not argv:
+ argv = sys.argv
+
+ print "runSNPAnalysis: version %s" % VERSION
+ usage = "usage: python runSNPAnalysis.py genome rdsfile label rmaskdbfile dbsnpfile uniqStartMin totalRatio rpkmfile [cachepages]"
+
+ parser = getParser(usage)
+ (options, args) = parser.parse_args(argv[1:])
+
+ if len(args) < 4:
+ print usage
+ sys.exit(1)
+
+ genome = args[0]
+ rdsfile = args[1]
+ label = args[2]
+ rmaskdbfile = args[3]
+ dbsnpfile = args[4]
+ uniqStartMin = args[5]
+ totalRatio = args[6]
+ rpkmfile = args[7]
+ try:
+ cachepages = args[8]
+ except IndexError:
+ cachepages = None
+
+ runSNPAnalysis(genome, rdsfile, label, rmaskdbfile, dbsnpfile, uniqStartMin, totalRatio, rpkmfile, cachepages)
+
+
+def getParser(usage):
+ parser = optparse.OptionParser(usage=usage)
+
+ return parser
+
+
+def runSNPAnalysis(genome, rdsfile, label, rmaskdbfile, dbsnpfile, uniqStartMin, totalRatio, rpkmfile, cachepages=None):
+ """ based on original script runSNPAnalysis.sh
+ usage: runSNPAnalysis.sh genome rdsfile label rmaskdbfile dbsnpfile uniqStartMin totalRatio rpkmfile [cachepages]
+ where for each position S:
+ uniqStartMin = # independent reads supporting base change at S
+ totalRatio = total # reads supporting base change at S / total # reads that pass through S
+ """
+
+ # log the parameters
+ message = "with parameters: %s %s %s %s %s %s %s %s" % (genome, rdsfile, label, rmaskdbfile, dbsnpfile, uniqStartMin, totalRatio, rpkmfile)
+ writeLog("rna.log", "runStandardAnalysis.py", message)
+
+ # get all SNPs by extracting it from the RDS
+ fullsnpfilename = "%s.snps.txt" % label
+ try:
+ snpCache = int(cachepages)
+ except TypeError, ValueError:
+ snpCache = 0
+
+ doCache = cachepages is not None
+ writeSNPsToFile(rdsfile, uniqStartMin, totalRatio, fullsnpfilename, doCache, cachePages=snpCache, forceChr=True)
+
+ # get SNPs in non-repeat regions only
+ snpfilename = "%s.nr_snps.txt" % label
+ chkSNPrmask(rmaskdbfile, fullsnpfilename, snpfilename, repeats=False, cachePages=cachepages)
+
+ # Check to see if SNPs are found in dbSNP
+ # if dbSNP128.db is not built yet, build it by running buildsnpdb.py - build snp database using the dbSNP database file downloaded from UCSC
+ # usage: python2.5 buildsnpdb.py snpdbdir snpdbname
+ # the database flat file must be in the snpdbdir directory
+ # To build dbSNP database file, run the following command
+ # python2.5 buildsnpdb.py snp128.txt dbSNP128
+
+ # get dbSNP info for SNPs that are found in the dbSNP database
+ dbsnpFileName = "%s.nr_dbsnp.txt" % label
+ chkSNPFile(dbsnpfile, snpfilename, dbsnpFileName, cachePages=cachepages)
+
+ # get gene info for the snps found in dbSNP
+ genefilename = "%s.nr_dbsnp_geneinfo.txt" % label
+ writeSNPGeneInfo(genome, dbsnpFileName, rpkmfile, genefilename, doCache=doCache)
+
+ # get gene info for snps that are not found in dbSNP
+ outfilename = "%s.nr_final.txt" % label
+ getNovelSNPsFromFile(genome, genefilename, outfilename)
+
+ # make bed file for displaying the snps on UCSC genome browser
+ bedfilename = "%s.nr_snps.bed" % label
+ makeSNPtrack(snpfilename, label, bedfilename)
+
+
+if __name__ == "__main__":
+ main(sys.argv)
\ No newline at end of file