3 from commoncode import writeLog
4 from makeSNPtrack import makeSNPtrack
5 from getNovelSNPs import getNovelSNPsFromFile
6 from getSNPGeneInfo import writeSNPGeneInfo
7 from chksnp import chkSNPFile
8 from chkSNPrmask import chkSNPrmask
9 from getSNPs import writeSNPsToFile
17 print "runSNPAnalysis: version %s" % VERSION
18 usage = "usage: python runSNPAnalysis.py genome rdsfile label rmaskdbfile dbsnpfile uniqStartMin totalRatio rpkmfile [cachepages]"
20 parser = getParser(usage)
21 (options, args) = parser.parse_args(argv[1:])
32 uniqStartMin = args[5]
40 runSNPAnalysis(genome, rdsfile, label, rmaskdbfile, dbsnpfile, uniqStartMin, totalRatio, rpkmfile, cachepages)
44 parser = optparse.OptionParser(usage=usage)
49 def runSNPAnalysis(genome, rdsfile, label, rmaskdbfile, dbsnpfile, uniqStartMin, totalRatio, rpkmfile, cachepages=None):
50 """ based on original script runSNPAnalysis.sh
51 usage: runSNPAnalysis.sh genome rdsfile label rmaskdbfile dbsnpfile uniqStartMin totalRatio rpkmfile [cachepages]
52 where for each position S:
53 uniqStartMin = # independent reads supporting base change at S
54 totalRatio = total # reads supporting base change at S / total # reads that pass through S
58 message = "with parameters: %s %s %s %s %s %s %s %s" % (genome, rdsfile, label, rmaskdbfile, dbsnpfile, uniqStartMin, totalRatio, rpkmfile)
59 writeLog("rna.log", "runStandardAnalysis.py", message)
61 # get all SNPs by extracting it from the RDS
62 fullsnpfilename = "%s.snps.txt" % label
64 snpCache = int(cachepages)
65 except TypeError, ValueError:
68 doCache = cachepages is not None
69 writeSNPsToFile(rdsfile, uniqStartMin, totalRatio, fullsnpfilename, doCache, cachePages=snpCache, forceChr=True)
71 # get SNPs in non-repeat regions only
72 snpfilename = "%s.nr_snps.txt" % label
73 chkSNPrmask(rmaskdbfile, fullsnpfilename, snpfilename, repeats=False, cachePages=cachepages)
75 # Check to see if SNPs are found in dbSNP
76 # if dbSNP128.db is not built yet, build it by running buildsnpdb.py - build snp database using the dbSNP database file downloaded from UCSC
77 # usage: python2.5 buildsnpdb.py snpdbdir snpdbname
78 # the database flat file must be in the snpdbdir directory
79 # To build dbSNP database file, run the following command
80 # python2.5 buildsnpdb.py snp128.txt dbSNP128
82 # get dbSNP info for SNPs that are found in the dbSNP database
83 dbsnpFileName = "%s.nr_dbsnp.txt" % label
84 chkSNPFile(dbsnpfile, snpfilename, dbsnpFileName, cachePages=cachepages)
86 # get gene info for the snps found in dbSNP
87 genefilename = "%s.nr_dbsnp_geneinfo.txt" % label
88 writeSNPGeneInfo(genome, dbsnpFileName, rpkmfile, genefilename, doCache=doCache)
90 # get gene info for snps that are not found in dbSNP
91 outfilename = "%s.nr_final.txt" % label
92 getNovelSNPsFromFile(genome, genefilename, outfilename)
94 # make bed file for displaying the snps on UCSC genome browser
95 bedfilename = "%s.nr_snps.bed" % label
96 makeSNPtrack(snpfilename, label, bedfilename)
99 if __name__ == "__main__":