5 # Usages: $ERANGEPATH/runSNPAnalysis.sh mouse rdsfile label rmaskdbfile dbsnpfile uniqStartMin totalRatio rpkmfile cachepages
6 # Example: /getSNPs.sh mouse /woldlab/trog/sdc/alim/24T4spike_10212/24T4spike.rds 24Tspike /woldlab/trog/data1/wlee/db/rmask.db /woldlab/trog/data1/wlee/db/dbSNP128.db 5 0.75 ~/proj/c2c12rna24R/c2c12rna.24R.final.rpkm 5000000
8 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
10 if [ -z "$ERANGEPATH" ]
12 ERANGEPATH='../commoncode'
15 echo 'runSNPAnalysis.sh: version 3.1'
19 cachepages="-cache "$9
23 if [ $# -eq 10 ]; then
24 nosplices=" -nosplices "
28 echo 'runSNPAnalysis.sh genome rdsfile label rmaskdbfile dbsnpfile uniqStartMin totalRatio rpkmfile [cachepages]'
29 echo 'where for each position S:'
30 echo ' uniqStartMin = # independent reads supporting base change at S'
31 echo ' totalRatio = total # reads supporting base change at S / total # reads that pass through S'
34 arguments=$1' '$2' '$3' '$4' '$5' '$6' '$7' '$8' '$cachepages$nosplices
35 echo 'running with settings: ' $arguments
36 python $ERANGEPATH/recordLog.py snp.log runSNPAnalysis.sh "with parameters: $arguments"
38 # get all SNPs by extracting it from the RDS
39 python $ERANGEPATH/getSNPs.py $2 $6 $7 $3.snps.txt -enforceChr $cachepages $nosplices
41 # get SNPs in non-repeat regions only
42 python $ERANGEPATH/chkSNPrmask.py $4 $3.snps.txt $3.nr_snps.txt $cachepages
44 # Check to see if SNPs are found in dbSNP
45 # if dbSNP128.db is not built yet, build it by running buildsnpdb.py - build snp database using the dbSNP database file downloaded from UCSC
46 # usage: python2.5 buildsnpdb.py snpdbdir snpdbname
47 # the database flat file must be in the snpdbdir directory
48 # To build dbSNP database file, run the following command
49 # python2.5 buildsnpdb.py snp128.txt dbSNP128
51 # get dbSNP info for SNPs that are found in the dbSNP database
52 python $ERANGEPATH/chksnp.py $5 $3.nr_snps.txt $3.nr_dbsnp.txt $cachepages
54 # get gene info for the snps found in dbSNP
55 python $ERANGEPATH/getSNPGeneInfo.py $1 $3.nr_dbsnp.txt $8 $3.nr_dbsnp_geneinfo.txt $cachepages
57 # get gene info for snps that are not found in dbSNP
58 python $ERANGEPATH/getNovelSNPs.py $1 $3.nr_dbsnp_geneinfo.txt $3.nr.final.txt
60 # make bed file for displaying the snps on UCSC genome browser
61 python $ERANGEPATH/makeSNPtrack.py $3.nr_snps.txt $3 $3.nr_snps.bed