3 # runStrandedAnalysis.sh
6 # example: . ../commoncode/runStrandedAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db 20000
8 # assuming that we have rds database with the prefix c2c12rna.24R.
10 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
12 if [ -z "$ERANGEPATH" ]
14 ERANGEPATH='../erange'
17 echo 'runStrandedAnalysis.sh: version 4.2'
22 echo 'usage:runStrandedAnalysis.sh genome rdsprefix repeatmaskdb bpradius'
24 echo 'where rdsprefix is the name of the rds file without the .rds extension'
25 echo 'use "none" for the repeatmaskdb if you do not have one'
30 arguments=$1' '$2' '$3' '$4
31 echo 'running with settings: ' $arguments
32 python $ERANGEPATH/recordLog.py rna.log runStrandedAnalysis.sh "with parameters: $arguments"
34 # count the unique reads falling on the gene models ; the nomatch files are
35 # mappable reads that fell outside of the Cistematic gene models and not the
36 # unmappable of Eland (i.e, the "NM" reads)
37 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --stranded --markGID --cache 1
39 # calculate a first-pass RPKM to re-weigh the unique reads,
40 # using 'none' for the splice count
41 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache
43 # recount the unique reads with weights calculated during the first pass
44 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --stranded --uniq --cache 1
47 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --stranded --splices --noUniqs --cache 1
49 # find new regions outside of gene models with reads piled up
50 python $ERANGEPATH/findall.py RNAFARP $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --strandfilter plus --log rna.log --cache 1
51 python $ERANGEPATH/findall.py RNAFARM $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --strandfilter minus --log rna.log --cache 1 --append
53 # filter out new regions that overlap repeats more than a certain fraction
54 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.good --startField 1 --log rna.log --cache 1
56 # Alternative 2: use a precomputed list of "new" regions (outside of gene models)
57 #python $ERANGEPATH/regionCounts.py $3 $2.nomatch.bed $2.newregions.good $2.stillnomatch.bed
58 #python $ERANGEPATH/regionCounts.py $3 $2.rds $2.newregions.good
60 # map all candidate regions that are within a given radius of a gene in bp
61 python $ERANGEPATH/getallgenes.py $1 $2.newregions.good $2.candidates.txt --radius $4 --trackfar --stranded --cache
63 # calculate expanded exonic read density
64 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache
67 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --stranded --multi --cache 1
69 # calculate final exonic read density
70 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache