3 # This is no longer supported. It is recommended that the pythin script of the same name be used instead.
5 # runStrandedAnalysis.sh
8 # example: . ../commoncode/runStrandedAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db 20000
10 # assuming that we have rds database with the prefix c2c12rna.24R.
12 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
14 if [ -z "$ERANGEPATH" ]
16 ERANGEPATH='../erange'
19 echo 'runStrandedAnalysis.sh: version 4.2'
24 echo 'usage:runStrandedAnalysis.sh genome rdsprefix repeatmaskdb bpradius'
26 echo 'where rdsprefix is the name of the rds file without the .rds extension'
27 echo 'use "none" for the repeatmaskdb if you do not have one'
32 arguments=$1' '$2' '$3' '$4
33 echo 'running with settings: ' $arguments
34 python $ERANGEPATH/recordLog.py rna.log runStrandedAnalysis.sh "with parameters: $arguments"
36 # count the unique reads falling on the gene models ; the nomatch files are
37 # mappable reads that fell outside of the Cistematic gene models and not the
38 # unmappable of Eland (i.e, the "NM" reads)
39 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --stranded --markGID --cache 1
41 # calculate a first-pass RPKM to re-weigh the unique reads,
42 # using 'none' for the splice count
43 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache
45 # recount the unique reads with weights calculated during the first pass
46 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --stranded --uniq --cache 1
49 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --stranded --splices --noUniqs --cache 1
51 # find new regions outside of gene models with reads piled up
52 python $ERANGEPATH/findall.py RNAFARP $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --strandfilter plus --log rna.log --cache 1
53 python $ERANGEPATH/findall.py RNAFARM $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --strandfilter minus --log rna.log --cache 1 --append
55 # filter out new regions that overlap repeats more than a certain fraction
56 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.good --startField 1 --log rna.log --cache 1
58 # Alternative 2: use a precomputed list of "new" regions (outside of gene models)
59 #python $ERANGEPATH/regionCounts.py $3 $2.nomatch.bed $2.newregions.good $2.stillnomatch.bed
60 #python $ERANGEPATH/regionCounts.py $3 $2.rds $2.newregions.good
62 # map all candidate regions that are within a given radius of a gene in bp
63 python $ERANGEPATH/getallgenes.py $1 $2.newregions.good $2.candidates.txt --radius $4 --trackfar --stranded --cache
65 # calculate expanded exonic read density
66 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache
69 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --stranded --multi --cache 1
71 # calculate final exonic read density
72 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache