3 # runRNAPairedAnalysis.sh
6 # example: . ../commoncode/runRNAPairedAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db
8 # assuming that we have rds database with the prefix c2c12rna.24R and that an RNAFAR analysis has already been run.
10 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
12 if [ -z "$ERANGEPATH" ]
14 ERANGEPATH='../commoncode'
17 echo 'runRNAPairedAnalysis.sh: version 3.7'
26 replacemodels=" -models $5 -replacemodels "
32 echo 'usage:runRNAPairedAnalysis.sh genome rdsprefix repeatmaskdb [modelfile] [-replacemodels]'
34 echo 'where rdsprefix is the name of the rds file without the .rds extension'
35 echo 'use "none" for the repeatmaskdb if you do not have one'
40 arguments=$1' '$2' '$3' '$models' '$5
41 echo 'running with settings: ' $arguments
42 python $ERANGEPATH/recordLog.py rna.log runRNAPairedAnalysis.sh "with parameters: $arguments"
44 # count the unique reads falling on the gene models ; the nomatch files are
45 # mappable reads that fell outside of the Cistematic gene models and not the
46 # unmappable of Eland (i.e, the "NM" reads)
47 echo "python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count -markGID -cache 1 $models $replacemodels"
48 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count -markGID -cache 1 $models $replacemodels
50 # calculate a first-pass RPKM to re-weigh the unique reads,
51 # using 'none' for the splice count
52 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm -cache $models $replacemodels
54 # recount the unique reads with weights calculated during the first pass
55 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount -uniq -cache 1 $models $replacemodels
58 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count -splices -noUniqs -markGID -cache 1 $models $replacemodels
60 # find new regions outside of gene models with reads piled up
61 python $ERANGEPATH/findall.py RNAFAR $2.rds $2.newregions.txt -RNA -minimum 1 -nomulti -flag NM -log rna.log -cache 1
63 # filter out new regions that overlap repeats more than a certain fraction
64 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.checked -startField 1 -log rna.log -cache 1
66 # calculate the read densities
67 python $ERANGEPATH/regionCounts.py $2.newregions.checked $2.rds $2.newregions.good -markRDS -cache -log rna.log
69 # map all candidate regions that have paired ends overlapping with known genes
70 python $ERANGEPATH/rnafarPairs.py $1 $2.newregions.good $2.rds $2.candidates.txt -cache $models $replacemodels
72 # calculate expanded exonic read density
73 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm -cache $models $replacemodels
76 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count -accept $2.accepted.rpkm -multi -cache 1 $models $replacemodels
78 # calculate final exonic read density
79 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm -multifraction -withGID -cache