docs/runRNAPairedAnalysis.sh

   1 #!/bin/bash
   2 #
   3 # runRNAPairedAnalysis.sh
   4 # ENRAGE
   5 #
   6 # example: . ../commoncode/runRNAPairedAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db
   7 #
   8 #          assuming that we have rds database with the prefix c2c12rna.24R and that an RNAFAR analysis has already been run.
   9
  10 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
  11
  12 if [ -z "$ERANGEPATH" ]
  13 then
  14     ERANGEPATH='../erange'
  15 fi
  16
  17 echo 'runRNAPairedAnalysis.sh: version 3.8'
  18
  19 models=""
  20 if [ $# -eq 5 ]; then
  21     models=" --models "$5
  22 fi
  23
  24 replacemodels=""
  25 if [ $# -eq 6 ]; then
  26     replacemodels=" --models $5 --replacemodels "
  27 fi
  28
  29 if [ -z "$1" ]
  30 then
  31     echo
  32     echo 'usage:runRNAPairedAnalysis.sh genome rdsprefix repeatmaskdb [modelfile] [--replacemodels]'
  33     echo
  34     echo 'where rdsprefix is the name of the rds file without the .rds extension'
  35     echo 'use "none" for the repeatmaskdb if you do not have one'
  36     echo
  37 else
  38
  39 # log the parameters
  40 arguments=$1' '$2' '$3' '$models' '$5
  41 echo 'running with settings: ' $arguments
  42 python $ERANGEPATH/recordLog.py rna.log runRNAPairedAnalysis.sh "with parameters: $arguments"
  43
  44 # count the unique reads falling on the gene models ; the nomatch files are
  45 # mappable reads that fell outside of the Cistematic gene models and not the
  46 # unmappable of Eland (i.e, the "NM" reads)
  47 echo "python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count -markGID -cache 1 $models $replacemodels"
  48 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --markGID --cache 1 $models $replacemodels
  49
  50 # calculate a first-pass RPKM to re-weigh the unique reads,
  51 # using 'none' for the splice count
  52 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache $models $replacemodels
  53
  54 # recount the unique reads with weights calculated during the first pass
  55 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --uniq --cache 1 $models $replacemodels
  56
  57 # count splice reads
  58 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --splices --noUniqs --markGID --cache 1 $models $replacemodels
  59
  60 # find new regions outside of gene models with reads piled up
  61 python $ERANGEPATH/findall.py RNAFAR $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --log rna.log --cache 1
  62
  63 # filter out new regions that overlap repeats more than a certain fraction
  64 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.checked --startField 1 --log rna.log --cache 1
  65
  66 # calculate the read densities
  67 python $ERANGEPATH/regionCounts.py $2.newregions.checked $2.rds $2.newregions.good --markRDS --cache --log rna.log
  68
  69 # map all candidate regions that have paired ends overlapping with known genes
  70 python $ERANGEPATH/rnafarPairs.py $1 $2.newregions.good $2.rds $2.candidates.txt --cache $models $replacemodels
  71
  72 # calculate expanded exonic read density
  73 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache $models $replacemodels
  74
  75 # weigh multi-reads
  76 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --multi --cache 1 $models $replacemodels
  77
  78 # calculate final exonic read density
  79 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache
  80
  81 fi