docs/runStandardAnalysis.sh

   1 #!/bin/bash
   2 #
   3 # runStandardAnalysis.sh
   4 # ENRAGE
   5 #
   6 # example: . $ERANGEPATH/runStandardAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db 20000
   7 #
   8 #          assuming that we have rds database with the prefix c2c12rna.24R and that an RNAFAR analysis has already been run.
   9
  10 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
  11
  12 if [ -z "$ERANGEPATH" ]
  13 then
  14     ERANGEPATH='../erange'
  15 fi
  16
  17 echo 'runStandardAnalysis.sh: version 4.3'
  18
  19 models=""
  20 if [ $# -eq 5 ]; then
  21     models=" --models "$5
  22 fi
  23
  24 replacemodels=""
  25 if [ $# -eq 6 ]; then
  26     replacemodels=" --models $5 --replacemodels "
  27 fi
  28
  29 if [ -z "$1" ]
  30 then
  31     echo
  32     echo 'usage:runStandardAnalysis.sh genome rdsprefix repeatmaskdb bpradius [modelfile] [--replacemodels]'
  33     echo
  34     echo 'where rdsprefix is the name of the rds file without the .rds extension'
  35     echo 'use "none" for the repeatmaskdb if you do not have one'
  36     echo
  37 else
  38
  39 # log the parameters
  40 arguments=$1' '$2' '$3' '$4' '$models' '$6
  41 echo 'running with settings: ' $arguments
  42 python $ERANGEPATH/recordLog.py rna.log runStandardAnalysis.sh "with parameters: $arguments"
  43
  44 # count the unique reads falling on the gene models ; the nomatch files are
  45 # mappable reads that fell outside of the Cistematic gene models and not the
  46 # unmappable of Eland (i.e, the "NM" reads)
  47 echo "python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --markGID --cache 1 $models $replacemodels"
  48 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --markGID --cache 1 $models $replacemodels
  49
  50 # calculate a first-pass RPKM to re-weigh the unique reads,
  51 # using 'none' for the splice count
  52 echo "python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache  $models $replacemodels"
  53 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache  $models $replacemodels
  54
  55 # recount the unique reads with weights calculated during the first pass
  56 echo "python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --uniq --cache 1  $models $replacemodels"
  57 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --uniq --cache 1  $models $replacemodels
  58
  59 # count splice reads
  60 echo "python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --splices --noUniqs --cache 1  $models $replacemodels"
  61 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --splices --noUniqs --cache 1  $models $replacemodels
  62
  63 # Alternative 1: find new regions outside of gene models with reads piled up
  64 echo "python $ERANGEPATH/findall.py RNAFAR $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --log rna.log --cache 1"
  65 python $ERANGEPATH/findall.py RNAFAR $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --log rna.log --cache 1
  66
  67 # Alternative 1: filter out new regions that overlap repeats more than a certain fraction
  68 echo "python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.good --log rna.log --startField 1 --cache 1"
  69 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.good --log rna.log --startField 1 --cache 1
  70
  71 # map all candidate regions that are within a given radius of a gene in bp
  72 echo "python $ERANGEPATH/getallgenes.py $1 $2.newregions.good $2.candidates.txt --radius $4 --trackfar --cache  $models $replacemodels"
  73 python $ERANGEPATH/getallgenes.py $1 $2.newregions.good $2.candidates.txt --radius $4 --trackfar --cache  $models $replacemodels
  74
  75 # make sure candidates.txt file exists
  76 echo "touch $2.candidates.txt"
  77 touch $2.candidates.txt
  78
  79 # calculate expanded exonic read density
  80 echo "python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache  $models $replacemodels"
  81 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache  $models $replacemodels
  82
  83 # weigh multi-reads
  84 echo "python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --multi --cache 1  $models $replacemodels"
  85 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --multi --cache 1  $models $replacemodels
  86
  87 # calculate final exonic read density
  88 echo "python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache"
  89 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache
  90
  91 fi