docs/runStandardAnalysis.sh

   1 #!/bin/bash
   2 #
   3 # This is no longer supported.  It is recommended that the pythin script of the same name be used instead.
   4 #
   5 # runStandardAnalysis.sh
   6 # ENRAGE
   7 #
   8 # example: . $ERANGEPATH/runStandardAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db 20000
   9 #
  10 #          assuming that we have rds database with the prefix c2c12rna.24R and that an RNAFAR analysis has already been run.
  11
  12 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
  13
  14 if [ -z "$ERANGEPATH" ]
  15 then
  16     ERANGEPATH='../erange'
  17 fi
  18
  19 echo 'runStandardAnalysis.sh: version 4.3'
  20
  21 models=""
  22 if [ $# -eq 5 ]; then
  23     models=" --models "$5
  24 fi
  25
  26 replacemodels=""
  27 if [ $# -eq 6 ]; then
  28     replacemodels=" --models $5 --replacemodels "
  29 fi
  30
  31 if [ -z "$1" ]
  32 then
  33     echo
  34     echo 'usage:runStandardAnalysis.sh genome rdsprefix repeatmaskdb bpradius [modelfile] [--replacemodels]'
  35     echo
  36     echo 'where rdsprefix is the name of the rds file without the .rds extension'
  37     echo 'use "none" for the repeatmaskdb if you do not have one'
  38     echo
  39 else
  40
  41 # log the parameters
  42 arguments=$1' '$2' '$3' '$4' '$models' '$6
  43 echo 'running with settings: ' $arguments
  44 python $ERANGEPATH/recordLog.py rna.log runStandardAnalysis.sh "with parameters: $arguments"
  45
  46 # count the unique reads falling on the gene models ; the nomatch files are
  47 # mappable reads that fell outside of the Cistematic gene models and not the
  48 # unmappable of Eland (i.e, the "NM" reads)
  49 echo "python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --markGID --cache 1 $models $replacemodels"
  50 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --markGID --cache 1 $models $replacemodels
  51
  52 # calculate a first-pass RPKM to re-weigh the unique reads,
  53 # using 'none' for the splice count
  54 echo "python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache  $models $replacemodels"
  55 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache  $models $replacemodels
  56
  57 # recount the unique reads with weights calculated during the first pass
  58 echo "python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --uniq --cache 1  $models $replacemodels"
  59 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --uniq --cache 1  $models $replacemodels
  60
  61 # count splice reads
  62 echo "python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --splices --noUniqs --cache 1  $models $replacemodels"
  63 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --splices --noUniqs --cache 1  $models $replacemodels
  64
  65 # Alternative 1: find new regions outside of gene models with reads piled up
  66 echo "python $ERANGEPATH/findall.py RNAFAR $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --log rna.log --cache 1"
  67 python $ERANGEPATH/findall.py RNAFAR $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --log rna.log --cache 1
  68
  69 # Alternative 1: filter out new regions that overlap repeats more than a certain fraction
  70 echo "python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.good --log rna.log --startField 1 --cache 1"
  71 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.good --log rna.log --startField 1 --cache 1
  72
  73 # map all candidate regions that are within a given radius of a gene in bp
  74 echo "python $ERANGEPATH/getallgenes.py $1 $2.newregions.good $2.candidates.txt --radius $4 --trackfar --cache  $models $replacemodels"
  75 python $ERANGEPATH/getallgenes.py $1 $2.newregions.good $2.candidates.txt --radius $4 --trackfar --cache  $models $replacemodels
  76
  77 # make sure candidates.txt file exists
  78 echo "touch $2.candidates.txt"
  79 touch $2.candidates.txt
  80
  81 # calculate expanded exonic read density
  82 echo "python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache  $models $replacemodels"
  83 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache  $models $replacemodels
  84
  85 # weigh multi-reads
  86 echo "python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --multi --cache 1  $models $replacemodels"
  87 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --multi --cache 1  $models $replacemodels
  88
  89 # calculate final exonic read density
  90 echo "python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache"
  91 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache
  92
  93 fi