docs/runRNAPairedAnalysis.sh

   1 #!/bin/bash
   2 #
   3 # This is no longer supported.  It is recommended that the pythin script of the same name be used instead.
   4 #
   5 # runRNAPairedAnalysis.sh
   6 # ENRAGE
   7 #
   8 # example: . ../commoncode/runRNAPairedAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db
   9 #
  10 #          assuming that we have rds database with the prefix c2c12rna.24R and that an RNAFAR analysis has already been run.
  11
  12 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
  13
  14 if [ -z "$ERANGEPATH" ]
  15 then
  16     ERANGEPATH='../erange'
  17 fi
  18
  19 echo 'runRNAPairedAnalysis.sh: version 3.8'
  20
  21 models=""
  22 if [ $# -eq 5 ]; then
  23     models=" --models "$5
  24 fi
  25
  26 replacemodels=""
  27 if [ $# -eq 6 ]; then
  28     replacemodels=" --models $5 --replacemodels "
  29 fi
  30
  31 if [ -z "$1" ]
  32 then
  33     echo
  34     echo 'usage:runRNAPairedAnalysis.sh genome rdsprefix repeatmaskdb [modelfile] [--replacemodels]'
  35     echo
  36     echo 'where rdsprefix is the name of the rds file without the .rds extension'
  37     echo 'use "none" for the repeatmaskdb if you do not have one'
  38     echo
  39 else
  40
  41 # log the parameters
  42 arguments=$1' '$2' '$3' '$models' '$5
  43 echo 'running with settings: ' $arguments
  44 python $ERANGEPATH/recordLog.py rna.log runRNAPairedAnalysis.sh "with parameters: $arguments"
  45
  46 # count the unique reads falling on the gene models ; the nomatch files are
  47 # mappable reads that fell outside of the Cistematic gene models and not the
  48 # unmappable of Eland (i.e, the "NM" reads)
  49 echo "python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count -markGID -cache 1 $models $replacemodels"
  50 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --markGID --cache 1 $models $replacemodels
  51
  52 # calculate a first-pass RPKM to re-weigh the unique reads,
  53 # using 'none' for the splice count
  54 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache $models $replacemodels
  55
  56 # recount the unique reads with weights calculated during the first pass
  57 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --uniq --cache 1 $models $replacemodels
  58
  59 # count splice reads
  60 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --splices --noUniqs --markGID --cache 1 $models $replacemodels
  61
  62 # find new regions outside of gene models with reads piled up
  63 python $ERANGEPATH/findall.py RNAFAR $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --log rna.log --cache 1
  64
  65 # filter out new regions that overlap repeats more than a certain fraction
  66 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.checked --startField 1 --log rna.log --cache 1
  67
  68 # calculate the read densities
  69 python $ERANGEPATH/regionCounts.py $2.newregions.checked $2.rds $2.newregions.good --markRDS --cache --log rna.log
  70
  71 # map all candidate regions that have paired ends overlapping with known genes
  72 python $ERANGEPATH/rnafarPairs.py $1 $2.newregions.good $2.rds $2.candidates.txt --cache $models $replacemodels
  73
  74 # calculate expanded exonic read density
  75 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache $models $replacemodels
  76
  77 # weigh multi-reads
  78 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --multi --cache 1 $models $replacemodels
  79
  80 # calculate final exonic read density
  81 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache
  82
  83 fi