docs/runStrandedAnalysis.sh

   1 #!/bin/bash
   2 #
   3 # This is no longer supported.  It is recommended that the pythin script of the same name be used instead.
   4 #
   5 # runStrandedAnalysis.sh
   6 # ENRAGE
   7 #
   8 # example: . ../commoncode/runStrandedAnalysis.sh mouse c2c12rna ../mm9repeats/rmask.db 20000
   9 #
  10 #          assuming that we have rds database with the prefix c2c12rna.24R.
  11
  12 # set ERANGEPATH to the absolute or relative path to ERANGE, if it's not in the environment
  13
  14 if [ -z "$ERANGEPATH" ]
  15 then
  16     ERANGEPATH='../erange'
  17 fi
  18
  19 echo 'runStrandedAnalysis.sh: version 4.2'
  20
  21 if [ -z "$1" ]
  22 then
  23     echo
  24     echo 'usage:runStrandedAnalysis.sh genome rdsprefix repeatmaskdb bpradius'
  25     echo
  26     echo 'where rdsprefix is the name of the rds file without the .rds extension'
  27     echo 'use "none" for the repeatmaskdb if you do not have one'
  28     echo
  29 else
  30
  31 # log the parameters
  32 arguments=$1' '$2' '$3' '$4
  33 echo 'running with settings: ' $arguments
  34 python $ERANGEPATH/recordLog.py rna.log runStrandedAnalysis.sh "with parameters: $arguments"
  35
  36 # count the unique reads falling on the gene models ; the nomatch files are
  37 # mappable reads that fell outside of the Cistematic gene models and not the
  38 # unmappable of Eland (i.e, the "NM" reads)
  39 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.uniqs.count --stranded --markGID --cache 1
  40
  41 # calculate a first-pass RPKM to re-weigh the unique reads,
  42 # using 'none' for the splice count
  43 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.count none $2.firstpass.rpkm --cache
  44
  45 # recount the unique reads with weights calculated during the first pass
  46 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.firstpass.rpkm $2.uniqs.recount --stranded --uniq --cache 1
  47
  48 # count splice reads
  49 python $ERANGEPATH/geneMrnaCounts.py $1 $2.rds $2.splices.count --stranded --splices --noUniqs --cache 1
  50
  51 # find new regions outside of gene models with reads piled up
  52 python $ERANGEPATH/findall.py RNAFARP $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --strandfilter plus --log rna.log --cache 1
  53 python $ERANGEPATH/findall.py RNAFARM $2.rds $2.newregions.txt --RNA --minimum 1 --nomulti --flag NM --strandfilter minus --log rna.log --cache 1 --append
  54
  55 # filter out new regions that overlap repeats more than a certain fraction
  56 python $ERANGEPATH/checkrmask.py $3 $2.newregions.txt $2.newregions.repstatus $2.newregions.good --startField 1 --log rna.log --cache 1
  57
  58 # Alternative 2: use a precomputed list of "new" regions (outside of gene models)
  59 #python $ERANGEPATH/regionCounts.py $3 $2.nomatch.bed $2.newregions.good $2.stillnomatch.bed
  60 #python $ERANGEPATH/regionCounts.py $3 $2.rds $2.newregions.good
  61
  62 # map all candidate regions that are within a given radius of a gene in bp
  63 python $ERANGEPATH/getallgenes.py $1 $2.newregions.good $2.candidates.txt --radius $4 --trackfar --stranded --cache
  64
  65 # calculate expanded exonic read density
  66 python $ERANGEPATH/normalizeExpandedExonic.py $1 $2.rds $2.uniqs.recount $2.splices.count $2.expanded.rpkm $2.candidates.txt $2.accepted.rpkm --cache
  67
  68 # weigh multi-reads
  69 python $ERANGEPATH/geneMrnaCountsWeighted.py $1 $2.rds $2.expanded.rpkm $2.multi.count --accept $2.accepted.rpkm --stranded --multi --cache 1
  70
  71 # calculate final exonic read density
  72 python $ERANGEPATH/normalizeFinalExonic.py $2.rds $2.expanded.rpkm $2.multi.count $2.final.rpkm --multifraction --withGID --cache
  73
  74 fi