# analysis steps for an ERANGE analysis of RNA-seq data # This is an example of the command-line settings used to run each of the scripts in runStandardAnalysis.sh # preliminary: create splice file using getsplicefa.py # preliminary: build expanded genome using Eland's squashGenome # preliminary: build repeatmask database using buildrmaskdb.py #run eland2 on expanded genome, i.e. eland_25 /proj/genome/experiments/eland64/ElandPL03/Eland/eland_25 FC11048.s1.25mer.txt /woldlab/myod/data1/alim/genomes/mm9sp25 FC11048.s1.25mer.mm9.eland2 --multi #create bed file of unique reads python2.5 ../commoncode/maketrackfromeland2.py c2c12rna24R.comb.eland2 c2c12rna24R c2c12rna.24R.uniqs.bed #create bed file of multi reads python2.5 ../commoncode/maketrackmulti.py c2c12rna24R.comb.eland2 c2c12rna24Rmulti c2c12rna.24R.multi.bed #create bed file of spliced reads python2.5 ../commoncode/remapSplicesEland2.py ../mm9splices/knownGene.txt c2c12rna24R.comb.eland2 c2c12rna24Rsplices c2c12rna.24R.splices.bed # count the unique reads falling on the gene models ; the nomatch files are # mappable reads that fell outside of the Cistematic gene models and not the # unmappable of Eland (i.e, the "NM" reads) python2.5 ../commoncode/geneMrnaCounts.py mouse c2c12rna.24R.uniqs.bed c2c12rna.24R.uniqs.count c2c12rna.24R.nomatch.bed # count splice reads python2.5 ../commoncode/geneMrnaCounts.py mouse c2c12rna.24R.splices.bed c2c12rna.24R.splices.count c2c12rna.24R.nomatchsplices.bed # calculate a first-pass RPKM to re-weigh the unique reads, # using 'none' for the splice count python2.5 ../commoncode/normalizeExpandedExonic.py mouse c2c12rna.24R.uniqs.bed c2c12rna.24R.uniqs.count none c2c12rna.24R.firstpass.rpkm -cache # recount the unique reads with weights calculated during the first pass python2.5 ../commoncode/geneMrnaCountsWeighted.py mouse c2c12rna.24R.uniqs.bed c2c12rna.24R.firstpass.rpkm c2c12rna.24R.uniqs.recount -cache # There is a choice of either identifying new regions from the data alone # (Alternative 1), or using a pre-computed list of new regions (presumably # pooled from multiple nomatch.bed files, or literature) against the nomatch.bed # file (Alternative 2) # Alternative 1: find new regions outside of gene models with reads piled up python2.5 ../commoncode/findallnocontrol.py RNA24R c2c12rna.24R.nomatch.bed c2c12rna.24R.newregions.txt 25 40 # Alternative 1: filter out new regions that overlap repeats more than a certain fraction python2.5 ../commoncode/checkrmask.py ../mm9repeats/rmask.db c2c12rna.24R.newregions.txt c2c12rna.24R.newregions.repstatus c2c12rna.24R.newregions.good 1 # Alternative 2: use a precomputed list of "new" regions (outside of gene models) python2.5 ../commoncode/regionCounts.py ../RNAFAR/all.newregions.good c2c12rna.24R.nomatch.bed c2c12rna.24R.newregions.good c2c12rna.24R.stillnomatch.bed # map all candidate regions that are within a 20kb radius of a gene in bp # take out -cache if running locally python2.5 ../commoncode/getallgenes.py mouse c2c12rna.24R.newregions.good c2c12rna.24R.candidates.txt 20001 -trackfar -cache # calculate expanded exonic read density python2.5 ../commoncode/normalizeExpandedExonic.py mouse c2c12rna.24R.uniqs.bed c2c12rna.24R.uniqs.recount c2c12rna.24R.splices.count c2c12rna.24R.expanded.rpkm c2c12rna.24R.candidates.txt c2c12rna.24R.accepted.rpkm -cache # create bed file of accepted candidate regions python2.5 ../commoncode/regiontobed.py RNAFAR c2c12rna.24R.accepted.rpkm RNAFAR.bed 255,0,0 # weigh multi-reads python2.5 ../commoncode/geneMrnaCountsWeighted.py mouse c2c12rna.24R.multi.bed c2c12rna.24R.expanded.rpkm c2c12rna.24R.accepted.rpkm c2c12rna.24R.multi.count -cache # calculate final exonic read density python2.5 ../commoncode/normalizeFinalExonic.py mouse c2c12rna.24R.uniqs.bed c2c12rna.24R.splices.bed c2c12rna.24R.multi.bed c2c12rna.24R.expanded.rpkm c2c12rna.24R.multi.count c2c12rna.24R.final.rpkm