Commit patch to not break on spaces.
[bowtie.git] / scripts / make_h_sapiens_ncbi37.sh
1 #!/bin/sh
2
3 #
4 # Downloads sequence for H. sapiens (human) from NCBI.  This script was
5 # used to build the Bowtie index for H. sapiens 37.
6 #
7 # From README_CURRENT_BUILD:
8 # Organism: Homo sapiens (human)
9 # NCBI Build Number: 37    
10 # Version: 1
11 # Release date: 04 August 2009
12 #
13
14 BASE_CHRS="\
15 chr1 \
16 chr2 \
17 chr3 \
18 chr4 \
19 chr5 \
20 chr6 \
21 chr7 \
22 chr8 \
23 chr9 \
24 chr10 \
25 chr11 \
26 chr12 \
27 chr13 \
28 chr14 \
29 chr15 \
30 chr16 \
31 chr17 \
32 chr18 \
33 chr19 \
34 chr20 \
35 chr21 \
36 chr22 \
37 chrX \
38 chrY"
39
40 CHRS_TO_INDEX=$BASE_CHRS
41
42 FTP_BASE=ftp://ftp.ncbi.nih.gov/genomes/H_sapiens
43 FTP_ASM_BASE=$FTP_BASE/Assembled_chromosomes
44 FTP_MT_BASE=$FTP_BASE/CHR_MT
45
46 get() {
47         file=$1
48         if ! wget --version >/dev/null 2>/dev/null ; then
49                 if ! curl --version >/dev/null 2>/dev/null ; then
50                         echo "Please install wget or curl somewhere in your PATH"
51                         exit 1
52                 fi
53                 curl -o `basename $1` $1
54                 return $?
55         else
56                 wget $1
57                 return $?
58         fi
59 }
60
61 BOWTIE_BUILD_EXE=./bowtie-build
62 if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then
63         if ! which bowtie-build ; then
64                 echo "Could not find bowtie-build in current directory or in PATH"
65                 exit 1
66         else
67                 BOWTIE_BUILD_EXE=`which bowtie-build`
68         fi
69 fi
70
71 INPUTS=
72 BASE_NAME=hs_ref_GRCh37_
73 for c in $CHRS_TO_INDEX ; do
74         c=${BASE_NAME}${c}
75         if [ ! -f $c.fa ] ; then
76                 F=$c.fa.gz
77                 get ${FTP_ASM_BASE}/$F || (echo "Error getting $F" && exit 1)
78                 gunzip $F || (echo "Error unzipping $F" && exit 1)
79         fi
80         [ -n "$INPUTS" ] && INPUTS=$INPUTS,$c.fa
81         [ -z "$INPUTS" ] && INPUTS=$c.fa
82 done
83
84 # Special case: get mitochondrial DNA from its home
85 if [ ! -f hs_ref_chrMT.fa ] ; then
86         F=hs_ref_chrMT.fa.gz
87         get ${FTP_MT_BASE}/$F || (echo "Error getting $F" && exit 1)
88         gunzip $F || (echo "Error unzipping $F" && exit 1)
89 fi
90
91 INPUTS=$INPUTS,hs_ref_chrMT.fa
92
93 echo Running ${BOWTIE_BUILD_EXE} $* ${INPUTS} h_sapiens_37_asm
94 ${BOWTIE_BUILD_EXE} $* ${INPUTS} h_sapiens_37_asm
95
96 if [ "$?" = "0" ] ; then
97         echo "h_sapiens_37_asm index built:"
98         echo "   h_sapiens_37_asm.1.ebwt h_sapiens_37_asm.2.ebwt"
99         echo "   h_sapiens_37_asm.3.ebwt h_sapiens_37_asm.4.ebwt"
100         echo "   h_sapiens_37_asm.rev.1.ebwt h_sapiens_37_asm.rev.2.ebwt"
101         echo "You may remove hs_ref_chr*.fa"
102 else
103         echo "Index building failed; see error message"
104 fi