Commit patch to not break on spaces.
[bowtie.git] / scripts / make_hg18.sh
1 #!/bin/sh
2
3 #
4 # Downloads sequence for the hg18 version of H. spiens (human) from
5 # UCSC.
6 #
7 # Note that UCSC's hg18 build has three categories of compressed fasta
8 # files:
9 #
10 # 1. The base files, named chr??.fa.gz
11 # 2. The unplaced-sequence files, named chr??_random.fa.gz
12 # 3. The alternative-haplotype files, named chr??_???_hap?.fa.gz
13 #
14 # By default, this script builds and index for just the base files,
15 # since alignments to those sequences are the most useful.  To change
16 # which categories are built by this script, edit the CHRS_TO_INDEX
17 # variable below.
18 #
19
20 BASE_CHRS="\
21 chr1 \
22 chr2 \
23 chr3 \
24 chr4 \
25 chr5 \
26 chr6 \
27 chr7 \
28 chr8 \
29 chr9 \
30 chr10 \
31 chr11 \
32 chr12 \
33 chr13 \
34 chr14 \
35 chr15 \
36 chr16 \
37 chr17 \
38 chr18 \
39 chr19 \
40 chr20 \
41 chr21 \
42 chr22 \
43 chrX \
44 chrY \
45 chrM"
46
47 RANDOM_CHRS="\
48 chr1_random \
49 chr2_random \
50 chr3_random \
51 chr4_random \
52 chr5_random \
53 chr6_random \
54 chr7_random \
55 chr8_random \
56 chr9_random \
57 chr10_random \
58 chr11_random \
59 chr13_random \
60 chr15_random \
61 chr16_random \
62 chr17_random \
63 chr18_random \
64 chr19_random \
65 chr21_random \
66 chr22_random \
67 chrX_random"
68
69 ALT_HAP_CHRS="\
70 chr22_h2_hap1 \
71 chr5_h2_hap1 \
72 chr6_cox_hap1 \
73 chr6_qbl_hap2"
74
75 CHRS_TO_INDEX=$BASE_CHRS
76
77 UCSC_HG18_BASE=ftp://hgdownload.cse.ucsc.edu/goldenPath/hg18/chromosomes
78
79 get() {
80         file=$1
81         if ! wget --version >/dev/null 2>/dev/null ; then
82                 if ! curl --version >/dev/null 2>/dev/null ; then
83                         echo "Please install wget or curl somewhere in your PATH"
84                         exit 1
85                 fi
86                 curl -o `basename $1` $1
87                 return $?
88         else
89                 wget $1
90                 return $?
91         fi
92 }
93
94 BOWTIE_BUILD_EXE=./bowtie-build
95 if [ ! -x "$BOWTIE_BUILD_EXE" ] ; then
96         if ! which bowtie-build ; then
97                 echo "Could not find bowtie-build in current directory or in PATH"
98                 exit 1
99         else
100                 BOWTIE_BUILD_EXE=`which bowtie-build`
101         fi
102 fi
103
104 INPUTS=
105 for c in $CHRS_TO_INDEX ; do
106         if [ ! -f ${c}.fa ] ; then
107                 F=${c}.fa.gz
108                 get ${UCSC_HG18_BASE}/$F || (echo "Error getting $F" && exit 1)
109                 gunzip $F || (echo "Error unzipping $F" && exit 1)
110         fi
111         [ -n "$INPUTS" ] && INPUTS=$INPUTS,${c}.fa
112         [ -z "$INPUTS" ] && INPUTS=${c}.fa
113 done
114
115 CMD="${BOWTIE_BUILD_EXE} $* ${INPUTS} hg18"
116 echo Running $CMD
117 if $CMD ; then
118         echo "hg18 index built; you may remove fasta files"
119 else
120         echo "Index building failed; see error message"
121 fi