6 # Calculate mapability of each reference position assuming reference is
22 my $btargs = "-t --norc -S --sam-nohead -M 1 --mm";
25 if(defined($ENV{BOWTIE_HOME})) {
26 $bowtie = "$ENV{BOWTIE_HOME}/bowtie";
27 unless(-x $bowtie) { $bowtie = "" };
30 $bowtie = `which bowtie 2>/dev/null`;
32 unless(-x $bowtie) { $bowtie = "" };
34 $bowtie = "./bowtie" if ($bowtie eq "" && -x "./bowtie");
39 "frequency=i" => \$freq,
40 "bowtie=s" => \$bowtie_arg,
47 print STDERR "Bowtie: found: $bowtie; given: $bowtie_arg\n";
48 print STDERR "Input fasta: $fa\n";
49 print STDERR "FW index: $fwidx\n";
50 print STDERR "RC index: $rcidx\n";
51 print STDERR "Alignment policy: $pol\n";
52 print STDERR "Window size: $win\n";
53 print STDERR "Frequency: $freq\n";
55 $fa ne "" || die "Must specify -fasta\n";
56 $fwidx ne "" || die "Must specify -fwidx\n";
57 $rcidx ne "" || die "Must specify -rcidx\n";
58 -f "$fwidx.1.ebwt" || die "Could not find -fwidx index file $fwidx.1.ebwt\n";
59 -f "$rcidx.1.ebwt" || die "Could not find -rcidx index file $rcidx.1.ebwt\n";
61 $bowtie = $bowtie_arg if $bowtie_arg ne "";
64 if($bowtie_arg ne "") {
65 die "Specified -bowtie, \"$bowtie\" doesn't exist or isn't executable\n";
67 die "bowtie couldn't be found in BOWTIE_HOME, PATH, or current directory; please specify -bowtie\n";
72 my $name = ""; # name of sequence currently being processed
78 # Read lengths of all the entries in all the input fasta files.
82 my @is = split(/[,]/, $ins);
84 open IN, "$i" || die "Could not open $i\n";
88 if(substr($_, 0, 1) eq '>') {
89 next if /\?[0-9]*$/; # Skip >?50000 lines
91 $name = substr($name, 1); # Chop off >
92 if($name =~ /^FW:/ || $name =~ /^RC:/) {
93 $name = substr($name, 3); # Chop off FW:/RC:
95 my @ns = split(/\s+/, $name);
96 $name = $ns[0]; # Get short name
98 print STDERR "Saw name $name\n";
101 $lens{$name} += length($_); # Update length
102 $totlen += length($_);
108 print STDERR "Reading fasta lengths\n";
110 print STDERR " read ".scalar(keys %lens)." sequences with total length $totlen\n";
113 for(my $i = 0; $i < $win; $i++) { push @last, 0 };
115 for(my $i = 0; $i < $win; $i++) { $last[$i] = 0 };
118 print STDERR "Opening bowtie pipes\n";
119 my $fwcmd = "$bowtie -F $win,$freq $btargs $pol $fwidx $fa";
120 my $rccmd = "$bowtie -F $win,$freq $btargs $pol $rcidx $fa";
121 print STDERR "Forward command: $fwcmd\n";
122 print STDERR "Reverse comp command: $rccmd\n";
123 open BTFW, "$fwcmd |" || die "Couldn't open pipe '$fwcmd |'\n";
124 open BTRC, "$rccmd |" || die "Couldn't open pipe '$rccmd |'\n";
126 # 10_554 4 * 0 0 * * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XM:i:0
127 # 10_555 4 * 0 0 * * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XM:i:0
128 # 10_556 4 * 0 0 * * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XM:i:0
129 # 10_557 4 * 0 0 * * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XM:i:0
130 # 10_558 4 * 0 0 * * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XM:i:0
131 # 10_559 4 * 0 0 * * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII XM:i:0
133 print STDERR "Reading...\n";
141 last unless defined($fwl) && defined($rcl);
144 my @fws = split(/\t/, $fwl);
145 my @fws1 = split(/_/, $fws[0]);
146 my @rcs = split(/\t/, $rcl);
147 my @rcs1 = split(/_/, $rcs[0]);
149 my $cname = join("_", @fws1[0..$#fws1-1]);
150 $cname eq join("_", @fws1[0..$#fws1-1]) || die "Name mismatch on line $ln:\n$fwl\n$rcl\n";
152 $off eq $rcs1[-1] || die "Offset mismatch on line $ln:\n$fwl\n$rcl\n";
153 # If a read aligns, XM:i is not printed
154 # If a read fails to align, XM:i:0 is printed
155 # If a read aligns multiple places, XM:i:N is printed where N>0
156 my $fwxm0 = $fws[-1] =~ /XM:i:0/;
157 my $rcxm0 = $rcs[-1] =~ /XM:i:0/;
158 my $fwxm = $fws[-1] =~ /XM:i:/;
159 my $rcxm = $rcs[-1] =~ /XM:i:/;
160 # For mapable to be true, we need the read to have failed to align
161 # in one index and aligned uniquely in the other
162 my $mapable = (($fwxm0 && !$rcxm) || ($rcxm0 && !$fwxm)) ? 1 : 0;
164 $cname =~ s/\s.*//; # trim everything after first whitespace to get short name
165 if($cname =~ /^FW:/ || $cname =~ /^RC:/) {
166 $cname = substr($cname, 3);
168 if($name ne $cname) {
170 # Flush remaining characters from previous name
171 defined($lens{$name}) || die;
172 $cur == $lens{$name} - $win + 1 || die "name is $name, cur is $cur, len is $lens{$name}, win is $win\n";
173 for(; $cur < $lens{$name}; $cur++) {
174 $running -= $last[$cur % $win];
175 $last[$cur % $win] = 0;
176 $lastc = chr($running + 64);
178 if((($cur+1) % 60) == 0) {
185 defined($lens{$name}) || die "No such name as \"$name\"\n";
186 print "\n" unless $lastc eq "\n";
194 $running -= $last[$cur % $win];
195 $last[$cur % $win] = $mapable;
196 $running += $last[$cur % $win];
198 $running <= $win || die "running counter $running got higher than window size $win\n";
199 $lastc = chr($running + 64);
201 if((($cur+1) % 60) == 0) {
208 defined($lens{$name}) || die;
209 for(; $cur < $lens{$name}; $cur++) {
210 $running -= $last[$cur % $win];
211 $last[$cur % $win] = 0;
212 $lastc = chr($running + 64);
214 if((($cur+1) % 60) == 0) {
220 $? == 0 || die "Bad exitlevel from forward bowtie: $?\n";
222 $? == 0 || die "Bad exitlevel from reverse-comp bowtie: $?\n";