4 # Generate and run a series of random (but non-trivial) test cases for
5 # the Bowtie suite of tools, including bowtie, bowtie-build and
6 # bowtie-inspect. Most of the problems turned up this way are in the
7 # form of assertions in the tools. However, we also do some sanity-
8 # checking of the results; e.g. we use the pe_verify.pl script to
9 # verify that paired-end alignments are consistent with matched-up
10 # single-end alignments.
12 # Usage: perl random_tester.pl [rand seed] \
15 # [min # text bases] \
16 # [max text bases to add to min] \
17 # [min # read bases] \
18 # [max read bases to add to min]
21 # -n don't attempt to compile binaries; use existing binaries
22 # -p "<pol>" use only the specified alignment policy (e.g. "-n 3")
25 use List::Util qw[min max];
32 my $bowtie = "./bowtie"; # path to version of 'bowtie' binary to use
33 my $bowtie_build = "./bowtie-build"; # path to version of 'bowtie-build' binary to use
34 my $bowtie_build_old = "./bowtie-build"; # path to old version of 'bowtie-build' binary to use (for inspect.pl)
35 my $bowtie_inspect = "./bowtie-inspect"; # path to version of 'bowtie-build' binary to use
38 "bowtie=s" => \$bowtie,
39 "bowtie-build=s" => \$bowtie_build,
40 "bowtie-build-old=s" => \$bowtie_build_old,
41 "bowtie-inspect=s" => \$bowtie_inspect,
43 "n|no-compile" => \$noCompile,
44 "p|policy" => \$setPolicy, # overrides pickPolicy()
45 "e|pairs-only" => \$pairedEndOnly) || die "Bad arguments";
48 print "Usage: perl random_bowtie_tests.pl [options]* seed outer inner tbase trand pbase prand\n";
53 $seed = int $ARGV[0] if defined($ARGV[0]);
56 # make all the relevant binaries, unless we were asked not to
58 run("make bowtie bowtie-debug bowtie-build-debug ".
59 "bowtie-inspect-debug bowtie-inspect bowtie-build") == 0 || die "Error building";
64 "-n 3", "-n 2", "-n 1", "-n 0",
65 "-v 3", "-v 2", "-v 1", "-v 0"
70 my $r = int(rand($#policies + 1));
71 my $pol = $policies[$r];
72 $pol = $setPolicy if defined($setPolicy);
73 if($pe && int(rand(2)) == 0) {
74 my $min = int(rand(200))+10;
76 $pol .= " -X ".int(rand(30)+$min);
78 if(!$pe && int(rand(2)) == 0) {
79 # Possibly ask for best alignments
82 if($pol =~ /-n/ && int(rand(2)) == 0) {
83 $pol .= " --nomaqround";
85 if($pol =~ /-n/ && int(rand(2)) == 0) {
86 $pol .= " -l ".int(rand(30)+8);
88 if($pol =~ /-n/ && int(rand(2)) == 0) {
89 $pol .= " -e ".int(rand(120)+40);
95 $outer = int $ARGV[1] if defined($ARGV[1]);
99 $inner = int $ARGV[2] if defined($ARGV[2]);
102 $tbase = int $ARGV[3] if defined($ARGV[3]);
104 $trand = int $ARGV[4] if defined($ARGV[4]);
107 $pbase = int $ARGV[5] if defined($ARGV[5]);
109 $prand = int $ARGV[6] if defined($ARGV[6]);
112 $ibase = int $ARGV[7] if defined($ARGV[7]);
114 $irand = int $ARGV[8] if defined($ARGV[8]);
118 my @dnaMap = ('A', 'T', 'C', 'G',
120 'M', 'R', 'W', 'S', 'Y', 'K', 'V', 'H', 'D', 'B');
124 $t =~ tr/-NnMmRrWwSsYyKkVvHhDdBbXx/N/;
125 $t =~ /[ACGTN]+/ || die "Bad N-ized DNA string: $t";
130 my $or = int(rand(4));
133 my $ir = int(rand(100))+1;
135 for(my $i = 0; $i < $ir; $i++) {
139 for(my $i = 0; $i < $ir; $i++) {
147 # Generates a random DNA string of the given length
152 my $noAmbigs = int(rand(3)) == 0;
153 for($i = 0; $i < $num; $i++) {
154 my $or = int(rand(50));
155 if($or == 0 && !$noAmbigs) {
156 # Add a random, possibly ambiguous character
157 $t .= $dnaMap[int(rand($#dnaMap+1))];
158 } elsif($or == 1 && !$noAmbigs) {
159 # Add a random-length streak of Ns (max: 20)
160 my $streak = int(rand(20))+1;
161 for(my $j = $i; $j < $num && $j < $streak; $j++) {
165 # Add a random non-ambiguous character
166 $t .= $dnaMap[int(rand(4))];
173 my %cmap = ("A" => "0", "C" => "1", "G" => "2", "T" => "3", "N" => ".");
174 my $s = nonACGTtoN(randDna(shift));
175 for(my $i = 0; $i < length($s); $i++) {
176 defined($cmap{substr($s, $i, 1)}) || die "No entry for \"".substr($s, $i, 1)."\"\n";
177 substr($s, $i, 1) = $cmap{substr($s, $i, 1)};
182 # Utility function that returns the reverse complement of its argument
185 my $c = shift; # may be undef
187 $r =~ tr/aAcCgGtT/tTgGcCaA/ unless (defined($c) && $c);
191 # Utility function that returns the complement of its argument
195 $r =~ tr/aAcCgGtT/tTgGcCaA/ unless (defined($c) && $c);
199 # Insert $src just before the $off'th character of $dst
201 my ($dst, $src, $off) = @_;
202 $off < length($dst) || die "ins offset $off is not less than dst length ".length($dst)."\n";
203 my $s = substr($dst, 0, $off) . $src . substr($dst, $off);
207 # Delete the $off'th character of $dst
209 my ($dst, $off) = @_;
210 my $s = substr($dst, 0, $off).substr($dst, $off+1);
214 # Substitute $src for the $off'th character of $dst
216 my ($dst, $src, $off) = @_;
217 my $s = substr($dst, 0, $off).$src.substr($dst, $off+1);
222 # Sanity check whether a read and a list of edits corresponds correctly
223 # to a substring of the reference.
225 sub checkAlignmentRef {
226 my ($ref, $read, $fw, $off, $edits, $alnuc) = @_;
227 return unless $alnuc;
229 $off-- unless $alnuc;
230 if($edits ne '-' && $edits ne "") {
232 my @es = split(/[,]/, $edits);
234 my $colonoff = index($e, ":");
235 my $caratoff = index($e, ">");
236 my $pos = substr($e, 0, $colonoff);
237 my $ref = substr($e, $colonoff+1, $caratoff-$colonoff-1);
238 my $qry = substr($e, $caratoff+1);
239 length($qry) == 1 || die "Query char in edit $e, \"$qry\", isn't 1 char\n";
240 $ref = comp($ref, $alnuc) unless $fw;
243 #my $inspos = ($fw ? $pos + $adjust : length($read) - $pos - $adjust);
244 my $inspos = $pos + $adjust;
245 $read = ins($read, $ref, $inspos);
246 $adjust += length($ref);
247 } elsif($ref eq "-") {
249 #my $delpos = ($fw ? $pos + $adjust : length($read) - $pos - $adjust);
250 my $delpos = $pos + $adjust;
251 $read = del($read, $delpos);
255 #my $mmpos = ($fw ? $pos + $adjust : length($read) - $pos - $adjust);
256 my $mmpos = $pos + $adjust;
257 $read = subst($read, $ref, $mmpos);
261 $read = revcomp($read, $alnuc) unless $fw;
262 my $rstr = substr($ref, $off, length($read));
263 $fw = $fw ? '+' : '-';
264 $read eq $rstr || die "\n\nAlignment failed to pass sanity check:\n".
265 "FW: $fw, Off: $off, Edits: $edits\n".
272 # Given a string in nucleotide space, convert to colorspace.
278 "AA" => "0", "CC" => "0", "GG" => "0", "TT" => "0",
279 "AC" => "1", "CA" => "1", "GT" => "1", "TG" => "1",
280 "AG" => "2", "GA" => "2", "CT" => "2", "TC" => "2",
281 "AT" => "3", "TA" => "3", "CG" => "3", "GC" => "3",
282 "NA" => ".", "NC" => ".", "NG" => ".", "NT" => ".",
283 "AN" => ".", "CN" => ".", "GN" => ".", "TN" => ".",
286 my %nmap = ("0" => "A", "1" => "C", "2" => "G", "3" => "T", "." => "N");
288 for(my $i = 0; $i < length($s)-1; $i++) {
289 my $di = uc substr($s, $i, 2);
290 $di =~ tr/-NnMmRrWwSsYyKkVvHhDdBbXx/N/;
291 defined($cmap{$di}) || die "Bad dinuc: $di\n";
292 $ret .= ($nucs ? $nmap{$cmap{$di}} : $cmap{$di});
298 # Encode colors as proxy nucleotides
302 my %nmap = ("0" => "A", "1" => "C", "2" => "G", "3" => "T", "." => "N",
303 "A" => "A", "C" => "C", "G" => "G", "T" => "T", "N" => "N");
305 for(my $i = 0; $i < length($s); $i++) {
306 my $c = uc substr($s, $i, 1);
307 defined($nmap{$c}) || die;
313 # Add some random quality values to encourage excercising the
317 my $len = length($r);
319 for(my $i = 0; $i < $len; $i++) {
321 while(not $c =~ /[0-9A-Z\/=@%]/) {
322 $c = chr(33 + int(rand(41)));
329 # Trim whitespace from a string argument
339 open CMDTMP, ">.randtmp$seed.cmd" || die;
340 print CMDTMP "$cmd\n";
347 open CMDTMP, ">.randtmp$seed.cmd" || die;
348 print CMDTMP "$cmd\n";
353 # Build an Ebwt based on given arguments
355 my($t, $color, $offRate, $ftabChars) = @_;
359 my $file2 = "\"$t\"";
360 if(substr($t, 0, 1) eq '-') {
361 # Add backslash to escape first dash
365 $color = ($color ? "-C" : "");
367 # Write reference sequences to a FASTA file
368 open FA, ">.randtmp$seed.fa" || die "Could not open temporary fasta file";
369 my @seqs = split(/,/, $t);
370 for(my $i = 0; $i <= $#seqs; $i++) {
372 print FA "$seqs[$i]\n";
376 $cmd = "perl $Bin/test/inspect.pl --debug --seed $seed --bowtie-build2=$bowtie_build_old --ref=.randtmp$seed.fa";
379 || die "inspect.pl died with exitlevel $?";
381 # Make a version of the FASTA file where all non-A/C/G/T characters
382 # are Ns. This is useful if we'd like to compare to the output of
385 # We make one version (FAN) that's colorized when the index is
386 # colorspace so that we can compare the output of bowtie-inspect -e,
387 # and another version (FANO) that's always in nucleotide space, so
388 # we can test default bowtie-inspect.
390 open FAN, ">.randtmp$seed.ns.fa" || die "Could not open temporary fasta file";
391 open FANO, ">.randtmp$seed.ns.orig.fa" || die "Could not open temporary fasta file";
392 for(my $i = 0; $i <= $#seqs; $i++) {
395 my $t = nonACGTtoN($seqs[$i]);
398 $t = colorize($t, 1) if $color;
404 my $fasta = int(rand(2)) == 0;
406 # Use the FASTA file as input
408 $file2 = ".randtmp$seed.fa";
412 my $bucketRand = int(rand(3));
413 if($bucketRand == 0) {
414 $bucketArg = "--bmaxdivn ";
415 $bucketArg .= (int(rand(30))+1);
416 } elsif($bucketRand == 1) {
420 $offRate = "--offrate $offRate" if $offRate ne "";
421 $ftabChars = "--ftabchars $ftabChars" if $ftabChars ne "";
423 $noauto = "-a" if $offRate eq "" && $ftabChars eq "";
424 my $args = "-q --sanity $color $file1 $noauto $offRate $ftabChars $bucketArg $file2";
426 # Do unpacked version
427 my $cmd = "${bowtie_build}-debug $args .tmp$seed";
428 run("echo \"$cmd\" > .tmp$seed.cmd");
430 my $out = trim(runBacktick("$cmd 2>&1"));
431 $out =~ s/Warning: Encountered reference sequence with only gaps//g;
436 print "Expected no output, got:\n$out\n";
442 # Do packed version and assert that it matches unpacked version
443 # (sometimes, but not all the time because it takes a while)
444 if(int(rand(4)) == 0) {
445 $cmd = "${bowtie_build}-debug -a -p $args .tmp$seed.packed";
447 $out = trim(runBacktick("$cmd 2>&1"));
448 $out =~ s/Warning: Encountered reference sequence with only gaps//g;
451 if(run("diff .tmp$seed.1.ebwt .tmp$seed.packed.1.ebwt") != 0) {
453 } elsif(run("diff .tmp$seed.2.ebwt .tmp$seed.packed.2.ebwt") != 0) {
459 print "Expected no output, got:\n$out\n";
469 sub deleteReadParts {
470 run("rm -f .tmp.un$seed". ".* .tmp.un$seed". "_1.* .tmp.un$seed". "_2.*");
471 run("rm -f .tmp.max$seed".".* .tmp.max$seed"."_1.* .tmp.max$seed"."_2.*");
472 run("rm -f .tmp.al$seed". ".* .tmp.al$seed". "_1.* .tmp.al$seed". "_2.*");
475 sub search($$$$$$$$$$) {
476 my($tstr, $cstr, $pe, $color, $p1, $p2, $policy, $oneHit, $requireResult, $offRate) = @_;
477 my $ret = doSearch($tstr, $cstr, $pe, $color, $p1, $p2, $policy, $oneHit, $requireResult, $offRate);
483 # Make sure that a verbose alignment is consistent with the content of
486 # $alnuc: Alignments are in nucleotides (not colors, as with -C --col-cseq)
488 sub checkRefVerbose($$$$) {
489 my ($l, $textsr, $ctextsr, $alnuc) = @_;
490 my @texts = @{$textsr};
491 my @ctexts = @{$ctextsr};
492 scalar(@texts) > 0 || die;
493 my @ls = split(/\t/, $l);
494 my ($fw, $ref, $off, $seq, $mms) = ($ls[1] eq '+', $ls[2], $ls[3], $ls[4], $ls[7]);
495 $mms = "-" unless(defined($mms));
496 defined($ref) || die "Malformed verbose output line: $l\n";
497 $ref == int($ref) || die;
498 $ref <= $#texts || die "Ref idx $ref exceeds number of texts ".($#texts+1)."\n";
500 $orig = revcomp($orig, $alnuc) unless $fw;
501 print STDERR "Checking alignemnt: $l\n";
502 checkAlignmentRef($alnuc ? $texts[$ref] : $ctexts[$ref],
503 $orig, $fw, $off, $mms, $alnuc);
507 # Search for a pattern in an existing Ebwt
508 sub doSearch($$$$$$$$$$) {
509 my($nstr, $cstr, $pe, $color, $p1, $p2, $policy, $oneHit, $requireResult, $offRate) = @_;
511 my @nts = split(/,/, $nstr); # nucleotide texts
512 my @cts = split(/,/, $cstr); # color texts
515 my $patstr = "\"$p1\"";
516 $patstr = "-1 \"$p1\" -2 \"$p2\"" if $pe;
517 my $outfile = ".tmp$seed.out";
523 $color .= " --col-cseq ";
526 $color .= " --col-cqual" if int(rand(3)) == 0;
530 my $format = int(rand(5));
533 open FA, ">.randread$seed.fa" || die "Could not open temporary fasta file";
535 my @cs = split /[,]/, $p1;
537 foreach my $c (@cs) {
538 my @cms = split /[:]/, $c;
539 print FA ">$idx\n$cms[0]\n";
544 $patstr = ".randread$seed.fa";
546 run("mv .randread$seed.fa .randread$seed"."_1.fa");
547 $patstr = "-1 .randread$seed"."_1.fa";
548 open FA, ">.randread$seed"."_2.fa" || die "Could not open temporary fasta file";
549 @cs = split /[,]/, $p2;
551 foreach my $c (@cs) {
552 my @cms = split /[:]/, $c;
553 print FA ">$idx\n$cms[0]\n";
557 $patstr .= " -2 .randread$seed"."_2.fa";
559 } elsif($format == 1) {
560 # FASTQ with ASCII qualities
561 open FQ, ">.randread$seed.fq" || die "Could not open temporary fastq file";
563 my @cs = split /[,]/, $p1;
565 foreach my $c (@cs) {
566 my @cms = split /[:]/, $c;
567 $#cms == 1 || die "Should be pair with : separating seq from quals: $c\n$p1\n";
568 print FQ "\@$idx\n$cms[0]\n+\n$cms[1]\n";
573 $patstr = ".randread$seed.fq";
575 run("mv .randread$seed.fq .randread$seed"."_1.fq");
576 $patstr = "-1 .randread$seed"."_1.fq";
577 open FQ, ">.randread$seed"."_2.fq" || die "Could not open temporary fastq file";
578 @cs = split /[,]/, $p2;
580 foreach my $c (@cs) {
581 my @cms = split /[:]/, $c;
582 $#cms == 1 || die "Should be pair with : separating seq from quals: $c\n$p2\n";
583 print FQ "\@$idx\n$cms[0]\n+\n$cms[1]\n";
587 $patstr .= " -2 .randread$seed"."_2.fq";
589 } elsif($format == 2) {
590 # FASTQ with integer qualities
591 open FQ, ">.randread$seed.integer.fq" || die "Could not open temporary solexa fastq file"; my $ext = ".fa";
593 my @cs = split /[,]/, $p1;
595 foreach my $c (@cs) {
596 my @cms = split /[:]/, $c;
597 print FQ "\@$idx\n$cms[0]\n+\n";
598 for(my $i = 0; $i < length($cms[1]); $i++) {
599 my $q = substr($cms[1], $i, 1);
607 $patarg = "-q --integer-quals";
608 $patstr = ".randread$seed.integer.fq";
610 run("mv .randread$seed.integer.fq .randread$seed.integer_1.fq");
611 $patstr = "-1 .randread$seed.integer_1.fq";
612 open FQ, ">.randread$seed.integer_2.fq" || die "Could not open temporary fastq file";
613 @cs = split /[,]/, $p2;
615 foreach my $c (@cs) {
616 my @cms = split /[:]/, $c;
617 print FQ "\@$idx\n$cms[0]\n+\n";
618 for(my $i = 0; $i < length($cms[1]); $i++) {
619 my $q = substr($cms[1], $i, 1);
627 $patstr .= " -2 .randread$seed.integer_2.fq";
629 } elsif($format == 3) {
631 open RAW, ">.randread$seed.raw" || die "Could not open temporary raw file";
633 my @cs = split /[,]/, $p1;
634 foreach my $c (@cs) {
635 my @cms = split /[:]/, $c;
636 print RAW "$cms[0]\n";
640 $patstr = ".randread$seed.raw";
642 run("mv .randread$seed.raw .randread$seed"."_1.raw");
643 $patstr = "-1 .randread$seed"."_1.raw";
644 open RAW, ">.randread$seed"."_2.raw" || die "Could not open temporary fastq file";
645 @cs = split /[,]/, $p2;
646 foreach my $c (@cs) {
647 my @cms = split /[:]/, $c;
648 print RAW "$cms[0]\n";
651 $patstr .= " -2 .randread$seed"."_2.raw";
655 # Perhaps dump unaligned reads using --un argument
657 my $unalignReconArg = "";
658 my $unalign = int(rand(5));
659 if($unalign == 0 || $unalign == 2) {
660 $unalignArg .= "--un .tmp.un$seed$ext ";
662 $unalignReconArg .= " .tmp.un$seed"."_1$ext .tmp.un$seed"."_2$ext";
664 $unalignReconArg .= " .tmp.un$seed$ext";
667 if($unalign == 1 || $unalign == 2) {
668 $unalignArg .= "--max .tmp.max$seed$ext ";
671 $unalignReconArg .= " .tmp.max$seed"."_1$ext .tmp.max$seed"."_2$ext";
673 $unalignReconArg .= " .tmp.max$seed$ext";
677 if($unalign == 2 || $unalign == 3) {
678 $unalignArg .= "--al .tmp.al$seed$ext ";
681 $unalignReconArg .= " .tmp.al$seed"."_1$ext .tmp.al$seed"."_2$ext";
683 $unalignReconArg .= " .tmp.al$seed$ext";
690 if(int(rand(2)) == 0) {
693 $khits = "-k " . (int(rand(20))+2);
695 if(int(rand(3)) == 0) {
696 $khits .= " --strata --best";
698 if(int(rand(2)) == 0) {
700 $mhits = (int(rand(20))+2);
704 if(int(rand(2)) == 0) {
705 $khits .= " -m $mhits";
707 $khits .= " -M $mhits";
712 if(int(rand(4)) == 0) {
713 if(int(rand(2)) == 0) {
714 # Reverse-complement reference strand only
717 # Forward reference strand only
728 if($offRate ne "" && int(rand(3)) == 0) {
729 $offRate == int($offRate) || die "Bad offrate: $offRate\n";
730 $offRateStr = "--offrate " . ($offRate + 1 + int(rand(4)));
732 defined($policy) || die;
733 defined($color) || die;
734 defined($strand) || die;
735 defined($unalignArg) || die;
736 defined($khits) || die;
737 defined($offRateStr) || die;
738 defined($nstr) || die;
739 my $cmd = "${bowtie}-debug $policy $color $strand $unalignArg $khits $offRateStr --cost --orig \"$nstr\" $oneHit --sanity $patarg .tmp$seed $patstr";
741 my $out = trim(runBacktick("$cmd 2>.tmp$seed.stderr | tee .tmp$seed.stdout"));
745 print "Exitlevel: $?\n";
747 my $err = `cat .tmp$seed.stderr 2> /dev/null`;
748 print "Stdout:\n$out\nStderr:\n$err\n";
753 my $err = `cat .tmp$seed.stderr 2> /dev/null`;
756 if($out eq "" && $requireResult) {
757 print "Expected results but got \"No Results\"\n";
759 print "Stdout:\n$out\nStderr:\n$err\n";
765 # Parse output to see if any of it is bad
767 my @outlines = split(/[\r\n]+/, $out);
770 my %readStratum = (); # for unpaired reads
771 my %readStratum1 = (); # for mate 1
772 my %readStratum2 = (); # for mate 2
775 for(my $i = 0; $i <= $#outlines; $i++) {
776 # Get the alignment for the first mate (or the unpaired read)
777 my $l = $outlines[$i];
779 checkRefVerbose($l, \@nts, \@cts, $alnuc);
783 # Get the alignment for the second mate
784 $l2 = $outlines[++$i];
785 defined($l2) || die "Odd number of output lines";
787 checkRefVerbose($l2, \@nts, \@cts, $alnuc);
791 # No two results should be the same
793 !defined($outhash{$key}) || die "Result $key appears in output twice";
796 # Parse out the read id
797 my @ls = split(/[\t]/, $l);
798 my ($mate1, $fw1, $stratum1) = ($ls[0], $ls[1], $ls[8]);
799 my ($mate2, $fw2, $stratum2) = (undef, undef, undef);
801 my @l2s = split(/[\t]/, $l2);
802 ($mate2, $fw2, $stratum2) = ($l2s[0], $l2s[1], $l2s[8]);
804 my $peor = ($color ? "ff" : "fr");
805 my $m1fw = ($peor eq 'fr' || $peor eq 'ff') ? '+' : '-';
806 my $m2fw = ($peor eq 'ff') ? '+' : '-';
807 if($pe && $peor eq 'ff') {
808 $fw1 eq $fw2 || die "Saw different orientations for mates\n";
810 $fw1 ne $fw2 || die "Saw same orientation for mates\n";
812 if($strand =~ /nofw/) {
814 my $m = substr($mate1, index($mate1, "/")+1);
816 ($fw1 eq '-' && $fw2 eq '-') ||
817 die "Saw non-rc alignment with --nofw specified\n";
820 die "Saw a forward alignment on line ".($i+1)." when --nofw was specified";
824 die "Saw a forward alignment on line ".($i+1)." when --nofw was specified";
826 } elsif($strand =~ /norc/) {
829 ($fw1 eq '+' && $fw2 eq '+') ||
830 die "Saw non-fw alignment with --nofw specified\n";
832 my $m = substr($mate1, index($mate1, "/")+1);
834 die "Saw a rev-comp alignment on line ".($i+1)." when --norc was specified";
838 die "Saw a rev-comp alignment on line ".($i+1)." when --norc was specified";
841 if($mate1 ne $lastread && !$pe) {
842 die "Read $mate1 appears multiple times non-consecutively" if defined($readcount{$mate1});
844 if(!$pe && $policy =~ /--strata /) {
845 !defined($readStratum{$mate1}) ||
846 $readStratum{$mate1} == $stratum1 ||
847 die "Incompatible strata: old: $readStratum{$mate1}, cur: $stratum1";
848 $readStratum{$mate1} = $stratum1;
849 } elsif($policy =~ /--strata /) {
850 # Paired-end case; use minimum of 2 mates
851 if($mate2 =~ /\/1$/) {
853 my $tmp = $mate1; $mate1 = $mate2; $mate2 = $tmp;
854 $tmp = $stratum1; $stratum1 = $stratum2; $stratum2 = $tmp;
856 if(defined($readStratum1{$mate1})) {
857 # One or ther other mate has to have the same stratum as we recorded previously
858 defined($readStratum2{$mate2}) || die "$mate1 was seen before, but not $mate2";
859 $readStratum1{$mate1} == $stratum1 ||
860 $readStratum2{$mate2} == $stratum2 ||
861 $readStratum1{$mate1} == $stratum2 ||
862 $readStratum2{$mate2} == $stratum1 ||
863 die "Incompatible strata: old: <$readStratum1{$mate1}, $readStratum2{$mate2}> cur: <$stratum1, $stratum2>";
865 $readStratum1{$mate1} = $stratum1;
866 $readStratum2{$mate2} = $stratum2;
869 $readcount{$mate1}++;
871 $readcount{$mate1} <= $mhits || die "Read $mate1 matched more than $mhits times";
876 $cmd = "${bowtie} $policy $color $strand $unalignArg $khits $offRateStr --cost --orig \"$nstr\" $oneHit --sanity $patarg .tmp$seed $patstr";
878 my $out2 = trim(runBacktick("$cmd 2>.tmp$seed.stderr"));
879 $out2 eq $out || die "Normal bowtie output did not match debug bowtie output";
881 $cmd = "${bowtie} $policy $color $strand $unalignArg $khits --orig \"$nstr\" $oneHit --sanity $patarg .tmp$seed $patstr $outfile";
883 my $out3 = trim(runBacktick("$cmd 2>.tmp$seed.stderr"));
885 $cmd = "${bowtie} --mm $policy $color $strand $unalignArg $khits --orig \"$nstr\" $oneHit --sanity $patarg .tmp$seed $patstr $outfile";
887 my $out4 = trim(runBacktick("$cmd 2>.tmp$seed.stderr"));
888 $out3 eq $out4 || die "Normal bowtie output did not match memory-mapped bowtie output";
891 # Now do another run with verbose output so that we can check the
893 $cmd = "${bowtie}-debug $policy $color $strand $unalignArg $khits $offRateStr --orig \"$nstr\" $oneHit --sanity $patarg .tmp$seed $patstr";
895 $out = trim(runBacktick("$cmd 2>.tmp$seed.stderr"));
896 # Parse output to see if any of it is bad
897 @outlines = split('\n', $out);
898 for my $l (@outlines) {
899 checkRefVerbose($l, \@nts, \@cts, $alnuc);
903 # If search was for paired-end alignments, then verify the
904 # outcome using pe_verify.pl
907 my $patstr2 = $patstr;
910 my $col = ($color ? "-C" : "");
911 open TMP, ">.tmp$seed.pe_verify.cmd" || die;
912 $cmd = "perl scripts/pe_verify.pl --args=\"--quiet\" $col -d $pol .tmp$seed $patstr2";
915 $out = trim(runBacktick("$cmd 2>.tmp$seed.pe_verify.stderr"));
919 print "scripts/pe_verify.pl exitlevel: $?\n";
921 my $err = `cat .tmp$seed.pe_verify.stderr 2> /dev/null`;
922 print "Stdout:\n$out\nStderr:\n$err\n";
929 if(!$pe && $policy =~ /--best/) {
930 my $col = ($color ? "-C" : "");
931 $cmd = "perl scripts/best_verify.pl -d $policy $col .tmp$seed $patstr";
933 $out = trim(runBacktick("$cmd 2>.tmp$seed.best_verify.stderr"));
936 print "scripts/best_verify.pl exitlevel: $?\n";
938 my $err = `cat .tmp$seed.best_verify.stderr 2> /dev/null`;
939 print "Stdout:\n$out\nStderr:\n$err\n";
946 # Run the reconciler to ensure that --un, --max, and --al had
948 # .tmp$seed.verbose.out
949 if($format >= 0 && $format <= 2 && $unalignReconArg ne "") {
951 my $cmd = "${bowtie} $policy $color $strand $unalignArg $khits $offRateStr --orig \"$nstr\" $oneHit --sanity $patarg .tmp$seed $patstr .tmp$seed.verbose.out";
953 run($cmd) == 0 || die "Error performing reconciler run\n";
954 $khits =~ s/--strata --best//;
955 $patarg =~ s/--integer-quals//;
956 $unalignReconArg =~ /\.tmp\.un/ || die;
960 $cmd = "perl scripts/reconcile_alignments_pe.pl $color $patarg $khits $patstr .tmp$seed.verbose.out $unalignReconArg";
962 run($cmd) == 0 || die "Failed to reconcile";
964 $cmd = "perl scripts/reconcile_alignments.pl $color $patarg $khits $patstr .tmp$seed.verbose.out $unalignReconArg";
966 run($cmd) == 0 || die "Failed to reconcile";
978 for(; $outer > 0; $outer--) {
980 # Generate random parameters
982 if(int(rand(2)) == 0) {
983 $offRate = int(rand(16));
986 if(int(rand(2)) == 0) {
987 $ftabChars = 1 + int(rand(8));
990 # Generate random text(s)
991 my $nt = int(rand(10)) + 1;
992 my $tstr = '', $cstr = '';
996 # For each reference sequence...
997 for(my $i = 0; $i < $nt; $i++) {
998 my $tlen = $tbase + int(rand($trand));
999 # Add some all-gap references to try to confuse bowtie-build
1000 # and bowtie-inspect
1005 push(@cts, colorize($tt, 1));
1006 my $newt = randGap() . $tt . randGap();
1008 $cstr .= colorize($newt, 1);
1012 $tt = randDna($tlen); # add text meat
1014 push(@cts, colorize($tt, 1));
1015 my $newt = randGap() . $tt . randGap();
1017 $cstr .= colorize($newt, 1);
1024 my $color = (int(rand(2)) == 0);
1026 # Run the command to build the Ebwt from the random text
1027 $pass += build($tstr, $color, $offRate, $ftabChars);
1028 last if(++$tests > $limit);
1031 for(; $in >= 0; $in--) {
1033 my $pe = (int(rand(2)) == 0);
1034 $pe = 1 if $pairedEndOnly;
1035 # Generate random pattern(s) based on text
1038 # Decide number of patterns to generate
1039 my $np = int(rand(30)) + 1;
1040 for(my $i = 0; $i < $np; $i++) {
1042 my $tt = $nts[int(rand($#nts))];
1050 # Pick a left-hand offset for the insert
1051 my $il = int(rand(length($tt))) - 10;
1052 # Pick a length for the insert
1053 my $ilen = int(rand($irand)) + $ibase;
1054 $il = min($il, length($tt)-$ilen);
1055 my $ir = min($il + $ilen, length($tt));
1056 my $is = substr $tt, $il, $ir - $il;
1057 # Pick a length for the #1 mate
1058 my $plen1 = int(rand($prand)) + $pbase;
1059 # Pick a length for the #2 mate
1060 my $plen2 = int(rand($prand)) + $pbase;
1061 $p1 = substr $is, 0, $plen1;
1063 $p2 = substr $is, 0, $plen2;
1067 # Pick a length for the read
1068 $plen = int(rand($prand)) + $pbase;
1069 $pl = int(rand(length($tt))) - 10;
1070 $pl = max($pl, $color ? 5 : 4);
1071 $pl = min($pl, length($tt));
1072 $pr = min($pl + $plen, length($tt));
1073 $p1 = substr $tt, $pl, $pr - $pl;
1075 # Check for empty pattern or pattern that spans a comma
1076 if(length($p1) < ($color ? 5 : 4) || index($p1, ",") != -1) {
1079 if($pe && (length($p2) < ($color ? 5 : 4) || index($p2, ",") != -1)) {
1082 # Optionally add nucleotide changes to pattern
1084 my $nummms = int(rand($color ? 3 : 5));
1085 for(my $j = 0; $j < $nummms; $j++) {
1086 substr($p1, int(rand(length($p1))), 1) = randDna(1);
1089 $nummms = int(rand(4));
1090 for(my $j = 0; $j < $nummms; $j++) {
1091 substr($p2, int(rand(length($p2))), 1) = randDna(1);
1095 # Possibly reverse complement it
1096 if((int(rand(2)) == 0)) {
1105 $p1 =~ tr/MRWSYKVHDBX/N/;
1107 defined($p1) || die;
1108 !$pe || defined($p2) || die;
1109 my ($nize1, $nize2) = (int(rand(2)) == 0, int(rand(2)) == 0);
1110 $p1 = colorize($p1, $nize1);
1111 $p2 = colorize($p2, $nize2) if $pe;
1112 # Optionally add color changes to pattern
1114 my $nummms = int(rand(3));
1115 for(my $j = 0; $j < $nummms; $j++) {
1116 my $r = randColor(1);
1117 $r = nucencode($r) if $nize1;
1118 substr($p1, int(rand(length($p1))), 1) = $r;
1121 $nummms = int(rand(4));
1122 for(my $j = 0; $j < $nummms; $j++) {
1123 my $r = randColor(1);
1124 $r = nucencode($r) if $nize2;
1125 substr($p2, int(rand(length($p2))), 1) = $r;
1130 # Add valid random quality values
1133 $pfinal1 .= "," if($i < $np-1);
1135 $p2 =~ tr/MRWSYKVHDBX/N/;
1138 $pfinal2 .= "," if($i < $np-1);
1142 # Run the command to search for the pattern from the Ebwt
1143 my $oneHit = (int(rand(3)) == 0);
1144 my $policy = pickPolicy($pe);
1145 my $expectResult = 1;
1147 for(my $i = 0; $i < length($pfinal1); $i++) {
1148 my $c = substr($pfinal1, $i, 1);
1150 if($c ne 'A' && $c ne 'C' && $c ne 'G' && $c ne 'T') {
1158 $pass += search($tstr, $cstr, $pe, $color, $pfinal1, $pfinal2, $policy, $oneHit, $expectResult, $offRate); # require 1 or more results
1159 last if(++$tests > $limit);
1163 for(; $in >= 0; $in--) {
1164 my $pe = (int(rand(2)) == 0);
1165 # Generate random pattern *not* based on text
1168 my $np = int(rand(10)) + 1;
1169 for(my $i = 0; $i < $np; $i++) {
1170 my $plen = int(rand($prand)) + $pbase;
1171 my $p1 = randDna($plen);
1172 $plen = int(rand($prand)) + $pbase;
1173 my $p2 = randDna($plen);
1174 $p1 =~ tr/MRWSYKVHDBX/N/;
1175 $p1 = colorize($p1, int(rand(2)) == 0) if $color;
1177 $p2 =~ tr/MRWSYKVHDBX/N/ if $pe;
1178 $p2 = colorize($p2, int(rand(2)) == 0) if $color && $pe;
1179 $p2 = addQual($p2) if $pe;
1181 $pfinal2 .= $p2 if $pe;
1184 $pfinal2 .= "," if $pe;
1188 # Run the command to search for the pattern from the Ebwt
1189 my $oneHit = (int(rand(3)) == 0);
1190 my $policy = pickPolicy($pe);
1191 $pass += search($tstr, $cstr, $pe, $color, $pfinal1, $pfinal2, $policy, $oneHit, 0, $offRate); # do not require any results
1192 last if(++$tests > $limit);
1196 print "$pass tests passed, $fail failed\n";
1197 exit 1 if $fail > 0;