4 # Generate lookup table that, given two packed DNA bytes (eight bases)
5 # and a character (A, C, G or T), returns how many times that character
6 # occurs in that packed pair of bytes. Useful for quickly counting
7 # character occurrences in long strings. The LUT is indexed first by
8 # character (0 - 3) then by byte (0 - 2^16-1).
18 # Compile character arrays
20 for($i = 0; $i < (256*256); $i++) {
22 my $b01 = ($i >> 0) & 3;
23 my $b23 = ($i >> 2) & 3;
24 my $b45 = ($i >> 4) & 3;
25 my $b67 = ($i >> 6) & 3;
26 my $b89 = ($i >> 8) & 3;
27 my $b1011 = ($i >> 10) & 3;
28 my $b1213 = ($i >> 12) & 3;
29 my $b1415 = ($i >> 14) & 3;
31 my $a = ($b01 == 0) + ($b23 == 0) + ($b45 == 0) + ($b67 == 0) +
32 ($b89 == 0) + ($b1011 == 0) + ($b1213 == 0) + ($b1415 == 0);
33 my $c = ($b01 == 1) + ($b23 == 1) + ($b45 == 1) + ($b67 == 1) +
34 ($b89 == 1) + ($b1011 == 1) + ($b1213 == 1) + ($b1415 == 1);
35 my $g = ($b01 == 2) + ($b23 == 2) + ($b45 == 2) + ($b67 == 2) +
36 ($b89 == 2) + ($b1011 == 2) + ($b1213 == 2) + ($b1415 == 2);
37 my $t = ($b01 == 3) + ($b23 == 3) + ($b45 == 3) + ($b67 == 3) +
38 ($b89 == 3) + ($b1011 == 3) + ($b1213 == 3) + ($b1415 == 3);
48 # Count occurrences in all 4 bit pairs
50 print "uint8_t cCntLUT_16b_4[4][256*256] = {\n";
53 print "\t/* As */ {\n";
54 for($i = 0; $i < (256*256); $i++) {
55 print "\t\t" if(($i % $entsPerLine) == 0);
57 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
62 print "\t/* Cs */ {\n";
63 for($i = 0; $i < (256*256); $i++) {
64 print "\t\t" if(($i % $entsPerLine) == 0);
66 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
71 print "\t/* Gs */ {\n";
72 for($i = 0; $i < (256*256); $i++) {
73 print "\t\t" if(($i % $entsPerLine) == 0);
75 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
80 print "\t/* Ts */ {\n";
81 for($i = 0; $i < (256*256); $i++) {
82 print "\t\t" if(($i % $entsPerLine) == 0);
84 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));