4 # Generate lookup table that, given a packed DNA byte (four bases) and
5 # a character (A, C, G or T), returns how many times that character
6 # occurs in that packed byte. Useful for quickly counting character
7 # occurrences in long strings. The LUT is indexed first by character
8 # (0-3) then by byte (0-255).
10 # Larger lookup tables are also possible, though they seem
11 # counterproductive. E.g., looking up eight bases at a time yields a
12 # 256K LUT, which doesn't fit in L1. A four-base LUT is 1KB, easily
18 my @as4 = (), @as3 = (), @as2 = (), @as1 = ();
19 my @cs4 = (), @cs3 = (), @cs2 = (), @cs1 = ();
20 my @gs4 = (), @gs3 = (), @gs2 = (), @gs1 = ();
21 my @ts4 = (), @ts3 = (), @ts2 = (), @ts1 = ();
23 # Compile character arrays
25 for($i = 0; $i < 256; $i++) {
26 my $b01 = ($i >> 0) & 3;
27 my $b23 = ($i >> 2) & 3;
28 my $b45 = ($i >> 4) & 3;
29 my $b67 = ($i >> 6) & 3;
31 my $a4 = ($b01 == 0) + ($b23 == 0) + ($b45 == 0) + ($b67 == 0);
32 my $c4 = ($b01 == 1) + ($b23 == 1) + ($b45 == 1) + ($b67 == 1);
33 my $g4 = ($b01 == 2) + ($b23 == 2) + ($b45 == 2) + ($b67 == 2);
34 my $t4 = ($b01 == 3) + ($b23 == 3) + ($b45 == 3) + ($b67 == 3);
41 my $a3 = ($b01 == 0) + ($b23 == 0) + ($b45 == 0);
42 my $c3 = ($b01 == 1) + ($b23 == 1) + ($b45 == 1);
43 my $g3 = ($b01 == 2) + ($b23 == 2) + ($b45 == 2);
44 my $t3 = ($b01 == 3) + ($b23 == 3) + ($b45 == 3);
51 my $a2 = ($b01 == 0) + ($b23 == 0);
52 my $c2 = ($b01 == 1) + ($b23 == 1);
53 my $g2 = ($b01 == 2) + ($b23 == 2);
54 my $t2 = ($b01 == 3) + ($b23 == 3);
61 my $a1 = ($b01 == 0) + 0;
62 my $c1 = ($b01 == 1) + 0;
63 my $g1 = ($b01 == 2) + 0;
64 my $t1 = ($b01 == 3) + 0;
74 print "#include <stdint.h>\n\n";
75 print "/* Generated by gen_lookup_tables.pl */\n\n";
77 # Count occurrences in all 4 bit pairs
79 print "uint8_t cCntLUT_4[4][4][256] = {\n";
80 print "\t/* All 4 bit pairs */ {\n";
83 print "\t\t/* As */ {\n";
84 for($i = 0; $i < 256; $i++) {
85 print "\t\t\t" if(($i % $entsPerLine) == 0);
87 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
92 print "\t\t/* Cs */ {\n";
93 for($i = 0; $i < 256; $i++) {
94 print "\t\t\t" if(($i % $entsPerLine) == 0);
96 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
101 print "\t\t/* Gs */ {\n";
102 for($i = 0; $i < 256; $i++) {
103 print "\t\t\t" if(($i % $entsPerLine) == 0);
105 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
110 print "\t\t/* Ts */ {\n";
111 for($i = 0; $i < 256; $i++) {
112 print "\t\t\t" if(($i % $entsPerLine) == 0);
114 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
116 print "\t\t}\n\t},\n";
118 # Count occurrences in low 1 bit pair
120 print "\t/* Least significant 1 bit pair */ {\n";
123 print "\t\t/* As */ {\n";
124 for($i = 0; $i < 256; $i++) {
125 print "\t\t\t" if(($i % $entsPerLine) == 0);
127 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
132 print "\t\t/* Cs */ {\n";
133 for($i = 0; $i < 256; $i++) {
134 print "\t\t\t" if(($i % $entsPerLine) == 0);
136 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
141 print "\t\t/* Gs */ {\n";
142 for($i = 0; $i < 256; $i++) {
143 print "\t\t\t" if(($i % $entsPerLine) == 0);
145 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
150 print "\t\t/* Ts */ {\n";
151 for($i = 0; $i < 256; $i++) {
152 print "\t\t\t" if(($i % $entsPerLine) == 0);
154 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
156 print "\t\t}\n\t},\n";
158 # Count occurrences in low 2 bit pairs
160 print "\t/* Least significant 2 bit pairs */ {\n";
163 print "\t\t/* As */ {\n";
164 for($i = 0; $i < 256; $i++) {
165 print "\t\t\t" if(($i % $entsPerLine) == 0);
167 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
172 print "\t\t/* Cs */ {\n";
173 for($i = 0; $i < 256; $i++) {
174 print "\t\t\t" if(($i % $entsPerLine) == 0);
176 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
181 print "\t\t/* Gs */ {\n";
182 for($i = 0; $i < 256; $i++) {
183 print "\t\t\t" if(($i % $entsPerLine) == 0);
185 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
190 print "\t\t/* Ts */ {\n";
191 for($i = 0; $i < 256; $i++) {
192 print "\t\t\t" if(($i % $entsPerLine) == 0);
194 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
196 print "\t\t}\n\t},\n";
198 # Count occurrences in low 3 bit pairs
200 print "\t/* Least significant 3 bit pairs */ {\n";
203 print "\t\t/* As */ {\n";
204 for($i = 0; $i < 256; $i++) {
205 print "\t\t\t" if(($i % $entsPerLine) == 0);
207 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
212 print "\t\t/* Cs */ {\n";
213 for($i = 0; $i < 256; $i++) {
214 print "\t\t\t" if(($i % $entsPerLine) == 0);
216 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
221 print "\t\t/* Gs */ {\n";
222 for($i = 0; $i < 256; $i++) {
223 print "\t\t\t" if(($i % $entsPerLine) == 0);
225 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
230 print "\t\t/* Ts */ {\n";
231 for($i = 0; $i < 256; $i++) {
232 print "\t\t\t" if(($i % $entsPerLine) == 0);
234 print "\n" if(($i % $entsPerLine) == ($entsPerLine-1));
236 print "\t\t}\n\t}\n";