Imported Debian patch 0.12.7-3
[bowtie.git] / zbox.h
1 #ifndef ZBOX_H_
2 #define ZBOX_H_
3
4 /**
5  * Fill z with Z-box information for s.  String z will not be resized
6  * and will only be filled up to its size cap.  This is the linear-time
7  * algorithm from Gusfield.  An optional sanity-check uses a naive
8  * algorithm to double-check results.
9  */
10 template<typename T>
11 void calcZ(const T& s,
12            uint32_t off,
13            String<uint32_t>& z,
14            bool verbose = false,
15            bool sanityCheck = false)
16 {
17         size_t lCur = 0, rCur = 0;
18         size_t zlen = length(z);
19         size_t slen = length(s);
20         assert_gt(zlen, 0);
21         assert_eq(z[0], 0);
22         //assert_leq(zlen, slen);
23         for (size_t k = 1; k < zlen && k+off < slen; k++) {
24                 assert_lt(lCur, k);
25                 assert(z[lCur] == 0 || z[lCur] == rCur - lCur + 1);
26                 if(k > rCur) {
27                         // compare starting at k with prefix starting at 0
28                         size_t ki = k;
29                         while(off+ki < length(s) && s[off+ki] == s[off+ki-k]) ki++;
30                         z[k] = ki - k;
31                         assert_lt(off+z[k], slen);
32                         if(z[k] > 0) {
33                                 lCur = k;
34                                 rCur = k + z[k] - 1;
35                         }
36                 } else {
37                         // position k is contained in a Z-box
38                         size_t betaLen = rCur - k + 1;
39                         size_t kPrime = k - lCur;
40                         assert_eq(s[off+k], s[off+kPrime]);
41                         if(z[kPrime] < betaLen) {
42                                 z[k] = z[kPrime];
43                                 assert_lt(off+z[k], slen);
44                                 // lCur, rCur unchanged
45                         } else if (z[kPrime] > 0) {
46                                 int q = 0;
47                                 while (off+q+rCur+1 < length(s) && s[off+q+rCur+1] == s[off+betaLen+q]) q++;
48                                 z[k] = betaLen + q;
49                                 assert_lt(off+z[k], slen);
50                                 rCur = rCur + q;
51                                 assert_geq(k, lCur);
52                                 lCur = k;
53                         } else {
54                                 z[k] = 0;
55                                 assert_lt(off+z[k], slen);
56                                 // lCur, rCur unchanged
57                         }
58                 }
59         }
60 #ifndef NDEBUG
61         if(sanityCheck) {
62                 // Recalculate Z-boxes using naive quadratic-time algorithm and
63                 // compare to linear-time result
64                 assert_eq(0, z[0]);
65                 for(size_t i = 1; i < length(z); i++) {
66                         size_t j;
67                         for(j = i; off+j < length(s); j++) {
68                                 if(s[off+j] != s[off+j-i]) break;
69                         }
70                         assert_eq(j-i, z[i]);
71                 }
72         }
73 #endif
74 }
75
76 #endif /*ZBOX_H_*/