1 #ifndef BINARY_SA_SEARCH_H_
2 #define BINARY_SA_SEARCH_H_
6 #include <seqan/sequence.h>
8 #include "assert_helpers.h"
11 * Do a binary search using the suffix of 'host' beginning at offset
12 * 'qry' as the query and 'sa' as an already-lexicographically-sorted
13 * list of suffixes of host. 'sa' may be all suffixes of host or just
14 * a subset. Returns the index in sa of the smallest suffix of host
15 * that is larger than qry, or length(sa) if all suffixes of host are
18 * We use the Manber and Myers optimization of maintaining a pair of
19 * counters for the longest lcp observed so far on the left- and right-
20 * hand sides and using the min of the two as a way of skipping over
21 * characters at the beginning of a new round.
23 * Returns 0xffffffff if the query suffix matches an element of sa.
25 template<typename TStr, typename TSufElt> inline
26 uint32_t binarySASearch(const TStr& host,
28 const String<TSufElt>& sa)
30 uint32_t lLcp = 0, rLcp = 0; // greatest observed LCPs on left and right
31 uint32_t l = 0, r = length(sa)+1; // binary-search window
32 uint32_t hostLen = length(host);
35 uint32_t m = (l+r) >> 1;
37 // Binary-search window has closed: we have an answer
38 if(m > 0 && sa[m-1] == qry) return 0xffffffff; // qry matches
39 assert_leq(m, length(sa));
40 return m; // Return index of right-hand suffix
43 uint32_t suf = sa[m-1];
44 if(suf == qry) return 0xffffffff; // query matches an elt of sa
45 uint32_t lcp = min(lLcp, rLcp);
47 if(prefix(suffix(host, qry), lcp) != prefix(suffix(host, suf), lcp)) {
51 // Keep advancing lcp, but stop when query mismatches host or
52 // when the counter falls off either the query or the suffix
53 while(suf+lcp < hostLen && qry+lcp < hostLen && host[suf+lcp] == host[qry+lcp]) {
56 // Fell off the end of either the query or the sa elt?
57 bool fell = (suf+lcp == hostLen || qry+lcp == hostLen);
58 if((fell && qry+lcp == hostLen) || (!fell && host[suf+lcp] < host[qry+lcp])) {
59 // Query is greater than sa elt
60 l = m; // update left bound
61 lLcp = max(lLcp, lcp); // update left lcp
63 else if((fell && suf+lcp == hostLen) || (!fell && host[suf+lcp] > host[qry+lcp])) {
64 // Query is less than sa elt
65 r = m; // update right bound
66 rLcp = max(rLcp, lcp); // update right lcp
68 assert(false); // Must be one or the other!
76 #endif /*BINARY_SA_SEARCH_H_*/