multikey_qsort.h

   1 #ifndef MULTIKEY_QSORT_H_
   2 #define MULTIKEY_QSORT_H_
   3
   4 #include <iostream>
   5 #include <seqan/basic.h>
   6 #include <seqan/file.h>
   7 #include <seqan/sequence.h>
   8 #include "sequence_io.h"
   9 #include "alphabet.h"
  10 #include "assert_helpers.h"
  11 #include "diff_sample.h"
  12
  13 using namespace std;
  14 using namespace seqan;
  15
  16 /**
  17  * Swap elements a and b in seqan::String s
  18  */
  19 template <typename TStr, typename TPos>
  20 static inline void swap(TStr& s, size_t slen, TPos a, TPos b) {
  21         typedef typename Value<TStr>::Type TAlphabet;
  22         assert_lt(a, slen);
  23         assert_lt(b, slen);
  24         TAlphabet tmp = s[a];
  25         s[a] = s[b];
  26         s[b] = tmp;
  27 }
  28
  29 /**
  30  * Swap elements a and b in array s
  31  */
  32 template <typename TVal, typename TPos>
  33 static inline void swap(TVal* s, size_t slen, TPos a, TPos b) {
  34         assert_lt(a, slen);
  35         assert_lt(b, slen);
  36         TVal tmp = s[a];
  37         s[a] = s[b];
  38         s[b] = tmp;
  39 }
  40
  41 /**
  42  * Helper macro for swapping elements a and b in seqan::String s.  Does
  43  * some additional sainty checking w/r/t begin and end (which are
  44  * parameters to the sorting routines below).
  45  */
  46 #define SWAP(s, a, b) { \
  47         assert_geq(a, begin); \
  48         assert_geq(b, begin); \
  49         assert_lt(a, end); \
  50         assert_lt(b, end); \
  51         swap(s, slen, a, b); \
  52 }
  53
  54 /**
  55  * Helper macro for swapping the same pair of elements a and b in
  56  * two different seqan::Strings s and s2.  This is a helpful variant
  57  * if, for example, the caller would like to see how their input was
  58  * permuted by the sort routine (in that case, the caller would let s2
  59  * be an array s2[] where s2 is the same length as s and s2[i] = i).
  60  */
  61 #define SWAP2(s, s2, a, b) { \
  62         SWAP(s, a, b); \
  63         swap(s2, slen, a, b); \
  64 }
  65
  66 #define SWAP1(s, s2, a, b) { \
  67         SWAP(s, a, b); \
  68 }
  69
  70 /**
  71  * Helper macro that swaps a range of elements [i, i+n) with another
  72  * range [j, j+n) in seqan::String s.
  73  */
  74 #define VECSWAP(s, i, j, n) { \
  75         if(n > 0) { vecswap(s, slen, i, j, n, begin, end); } \
  76 }
  77
  78 /**
  79  * Helper macro that swaps a range of elements [i, i+n) with another
  80  * range [j, j+n) both in seqan::String s and seqan::String s2.
  81  */
  82 #define VECSWAP2(s, s2, i, j, n) { \
  83         if(n > 0) { vecswap2(s, slen, s2, i, j, n, begin, end); } \
  84 }
  85
  86 /**
  87  * Helper function that swaps a range of elements [i, i+n) with another
  88  * range [j, j+n) in seqan::String s.  begin and end represent the
  89  * current range under consideration by the caller (one of the
  90  * recursive multikey_quicksort routines below).
  91  */
  92 template <typename TStr, typename TPos>
  93 static inline void vecswap(TStr& s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) {
  94         assert_geq(i, begin);
  95         assert_geq(j, begin);
  96         assert_lt(i, end);
  97         assert_lt(j, end);
  98         while(n-- > 0) {
  99                 assert_geq(n, 0);
 100                 TPos a = i+n;
 101                 TPos b = j+n;
 102                 assert_geq(a, begin);
 103                 assert_geq(b, begin);
 104                 assert_lt(a, end);
 105                 assert_lt(b, end);
 106                 swap(s, slen, a, b);
 107         }
 108 }
 109
 110 template <typename TVal, typename TPos>
 111 static inline void vecswap(TVal *s, size_t slen, TPos i, TPos j, TPos n, TPos begin, TPos end) {
 112         assert_geq(i, begin);
 113         assert_geq(j, begin);
 114         assert_lt(i, end);
 115         assert_lt(j, end);
 116         while(n-- > 0) {
 117                 assert_geq(n, 0);
 118                 TPos a = i+n;
 119                 TPos b = j+n;
 120                 assert_geq(a, begin);
 121                 assert_geq(b, begin);
 122                 assert_lt(a, end);
 123                 assert_lt(b, end);
 124                 swap(s, slen, a, b);
 125         }
 126 }
 127
 128 /**
 129  * Helper function that swaps a range of elements [i, i+n) with another
 130  * range [j, j+n) both in seqan::String s and seqan::String s2.  begin
 131  * and end represent the current range under consideration by the
 132  * caller (one of the recursive multikey_quicksort routines below).
 133  */
 134 template <typename TStr, typename TPos>
 135 static inline void vecswap2(TStr& s, size_t slen, TStr& s2, TPos i, TPos j, TPos n, TPos begin, TPos end) {
 136         assert_geq(i, begin);
 137         assert_geq(j, begin);
 138         assert_lt(i, end);
 139         assert_lt(j, end);
 140         while(n-- > 0) {
 141                 assert_geq(n, 0);
 142                 TPos a = i+n;
 143                 TPos b = j+n;
 144                 assert_geq(a, begin);
 145                 assert_geq(b, begin);
 146                 assert_lt(a, end);
 147                 assert_lt(b, end);
 148                 swap(s, slen, a, b);
 149                 swap(s2, slen, a, b);
 150         }
 151 }
 152
 153 template <typename TVal, typename TPos>
 154 static inline void vecswap2(TVal* s, size_t slen, TVal* s2, TPos i, TPos j, TPos n, TPos begin, TPos end) {
 155         assert_geq(i, begin);
 156         assert_geq(j, begin);
 157         assert_lt(i, end);
 158         assert_lt(j, end);
 159         while(n-- > 0) {
 160                 assert_geq(n, 0);
 161                 TPos a = i+n;
 162                 TPos b = j+n;
 163                 assert_geq(a, begin);
 164                 assert_geq(b, begin);
 165                 assert_lt(a, end);
 166                 assert_lt(b, end);
 167                 swap(s, slen, a, b);
 168                 swap(s2, slen, a, b);
 169         }
 170 }
 171
 172 /// Retrieve an int-ized version of the ath character of string s, or,
 173 /// if a goes off the end of s, return a (user-specified) int greater
 174 /// than any TAlphabet character - 'hi'.
 175 #define CHAR_AT(ss, aa) ((length(s[ss]) > aa) ? (int)(Dna)(s[ss][aa]) : hi)
 176
 177 /// Retrieve an int-ized version of the ath character of string s, or,
 178 /// if a goes off the end of s, return a (user-specified) int greater
 179 /// than any TAlphabet character - 'hi'.
 180 #define CHAR_AT_SUF(si, off) (((off+s[si]) < hlen) ? ((int)(Dna)(host[off+s[si]])) : (hi))
 181
 182 /// Retrieve an int-ized version of the ath character of string s, or,
 183 /// if a goes off the end of s, return a (user-specified) int greater
 184 /// than any TAlphabet character - 'hi'.
 185
 186 #define CHAR_AT_SUF_U8(si, off) char_at_suf_u8(host, hlen, s, si, off, hi)
 187
 188 // Note that CHOOSE_AND_SWAP_RANDOM_PIVOT is unused
 189 #define CHOOSE_AND_SWAP_RANDOM_PIVOT(sw, ch) {                            \
 190         /* Note: rand() didn't really cut it here; it seemed to run out of */ \
 191         /* randomness and, after a time, returned the same thing over and */  \
 192         /* over again */                                                      \
 193         a = (rand() % n) + begin; /* choose pivot between begin and end */  \
 194         assert_lt(a, end); assert_geq(a, begin);                              \
 195         sw(s, s2, begin, a); /* move pivot to beginning */                    \
 196 }
 197
 198 /**
 199  * Ad-hoc DNA-centric way of choose a pretty good pivot without using
 200  * the pseudo-random number generator.  We try to get a 1 or 2 if
 201  * possible, since they'll split things more evenly than a 0 or 4.  We
 202  * also avoid swapping in the event that we choose the first element.
 203  */
 204 #define CHOOSE_AND_SWAP_SMART_PIVOT(sw, ch) {                                    \
 205         a = begin; /* choose first elt */                                            \
 206         /* now try to find a better elt */                                           \
 207         if(n >= 5) { /* n is the difference between begin and end */                 \
 208                 if     (ch(begin+1, depth) == 1 || ch(begin+1, depth) == 2) a = begin+1; \
 209                 else if(ch(begin+2, depth) == 1 || ch(begin+2, depth) == 2) a = begin+2; \
 210                 else if(ch(begin+3, depth) == 1 || ch(begin+3, depth) == 2) a = begin+3; \
 211                 else if(ch(begin+4, depth) == 1 || ch(begin+4, depth) == 2) a = begin+4; \
 212                 if(a != begin) sw(s, s2, begin, a); /* move pivot to beginning */        \
 213         }                                                                            \
 214         /* the element at [begin] now holds the pivot value */                       \
 215 }
 216
 217 #define CHOOSE_AND_SWAP_PIVOT CHOOSE_AND_SWAP_SMART_PIVOT
 218
 219 #ifndef NDEBUG
 220
 221 /**
 222  * Assert that the range of chars at depth 'depth' in strings 'begin'
 223  * to 'end' in string-of-suffix-offsets s is parititioned properly
 224  * according to the ternary paritioning strategy of Bentley and McIlroy
 225  * (*prior to* swapping the = regions to the center)
 226  */
 227 template<typename THost>
 228 bool assertPartitionedSuf(const THost& host,
 229                           uint32_t *s,
 230                           size_t slen,
 231                           int hi,
 232                           int pivot,
 233                           size_t begin,
 234                           size_t end,
 235                           size_t depth)
 236 {
 237         size_t hlen = length(host);
 238         int state = 0; // 0 -> 1st = section, 1 -> < section, 2 -> > section, 3 -> 2nd = section
 239         for(size_t i = begin; i < end; i++) {
 240                 switch(state) {
 241                 case 0:
 242                         if       (CHAR_AT_SUF(i, depth) < pivot)  { state = 1; break; }
 243                         else if  (CHAR_AT_SUF(i, depth) > pivot)  { state = 2; break; }
 244                         assert_eq(CHAR_AT_SUF(i, depth), pivot);  break;
 245                 case 1:
 246                         if       (CHAR_AT_SUF(i, depth) > pivot)  { state = 2; break; }
 247                         else if  (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; }
 248                         assert_lt(CHAR_AT_SUF(i, depth), pivot);  break;
 249                 case 2:
 250                         if       (CHAR_AT_SUF(i, depth) == pivot) { state = 3; break; }
 251                         assert_gt(CHAR_AT_SUF(i, depth), pivot);         break;
 252                 case 3:
 253                         assert_eq(CHAR_AT_SUF(i, depth), pivot);         break;
 254                 }
 255         }
 256         return true;
 257 }
 258
 259 /**
 260  * Assert that the range of chars at depth 'depth' in strings 'begin'
 261  * to 'end' in string-of-suffix-offsets s is parititioned properly
 262  * according to the ternary paritioning strategy of Bentley and McIlroy
 263  * (*after* swapping the = regions to the center)
 264  */
 265 template<typename THost>
 266 bool assertPartitionedSuf2(const THost& host,
 267                            uint32_t *s,
 268                            size_t slen,
 269                            int hi,
 270                            int pivot,
 271                            size_t begin,
 272                            size_t end,
 273                            size_t depth)
 274 {
 275         size_t hlen = length(host);
 276         int state = 0; // 0 -> < section, 1 -> = section, 2 -> > section
 277         for(size_t i = begin; i < end; i++) {
 278                 switch(state) {
 279                 case 0:
 280                         if       (CHAR_AT_SUF(i, depth) == pivot) { state = 1; break; }
 281                         else if  (CHAR_AT_SUF(i, depth) > pivot)  { state = 2; break; }
 282                         assert_lt(CHAR_AT_SUF(i, depth), pivot);  break;
 283                 case 1:
 284                         if       (CHAR_AT_SUF(i, depth) > pivot)  { state = 2; break; }
 285                         assert_eq(CHAR_AT_SUF(i, depth), pivot);  break;
 286                 case 2:
 287                         assert_gt(CHAR_AT_SUF(i, depth), pivot);  break;
 288                 }
 289         }
 290         return true;
 291 }
 292 #endif
 293
 294 /**
 295  * Assert that the seqan::String s of suffix offsets into seqan::String
 296  * 'host' is a seemingly legitimate suffix-offset list (at this time,
 297  * we just check that it doesn't list any suffix twice).
 298  */
 299 static void sanityCheckInputSufs(uint32_t *s, size_t slen) {
 300         assert_gt(slen, 0);
 301         for(size_t i = 0; i < slen; i++) {
 302                 // Actually, it's convenient to allow the caller to provide
 303                 // suffix offsets thare are off the end of the host string.
 304                 // See, e.g., build() in diff_sample.cpp.
 305                 //assert_lt(s[i], length(host));
 306                 for(size_t j = i+1; j < slen; j++) {
 307                         assert_neq(s[i], s[j]);
 308                 }
 309         }
 310 }
 311
 312 /**
 313  * Assert that the seqan::String s of suffix offsets into seqan::String
 314  * 'host' really are in lexicographical order up to depth 'upto'.
 315  */
 316 template <typename T>
 317 void sanityCheckOrderedSufs(const T& host,
 318                             size_t hlen,
 319                             uint32_t *s,
 320                             size_t slen,
 321                             size_t upto,
 322                             uint32_t lower = 0,
 323                             uint32_t upper = 0xffffffff)
 324 {
 325         assert_lt(s[0], hlen);
 326         upper = min<uint32_t>(upper, slen-1);
 327         for(size_t i = lower; i < upper; i++) {
 328                 // Allow s[i+t] to point off the end of the string; this is
 329                 // convenient for some callers
 330                 if(s[i+1] >= hlen) continue;
 331                 if(upto == 0xffffffff) {
 332                         assert(dollarLt(suffix(host, s[i]), suffix(host, s[i+1])));
 333                 } else {
 334 #ifndef NDEBUG
 335                         if(prefix(suffix(host, s[i]), upto) > prefix(suffix(host, s[i+1]), upto)) {
 336                                 // operator > treats shorter strings as
 337                                 // lexicographically smaller, but we want to opposite
 338                                 assert(isPrefix(suffix(host, s[i+1]), suffix(host, s[i])));
 339                         }
 340 #endif
 341                 }
 342         }
 343 }
 344
 345 /**
 346  * Main multikey quicksort function for suffixes.  Based on Bentley &
 347  * Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for
 348  * Sorting and Searching Strings".  That algorithm has been extended in
 349  * three ways:
 350  *
 351  *  1. Deal with keys of different lengths by checking bounds and
 352  *     considering off-the-end values to be 'hi' (b/c our goal is the
 353  *     BWT transform, we're biased toward considring prefixes as
 354  *     lexicographically *greater* than their extensions).
 355  *  2. The multikey_qsort_suffixes version takes a single host string
 356  *     and a list of suffix offsets as input.  This reduces memory
 357  *     footprint compared to an approach that treats its input
 358  *     generically as a set of strings (not necessarily suffixes), thus
 359  *     requiring that we store at least two integers worth of
 360  *     information for each string.
 361  *  3. Sorting functions take an extra "upto" parameter that upper-
 362  *     bounds the depth to which the function sorts.
 363  *
 364  * TODO: Consult a tie-breaker (like a difference cover sample) if two
 365  * keys share a long prefix.
 366  */
 367 template<typename T>
 368 void mkeyQSortSuf(const T& host,
 369                   size_t hlen,
 370                   uint32_t *s,
 371                   size_t slen,
 372                   int hi,
 373                   size_t begin,
 374                   size_t end,
 375                   size_t depth,
 376                   size_t upto = 0xffffffff)
 377 {
 378         // Helper for making the recursive call; sanity-checks arguments to
 379         // make sure that the problem actually got smaller.
 380         #define MQS_RECURSE_SUF(nbegin, nend, ndepth) { \
 381                 assert(nbegin > begin || nend < end || ndepth > depth); \
 382                 if(ndepth < upto) { /* don't exceed depth of 'upto' */ \
 383                         mkeyQSortSuf(host, hlen, s, slen, hi, nbegin, nend, ndepth, upto); \
 384                 } \
 385         }
 386         assert_leq(begin, slen);
 387         assert_leq(end, slen);
 388         size_t a, b, c, d, /*e,*/ r;
 389         size_t n = end - begin;
 390         if(n <= 1) return;                 // 1-element list already sorted
 391         CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF); // pick pivot, swap it into [begin]
 392         int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value
 393         #ifndef NDEBUG
 394         {
 395                 bool stillInBounds = false;
 396                 for(size_t i = begin; i < end; i++) {
 397                         if(depth < (hlen-s[i])) {
 398                                 stillInBounds = true;
 399                                 break;
 400                         } else { /* already fell off this suffix */ }
 401                 }
 402                 assert(stillInBounds); // >=1 suffix must still be in bounds
 403         }
 404         #endif
 405         a = b = begin;
 406         c = d = end-1;
 407         while(true) {
 408                 // Invariant: everything before a is = pivot, everything
 409                 // between a and b is <
 410                 int bc = 0; // shouldn't have to init but gcc on Mac complains
 411                 while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) {
 412                         if(v == bc) {
 413                                 SWAP(s, a, b); a++;
 414                         }
 415                         b++;
 416                 }
 417                 // Invariant: everything after d is = pivot, everything
 418                 // between c and d is >
 419                 int cc = 0; // shouldn't have to init but gcc on Mac complains
 420                 while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) {
 421                         if(v == cc) {
 422                                 SWAP(s, c, d); d--;
 423                         }
 424                         c--;
 425                 }
 426                 if(b > c) break;
 427                 SWAP(s, b, c);
 428                 b++;
 429                 c--;
 430         }
 431         assert(a > begin || c < end-1);                      // there was at least one =s
 432         assert_lt(d-c, n); // they can't all have been > pivot
 433         assert_lt(b-a, n); // they can't all have been < pivot
 434         assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth));  // check pre-=-swap invariant
 435         r = min(a-begin, b-a); VECSWAP(s, begin, b-r,   r);  // swap left = to center
 436         r = min(d-c, end-d-1); VECSWAP(s, b,     end-r, r);  // swap right = to center
 437         assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant
 438         r = b-a; // r <- # of <'s
 439         if(r > 0) {
 440                 MQS_RECURSE_SUF(begin, begin + r, depth); // recurse on <'s
 441         }
 442         // Do not recurse on ='s if the pivot was the off-the-end value;
 443         // they're already fully sorted
 444         if(v != hi) {
 445                 MQS_RECURSE_SUF(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s
 446         }
 447         r = d-c; // r <- # of >'s excluding those exhausted
 448         if(r > 0 && v < hi-1) {
 449                 MQS_RECURSE_SUF(end-r, end, depth); // recurse on >'s
 450         }
 451 }
 452
 453 /**
 454  * Toplevel function for multikey quicksort over suffixes.
 455  */
 456 template<typename T>
 457 void mkeyQSortSuf(const T& host,
 458                   uint32_t *s,
 459                   size_t slen,
 460                   int hi,
 461                   bool verbose = false,
 462                   bool sanityCheck = false,
 463                   size_t upto = 0xffffffff)
 464 {
 465         size_t hlen = length(host);
 466         assert(!empty(s));
 467         if(sanityCheck) sanityCheckInputSufs(s, slen);
 468         mkeyQSortSuf(host, hlen, s, slen, hi, (size_t)0, slen, (size_t)0, upto);
 469         if(sanityCheck) sanityCheckOrderedSufs(host, hlen, s, slen, upto);
 470 }
 471
 472 /**
 473  * Just like mkeyQSortSuf but all swaps are applied to s2 as well as s.
 474  * This is a helpful variant if, for example, the caller would like to
 475  * see how their input was permuted by the sort routine (in that case,
 476  * the caller would let s2 be an array s2[] where s2 is the same length
 477  * as s and s2[i] = i).
 478  */
 479 template<typename T>
 480 void mkeyQSortSuf2(const T& host,
 481                    size_t hlen,
 482                    uint32_t *s,
 483                    size_t slen,
 484                    uint32_t *s2,
 485                    int hi,
 486                    size_t begin,
 487                    size_t end,
 488                    size_t depth,
 489                    size_t upto = 0xffffffff)
 490 {
 491         // Helper for making the recursive call; sanity-checks arguments to
 492         // make sure that the problem actually got smaller.
 493         #define MQS_RECURSE_SUF_DS(nbegin, nend, ndepth) { \
 494                 assert(nbegin > begin || nend < end || ndepth > depth); \
 495                 if(ndepth < upto) { /* don't exceed depth of 'upto' */ \
 496                         mkeyQSortSuf2(host, hlen, s, slen, s2, hi, nbegin, nend, ndepth, upto); \
 497                 } \
 498         }
 499         assert_leq(begin, slen);
 500         assert_leq(end, slen);
 501         size_t a, b, c, d, /*e,*/ r;
 502         size_t n = end - begin;
 503         if(n <= 1) return;                 // 1-element list already sorted
 504         CHOOSE_AND_SWAP_PIVOT(SWAP2, CHAR_AT_SUF); // pick pivot, swap it into [begin]
 505         int v = CHAR_AT_SUF(begin, depth); // v <- randomly-selected pivot value
 506         #ifndef NDEBUG
 507         {
 508                 bool stillInBounds = false;
 509                 for(size_t i = begin; i < end; i++) {
 510                         if(depth < (hlen-s[i])) {
 511                                 stillInBounds = true;
 512                                 break;
 513                         } else { /* already fell off this suffix */ }
 514                 }
 515                 assert(stillInBounds); // >=1 suffix must still be in bounds
 516         }
 517         #endif
 518         a = b = begin;
 519         c = d = /*e =*/ end-1;
 520         while(true) {
 521                 // Invariant: everything before a is = pivot, everything
 522                 // between a and b is <
 523                 int bc = 0; // shouldn't have to init but gcc on Mac complains
 524                 while(b <= c && v >= (bc = CHAR_AT_SUF(b, depth))) {
 525                         if(v == bc) {
 526                                 SWAP2(s, s2, a, b); a++;
 527                         }
 528                         b++;
 529                 }
 530                 // Invariant: everything after d is = pivot, everything
 531                 // between c and d is >
 532                 int cc = 0; // shouldn't have to init but gcc on Mac complains
 533                 while(b <= c && v <= (cc = CHAR_AT_SUF(c, depth))) {
 534                         if(v == cc) {
 535                                 SWAP2(s, s2, c, d); d--; /*e--;*/
 536                         }
 537                         //else if(c == e && v == hi) e--;
 538                         c--;
 539                 }
 540                 if(b > c) break;
 541                 SWAP2(s, s2, b, c);
 542                 b++;
 543                 c--;
 544         }
 545         assert(a > begin || c < end-1);                      // there was at least one =s
 546         assert_lt(/*e*/d-c, n); // they can't all have been > pivot
 547         assert_lt(b-a, n); // they can't all have been < pivot
 548         assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth));  // check pre-=-swap invariant
 549         r = min(a-begin, b-a); VECSWAP2(s, s2, begin, b-r,   r);  // swap left = to center
 550         r = min(d-c, end-d-1); VECSWAP2(s, s2, b,     end-r, r);  // swap right = to center
 551         assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant
 552         r = b-a; // r <- # of <'s
 553         if(r > 0) {
 554                 MQS_RECURSE_SUF_DS(begin, begin + r, depth); // recurse on <'s
 555         }
 556         // Do not recurse on ='s if the pivot was the off-the-end value;
 557         // they're already fully sorted
 558         if(v != hi) {
 559                 MQS_RECURSE_SUF_DS(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s
 560         }
 561         r = d-c;   // r <- # of >'s excluding those exhausted
 562         if(r > 0 && v < hi-1) {
 563                 MQS_RECURSE_SUF_DS(end-r, end, depth); // recurse on >'s
 564         }
 565 }
 566
 567 /**
 568  * Toplevel function for multikey quicksort over suffixes with double
 569  * swapping.
 570  */
 571 template<typename T>
 572 void mkeyQSortSuf2(const T& host,
 573                    uint32_t *s,
 574                    size_t slen,
 575                    uint32_t *s2,
 576                    int hi,
 577                    bool verbose = false,
 578                    bool sanityCheck = false,
 579                    size_t upto = 0xffffffff)
 580 {
 581         size_t hlen = length(host);
 582         if(sanityCheck) sanityCheckInputSufs(s, slen);
 583         uint32_t *sOrig = NULL;
 584         if(sanityCheck) {
 585                 sOrig = new uint32_t[slen];
 586                 memcpy(sOrig, s, 4 * slen);
 587         }
 588         mkeyQSortSuf2(host, hlen, s, slen, s2, hi, (size_t)0, slen, (size_t)0, upto);
 589         if(sanityCheck) {
 590                 sanityCheckOrderedSufs(host, hlen, s, slen, upto);
 591                 for(size_t i = 0; i < slen; i++) {
 592                         assert_eq(s[i], sOrig[s2[i]]);
 593                 }
 594         }
 595 }
 596
 597 // Ugly but necessary; otherwise the compiler chokes dramatically on
 598 // the DifferenceCoverSample<> template args to the next few functions
 599 template <typename T>
 600 class DifferenceCoverSample;
 601
 602 /**
 603  * Constant time
 604  */
 605 template<typename T1, typename T2> inline
 606 bool sufDcLt(const T1& host,
 607              const T2& s1,
 608              const T2& s2,
 609              const DifferenceCoverSample<T1>& dc,
 610              bool sanityCheck = false)
 611 {
 612         uint32_t diff = dc.tieBreakOff(s1, s2);
 613         assert_lt(diff, dc.v());
 614         assert_lt(diff, length(host)-s1);
 615         assert_lt(diff, length(host)-s2);
 616         if(sanityCheck) {
 617                 for(uint32_t i = 0; i < diff; i++) {
 618                         assert_eq(host[s1+i], host[s2+i]);
 619                 }
 620         }
 621         bool ret = dc.breakTie(s1+diff, s2+diff) < 0;
 622 #ifndef NDEBUG
 623         if(sanityCheck && ret != dollarLt(suffix(host, s1), suffix(host, s2))) {
 624                 assert(false);
 625         }
 626 #endif
 627         return ret;
 628 }
 629
 630 /**
 631  * k log(k)
 632  */
 633 template<typename T> inline
 634 void qsortSufDc(const T& host,
 635                 size_t hlen,
 636                 uint32_t* s,
 637                 size_t slen,
 638                 const DifferenceCoverSample<T>& dc,
 639                 size_t begin,
 640                 size_t end,
 641                 bool sanityCheck = false)
 642 {
 643         assert_leq(end, slen);
 644         assert_lt(begin, slen);
 645         assert_gt(end, begin);
 646         size_t n = end - begin;
 647         if(n <= 1) return;                 // 1-element list already sorted
 648         // Note: rand() didn't really cut it here; it seemed to run out of
 649         // randomness and, after a time, returned the same thing over and
 650         // over again
 651         size_t a = (rand() % n) + begin; // choose pivot between begin and end
 652         assert_lt(a, end);
 653         assert_geq(a, begin);
 654         SWAP(s, end-1, a); // move pivot to end
 655         size_t cur = 0;
 656         for(size_t i = begin; i < end-1; i++) {
 657                 if(sufDcLt(host, s[i], s[end-1], dc, sanityCheck)) {
 658                         if(sanityCheck)
 659                                 assert(dollarLt(suffix(host, s[i]), suffix(host, s[end-1])));
 660                         assert_lt(begin + cur, end-1);
 661                         SWAP(s, i, begin + cur);
 662                         cur++;
 663                 }
 664         }
 665         // Put pivot into place
 666         assert_lt(cur, end-begin);
 667         SWAP(s, end-1, begin+cur);
 668         if(begin+cur > begin) qsortSufDc(host, hlen, s, slen, dc, begin, begin+cur);
 669         if(end > begin+cur+1) qsortSufDc(host, hlen, s, slen, dc, begin+cur+1, end);
 670 }
 671
 672 /**
 673  * Toplevel function for multikey quicksort over suffixes.
 674  */
 675 template<typename T1, typename T2>
 676 void mkeyQSortSufDcU8(const T1& seqanHost,
 677                       const T2& host,
 678                       size_t hlen,
 679                       uint32_t* s,
 680                       size_t slen,
 681                       const DifferenceCoverSample<T1>& dc,
 682                       int hi,
 683                       bool verbose = false,
 684                       bool sanityCheck = false)
 685 {
 686         if(sanityCheck) sanityCheckInputSufs(s, slen);
 687         mkeyQSortSufDcU8(seqanHost, host, hlen, s, slen, dc, hi, 0, slen, 0, sanityCheck);
 688         if(sanityCheck) sanityCheckOrderedSufs(seqanHost, hlen, s, slen, 0xffffffff);
 689 }
 690
 691 /**
 692  * Return a boolean indicating whether s1 < s2 using the difference
 693  * cover to break the tie.
 694  */
 695 template<typename T1, typename T2> inline
 696 bool sufDcLtU8(const T1& seqanHost,
 697                const T2& host,
 698                size_t hlen,
 699                uint32_t s1,
 700                uint32_t s2,
 701                const DifferenceCoverSample<T1>& dc,
 702                bool sanityCheck = false)
 703 {
 704         hlen += 0;
 705         uint32_t diff = dc.tieBreakOff(s1, s2);
 706         assert_lt(diff, dc.v());
 707         assert_lt(diff, hlen-s1);
 708         assert_lt(diff, hlen-s2);
 709         if(sanityCheck) {
 710                 for(uint32_t i = 0; i < diff; i++) {
 711                         assert_eq(host[s1+i], host[s2+i]);
 712                 }
 713         }
 714         bool ret = dc.breakTie(s1+diff, s2+diff) < 0;
 715         // Sanity-check return value using dollarLt
 716 #ifndef NDEBUG
 717         if(sanityCheck &&
 718            ret != dollarLt(suffix(seqanHost, s1), suffix(seqanHost, s2)))
 719         {
 720                 assert(false);
 721         }
 722 #endif
 723         return ret;
 724 }
 725
 726 /**
 727  * k log(k)
 728  */
 729 template<typename T1, typename T2> inline
 730 void qsortSufDcU8(const T1& seqanHost,
 731                   const T2& host,
 732                   size_t hlen,
 733                   uint32_t* s,
 734                   size_t slen,
 735                   const DifferenceCoverSample<T1>& dc,
 736                   size_t begin,
 737                   size_t end,
 738                   bool sanityCheck = false)
 739 {
 740         assert_leq(end, slen);
 741         assert_lt(begin, slen);
 742         assert_gt(end, begin);
 743         size_t n = end - begin;
 744         if(n <= 1) return;                 // 1-element list already sorted
 745         // Note: rand() didn't really cut it here; it seemed to run out of
 746         // randomness and, after a time, returned the same thing over and
 747         // over again
 748         size_t a = (rand() % n) + begin; // choose pivot between begin and end
 749         assert_lt(a, end);
 750         assert_geq(a, begin);
 751         SWAP(s, end-1, a); // move pivot to end
 752         size_t cur = 0;
 753         for(size_t i = begin; i < end-1; i++) {
 754                 if(sufDcLtU8(seqanHost, host, hlen, s[i], s[end-1], dc, sanityCheck)) {
 755 #ifndef NDEBUG
 756                         if(sanityCheck)
 757                                 assert(dollarLt(suffix(seqanHost, s[i]), suffix(seqanHost, s[end-1])));
 758                         assert_lt(begin + cur, end-1);
 759 #endif
 760                         SWAP(s, i, begin + cur);
 761                         cur++;
 762                 }
 763         }
 764         // Put pivot into place
 765         assert_lt(cur, end-begin);
 766         SWAP(s, end-1, begin+cur);
 767         if(begin+cur > begin) qsortSufDcU8(seqanHost, host, hlen, s, slen, dc, begin, begin+cur);
 768         if(end > begin+cur+1) qsortSufDcU8(seqanHost, host, hlen, s, slen, dc, begin+cur+1, end);
 769 }
 770
 771 #define BUCKET_SORT_CUTOFF (4 * 1024 * 1024)
 772 #define SELECTION_SORT_CUTOFF 6
 773
 774 // 5 64-element buckets for bucket-sorting A, C, G, T, $
 775 static uint32_t bkts[4][4 * 1024 * 1024];
 776
 777 /**
 778  * Straightforwardly obtain a uint8_t-ized version of t[off].  This
 779  * works fine as long as TStr is not packed.
 780  */
 781 template<typename TStr>
 782 inline uint8_t get_uint8(const TStr& t, uint32_t off) {
 783         return t[off];
 784 }
 785
 786 /**
 787  * For incomprehensible generic-programming reasons, getting a uint8_t
 788  * version of a character in a packed String<> requires casting first
 789  * to Dna then to uint8_t.
 790  */
 791 template<>
 792 inline uint8_t get_uint8(const String<Dna, Packed<> >& t, uint32_t off) {
 793         return (uint8_t)(Dna)t[off];
 794 }
 795
 796 /**
 797  * Return character at offset 'off' from the 'si'th suffix in the array
 798  * 's' of suffixes.  If the character is out-of-bounds, return hi.
 799  */
 800 template<typename TStr>
 801 static inline int char_at_suf_u8(const TStr& host,
 802                                  uint32_t hlen,
 803                                  uint32_t* s,
 804                                  uint32_t si,
 805                                  uint32_t off,
 806                                  uint8_t hi)
 807 {
 808         return ((off+s[si]) < hlen) ? get_uint8(host, off+s[si]) : (hi);
 809 }
 810
 811 template<typename T1, typename T2>
 812 static void selectionSortSufDcU8(
 813                 const T1& seqanHost,
 814                 const T2& host,
 815         size_t hlen,
 816         uint32_t* s,
 817         size_t slen,
 818         const DifferenceCoverSample<T1>& dc,
 819         uint8_t hi,
 820         size_t begin,
 821         size_t end,
 822         size_t depth,
 823         bool sanityCheck = false)
 824 {
 825 #define ASSERT_SUF_LT(l, r) \
 826         if(sanityCheck && \
 827            !dollarLt(suffix(seqanHost, s[l]), \
 828                      suffix(seqanHost, s[r]))) { \
 829                 cout << "l: " << suffixStr(seqanHost, s[l]) << endl; \
 830                 cout << "r: " << suffixStr(seqanHost, s[r]) << endl; \
 831                 assert(false); \
 832         }
 833
 834         assert_gt(end, begin+1);
 835         assert_leq(end-begin, SELECTION_SORT_CUTOFF);
 836         assert_eq(hi, 4);
 837         uint32_t v = dc.v();
 838         if(end == begin+2) {
 839                 uint32_t off = dc.tieBreakOff(s[begin], s[begin+1]);
 840                 if(off + s[begin] >= hlen ||
 841                    off + s[begin+1] >= hlen)
 842                 {
 843                         off = 0xffffffff;
 844                 }
 845                 if(off != 0xffffffff) {
 846                         if(off < depth) {
 847                                 qsortSufDcU8<T1,T2>(seqanHost, host, hlen, s, slen, dc,
 848                                                     begin, end, sanityCheck);
 849                                 // It's helpful for debugging if we call this here
 850                                 if(sanityCheck) {
 851                                         sanityCheckOrderedSufs(seqanHost, hlen, s, slen,
 852                                                                0xffffffff, begin, end);
 853                                 }
 854                                 return;
 855                         }
 856                         v = off - depth + 1;
 857                 }
 858         }
 859         assert_leq(v, dc.v());
 860         uint32_t lim = v;
 861         assert_geq(lim, 0);
 862         for(size_t i = begin; i < end-1; i++) {
 863                 uint32_t targ = i;
 864                 uint32_t targoff = depth + s[i];
 865                 for(size_t j = i+1; j < end; j++) {
 866                         assert_neq(j, targ);
 867                         uint32_t joff = depth + s[j];
 868                         uint32_t k;
 869                         for(k = 0; k <= lim; k++) {
 870                                 assert_neq(j, targ);
 871                                 uint8_t jc = (k + joff < hlen)    ? get_uint8(host, k + joff)    : hi;
 872                                 uint8_t tc = (k + targoff < hlen) ? get_uint8(host, k + targoff) : hi;
 873                                 assert(jc != hi || tc != hi);
 874                                 if(jc > tc) {
 875                                         // the jth suffix is greater than the current
 876                                         // smallest suffix
 877                                         ASSERT_SUF_LT(targ, j);
 878                                         break;
 879                                 } else if(jc < tc) {
 880                                         // the jth suffix is less than the current smallest
 881                                         // suffix, so update smallest to be j
 882                                         ASSERT_SUF_LT(j, targ);
 883                                         targ = j;
 884                                         targoff = joff;
 885                                         break;
 886                                 } else if(k == lim) {
 887                                         // Check whether either string ends immediately
 888                                         // after this character
 889                                         assert_leq(k + joff + 1, hlen);
 890                                         assert_leq(k + targoff + 1, hlen);
 891                                         if(k + joff + 1 == hlen) {
 892                                                 // targ < j
 893                                                 assert_neq(k + targoff + 1, hlen);
 894                                                 ASSERT_SUF_LT(targ, j);
 895                                                 break;
 896                                         } else if(k + targoff + 1 == hlen) {
 897                                                 // j < targ
 898                                                 ASSERT_SUF_LT(j, targ);
 899                                                 targ = j;
 900                                                 targoff = joff;
 901                                                 break;
 902                                         }
 903                                 } else {
 904                                         // They're equal so far, keep going
 905                                 }
 906                         }
 907                         // The jth suffix was equal to the current smallest suffix
 908                         // up to the difference-cover period, so disambiguate with
 909                         // difference cover
 910                         if(k == lim+1) {
 911                                 assert_neq(j, targ);
 912                                 if(sufDcLtU8(seqanHost, host, hlen, s[j], s[targ], dc, sanityCheck)) {
 913                                         // j < targ
 914                                         assert(!sufDcLtU8(seqanHost, host, hlen, s[targ], s[j], dc, sanityCheck));
 915                                         ASSERT_SUF_LT(j, targ);
 916                                         targ = j;
 917                                         targoff = joff;
 918                                 } else {
 919                                         assert(sufDcLtU8(seqanHost, host, hlen, s[targ], s[j], dc, sanityCheck));
 920                                         ASSERT_SUF_LT(targ, j); // !
 921                                 }
 922                         }
 923                 }
 924                 if(i != targ) {
 925                         ASSERT_SUF_LT(targ, i);
 926                         // swap i and targ
 927                         uint32_t tmp = s[i];
 928                         s[i] = s[targ];
 929                         s[targ] = tmp;
 930                 }
 931                 for(size_t j = i+1; j < end; j++) {
 932                         ASSERT_SUF_LT(i, j);
 933                 }
 934         }
 935         if(sanityCheck) {
 936                 sanityCheckOrderedSufs(seqanHost, hlen, s, slen,
 937                                        0xffffffff, begin, end);
 938         }
 939 }
 940
 941 template<typename T1, typename T2>
 942 static void bucketSortSufDcU8(
 943                 const T1& seqanHost,
 944                 const T2& host,
 945         size_t hlen,
 946         uint32_t* s,
 947         size_t slen,
 948         const DifferenceCoverSample<T1>& dc,
 949         uint8_t hi,
 950         size_t begin,
 951         size_t end,
 952         size_t depth,
 953         bool sanityCheck = false)
 954 {
 955         uint32_t cnts[] = { 0, 0, 0, 0, 0 };
 956         #define BKT_RECURSE_SUF_DC_U8(nbegin, nend) { \
 957                 bucketSortSufDcU8<T1,T2>(seqanHost, host, hlen, s, slen, dc, hi, \
 958                                          (nbegin), (nend), depth+1, sanityCheck); \
 959         }
 960         assert_gt(end, begin);
 961         assert_leq(end-begin, BUCKET_SORT_CUTOFF);
 962         assert_eq(hi, 4);
 963         if(end == begin+1) return; // 1-element list already sorted
 964         if(depth > dc.v()) {
 965                 // Quicksort the remaining suffixes using difference cover
 966                 // for constant-time comparisons; this is O(k*log(k)) where
 967                 // k=(end-begin)
 968                 qsortSufDcU8<T1,T2>(seqanHost, host, hlen, s, slen, dc, begin, end, sanityCheck);
 969                 return;
 970         }
 971         if(end-begin <= SELECTION_SORT_CUTOFF) {
 972                 // Bucket sort remaining items
 973                 selectionSortSufDcU8(seqanHost, host, hlen, s, slen, dc, hi,
 974                                      begin, end, depth, sanityCheck);
 975                 if(sanityCheck) {
 976                         sanityCheckOrderedSufs(seqanHost, hlen, s, slen,
 977                                                0xffffffff, begin, end);
 978                 }
 979                 return;
 980         }
 981         for(size_t i = begin; i < end; i++) {
 982                 uint32_t off = depth+s[i];
 983                 uint8_t c = (off < hlen) ? get_uint8(host, off) : hi;
 984                 assert_leq(c, 4);
 985                 if(c == 0) {
 986                         s[begin + cnts[0]++] = s[i];
 987                 } else {
 988                         bkts[c-1][cnts[c]++] = s[i];
 989                 }
 990         }
 991         assert_eq(cnts[0] + cnts[1] + cnts[2] + cnts[3] + cnts[4], end - begin);
 992         uint32_t cur = begin + cnts[0];
 993         if(cnts[1] > 0) { memcpy(&s[cur], bkts[0], cnts[1] << 2); cur += cnts[1]; }
 994         if(cnts[2] > 0) { memcpy(&s[cur], bkts[1], cnts[2] << 2); cur += cnts[2]; }
 995         if(cnts[3] > 0) { memcpy(&s[cur], bkts[2], cnts[3] << 2); cur += cnts[3]; }
 996         if(cnts[4] > 0) { memcpy(&s[cur], bkts[3], cnts[4] << 2); }
 997         // This frame is now totally finished with bkts[][], so recursive
 998         // callees can safely clobber it; we're not done with cnts[], but
 999         // that's local to the stack frame.
1000         cur = begin;
1001         if(cnts[0] > 0) {
1002                 BKT_RECURSE_SUF_DC_U8(cur, cur + cnts[0]); cur += cnts[0];
1003         }
1004         if(cnts[1] > 0) {
1005                 BKT_RECURSE_SUF_DC_U8(cur, cur + cnts[1]); cur += cnts[1];
1006         }
1007         if(cnts[2] > 0) {
1008                 BKT_RECURSE_SUF_DC_U8(cur, cur + cnts[2]); cur += cnts[2];
1009         }
1010         if(cnts[3] > 0) {
1011                 BKT_RECURSE_SUF_DC_U8(cur, cur + cnts[3]);
1012         }
1013         // Done
1014 }
1015
1016 /**
1017  * Main multikey quicksort function for suffixes.  Based on Bentley &
1018  * Sedgewick's algorithm on p.5 of their paper "Fast Algorithms for
1019  * Sorting and Searching Strings".  That algorithm has been extended in
1020  * three ways:
1021  *
1022  *  1. Deal with keys of different lengths by checking bounds and
1023  *     considering off-the-end values to be 'hi' (b/c our goal is the
1024  *     BWT transform, we're biased toward considring prefixes as
1025  *     lexicographically *greater* than their extensions).
1026  *  2. The multikey_qsort_suffixes version takes a single host string
1027  *     and a list of suffix offsets as input.  This reduces memory
1028  *     footprint compared to an approach that treats its input
1029  *     generically as a set of strings (not necessarily suffixes), thus
1030  *     requiring that we store at least two integers worth of
1031  *     information for each string.
1032  *  3. Sorting functions take an extra "upto" parameter that upper-
1033  *     bounds the depth to which the function sorts.
1034  */
1035 template<typename T1, typename T2>
1036 void mkeyQSortSufDcU8(const T1& seqanHost,
1037                       const T2& host,
1038                       size_t hlen,
1039                       uint32_t* s,
1040                       size_t slen,
1041                       const DifferenceCoverSample<T1>& dc,
1042                       int hi,
1043                       size_t begin,
1044                       size_t end,
1045                       size_t depth,
1046                       bool sanityCheck = false)
1047 {
1048         // Helper for making the recursive call; sanity-checks arguments to
1049         // make sure that the problem actually got smaller.
1050         #define MQS_RECURSE_SUF_DC_U8(nbegin, nend, ndepth) { \
1051                 assert(nbegin > begin || nend < end || ndepth > depth); \
1052                 mkeyQSortSufDcU8(seqanHost, host, hlen, s, slen, dc, hi, nbegin, nend, ndepth, sanityCheck); \
1053         }
1054         assert_leq(begin, slen);
1055         assert_leq(end, slen);
1056         size_t n = end - begin;
1057         if(n <= 1) return; // 1-element list already sorted
1058         if(depth > dc.v()) {
1059                 // Quicksort the remaining suffixes using difference cover
1060                 // for constant-time comparisons; this is O(k*log(k)) where
1061                 // k=(end-begin)
1062                 qsortSufDcU8<T1,T2>(seqanHost, host, hlen, s, slen, dc, begin, end, sanityCheck);
1063                 if(sanityCheck) {
1064                         sanityCheckOrderedSufs(seqanHost, hlen, s, slen, 0xffffffff, begin, end);
1065                 }
1066                 return;
1067         }
1068         if(n <= BUCKET_SORT_CUTOFF) {
1069                 // Bucket sort remaining items
1070                 bucketSortSufDcU8(seqanHost, host, hlen, s, slen, dc,
1071                                   (uint8_t)hi, begin, end, depth, sanityCheck);
1072                 if(sanityCheck) {
1073                         sanityCheckOrderedSufs(seqanHost, hlen, s, slen, 0xffffffff, begin, end);
1074                 }
1075                 return;
1076         }
1077         size_t a, b, c, d, r;
1078         CHOOSE_AND_SWAP_PIVOT(SWAP1, CHAR_AT_SUF_U8); // choose pivot, swap to begin
1079         int v = CHAR_AT_SUF_U8(begin, depth); // v <- pivot value
1080         #ifndef NDEBUG
1081         {
1082                 bool stillInBounds = false;
1083                 for(size_t i = begin; i < end; i++) {
1084                         if(depth < (hlen-s[i])) {
1085                                 stillInBounds = true;
1086                                 break;
1087                         } else { /* already fell off this suffix */ }
1088                 }
1089                 assert(stillInBounds); // >=1 suffix must still be in bounds
1090         }
1091         #endif
1092         a = b = begin;
1093         c = d = end-1;
1094         while(true) {
1095                 // Invariant: everything before a is = pivot, everything
1096                 // between a and b is <
1097                 int bc = 0; // shouldn't have to init but gcc on Mac complains
1098                 while(b <= c && v >= (bc = CHAR_AT_SUF_U8(b, depth))) {
1099                         if(v == bc) {
1100                                 SWAP(s, a, b); a++;
1101                         }
1102                         b++;
1103                 }
1104                 // Invariant: everything after d is = pivot, everything
1105                 // between c and d is >
1106                 int cc = 0; // shouldn't have to init but gcc on Mac complains
1107                 bool hiLatch = true;
1108                 while(b <= c && v <= (cc = CHAR_AT_SUF_U8(c, depth))) {
1109                         if(v == cc) {
1110                                 SWAP(s, c, d); d--;
1111                         }
1112                         else if(hiLatch && cc == hi) {
1113
1114                         }
1115                         c--;
1116                 }
1117                 if(b > c) break;
1118                 SWAP(s, b, c);
1119                 b++;
1120                 c--;
1121         }
1122         assert(a > begin || c < end-1);                      // there was at least one =s
1123         assert_lt(d-c, n); // they can't all have been > pivot
1124         assert_lt(b-a, n); // they can't all have been < pivot
1125         //assert(assertPartitionedSuf(host, s, slen, hi, v, begin, end, depth));  // check pre-=-swap invariant
1126         r = min(a-begin, b-a); VECSWAP(s, begin, b-r,   r);  // swap left = to center
1127         r = min(d-c, end-d-1); VECSWAP(s, b,     end-r, r);  // swap right = to center
1128         //assert(assertPartitionedSuf2(host, s, slen, hi, v, begin, end, depth)); // check post-=-swap invariant
1129         r = b-a; // r <- # of <'s
1130         if(r > 0) {
1131                 MQS_RECURSE_SUF_DC_U8(begin, begin + r, depth); // recurse on <'s
1132         }
1133         // Do not recurse on ='s if the pivot was the off-the-end value;
1134         // they're already fully sorted
1135         if(v != hi) {
1136                 MQS_RECURSE_SUF_DC_U8(begin + r, begin + r + (a-begin) + (end-d-1), depth+1); // recurse on ='s
1137         }
1138         r = d-c; // r <- # of >'s excluding those exhausted
1139         if(r > 0 && v < hi-1) {
1140                 MQS_RECURSE_SUF_DC_U8(end-r, end, depth); // recurse on >'s
1141         }
1142 }
1143
1144
1145 #endif /*MULTIKEY_QSORT_H_*/