range_source.h

   1 /*
   2  * range_source.h
   3  */
   4
   5 #ifndef RANGE_SOURCE_H_
   6 #define RANGE_SOURCE_H_
   7
   8 #include <stdint.h>
   9 #include <vector>
  10 #include "seqan/sequence.h"
  11 #include "ebwt.h"
  12 #include "range.h"
  13 #include "pool.h"
  14 #include "edit.h"
  15
  16 enum AdvanceUntil {
  17         ADV_FOUND_RANGE = 1,
  18         ADV_COST_CHANGES,
  19         ADV_STEP
  20 };
  21
  22 /**
  23  * List of Edits that automatically expands as edits are added.
  24  */
  25 struct EditList {
  26
  27         EditList() : sz_(0), moreEdits_(NULL), yetMoreEdits_(NULL) { }
  28
  29         /**
  30          * Add an edit to the edit list.
  31          */
  32         bool add(const Edit& e, AllocOnlyPool<Edit>& pool, size_t qlen) {
  33                 assert_lt(sz_, qlen + 10);
  34                 if(sz_ < numEdits) {
  35                         assert(moreEdits_ == NULL);
  36                         assert(yetMoreEdits_ == NULL);
  37                         edits_[sz_++] = e;
  38                 } else if(sz_ == numEdits) {
  39                         assert(moreEdits_ == NULL);
  40                         assert(yetMoreEdits_ == NULL);
  41                         moreEdits_ = pool.alloc(numMoreEdits);
  42                         if(moreEdits_ == NULL) {
  43                                 return false;
  44                         }
  45                         assert(moreEdits_ != NULL);
  46                         moreEdits_[0] = e;
  47                         sz_++;
  48                 } else if(sz_ < (numEdits + numMoreEdits)) {
  49                         assert(moreEdits_ != NULL);
  50                         assert(yetMoreEdits_ == NULL);
  51                         moreEdits_[sz_ - numEdits] = e;
  52                         sz_++;
  53                 } else if(sz_ == (numEdits + numMoreEdits)) {
  54                         assert(moreEdits_ != NULL);
  55                         assert(yetMoreEdits_ == NULL);
  56                         yetMoreEdits_ = pool.alloc(qlen + 10 - numMoreEdits - numEdits);
  57                         if(yetMoreEdits_ == NULL) {
  58                                 return false;
  59                         }
  60                         assert(yetMoreEdits_ != NULL);
  61                         yetMoreEdits_[0] = e;
  62                         sz_++;
  63                 } else {
  64                         assert(moreEdits_ != NULL);
  65                         assert(yetMoreEdits_ != NULL);
  66                         yetMoreEdits_[sz_ - numEdits - numMoreEdits] = e;
  67                         sz_++;
  68                 }
  69                 return true;
  70         }
  71
  72         /**
  73          * Return a const reference to the ith Edit in the list.
  74          */
  75         const Edit& get(size_t i) const {
  76                 assert_lt(i, sz_);
  77                 if(i < numEdits) {
  78                         return edits_[i];
  79                 } else if(i < (numEdits + numMoreEdits)) {
  80                         assert(moreEdits_ != NULL);
  81                         return moreEdits_[i-numEdits];
  82                 } else {
  83                         assert(moreEdits_ != NULL);
  84                         assert(yetMoreEdits_ != NULL);
  85                         return yetMoreEdits_[i-numEdits-numMoreEdits];
  86                 }
  87         }
  88
  89         /**
  90          * Get most recently added Edit.
  91          */
  92         const Edit& top() const {
  93                 assert_gt(size(), 0);
  94                 return get(size()-1);
  95         }
  96
  97         /**
  98          * Return true iff no Edits have been added.
  99          */
 100         bool empty() const { return size() == 0; }
 101
 102         /**
 103          * Set a particular element of the EditList.
 104          */
 105         void set(size_t i, const Edit& e) {
 106                 assert_lt(i, sz_);
 107                 if(i < numEdits) {
 108                         edits_[i] = e;
 109                 } else if(i < (numEdits + numMoreEdits)) {
 110                         assert(moreEdits_ != NULL);
 111                         moreEdits_[i-numEdits] = e;
 112                 } else {
 113                         assert(moreEdits_ != NULL);
 114                         assert(yetMoreEdits_ != NULL);
 115                         yetMoreEdits_[i-numEdits-numMoreEdits] = e;
 116                 }
 117         }
 118
 119         /**
 120          * Remove all Edits from the list.
 121          */
 122         void clear() {
 123                 sz_ = 0;
 124                 moreEdits_ = NULL;
 125                 yetMoreEdits_ = NULL;
 126         }
 127
 128         /**
 129          * Return number of Edits in the List.
 130          */
 131         size_t size() const { return sz_; }
 132
 133         /**
 134          * Free all the heap-allocated edit lists
 135          */
 136         void free(AllocOnlyPool<Edit>& epool, size_t qlen) {
 137                 if(yetMoreEdits_ != NULL)
 138                         epool.free(yetMoreEdits_, qlen + 10 - numMoreEdits - numEdits);
 139                 if(moreEdits_ != NULL)
 140                         epool.free(moreEdits_, numMoreEdits);
 141         }
 142
 143         const static size_t numEdits = 6; // part of object allocation
 144         const static size_t numMoreEdits = 16; // first extra allocation
 145         size_t sz_;          // number of Edits stored in the EditList
 146         Edit edits_[numEdits]; // first 4 edits; typically, no more are needed
 147         Edit *moreEdits_;    // if used, size is dictated by numMoreEdits
 148         Edit *yetMoreEdits_; // if used, size is dictated by length of read
 149 };
 150
 151 /**
 152  * Holds per-position information about what outgoing paths have been
 153  * eliminated and what the quality value(s) is (are) at the position.
 154  */
 155 union ElimsAndQual {
 156
 157         /**
 158          * Assuming qual A/C/G/T are already set, set quallo and quallo2
 159          * to the additional cost incurred by the least and second-least
 160          * costly paths.
 161          */
 162         void updateLo() {
 163                 flags.quallo = 127;
 164                 flags.quallo2 = 127;
 165                 if(!flags.mmA) {
 166                         // A mismatch to an A in the genome has not been ruled out
 167                         if(flags.qualA < flags.quallo) {
 168                                 //flags.quallo2 = flags.quallo;
 169                                 flags.quallo = flags.qualA;
 170                         }
 171                         //else if(flags.qualA == flags.quallo) {
 172                         //      flags.quallo2 = flags.quallo;
 173                         //} else if(flags.qualA < flags.quallo2) {
 174                         //      flags.quallo2 = flags.qualA;
 175                         //}
 176                 }
 177                 if(!flags.mmC) {
 178                         // A mismatch to a C in the genome has not been ruled out
 179                         if(flags.qualC < flags.quallo) {
 180                                 flags.quallo2 = flags.quallo;
 181                                 flags.quallo = flags.qualC;
 182                         } else if(flags.qualC == flags.quallo) {
 183                                 flags.quallo2 = flags.quallo;
 184                         } else if(flags.qualC < flags.quallo2) {
 185                                 flags.quallo2 = flags.qualC;
 186                         }
 187                 }
 188                 if(!flags.mmG) {
 189                         // A mismatch to a G in the genome has not been ruled out
 190                         if(flags.qualG < flags.quallo) {
 191                                 flags.quallo2 = flags.quallo;
 192                                 flags.quallo = flags.qualG;
 193                         } else if(flags.qualG == flags.quallo) {
 194                                 flags.quallo2 = flags.quallo;
 195                         } else if(flags.qualG < flags.quallo2) {
 196                                 flags.quallo2 = flags.qualG;
 197                         }
 198                 }
 199                 if(!flags.mmT) {
 200                         // A mismatch to a T in the genome has not been ruled out
 201                         if(flags.qualT < flags.quallo) {
 202                                 flags.quallo2 = flags.quallo;
 203                                 flags.quallo = flags.qualT;
 204                         } else if(flags.qualT == flags.quallo) {
 205                                 flags.quallo2 = flags.quallo;
 206                         } else if(flags.qualT < flags.quallo2) {
 207                                 flags.quallo2 = flags.qualT;
 208                         }
 209                 }
 210                 assert(repOk());
 211         }
 212
 213         /**
 214          * Set all 13 elimination bits of the flags field to 1, indicating
 215          * that all outgoing paths are eliminated.
 216          */
 217         inline void eliminate() {
 218                 join.elims = ((1 << 13) - 1);
 219         }
 220
 221         /**
 222          * Internal consistency check.  Basically just checks that lo and
 223          * lo2 are set correctly.
 224          */
 225         bool repOk() const {
 226                 uint8_t lo = 127;
 227                 uint8_t lo2 = 127;
 228                 assert_lt(flags.qualA, 127);
 229                 assert_lt(flags.qualC, 127);
 230                 assert_lt(flags.qualG, 127);
 231                 assert_lt(flags.qualT, 127);
 232                 if(!flags.mmA) {
 233                         if(flags.qualA < lo) {
 234                                 lo = flags.qualA;
 235                         }
 236                         //else if(flags.qualA == lo || flags.qualA < lo2) {
 237                         //      lo2 = flags.qualA;
 238                         //}
 239                 }
 240                 if(!flags.mmC) {
 241                         if(flags.qualC < lo) {
 242                                 lo2 = lo;
 243                                 lo = flags.qualC;
 244                         } else if(flags.qualC == lo || flags.qualC < lo2) {
 245                                 lo2 = flags.qualC;
 246                         }
 247                 }
 248                 if(!flags.mmG) {
 249                         if(flags.qualG < lo) {
 250                                 lo2 = lo;
 251                                 lo = flags.qualG;
 252                         } else if(flags.qualG == lo || flags.qualG < lo2) {
 253                                 lo2 = flags.qualG;
 254                         }
 255                 }
 256                 if(!flags.mmT) {
 257                         if(flags.qualT < lo) {
 258                                 lo2 = lo;
 259                                 lo = flags.qualT;
 260                         } else if(flags.qualT == lo || flags.qualT < lo2) {
 261                                 lo2 = flags.qualT;
 262                         }
 263                 }
 264                 assert_eq((int)lo, (int)flags.quallo);
 265                 assert_eq((int)lo2, (int)flags.quallo2);
 266                 return true;
 267         }
 268
 269         struct {
 270                 uint64_t mmA   : 1; // A in ref aligns to non-A char in read
 271                 uint64_t mmC   : 1; // C in ref aligns to non-C char in read
 272                 uint64_t mmG   : 1; // G in ref aligns to non-G char in read
 273                 uint64_t mmT   : 1; // T in ref aligns to non-T char in read
 274                 uint64_t snpA  : 1; // Same as mmA, but we think it's a SNP and not a miscall
 275                 uint64_t snpC  : 1; // Same as mmC, but we think it's a SNP and not a miscall
 276                 uint64_t snpG  : 1; // Same as mmG, but we think it's a SNP and not a miscall
 277                 uint64_t snpT  : 1; // Same as mmT, but we think it's a SNP and not a miscall
 278                 uint64_t insA  : 1; // A insertion in reference w/r/t read
 279                 uint64_t insC  : 1; // C insertion in reference w/r/t read
 280                 uint64_t insG  : 1; // G insertion in reference w/r/t read
 281                 uint64_t insT  : 1; // T insertion in reference w/r/t read
 282                 uint64_t del   : 1; // deletion of read character
 283                 uint64_t qualA : 7; // quality penalty for picking A at this position
 284                 uint64_t qualC : 7; // quality penalty for picking C at this position
 285                 uint64_t qualG : 7; // quality penalty for picking G at this position
 286                 uint64_t qualT : 7; // quality penalty for picking T at this position
 287                 uint64_t quallo : 7; // lowest quality penalty at this position
 288                 uint64_t quallo2 : 7; // 2nd-lowest quality penalty at this position
 289                 uint64_t reserved : 9;
 290         } flags;
 291         struct {
 292                 uint64_t elims : 13; // all of the edit-elim flags bundled together
 293                 uint64_t quals : 42; // quality of positions
 294                 uint64_t reserved : 9;
 295         } join;
 296         struct {
 297                 uint64_t mmElims  : 4; // substitution flags bundled together
 298                 uint64_t snpElims : 4; // substitution flags bundled together
 299                 uint64_t insElims : 4; // inserts-in-reference flags bundled together
 300                 uint64_t delElims : 1; // deletion of read character
 301                 uint64_t quals    : 42; // quality of positions
 302                 uint64_t reserved : 9;
 303         } join2;
 304 };
 305
 306 /**
 307  * All per-position state, including the ranges calculated for each
 308  * character, the quality value at the position, and a set of flags
 309  * recording whether we've tried each way of proceeding from this
 310  * position.
 311  */
 312 struct RangeState {
 313
 314         /**
 315          * Using randomness when picking from among multiple choices, pick
 316          * an edit to make.  TODO: Only knows how to pick mismatches for
 317          * now.
 318          */
 319         Edit pickEdit(int pos, RandomSource& rand, bool fuzzy,
 320                       uint32_t& top, uint32_t& bot, bool indels,
 321                       bool& last)
 322         {
 323                 bool color = false;
 324                 Edit ret;
 325                 ret.type = EDIT_TYPE_MM;
 326                 ret.pos = pos;
 327                 ret.chr = 0;
 328                 ret.qchr = 0;
 329                 ret.reserved = 0;
 330                 assert(!eliminated_);
 331                 assert(!fuzzy || eq.repOk());
 332                 assert(!eq.flags.mmA || !eq.flags.mmC || !eq.flags.mmG || !eq.flags.mmT);
 333                 int num = !eq.flags.mmA + !eq.flags.mmC + !eq.flags.mmG + !eq.flags.mmT;
 334                 assert_leq(num, 4);
 335                 assert_gt(num, 0);
 336                 uint8_t lo2 = eq.flags.quallo2;
 337                 if(num == 2) eq.flags.quallo2 = 127;
 338                 // Only need to pick randomly if there's a quality tie
 339                 if(num > 1 && (!fuzzy || eq.flags.quallo == lo2)) {
 340                         last = false; // not the last at this pos
 341                         // Sum up range sizes and do a random weighted pick
 342                         uint32_t tot = 0;
 343                         bool candA = !eq.flags.mmA; bool candC = !eq.flags.mmC;
 344                         bool candG = !eq.flags.mmG; bool candT = !eq.flags.mmT;
 345                         bool candInsA = false, candInsC = false;
 346                         bool candInsG = false, candInsT = false;
 347                         bool candDel = false;
 348                         if(indels) {
 349                                 // Insertions and deletions can only be candidates
 350                                 // if the user asked for indels
 351                                 candInsA = !eq.flags.insA;
 352                                 candInsC = !eq.flags.insC;
 353                                 candInsG = !eq.flags.insG;
 354                                 candInsT = !eq.flags.insT;
 355                                 candDel = !eq.flags.del;
 356                         }
 357                         if(fuzzy) {
 358                                 // To be a candidate in fuzzy mode, you have to both
 359                                 // (a) not have been eliminated, and (b) be tied for
 360                                 // lowest penalty.
 361                                 candA = (candA && eq.flags.qualA == eq.flags.quallo);
 362                                 candC = (candC && eq.flags.qualC == eq.flags.quallo);
 363                                 candG = (candG && eq.flags.qualG == eq.flags.quallo);
 364                                 candT = (candT && eq.flags.qualT == eq.flags.quallo);
 365                         }
 366                         ASSERT_ONLY(int origNum = num);
 367                         if(candA) {
 368                                 assert_gt(bots[0], tops[0]);
 369                                 tot += (bots[0] - tops[0]);
 370                                 assert_gt(num, 0);
 371                                 ASSERT_ONLY(num--);
 372                         }
 373                         if(candC) {
 374                                 assert_gt(bots[1], tops[1]);
 375                                 tot += (bots[1] - tops[1]);
 376                                 assert_gt(num, 0);
 377                                 ASSERT_ONLY(num--);
 378                         }
 379                         if(candG) {
 380                                 assert_gt(bots[2], tops[2]);
 381                                 tot += (bots[2] - tops[2]);
 382                                 assert_gt(num, 0);
 383                                 ASSERT_ONLY(num--);
 384                         }
 385                         if(candT) {
 386                                 assert_gt(bots[3], tops[3]);
 387                                 tot += (bots[3] - tops[3]);
 388                                 assert_gt(num, 0);
 389                                 ASSERT_ONLY(num--);
 390                         }
 391                         if(indels) {
 392                                 if(candInsA) {
 393                                         assert_gt(bots[0], tops[0]);
 394                                         tot += (bots[0] - tops[0]);
 395                                         assert_gt(num, 0);
 396                                         ASSERT_ONLY(num--);
 397                                 }
 398                                 if(candInsC) {
 399                                         assert_gt(bots[1], tops[1]);
 400                                         tot += (bots[1] - tops[1]);
 401                                         assert_gt(num, 0);
 402                                         ASSERT_ONLY(num--);
 403                                 }
 404                                 if(candInsG) {
 405                                         assert_gt(bots[2], tops[2]);
 406                                         tot += (bots[2] - tops[2]);
 407                                         assert_gt(num, 0);
 408                                         ASSERT_ONLY(num--);
 409                                 }
 410                                 if(candInsT) {
 411                                         assert_gt(bots[3], tops[3]);
 412                                         tot += (bots[3] - tops[3]);
 413                                         assert_gt(num, 0);
 414                                         ASSERT_ONLY(num--);
 415                                 }
 416                                 if(candDel) {
 417                                         // Always a candidate?
 418                                         // Always a candidate just within the window?
 419                                         // We can eliminate some indels based on the content downstream, but not many
 420                                 }
 421                         }
 422
 423                         assert_geq(num, 0);
 424                         assert_lt(num, origNum);
 425                         // Throw a dart randomly that hits one of the possible
 426                         // substitutions, with likelihoods weighted by range size
 427                         uint32_t dart = rand.nextU32() % tot;
 428                         if(candA) {
 429                                 if(dart < (bots[0] - tops[0])) {
 430                                         // Eliminate A mismatch
 431                                         top = tops[0];
 432                                         bot = bots[0];
 433                                         eq.flags.mmA = 1;
 434                                         assert_lt(eq.join2.mmElims, 15);
 435                                         ret.chr = color ? '0' : 'A';
 436                                         if(fuzzy) eq.updateLo();
 437                                         return ret;
 438                                 }
 439                                 dart -= (bots[0] - tops[0]);
 440                         }
 441                         if(candC) {
 442                                 if(dart < (bots[1] - tops[1])) {
 443                                         // Eliminate C mismatch
 444                                         top = tops[1];
 445                                         bot = bots[1];
 446                                         eq.flags.mmC = 1;
 447                                         assert_lt(eq.join2.mmElims, 15);
 448                                         ret.chr = color ? '1' : 'C';
 449                                         if(fuzzy) eq.updateLo();
 450                                         return ret;
 451                                 }
 452                                 dart -= (bots[1] - tops[1]);
 453                         }
 454                         if(candG) {
 455                                 if(dart < (bots[2] - tops[2])) {
 456                                         // Eliminate G mismatch
 457                                         top = tops[2];
 458                                         bot = bots[2];
 459                                         eq.flags.mmG = 1;
 460                                         assert_lt(eq.join2.mmElims, 15);
 461                                         ret.chr = color ? '2' : 'G';
 462                                         if(fuzzy) eq.updateLo();
 463                                         return ret;
 464                                 }
 465                                 dart -= (bots[2] - tops[2]);
 466                         }
 467                         if(candT) {
 468                                 assert_lt(dart, (bots[3] - tops[3]));
 469                                 // Eliminate T mismatch
 470                                 top = tops[3];
 471                                 bot = bots[3];
 472                                 eq.flags.mmT = 1;
 473                                 assert_lt(eq.join2.mmElims, 15);
 474                                 ret.chr = color ? '3' : 'T';
 475                                 if(fuzzy) eq.updateLo();
 476                         }
 477                 } else {
 478                         if(fuzzy) {
 479                                 last = (num == 1);
 480                                 int chr = -1;
 481                                 if(eq.flags.qualA == eq.flags.quallo && eq.flags.mmA == 0) {
 482                                         eq.flags.mmA = 1;
 483                                         chr = 0;
 484                                 } else if(eq.flags.qualC == eq.flags.quallo && eq.flags.mmC == 0) {
 485                                         eq.flags.mmC = 1;
 486                                         chr = 1;
 487                                 } else if(eq.flags.qualG == eq.flags.quallo && eq.flags.mmG == 0) {
 488                                         eq.flags.mmG = 1;
 489                                         chr = 2;
 490                                 } else {
 491                                         assert_eq(eq.flags.qualT, eq.flags.quallo);
 492                                         assert_eq(0, eq.flags.mmT);
 493                                         eq.flags.mmT = 1;
 494                                         chr = 3;
 495                                 }
 496                                 ret.chr = color ? "0123"[chr] : "ACGT"[chr];
 497                                 top = tops[chr];
 498                                 bot = bots[chr];
 499                                 eliminated_ = last;
 500                                 if(!last) eq.updateLo();
 501                 } else {
 502                         last = true; // last at this pos
 503                         // There's only one; pick it!
 504                         int chr = -1;
 505                         if(!eq.flags.mmA) {
 506                                 chr = 0;
 507                         } else if(!eq.flags.mmC) {
 508                                 chr = 1;
 509                         } else if(!eq.flags.mmG) {
 510                                 chr = 2;
 511                         } else {
 512                                 assert(!eq.flags.mmT);
 513                                 chr = 3;
 514                         }
 515                         ret.chr = color ? "0123"[chr] : "ACGT"[chr];
 516                         top = tops[chr];
 517                         bot = bots[chr];
 518                         //assert_eq(15, eq.join2.mmElims);
 519                         // Mark entire position as eliminated
 520                         eliminated_ = true;
 521                         }
 522                 }
 523                         return ret;
 524         }
 525
 526         /**
 527          * Return true (without assertion) iff this RangeState is
 528          * internally consistent.
 529          */
 530         bool repOk() {
 531                 if(eliminated_) return true;
 532                 // Uneliminated chars must have non-empty ranges
 533                 if(!eq.flags.mmA || !eq.flags.insA) assert_gt(bots[0], tops[0]);
 534                 if(!eq.flags.mmC || !eq.flags.insC) assert_gt(bots[1], tops[1]);
 535                 if(!eq.flags.mmG || !eq.flags.insG) assert_gt(bots[2], tops[2]);
 536                 if(!eq.flags.mmT || !eq.flags.insT) assert_gt(bots[3], tops[3]);
 537                 return true;
 538         }
 539
 540         // Outgoing ranges; if the position being described is not a
 541         // legitimate jumping-off point for a branch, tops[] and bots[]
 542         // will be filled with 0s and all possibilities in eq will be
 543         // eliminated
 544         uint32_t tops[4]; // A, C, G, T top offsets
 545         uint32_t bots[4]; // A, C, G, T bot offsets
 546         ElimsAndQual eq;  // Which outgoing paths have been tried already
 547         bool eliminated_;  // Whether all outgoing paths have been eliminated
 548 };
 549
 550 /**
 551  * Encapsulates a "branch" of the search space; i.e. all of the
 552  * information deduced by walking along a path with only matches, along
 553  * with information about the decisions that lead to the root of that
 554  * path.
 555  */
 556 class Branch {
 557         typedef std::pair<uint32_t, uint32_t> U32Pair;
 558 public:
 559         Branch() :
 560                 delayedCost_(0), curtailed_(false), exhausted_(false),
 561                 prepped_(false), delayedIncrease_(false) { }
 562
 563         /**
 564          * Initialize a new branch object with an empty path.
 565          */
 566         bool init(AllocOnlyPool<RangeState>& rsPool,
 567                   AllocOnlyPool<Edit>& epool,
 568                   uint32_t id,
 569                   uint32_t qlen,
 570                   uint16_t depth0,
 571                   uint16_t depth1,
 572                   uint16_t depth2,
 573                   uint16_t depth3,
 574                   uint16_t rdepth,
 575                   uint16_t len,
 576                   uint16_t cost,
 577                   uint16_t ham,
 578                   uint32_t itop,
 579                   uint32_t ibot,
 580                   const EbwtParams& ep,
 581                   const uint8_t* ebwt,
 582                   const EditList* edits = NULL)
 583         {
 584                 id_ = id;
 585                 delayedCost_ = 0;
 586                 depth0_ = depth0;
 587                 depth1_ = depth1;
 588                 depth2_ = depth2;
 589                 depth3_ = depth3;
 590                 assert_gt(depth3_, 0);
 591                 rdepth_ = rdepth;
 592                 len_ = len;
 593                 cost_ = cost;
 594                 ham_ = ham;
 595                 top_ = itop;
 596                 bot_ = ibot;
 597
 598                 if(ibot > itop+1) {
 599                         // Care about both top and bot
 600                         SideLocus::initFromTopBot(itop, ibot, ep, ebwt, ltop_, lbot_);
 601                 } else if(ibot > itop) {
 602                         // Only care about top
 603                         ltop_.initFromRow(itop, ep, ebwt);
 604                         lbot_.invalidate();
 605                 }
 606                 if(qlen - rdepth_ > 0) {
 607                         ranges_ = rsPool.allocC(qlen - rdepth_); // allocated from the RangeStatePool
 608                         if(ranges_ == NULL) {
 609                                 return false; // RangeStatePool exhausted
 610                         }
 611                         rangesSz_ = qlen - rdepth_;
 612                 } else {
 613                         ranges_ = NULL;
 614                         rangesSz_ = 0;
 615                 }
 616 #ifndef NDEBUG
 617                 for(size_t i = 0; i < (qlen - rdepth_); i++) {
 618                         for(int j = 0; j < 4; j++) {
 619                                 assert_eq(0, ranges_[i].tops[j]);
 620                                 assert_eq(0, ranges_[i].bots[j]);
 621                         }
 622                 }
 623 #endif
 624                 curtailed_ = false;
 625                 exhausted_ = false;
 626                 prepped_ = true;
 627                 delayedIncrease_ = false;
 628                 edits_.clear();
 629                 if(edits != NULL) {
 630                         const size_t numEdits = edits->size();
 631                         for(size_t i = 0; i < numEdits; i++) {
 632                                 edits_.add(edits->get(i), epool, qlen);
 633                         }
 634                 }
 635                 // If we're starting with a non-zero length, that means we're
 636                 // jumping over a bunch of unrevisitable positions.
 637                 for(size_t i = 0; i < len_; i++) {
 638                         ranges_[i].eliminated_ = true;
 639                         assert(eliminated(i));
 640                 }
 641                 assert(repOk(qlen));
 642                 return true;
 643         }
 644
 645         /**
 646          * Depth of the deepest tip of the branch.
 647          */
 648         uint16_t tipDepth() const {
 649                 return rdepth_ + len_;
 650         }
 651
 652         /**
 653          * Return true iff all outgoing edges from position i have been
 654          * eliminated.
 655          */
 656         inline bool eliminated(int i) const {
 657                 assert(!exhausted_);
 658                 if(i <= len_ && i < rangesSz_) {
 659                         assert(ranges_ != NULL);
 660 #ifndef NDEBUG
 661                         if(!ranges_[i].eliminated_) {
 662                                 // Someone must be as-yet-uneliminated
 663                                 assert(!ranges_[i].eq.flags.mmA ||
 664                                        !ranges_[i].eq.flags.mmC ||
 665                                        !ranges_[i].eq.flags.mmG ||
 666                                        !ranges_[i].eq.flags.mmT);
 667                                 assert_lt(ranges_[i].eq.flags.quallo, 127);
 668                         }
 669 #endif
 670                         return ranges_[i].eliminated_;
 671                 }
 672                 return true;
 673         }
 674
 675         /**
 676          * Split off a new branch by selecting a good outgoing path and
 677          * creating a new Branch object for it and inserting that branch
 678          * into the priority queue.  Mark that outgoing path from the
 679          * parent branch as eliminated.  If the second-best outgoing path
 680          * cost more, add the difference to the cost of this branch (since
 681          * that's the best we can do starting from here from now on).
 682          */
 683         Branch* splitBranch(AllocOnlyPool<RangeState>& rpool,
 684                             AllocOnlyPool<Edit>& epool,
 685                             AllocOnlyPool<Branch>& bpool,
 686                             uint32_t pmSz,
 687                             RandomSource& rand, uint32_t qlen,
 688                             uint32_t qualLim, int seedLen,
 689                             bool qualOrder, const EbwtParams& ep,
 690                             const uint8_t* ebwt, bool ebwtFw,
 691                             bool fuzzy,
 692                             bool verbose,
 693                             bool quiet)
 694         {
 695                 assert(!exhausted_);
 696                 assert(ranges_ != NULL);
 697                 assert(curtailed_);
 698                 assert_gt(pmSz, 0);
 699                 Branch *newBranch = bpool.alloc();
 700                 if(newBranch == NULL) {
 701                         return NULL;
 702                 }
 703                 assert(newBranch != NULL);
 704                 uint32_t id = bpool.lastId();
 705                 int tiedPositions[3];
 706                 int numTiedPositions = 0;
 707                 // Lowest marginal cost incurred by any of the positions with
 708                 // non-eliminated outgoing edges
 709                 uint16_t bestCost = 0xffff;
 710                 // Next-lowest
 711                 uint16_t nextCost = 0xffff;
 712                 int numNotEliminated = 0;
 713                 int i = (int)depth0_;
 714                 i = max(0, i - rdepth_);
 715                 // Iterate over revisitable positions in the path
 716                 for(; i <= len_; i++) {
 717                         // If there are still valid options for leaving out of this
 718                         // position
 719                         if(!eliminated(i)) {
 720                                 numNotEliminated++;
 721                                 if(fuzzy) {
 722                                         assert_lt(ranges_[i].eq.flags.quallo, 127);
 723                                         if(ranges_[i].eq.flags.quallo2 < 127) {
 724                                                 numNotEliminated++;
 725                                         }
 726                                 }
 727                                 uint16_t stratum = (rdepth_ + i < seedLen) ? (1 << 14) : 0;
 728                                 uint16_t cost = stratum;
 729                                 cost |= (qualOrder ? ranges_[i].eq.flags.quallo : 0);
 730                                 if(cost < bestCost) {
 731                                         // Demote the old best to the next-best
 732                                         nextCost = bestCost;
 733                                         if(fuzzy) {
 734                                                 if(ranges_[i].eq.flags.quallo2 < 127) {
 735                                                         nextCost = min<uint16_t>(
 736                                                                 nextCost, ranges_[i].eq.flags.quallo2 | stratum);
 737                                                 }
 738                                         }
 739                                         // Update the new best
 740                                         bestCost = cost;
 741                                         numTiedPositions = 1;
 742                                         tiedPositions[0] = i;
 743                                 } else if(cost == bestCost) {
 744                                         // As good as the best so far
 745                                         if(fuzzy) nextCost = cost;
 746                                         assert_gt(numTiedPositions, 0);
 747                                         if(numTiedPositions < 3) {
 748                                                 tiedPositions[numTiedPositions++] = i;
 749                                         } else {
 750                                                 tiedPositions[0] = tiedPositions[1];
 751                                                 tiedPositions[1] = tiedPositions[2];
 752                                                 tiedPositions[2] = i;
 753                                         }
 754                                 } else if(cost < nextCost) {
 755                                         // 'cost' isn't beter than the best, but it is
 756                                         // better than the next-best
 757                                         nextCost = cost;
 758                                 }
 759                         }
 760                 }
 761                 assert_gt(numNotEliminated, 0);
 762                 assert_gt(numTiedPositions, 0);
 763                 //if(nextCost != 0xffff) assert_gt(nextCost, bestCost);
 764                 int r = 0;
 765                 if(numTiedPositions > 1) {
 766                         r = rand.nextU32() % numTiedPositions;
 767                 }
 768                 int pos = tiedPositions[r];
 769                 bool last = false;
 770                 // Pick an edit from among the edits tied for lowest cost
 771                 // (using randomness to break ties).  If the selected edit is
 772                 // the last remaining one at this position, 'last' is set to
 773                 // true.
 774                 uint32_t top = 0, bot = 0;
 775                 Edit e = ranges_[pos].pickEdit(pos + rdepth_, rand, fuzzy, top,
 776                                                bot, false, last);
 777                 assert_gt(bot, top);
 778                 // Create and initialize a new Branch
 779                 uint16_t newRdepth = rdepth_ + pos + 1;
 780                 assert_lt((bestCost >> 14), 4);
 781                 uint32_t hamadd = (bestCost & ~0xc000);
 782                 uint16_t depth = pos + rdepth_;
 783                 assert_geq(depth, depth0_);
 784                 uint16_t newDepth0 = depth0_;
 785                 uint16_t newDepth1 = depth1_;
 786                 uint16_t newDepth2 = depth2_;
 787                 uint16_t newDepth3 = depth3_;
 788                 if(depth < depth1_) newDepth0 = depth1_;
 789                 if(depth < depth2_) newDepth1 = depth2_;
 790                 if(depth < depth3_) newDepth2 = depth3_;
 791                 assert_eq((uint32_t)(cost_ & ~0xc000), (uint32_t)(ham_ + hamadd));
 792                 if(!newBranch->init(
 793                                 rpool, epool, id, qlen,
 794                                 newDepth0, newDepth1, newDepth2, newDepth3,
 795                                 newRdepth, 0, cost_, ham_ + hamadd,
 796                                 top, bot, ep, ebwt, &edits_))
 797                 {
 798                         return NULL;
 799                 }
 800                 // Add the new edit
 801                 newBranch->edits_.add(e, epool, qlen);
 802                 if(numNotEliminated == 1 && last) {
 803                         // This branch is totally exhausted; there are no more
 804                         // valid outgoing paths from any positions within it.
 805                         // Remove it from the PathManager and mark it as exhausted.
 806                         // The caller should delete it.
 807                         exhausted_ = true;
 808                         if(ranges_ != NULL) {
 809                                 assert_gt(rangesSz_, 0);
 810                                 if(rpool.free(ranges_, rangesSz_)) {
 811                                         ranges_ = NULL;
 812                                         rangesSz_ = 0;
 813                                 }
 814                         }
 815                 }
 816                 else if(fuzzy && bestCost != nextCost) {
 817                         // We exhausted the last outgoing edge at the current best
 818                         // cost; update the best cost to be the next-best
 819                         assert_gt(nextCost, bestCost);
 820                         delayedCost_ = (cost_ - bestCost + nextCost);
 821                         assert_gt(delayedCost_, cost_);
 822                         delayedIncrease_ = true;
 823                 }
 824                 else if(!fuzzy && numTiedPositions == 1 && last) {
 825                         // We exhausted the last outgoing edge at the current best
 826                         // cost; update the best cost to be the next-best
 827                         assert_neq(0xffff, nextCost);
 828                         if(bestCost != nextCost) {
 829                                 assert_gt(nextCost, bestCost);
 830                                 delayedCost_ = (cost_ - bestCost + nextCost);
 831                                 delayedIncrease_ = true;
 832                         }
 833                 }
 834                 return newBranch;
 835         }
 836
 837         /**
 838          * Free a branch and all its contents.
 839          */
 840         void free(uint32_t qlen,
 841                   AllocOnlyPool<RangeState>& rpool,
 842                   AllocOnlyPool<Edit>& epool,
 843                   AllocOnlyPool<Branch>& bpool)
 844         {
 845                 edits_.free(epool, qlen);
 846                 if(ranges_ != NULL) {
 847                         assert_gt(rangesSz_, 0);
 848                         rpool.free(ranges_, rangesSz_);
 849                         ranges_ = NULL;
 850                         rangesSz_ = 0;
 851                 }
 852                 bpool.free(this);
 853         }
 854
 855         /**
 856          * Pretty-print the state of this branch.
 857          */
 858         void print(const String<Dna5>& qry,
 859                    const String<char>& quals,
 860                    uint16_t minCost,
 861                    std::ostream& out,
 862                    bool halfAndHalf,
 863                    bool seeded,
 864                    bool fw,
 865                    bool ebwtFw)
 866         {
 867                 size_t editidx = 0;
 868                 size_t printed = 0;
 869                 const size_t qlen = seqan::length(qry);
 870                 if(exhausted_)      out << "E ";
 871                 else if(curtailed_) out << "C ";
 872                 else                out << "  ";
 873                 if(ebwtFw) out << "<";
 874                 else       out << ">";
 875                 if(fw)     out << "F ";
 876                 else       out << "R ";
 877                 std::stringstream ss;
 878                 ss << cost_;
 879                 string s = ss.str();
 880                 if(s.length() < 6) {
 881                         for(size_t i = 0; i < 6 - s.length(); i++) {
 882                                 out << "0";
 883                         }
 884                 }
 885                 out << s << " ";
 886                 std::stringstream ss2;
 887                 ss2 << minCost;
 888                 s = ss2.str();
 889                 if(s.length() < 6) {
 890                         for(size_t i = 0; i < 6 - s.length(); i++) {
 891                                 out << "0";
 892                         }
 893                 }
 894                 out << s;
 895                 if(halfAndHalf) out << " h ";
 896                 else if(seeded) out << " s ";
 897                 else            out << "   ";
 898                 std::stringstream ss3;
 899                 const size_t numEdits = edits_.size();
 900                 if(rdepth_ > 0) {
 901                         for(size_t i = 0; i < rdepth_; i++) {
 902                                 if(editidx < numEdits && edits_.get(editidx).pos == i) {
 903                                         ss3 << " " << (char)tolower(edits_.get(editidx).chr);
 904                                         editidx++;
 905                                 } else {
 906                                         ss3 << " " << (char)qry[qlen - i - 1];
 907                                 }
 908                                 printed++;
 909                         }
 910                         ss3 << "|";
 911                 } else {
 912                         ss3 << " ";
 913                 }
 914                 for(size_t i = 0; i < len_; i++) {
 915                         if(editidx < numEdits && edits_.get(editidx).pos == printed) {
 916                                 ss3 << (char)tolower(edits_.get(editidx).chr) << " ";
 917                                 editidx++;
 918                         } else {
 919                                 ss3 << (char)qry[qlen - printed - 1] << " ";
 920                         }
 921                         printed++;
 922                 }
 923                 assert_eq(editidx, edits_.size());
 924                 for(size_t i = printed; i < qlen; i++) {
 925                         ss3 << "= ";
 926                 }
 927                 s = ss3.str();
 928                 if(ebwtFw) {
 929                         std::reverse(s.begin(), s.end());
 930                 }
 931                 out << s << endl;
 932         }
 933
 934         /**
 935          * Called when the most recent branch extension resulted in an
 936          * empty range or some other constraint violation (e.g., a
 937          * half-and-half constraint).
 938          */
 939         void curtail(AllocOnlyPool<RangeState>& rpool, int seedLen, bool qualOrder) {
 940                 assert(!curtailed_);
 941                 assert(!exhausted_);
 942                 if(ranges_ == NULL) {
 943                         exhausted_ = true;
 944                         curtailed_ = true;
 945                         return;
 946                 }
 947                 uint16_t lowestCost = 0xffff;
 948                 // Iterate over positions in the path looking for the cost of
 949                 // the lowest-cost non-eliminated position
 950                 uint32_t eliminatedStretch = 0;
 951                 int i = (int)depth0_;
 952                 i = max(0, i - rdepth_);
 953                 // TODO: It matters whether an insertion/deletion at given
 954                 // position would be a gap open or a gap extension
 955                 for(; i <= len_; i++) {
 956                         if(!eliminated(i)) {
 957                                 eliminatedStretch = 0;
 958                                 uint16_t stratum = (rdepth_ + i < seedLen) ? (1 << 14) : 0;
 959                                 uint16_t cost = (qualOrder ? /*TODO*/ ranges_[i].eq.flags.quallo : 0) | stratum;
 960                                 if(cost < lowestCost) lowestCost = cost;
 961                         } else if(i < rangesSz_) {
 962                                 eliminatedStretch++;
 963                         }
 964                 }
 965                 if(lowestCost > 0 && lowestCost != 0xffff) {
 966                         // This branch's cost will change when curtailed; the
 967                         // caller should re-insert it into the priority queue so
 968                         // that the new cost takes effect.
 969                         cost_ += lowestCost;
 970                 } else if(lowestCost == 0xffff) {
 971                         // This branch is totally exhausted; there are no more
 972                         // valid outgoing paths from any positions within it.
 973                         // Remove it from the PathManager and mark it as exhausted.
 974                         // The caller should delete it.
 975                         exhausted_ = true;
 976                         if(ranges_ != NULL) {
 977                                 assert_gt(rangesSz_, 0);
 978                                 if(rpool.free(ranges_, rangesSz_)) {
 979                                         ranges_ = NULL;
 980                                         rangesSz_ = 0;
 981                                 }
 982                         }
 983                 } else {
 984                         // Just mark it as curtailed and keep the same cost
 985                 }
 986                 if(ranges_ != NULL) {
 987                         // Try to trim off no-longer-relevant elements of the
 988                         // ranges_ array
 989                         assert(!exhausted_);
 990                         assert_gt(rangesSz_, 0);
 991                         uint32_t trim = (rangesSz_ - len_ - 1) + eliminatedStretch;
 992                         assert_leq(trim, rangesSz_);
 993                         if(rpool.free(ranges_ + rangesSz_ - trim, trim)) {
 994                                 rangesSz_ -= trim;
 995                                 if(rangesSz_ == 0) {
 996                                         ranges_ = NULL;
 997                                 }
 998                         }
 999                 }
1000                 curtailed_ = true;
1001         }
1002
1003         /**
1004          * Prep this branch for the next extension by calculating the
1005          * SideLocus information and prefetching cache lines from the
1006          * appropriate loci.
1007          */
1008         void prep(const EbwtParams& ep, const uint8_t* ebwt) {
1009                 if(bot_ > top_+1) {
1010                         SideLocus::initFromTopBot(top_, bot_, ep, ebwt, ltop_, lbot_);
1011                 } else if(bot_ > top_) {
1012                         ltop_.initFromRow(top_, ep, ebwt);
1013                         lbot_.invalidate();
1014                 }
1015                 prepped_ = true;
1016         }
1017
1018         /**
1019          * Get the furthest-out RangeState.
1020          */
1021         RangeState* rangeState() {
1022                 assert(!exhausted_);
1023                 assert(ranges_ != NULL);
1024                 assert_lt(len_, rangesSz_);
1025                 return &ranges_[len_];
1026         }
1027
1028         /**
1029          * Set the elims to match the ranges in ranges_[len_], already
1030          * calculated by the caller.  Only does mismatches for now.
1031          */
1032         int installRanges(int c, int nextc, bool fuzzy, uint32_t qAllow,
1033                           const uint8_t* qs)
1034         {
1035                 assert(!exhausted_);
1036                 assert(ranges_ != NULL);
1037                 RangeState& r = ranges_[len_];
1038                 int ret = 0;
1039                 r.eliminated_ = true; // start with everything eliminated
1040                 r.eq.eliminate(); // set all elim flags to 1
1041                 assert_lt(qs[0], 127);
1042                 assert_lt(qs[1], 127);
1043                 assert_lt(qs[2], 127);
1044                 assert_lt(qs[3], 127);
1045                 if(!fuzzy) {
1046                         assert_eq(qs[0], qs[1]);
1047                         assert_eq(qs[0], qs[2]);
1048                         assert_eq(qs[0], qs[3]);
1049                         r.eq.flags.quallo = qs[0];
1050                         if(qs[0] > qAllow) return 0;
1051                 }
1052                 // Set one/both of these to true to do the accounting for
1053                 // insertions and deletions as well as mismatches
1054                 bool doInserts = false;
1055                 bool doDeletes = false;
1056                 // We can proceed on an A
1057                 if(c != 0 && r.bots[0] > r.tops[0] && qs[0] <= qAllow) {
1058                         r.eliminated_ = false;
1059                         r.eq.flags.mmA = 0; // A substitution is an option
1060                         if(doInserts) r.eq.flags.insA = 0;
1061                         if(doDeletes && nextc == 0) r.eq.flags.del = 0;
1062                         ret++;
1063                 }
1064                 // We can proceed on a C
1065                 if(c != 1 && r.bots[1] > r.tops[1] && qs[1] <= qAllow) {
1066                         r.eliminated_ = false;
1067                         r.eq.flags.mmC = 0; // C substitution is an option
1068                         if(doInserts) r.eq.flags.insC = 0;
1069                         if(doDeletes && nextc == 1) r.eq.flags.del = 0;
1070                         ret++;
1071                 }
1072                 // We can proceed on a G
1073                 if(c != 2 && r.bots[2] > r.tops[2] && qs[2] <= qAllow) {
1074                         r.eliminated_ = false;
1075                         r.eq.flags.mmG = 0; // G substitution is an option
1076                         if(doInserts) r.eq.flags.insG = 0;
1077                         if(doDeletes && nextc == 2) r.eq.flags.del = 0;
1078                         ret++;
1079                 }
1080                 // We can proceed on a T
1081                 if(c != 3 && r.bots[3] > r.tops[3] && qs[3] <= qAllow) {
1082                         r.eliminated_ = false;
1083                         r.eq.flags.mmT = 0; // T substitution is an option
1084                         if(doInserts) r.eq.flags.insT = 0;
1085                         if(doDeletes && nextc == 3) r.eq.flags.del = 0;
1086                         ret++;
1087                 }
1088                 if(!r.eliminated_ && fuzzy) {
1089                         // Copy quals
1090                         r.eq.flags.qualA = qs[0];
1091                         r.eq.flags.qualC = qs[1];
1092                         r.eq.flags.qualG = qs[2];
1093                         r.eq.flags.qualT = qs[3];
1094
1095                         // Now that the quals are set and the elim flags are set
1096                         // according to which Burrows-Wheeler ranges are empty,
1097                         // determine best and second-best quals
1098                         r.eq.updateLo();
1099
1100                         assert_lt(r.eq.flags.quallo, 127);
1101                 }
1102                 return ret;
1103         }
1104
1105         /**
1106          * Extend this branch by one position.
1107          */
1108         void extend() {
1109                 assert(!exhausted_);
1110                 assert(!curtailed_);
1111                 assert(ranges_ != NULL);
1112                 assert(repOk());
1113                 prepped_ = false;
1114                 len_++;
1115         }
1116
1117         /**
1118          * Do an internal consistency check
1119          */
1120         bool repOk(uint32_t qlen = 0) const{
1121                 assert_leq(depth0_, depth1_);
1122                 assert_leq(depth1_, depth2_);
1123                 assert_leq(depth2_, depth3_);
1124                 assert_gt(depth3_, 0);
1125                 if(qlen > 0) {
1126                         assert_leq(edits_.size(), qlen); // might have to relax this with inserts
1127                         assert_leq(rdepth_, qlen);
1128                 }
1129                 for(int i = 0; i < len_; i++) {
1130                         if(!eliminated(i)) {
1131                                 assert_lt(i, (int)(len_));
1132                                 assert(ranges_[i].repOk());
1133                         }
1134                 }
1135                 const size_t numEdits = edits_.size();
1136                 for(size_t i = 0; i < numEdits; i++) {
1137                         for(size_t j = i+1; j < numEdits; j++) {
1138                                 // No two edits should be at the same position (might
1139                                 // have to relax this with inserts)
1140                                 assert_neq(edits_.get(i).pos, edits_.get(j).pos);
1141                         }
1142                 }
1143                 assert_lt((cost_ >> 14), 4);
1144                 return true;
1145         }
1146
1147         uint32_t id_;     // branch id; needed to make the ordering of
1148                           // branches that are tied in the priority queue
1149                           // totally unambiguous.  Otherwise, things start
1150                           // getting non-deterministic.
1151         uint16_t depth0_; // no edits at depths < depth0
1152         uint16_t depth1_; // at most 1 edit at depths < depth1
1153         uint16_t depth2_; // at most 2 edits at depths < depth2
1154         uint16_t depth3_; // at most 3 edits at depths < depth3
1155         uint16_t rdepth_; // offset in read space from root of search space
1156         uint16_t len_;    // length of the branch
1157         uint16_t cost_;   // top 2 bits = stratum, bottom 14 = qual ham
1158                           // it's up to Branch to keep this updated with the
1159                           // cumulative cost of the best path leaving the
1160                           // branch; if the branch hasn't been fully
1161                           // extended yet, then that path will always be the
1162                           // one that extends it by one more
1163         uint16_t ham_;    // quality-weighted hamming distance so far
1164         RangeState *ranges_; // Allocated from the RangeStatePool
1165         uint16_t rangesSz_;
1166         uint32_t top_;    // top offset leading to the root of this subtree
1167         uint32_t bot_;    // bot offset leading to the root of this subtree
1168         SideLocus ltop_;
1169         SideLocus lbot_;
1170         EditList edits_;   // edits leading to the root of the branch
1171
1172         uint16_t delayedCost_;
1173
1174         bool curtailed_;  // can't be extended anymore without using edits
1175         bool exhausted_;  // all outgoing edges exhausted, including all edits
1176         bool prepped_;    // whether SideLocus's are inited
1177         bool delayedIncrease_;
1178 };
1179
1180 /**
1181  * Order two Branches based on cost.
1182  */
1183 class CostCompare {
1184 public:
1185         /**
1186          * true -> b before a
1187          * false -> a before b
1188          */
1189         bool operator()(const Branch* a, const Branch* b) const {
1190                 bool aUnextendable = a->curtailed_ || a->exhausted_;
1191                 bool bUnextendable = b->curtailed_ || b->exhausted_;
1192                 // Branch with the best cost
1193                 if(a->cost_ == b->cost_) {
1194                         // If one or the other is curtailed, take the one that's
1195                         // still getting extended
1196                         if(bUnextendable && !aUnextendable) {
1197                                 // a still being extended, return false
1198                                 return false;
1199                         }
1200                         if(aUnextendable && !bUnextendable) {
1201                                 // b still being extended, return true
1202                                 return true;
1203                         }
1204                         // Either both are curtailed or both are still being
1205                         // extended, pick based on which one is deeper
1206                         if(a->tipDepth() != b->tipDepth()) {
1207                                 // Expression is true if b is deeper
1208                                 return a->tipDepth() < b->tipDepth();
1209                         }
1210                         // Keep things deterministic by providing an unambiguous
1211                         // order using the id_ field
1212                         assert_neq(b->id_, a->id_);
1213                         return b->id_ < a->id_;
1214                 } else {
1215                         return b->cost_ < a->cost_;
1216                 }
1217         }
1218
1219         static bool equal(const Branch* a, const Branch* b) {
1220                 return a->cost_ == b->cost_ && a->curtailed_ == b->curtailed_ && a->tipDepth() == b->tipDepth();
1221         }
1222 };
1223
1224 /**
1225  * A priority queue for Branch objects; makes it easy to process
1226  * branches in a best-first manner by prioritizing branches with lower
1227  * cumulative costs over branches with higher cumulative costs.
1228  */
1229 class BranchQueue {
1230
1231         typedef std::pair<int, int> TIntPair;
1232         typedef std::priority_queue<Branch*, std::vector<Branch*>, CostCompare> TBranchQueue;
1233
1234 public:
1235
1236         BranchQueue(bool verbose, bool quiet) :
1237                 sz_(0), branchQ_(), patid_(0), verbose_(verbose), quiet_(quiet)
1238         { }
1239
1240         /**
1241          * Return the front (highest-priority) element of the queue.
1242          */
1243         Branch *front() {
1244                 Branch *b = branchQ_.top();
1245                 if(verbose_) {
1246                         stringstream ss;
1247                         ss << patid_ << ": Fronting " << b->id_ << ", " << b << ", " << b->cost_ << ", " << b->exhausted_ << ", " << b->curtailed_ << ", " << sz_ << "->" << (sz_-1);
1248                         glog.msg(ss.str());
1249                 }
1250                 return b;
1251         }
1252
1253         /**
1254          * Remove and return the front (highest-priority) element of the
1255          * queue.
1256          */
1257         Branch *pop() {
1258                 Branch *b = branchQ_.top(); // get it
1259                 branchQ_.pop(); // remove it
1260                 if(verbose_) {
1261                         stringstream ss;
1262                         ss << patid_ << ": Popping " << b->id_ << ", " << b << ", " << b->cost_ << ", " << b->exhausted_ << ", " << b->curtailed_ << ", " << sz_ << "->" << (sz_-1);
1263                         glog.msg(ss.str());
1264                 }
1265                 sz_--;
1266                 return b;
1267         }
1268
1269         /**
1270          * Insert a new Branch into the sorted priority queue.
1271          */
1272         void push(Branch *b) {
1273 #ifndef NDEBUG
1274                 bool bIsBetter = empty() || !CostCompare()(b, branchQ_.top());
1275 #endif
1276                 if(verbose_) {
1277                         stringstream ss;
1278                         ss << patid_ << ": Pushing " << b->id_ << ", " << b << ", " << b->cost_ << ", " << b->exhausted_ << ", " << b->curtailed_ << ", " << sz_ << "->" << (sz_+1);
1279                         glog.msg(ss.str());
1280                 }
1281                 branchQ_.push(b);
1282 #ifndef NDEBUG
1283                 assert(bIsBetter  || branchQ_.top() != b || CostCompare::equal(branchQ_.top(), b));
1284                 assert(!bIsBetter || branchQ_.top() == b || CostCompare::equal(branchQ_.top(), b));
1285 #endif
1286                 sz_++;
1287         }
1288
1289         /**
1290          * Empty the priority queue and reset the count.
1291          */
1292         void reset(uint32_t patid) {
1293                 patid_ = patid;
1294                 branchQ_ = TBranchQueue();
1295                 sz_ = 0;
1296         }
1297
1298         /**
1299          * Return true iff the priority queue of branches is empty.
1300          */
1301         bool empty() const {
1302                 bool ret = branchQ_.empty();
1303                 assert(ret || sz_ > 0);
1304                 assert(!ret || sz_ == 0);
1305                 return ret;
1306         }
1307
1308         /**
1309          * Return the number of Branches in the queue.
1310          */
1311         uint32_t size() const {
1312                 return sz_;
1313         }
1314
1315 #ifndef NDEBUG
1316         /**
1317          * Consistency check.
1318          */
1319         bool repOk(std::set<Branch*>& bset) {
1320                 TIntPair pair = bestStratumAndHam(bset);
1321                 Branch *b = branchQ_.top();
1322                 assert_eq(pair.first, (b->cost_ >> 14));
1323                 assert_eq(pair.second, (b->cost_ & ~0xc000));
1324                 std::set<Branch*>::iterator it;
1325                 for(it = bset.begin(); it != bset.end(); it++) {
1326                         assert_gt((*it)->depth3_, 0);
1327                 }
1328                 return true;
1329         }
1330 #endif
1331
1332 protected:
1333
1334 #ifndef NDEBUG
1335         /**
1336          * Return the stratum and quality-weight (sum of qualities of all
1337          * edited positions) of the lowest-cost branch.
1338          */
1339         TIntPair bestStratumAndHam(std::set<Branch*>& bset) const {
1340                 TIntPair ret = make_pair(0xffff, 0xffff);
1341                 std::set<Branch*>::iterator it;
1342                 for(it = bset.begin(); it != bset.end(); it++) {
1343                         Branch *b = *it;
1344                         int stratum = b->cost_ >> 14;
1345                         assert_lt(stratum, 4);
1346                         int qual = b->cost_ & ~0xc000;
1347                         if(stratum < ret.first ||
1348                            (stratum == ret.first && qual < ret.second))
1349                         {
1350                                 ret.first = stratum;
1351                                 ret.second = qual;
1352                         }
1353                 }
1354                 return ret;
1355         }
1356 #endif
1357
1358         uint32_t sz_;
1359         TBranchQueue branchQ_; // priority queue of branches
1360         uint32_t patid_;
1361         bool verbose_;
1362         bool quiet_;
1363 };
1364
1365 /**
1366  * A class that both contains Branches and determines how those
1367  * branches are extended to form longer paths.  The overall goal is to
1368  * find the best full alignment(s) as quickly as possible so that a
1369  * successful search can finish quickly.  A second goal is to ensure
1370  * that the most "promising" paths are tried first so that, if there is
1371  * a limit on the amount of effort spent searching before we give up,
1372  * we can be as sensitive as possible given that limit.
1373  *
1374  * The quality (or cost) of a given alignment path will ultimately be
1375  * configurable.  The default cost model is:
1376  *
1377  * 1. Mismatches incur one "edit" penalty and a "quality" penalty with
1378  *    magnitude equal to the Phred quality score of the read position
1379  *    involved in the edit (note that insertions into the read are a
1380  *    little trickier).
1381  * 2. Edit penalties are all more costly than any quality penalty; i.e.
1382  *    the policy sorts alignments first by edit penalty, then by
1383  *    quality penalty.
1384  * 3. For the Maq-like alignment policy, edit penalties saturate (don't
1385  *    get any greater) after leaving the seed region of the alignment.
1386  */
1387 class PathManager {
1388
1389 public:
1390
1391         PathManager(ChunkPool* cpool_, int *btCnt, bool verbose, bool quiet) :
1392                 branchQ_(verbose, quiet),
1393                 cpool(cpool_),
1394                 bpool(cpool, "branch"),
1395                 rpool(cpool, "range state"),
1396                 epool(cpool, "edit"),
1397                 minCost(0), btCnt_(btCnt),
1398                 verbose_(verbose),
1399                 quiet_(quiet)
1400         { }
1401
1402         ~PathManager() { }
1403
1404         /**
1405          * Return the "front" (highest-priority) branch in the collection.
1406          */
1407         Branch* front() {
1408                 assert(!empty());
1409                 assert_gt(branchQ_.front()->depth3_, 0);
1410                 return branchQ_.front();
1411         }
1412
1413         /**
1414          * Pop the highest-priority (lowest cost) element from the
1415          * priority queue.
1416          */
1417         Branch* pop() {
1418                 Branch* b = branchQ_.pop();
1419                 assert_gt(b->depth3_, 0);
1420 #ifndef NDEBUG
1421                 // Also remove it from the set
1422                 assert(branchSet_.find(b) != branchSet_.end());
1423                 ASSERT_ONLY(size_t setSz = branchSet_.size());
1424                 branchSet_.erase(branchSet_.find(b));
1425                 assert_eq(setSz-1, branchSet_.size());
1426                 if(!branchQ_.empty()) {
1427                         // Top shouldn't be b any more
1428                         Branch *newtop = branchQ_.front();
1429                         assert(b != newtop);
1430                 }
1431 #endif
1432                 // Update this PathManager's cost
1433                 minCost = branchQ_.front()->cost_;
1434                 assert(repOk());
1435                 return b;
1436         }
1437
1438         /**
1439          * Push a new element onto the priority queue.
1440          */
1441         void push(Branch *b) {
1442                 assert(!b->exhausted_);
1443                 assert_gt(b->depth3_, 0);
1444                 branchQ_.push(b);
1445 #ifndef NDEBUG
1446                 // Also insert it into the set
1447                 assert(branchSet_.find(b) == branchSet_.end());
1448                 branchSet_.insert(b);
1449 #endif
1450                 // Update this PathManager's cost
1451                 minCost = branchQ_.front()->cost_;
1452         }
1453
1454         /**
1455          * Return the number of active branches in the best-first
1456          * BranchQueue.
1457          */
1458         uint32_t size() {
1459                 return branchQ_.size();
1460         }
1461
1462         /**
1463          * Reset the PathManager, clearing out the priority queue and
1464          * resetting the RangeStatePool.
1465          */
1466         void reset(uint32_t patid) {
1467                 branchQ_.reset(patid); // reset the priority queue
1468                 assert(branchQ_.empty());
1469                 bpool.reset();    // reset the Branch pool
1470                 epool.reset();    // reset the Edit pool
1471                 rpool.reset();    // reset the RangeState pool
1472                 assert(bpool.empty());
1473                 assert(epool.empty());
1474                 assert(rpool.empty());
1475                 ASSERT_ONLY(branchSet_.clear());
1476                 assert_eq(0, branchSet_.size());
1477                 assert_eq(0, branchQ_.size());
1478                 minCost = 0;
1479         }
1480
1481 #ifndef NDEBUG
1482         /**
1483          * Return true iff Branch b is in the priority queue;
1484          */
1485         bool contains(Branch *b) const {
1486                 bool ret = branchSet_.find(b) != branchSet_.end();
1487                 assert(!ret || !b->exhausted_);
1488                 return ret;
1489         }
1490
1491         /**
1492          * Do a consistenty-check on the collection of branches contained
1493          * in this PathManager.
1494          */
1495         bool repOk() {
1496                 if(empty()) return true;
1497                 assert(branchQ_.repOk(branchSet_));
1498                 return true;
1499         }
1500 #endif
1501
1502         /**
1503          * Return true iff the priority queue of branches is empty.
1504          */
1505         bool empty() const {
1506                 bool ret = branchQ_.empty();
1507                 assert_eq(ret, branchSet_.empty());
1508                 return ret;
1509         }
1510
1511         /**
1512          * Curtail the given branch, and possibly remove it from or
1513          * re-insert it into the priority queue.
1514          */
1515         void curtail(Branch *br, uint32_t qlen, int seedLen, bool qualOrder) {
1516                 assert(!br->exhausted_);
1517                 assert(!br->curtailed_);
1518                 uint16_t origCost = br->cost_;
1519                 br->curtail(rpool, seedLen, qualOrder);
1520                 assert(br->curtailed_);
1521                 assert_geq(br->cost_, origCost);
1522                 if(br->exhausted_) {
1523                         assert(br == front());
1524                         ASSERT_ONLY(Branch *popped =) pop();
1525                         assert(popped == br);
1526                         br->free(qlen, rpool, epool, bpool);
1527                 } else if(br->cost_ != origCost) {
1528                         // Re-insert the newly-curtailed branch
1529                         assert(br == front());
1530                         Branch *popped = pop();
1531                         assert(popped == br);
1532                         push(popped);
1533                 }
1534         }
1535
1536         /**
1537          * If the frontmost branch is a curtailed branch, split off an
1538          * extendable branch and add it to the queue.
1539          */
1540         bool splitAndPrep(RandomSource& rand, uint32_t qlen,
1541                           uint32_t qualLim, int seedLen,
1542                           bool qualOrder, bool fuzzy,
1543                           const EbwtParams& ep, const uint8_t* ebwt,
1544                           bool ebwtFw)
1545         {
1546                 if(empty()) return true;
1547                 // This counts as a backtrack
1548                 if(btCnt_ != NULL && (*btCnt_ == 0)) {
1549                         // Abruptly end search
1550                         return false;
1551                 }
1552                 Branch *f = front();
1553                 assert(!f->exhausted_);
1554                 while(f->delayedIncrease_) {
1555                         assert(!f->exhausted_);
1556                         if(f->delayedIncrease_) {
1557                                 assert_neq(0, f->delayedCost_);
1558                                 ASSERT_ONLY(Branch *popped =) pop();
1559                                 assert(popped == f);
1560                                 f->cost_ = f->delayedCost_;
1561                                 f->delayedIncrease_ = false;
1562                                 f->delayedCost_ = 0;
1563                                 push(f); // re-insert it
1564                                 assert(!empty());
1565                         }
1566                         f = front();
1567                         assert(!f->exhausted_);
1568                 }
1569                 if(f->curtailed_) {
1570                         ASSERT_ONLY(uint16_t origCost = f->cost_);
1571                         // This counts as a backtrack
1572                         if(btCnt_ != NULL) {
1573                                 if(--(*btCnt_) == 0) {
1574                                         // Abruptly end search
1575                                         return false;
1576                                 }
1577                         }
1578                         Branch* newbr = splitBranch(
1579                                         f, rand, qlen, qualLim, seedLen, qualOrder, fuzzy, ep, ebwt,
1580                                         ebwtFw);
1581                         if(newbr == NULL) {
1582                                 return false;
1583                         }
1584                         // If f is exhausted, get rid of it immediately
1585                         if(f->exhausted_) {
1586                                 assert(!f->delayedIncrease_);
1587                                 ASSERT_ONLY(Branch *popped =) pop();
1588                                 assert(popped == f);
1589                                 f->free(qlen, rpool, epool, bpool);
1590                         }
1591                         assert_eq(origCost, f->cost_);
1592                         assert(newbr != NULL);
1593                         push(newbr);
1594                         assert(newbr == front());
1595                 }
1596                 prep(ep, ebwt);
1597                 return true;
1598         }
1599
1600         /**
1601          * Return true iff the front element of the queue is prepped.
1602          */
1603         bool prepped() {
1604                 return front()->prepped_;
1605         }
1606
1607 protected:
1608
1609         /**
1610          * Split off an extendable branch from a curtailed branch.
1611          */
1612         Branch* splitBranch(Branch* src, RandomSource& rand, uint32_t qlen,
1613                             uint32_t qualLim, int seedLen, bool qualOrder, bool fuzzy,
1614                             const EbwtParams& ep, const uint8_t* ebwt,
1615                             bool ebwtFw)
1616         {
1617                 Branch* dst = src->splitBranch(
1618                                 rpool, epool, bpool, size(), rand,
1619                         qlen, qualLim, seedLen, qualOrder, ep, ebwt, ebwtFw, fuzzy,
1620                         verbose_, quiet_);
1621                 assert(dst->repOk());
1622                 return dst;
1623         }
1624
1625         /**
1626          * Prep the next branch to be extended in advanceBranch().
1627          */
1628         void prep(const EbwtParams& ep, const uint8_t* ebwt) {
1629                 if(!branchQ_.empty()) {
1630                         branchQ_.front()->prep(ep, ebwt);
1631                 }
1632         }
1633
1634         BranchQueue branchQ_; // priority queue for selecting lowest-cost Branch
1635         // set of branches in priority queue, for sanity checks
1636         ASSERT_ONLY(std::set<Branch*> branchSet_);
1637
1638 public:
1639
1640         ChunkPool *cpool; // pool for generic chunks of memory
1641         AllocOnlyPool<Branch> bpool; // pool for allocating Branches
1642         AllocOnlyPool<RangeState> rpool; // pool for allocating RangeStates
1643         AllocOnlyPool<Edit> epool; // pool for allocating Edits
1644         /// The minimum possible cost for any alignments obtained by
1645         /// advancing further
1646         uint16_t minCost;
1647
1648 protected:
1649         /// Pointer to the aligner's per-read backtrack counter.  We
1650         /// increment it in splitBranch.
1651         int *btCnt_;
1652         bool verbose_;
1653         bool quiet_;
1654 };
1655
1656 /**
1657  * Encapsulates an algorithm that navigates the Bowtie index to produce
1658  * candidate ranges of alignments in the Burrows-Wheeler matrix.  A
1659  * higher authority is responsible for reporting hits out of those
1660  * ranges, and stopping when the consumer is satisfied.
1661  */
1662 class RangeSource {
1663         typedef Ebwt<String<Dna> > TEbwt;
1664 public:
1665         RangeSource() :
1666                 done(false), foundRange(false), curEbwt_(NULL) { }
1667
1668         virtual ~RangeSource() { }
1669
1670         /// Set query to find ranges for
1671         virtual void setQuery(ReadBuf& r, Range *partial) = 0;
1672         /// Set up the range search.
1673         virtual void initBranch(PathManager& pm) = 0;
1674         /// Advance the range search by one memory op
1675         virtual void advanceBranch(int until, uint16_t minCost, PathManager& pm) = 0;
1676
1677         /// Return the last valid range found
1678         virtual Range& range() = 0;
1679         /// Return ptr to index this RangeSource is currently getting ranges from
1680         const TEbwt *curEbwt() const { return curEbwt_; }
1681
1682         /// All searching w/r/t the current query is finished
1683         bool done;
1684         /// Set to true iff the last call to advance yielded a range
1685         bool foundRange;
1686 protected:
1687         /// ptr to index this RangeSource is currently getting ranges from
1688         const TEbwt *curEbwt_;
1689 };
1690
1691 /**
1692  * Abstract parent of RangeSourceDrivers.
1693  */
1694 template<typename TRangeSource>
1695 class RangeSourceDriver {
1696         typedef Ebwt<String<Dna> > TEbwt;
1697 public:
1698         RangeSourceDriver(bool _done, uint32_t minCostAdjustment = 0) :
1699                 foundRange(false), done(_done), minCostAdjustment_(minCostAdjustment)
1700         {
1701                 minCost = minCostAdjustment_;
1702         }
1703
1704         virtual ~RangeSourceDriver() { }
1705
1706         /**
1707          * Prepare this aligner for the next read.
1708          */
1709         virtual void setQuery(PatternSourcePerThread* patsrc, Range *r) {
1710                 // Clear our buffer of previously-dished-out top offsets
1711                 ASSERT_ONLY(allTops_.clear());
1712                 setQueryImpl(patsrc, r);
1713         }
1714
1715         /**
1716          * Prepare this aligner for the next read.
1717          */
1718         virtual void setQueryImpl(PatternSourcePerThread* patsrc, Range *r) = 0;
1719
1720         /**
1721          * Advance the aligner by one memory op.  Return true iff we're
1722          * done with this read.
1723          */
1724         virtual void advance(int until) {
1725                 advanceImpl(until);
1726 #ifndef NDEBUG
1727                 if(this->foundRange) {
1728                         // Assert that we have not yet dished out a range with this
1729                         // top offset
1730                         assert_gt(range().bot, range().top);
1731                         assert(range().ebwt != NULL);
1732                         int64_t top = (int64_t)range().top;
1733                         top++; // ensure it's not 0
1734                         if(!range().ebwt->fw()) top = -top;
1735                         assert(allTops_.find(top) == allTops_.end());
1736                         allTops_.insert(top);
1737                 }
1738 #endif
1739         }
1740         /**
1741          * Advance the aligner by one memory op.  Return true iff we're
1742          * done with this read.
1743          */
1744         virtual void advanceImpl(int until) = 0;
1745         /**
1746          * Return the range found.
1747          */
1748         virtual Range& range() = 0;
1749
1750         /**
1751          * Return whether we're generating ranges for the first or the
1752          * second mate.
1753          */
1754         virtual bool mate1() const = 0;
1755
1756         /**
1757          * Return true iff current pattern is forward-oriented.
1758          */
1759         virtual bool fw() const = 0;
1760
1761         virtual void removeMate(int m) { }
1762
1763         /// Set to true iff we just found a range.
1764         bool foundRange;
1765
1766         /**
1767          * Set to true if all searching w/r/t the current query is
1768          * finished or if there is no current query.
1769          */
1770         bool done;
1771
1772         /**
1773          * The minimum "combined" stratum/qual cost that could possibly
1774          * result from subsequent calls to advance() for this driver.
1775          */
1776         uint16_t minCost;
1777
1778         /**
1779          * Adjustment to the minCost given by the caller that constructed
1780          * the object.  This is useful if we know the lowest-cost branch is
1781          * likely to cost less than the any of the alignments that could
1782          * possibly result from advancing (e.g. when we're going to force a
1783          * mismatch somewhere down the line).
1784          */
1785         uint16_t minCostAdjustment_;
1786
1787 protected:
1788
1789 #ifndef NDEBUG
1790         std::set<int64_t> allTops_;
1791 #endif
1792 };
1793
1794 /**
1795  * A concrete driver wrapper for a single RangeSource.
1796  */
1797 template<typename TRangeSource>
1798 class SingleRangeSourceDriver : public RangeSourceDriver<TRangeSource> {
1799
1800         typedef Ebwt<String<Dna> > TEbwt;
1801
1802 public:
1803         SingleRangeSourceDriver(
1804                 EbwtSearchParams<String<Dna> >& params,
1805                 TRangeSource* rs,
1806                 bool fw,
1807                 HitSink& sink,
1808                 HitSinkPerThread* sinkPt,
1809                 vector<String<Dna5> >& os,
1810                 bool verbose,
1811                 bool quiet,
1812                 bool mate1,
1813                 uint32_t minCostAdjustment,
1814                 ChunkPool* pool,
1815                 int *btCnt) :
1816                 RangeSourceDriver<TRangeSource>(true, minCostAdjustment),
1817                 len_(0), mate1_(mate1),
1818                 sinkPt_(sinkPt),
1819                 params_(params),
1820                 fw_(fw), rs_(rs),
1821                 ebwtFw_(rs_->curEbwt()->fw()),
1822                 pm_(pool, btCnt, verbose, quiet)
1823         {
1824                 assert(rs_ != NULL);
1825         }
1826
1827         virtual ~SingleRangeSourceDriver() {
1828                 delete rs_; rs_ = NULL;
1829         }
1830
1831         /**
1832          * Prepare this aligner for the next read.
1833          */
1834         virtual void setQueryImpl(PatternSourcePerThread* patsrc, Range *r) {
1835                 this->done = false;
1836                 pm_.reset(patsrc->patid());
1837                 ReadBuf* buf = mate1_ ? &patsrc->bufa() : &patsrc->bufb();
1838                 len_ = buf->length();
1839                 rs_->setQuery(*buf, r);
1840                 initRangeSource((fw_ == ebwtFw_) ? buf->qual : buf->qualRev,
1841                                 buf->fuzzy, buf->alts,
1842                                 (fw_ == ebwtFw_) ? buf->altQual : buf->altQualRev);
1843                 assert_gt(len_, 0);
1844                 if(this->done) return;
1845                 ASSERT_ONLY(allTops_.clear());
1846                 if(!rs_->done) {
1847                         rs_->initBranch(pm_); // set up initial branch
1848                 }
1849                 uint16_t icost = (r != NULL) ? r->cost : 0;
1850                 this->minCost = max<uint16_t>(icost, this->minCostAdjustment_);
1851                 this->done = rs_->done;
1852                 this->foundRange = rs_->foundRange;
1853                 if(!pm_.empty()) {
1854                         assert(!pm_.front()->curtailed_);
1855                         assert(!pm_.front()->exhausted_);
1856                 }
1857         }
1858
1859         /**
1860          * Advance the aligner by one memory op.  Return true iff we're
1861          * done with this read.
1862          */
1863         virtual void advanceImpl(int until) {
1864                 if(this->done || pm_.empty()) {
1865                         this->done = true;
1866                         return;
1867                 }
1868                 assert(!pm_.empty());
1869                 assert(!pm_.front()->curtailed_);
1870                 assert(!pm_.front()->exhausted_);
1871                 params_.setFw(fw_);
1872                 // Advance the RangeSource for the forward-oriented read
1873                 ASSERT_ONLY(uint16_t oldMinCost = this->minCost);
1874                 ASSERT_ONLY(uint16_t oldPmMinCost = pm_.minCost);
1875                 rs_->advanceBranch(until, this->minCost, pm_);
1876                 this->done = pm_.empty();
1877                 if(pm_.minCost != 0) {
1878                         this->minCost = max(pm_.minCost, this->minCostAdjustment_);
1879                 } else {
1880                         // pm_.minCost is 0 because we reset it due to exceptional
1881                         // circumstances
1882                 }
1883 #ifndef NDEBUG
1884                 {
1885                         bool error = false;
1886                         if(pm_.minCost != 0 && pm_.minCost < oldPmMinCost) {
1887                                 cerr << "PathManager's cost went down" << endl;
1888                                 error = true;
1889                         }
1890                         if(this->minCost < oldMinCost) {
1891                                 cerr << "this->minCost cost went down" << endl;
1892                                 error = true;
1893                         }
1894                         if(error) {
1895                                 cerr << "pm.minCost went from " << oldPmMinCost
1896                                      << " to " << pm_.minCost << endl;
1897                                 cerr << "this->minCost went from " << oldMinCost
1898                                      << " to " << this->minCost << endl;
1899                                 cerr << "this->minCostAdjustment_ == "
1900                                      << this->minCostAdjustment_ << endl;
1901                         }
1902                         assert(!error);
1903                 }
1904 #endif
1905                 this->foundRange = rs_->foundRange;
1906 #ifndef NDEBUG
1907                 if(this->foundRange) {
1908                         if(until >= ADV_COST_CHANGES) assert_eq(oldMinCost, range().cost);
1909                         // Assert that we have not yet dished out a range with this
1910                         // top offset
1911                         assert_gt(range().bot, range().top);
1912                         assert(range().ebwt != NULL);
1913                         int64_t top = (int64_t)range().top;
1914                         top++; // ensure it's not 0
1915                         if(!range().ebwt->fw()) top = -top;
1916                         assert(allTops_.find(top) == allTops_.end());
1917                         allTops_.insert(top);
1918                 }
1919                 if(!pm_.empty()) {
1920                         assert(!pm_.front()->curtailed_);
1921                         assert(!pm_.front()->exhausted_);
1922                 }
1923 #endif
1924         }
1925
1926         /**
1927          * Return the range found.
1928          */
1929         virtual Range& range() {
1930                 rs_->range().fw = fw_;
1931                 rs_->range().mate1 = mate1_;
1932                 return rs_->range();
1933         }
1934
1935         /**
1936          * Return whether we're generating ranges for the first or the
1937          * second mate.
1938          */
1939         bool mate1() const {
1940                 return mate1_;
1941         }
1942
1943         /**
1944          * Return true iff current pattern is forward-oriented.
1945          */
1946         bool fw() const {
1947                 return fw_;
1948         }
1949
1950         virtual void initRangeSource(const String<char>& qual, bool fuzzy,
1951                                      int alts, const String<char>* altQuals) = 0;
1952
1953 protected:
1954
1955         // Progress state
1956         uint32_t len_;
1957         bool mate1_;
1958
1959         // Temporary HitSink; to be deleted
1960         HitSinkPerThread* sinkPt_;
1961
1962         // State for alignment
1963         EbwtSearchParams<String<Dna> >& params_;
1964         bool                            fw_;
1965         TRangeSource*                   rs_; // delete this in destructor
1966         bool ebwtFw_;
1967         PathManager pm_;
1968         ASSERT_ONLY(std::set<int64_t> allTops_);
1969 };
1970
1971 /**
1972  * Encapsulates an algorithm that navigates the Bowtie index to produce
1973  * candidate ranges of alignments in the Burrows-Wheeler matrix.  A
1974  * higher authority is responsible for reporting hits out of those
1975  * ranges, and stopping when the consumer is satisfied.
1976  */
1977 template<typename TRangeSource>
1978 class StubRangeSourceDriver : public RangeSourceDriver<TRangeSource> {
1979
1980         typedef Ebwt<String<Dna> > TEbwt;
1981         typedef std::vector<RangeSourceDriver<TRangeSource>*> TRangeSrcDrPtrVec;
1982
1983 public:
1984
1985         StubRangeSourceDriver() :
1986                 RangeSourceDriver<TRangeSource>(false)
1987         {
1988                 this->done = true;
1989                 this->foundRange = false;
1990         }
1991
1992         virtual ~StubRangeSourceDriver() { }
1993
1994         /// Set query to find ranges for
1995         virtual void setQueryImpl(PatternSourcePerThread* patsrc, Range *r) { }
1996
1997         /// Advance the range search by one memory op
1998         virtual void advanceImpl(int until) { }
1999
2000         /// Return the last valid range found
2001         virtual Range& range() { throw 1; }
2002
2003         /**
2004          * Return whether we're generating ranges for the first or the
2005          * second mate.
2006          */
2007         virtual bool mate1() const {
2008                 return true;
2009         }
2010
2011         /**
2012          * Return true iff current pattern is forward-oriented.
2013          */
2014         virtual bool fw() const {
2015                 return true;
2016         }
2017
2018 };
2019
2020 /**
2021  * Encapsulates an algorithm that navigates the Bowtie index to produce
2022  * candidate ranges of alignments in the Burrows-Wheeler matrix.  A
2023  * higher authority is responsible for reporting hits out of those
2024  * ranges, and stopping when the consumer is satisfied.
2025  */
2026 template<typename TRangeSource>
2027 class ListRangeSourceDriver : public RangeSourceDriver<TRangeSource> {
2028
2029         typedef Ebwt<String<Dna> > TEbwt;
2030         typedef std::vector<RangeSourceDriver<TRangeSource>*> TRangeSrcDrPtrVec;
2031
2032 public:
2033
2034         ListRangeSourceDriver(const TRangeSrcDrPtrVec& rss) :
2035                 RangeSourceDriver<TRangeSource>(false),
2036                 cur_(0), ham_(0), rss_(rss) /* copy */,
2037                 patsrc_(NULL), seedRange_(NULL)
2038         {
2039                 assert_gt(rss_.size(), 0);
2040                 assert(!this->done);
2041         }
2042
2043         virtual ~ListRangeSourceDriver() {
2044                 for(size_t i = 0; i < rss_.size(); i++) {
2045                         delete rss_[i];
2046                 }
2047         }
2048
2049         /// Set query to find ranges for
2050         virtual void setQueryImpl(PatternSourcePerThread* patsrc, Range *r) {
2051                 cur_ = 0; // go back to first RangeSource in list
2052                 this->done = false;
2053                 rss_[cur_]->setQuery(patsrc, r);
2054                 patsrc_ = patsrc; // so that we can call setQuery on the other elements later
2055                 seedRange_ = r;
2056                 this->done = (cur_ == rss_.size()-1) && rss_[cur_]->done;
2057                 this->minCost = max(rss_[cur_]->minCost, this->minCostAdjustment_);
2058                 this->foundRange = rss_[cur_]->foundRange;
2059         }
2060
2061         /// Advance the range search by one memory op
2062         virtual void advanceImpl(int until) {
2063                 assert(!this->done);
2064                 assert_lt(cur_, rss_.size());
2065                 if(rss_[cur_]->done) {
2066                         // Move on to next RangeSourceDriver
2067                         if(cur_ < rss_.size()-1) {
2068                                 rss_[++cur_]->setQuery(patsrc_, seedRange_);
2069                                 this->minCost = max(rss_[cur_]->minCost, this->minCostAdjustment_);
2070                                 this->foundRange = rss_[cur_]->foundRange;
2071                         } else {
2072                                 // No RangeSources in list; done
2073                                 cur_ = 0xffffffff;
2074                                 this->done = true;
2075                         }
2076                 } else {
2077                         // Advance current RangeSource
2078                         rss_[cur_]->advance(until);
2079                         this->done = (cur_ == rss_.size()-1 && rss_[cur_]->done);
2080                         this->foundRange = rss_[cur_]->foundRange;
2081                         this->minCost = max(rss_[cur_]->minCost, this->minCostAdjustment_);
2082                 }
2083         }
2084
2085         /// Return the last valid range found
2086         virtual Range& range() { return rss_[cur_]->range(); }
2087
2088         /**
2089          * Return whether we're generating ranges for the first or the
2090          * second mate.
2091          */
2092         virtual bool mate1() const {
2093                 return rss_[0]->mate1();
2094         }
2095
2096         /**
2097          * Return true iff current pattern is forward-oriented.
2098          */
2099         virtual bool fw() const {
2100                 return rss_[0]->fw();
2101         }
2102
2103 protected:
2104
2105         uint32_t cur_;
2106         uint32_t ham_;
2107         TRangeSrcDrPtrVec rss_;
2108         PatternSourcePerThread* patsrc_;
2109         Range *seedRange_;
2110 };
2111
2112 /**
2113  * A RangeSourceDriver that wraps a set of other RangeSourceDrivers and
2114  * chooses which one to advance at any given moment by picking one with
2115  * minimal "cumulative cost" so far.
2116  *
2117  * Note that costs have to be "adjusted" to account for the fact that
2118  * the alignment policy for the underlying RangeSource might force
2119  * mismatches.
2120  */
2121 template<typename TRangeSource>
2122 class CostAwareRangeSourceDriver : public RangeSourceDriver<TRangeSource> {
2123
2124         typedef Ebwt<String<Dna> > TEbwt;
2125         typedef RangeSourceDriver<TRangeSource>* TRangeSrcDrPtr;
2126         typedef std::vector<TRangeSrcDrPtr> TRangeSrcDrPtrVec;
2127
2128 public:
2129
2130         CostAwareRangeSourceDriver(
2131                         bool strandFix,
2132                         const TRangeSrcDrPtrVec* rss,
2133                         bool verbose,
2134                         bool quiet,
2135                         bool mixesReads) :
2136                 RangeSourceDriver<TRangeSource>(false),
2137                 rss_(), active_(), strandFix_(strandFix),
2138                 lastRange_(NULL), delayedRange_(NULL), patsrc_(NULL),
2139                 verbose_(verbose), quiet_(quiet), mixesReads_(mixesReads)
2140         {
2141                 if(rss != NULL) {
2142                         rss_ = (*rss);
2143                 }
2144                 paired_ = false;
2145                 this->foundRange = false;
2146                 this->done = false;
2147                 if(rss_.empty()) {
2148                         return;
2149                 }
2150                 calcPaired();
2151                 active_ = rss_;
2152                 this->minCost = 0;
2153         }
2154
2155         /// Destroy all underlying RangeSourceDrivers
2156         virtual ~CostAwareRangeSourceDriver() {
2157                 const size_t rssSz = rss_.size();
2158                 for(size_t i = 0; i < rssSz; i++) {
2159                         delete rss_[i];
2160                 }
2161                 rss_.clear();
2162                 active_.clear();
2163         }
2164
2165         /// Set query to find ranges for
2166         virtual void setQueryImpl(PatternSourcePerThread* patsrc, Range *r) {
2167                 this->done = false;
2168                 this->foundRange = false;
2169                 lastRange_ = NULL;
2170                 delayedRange_ = NULL;
2171                 ASSERT_ONLY(allTopsRc_.clear());
2172                 patsrc_ = patsrc;
2173                 rand_.init(patsrc->bufa().seed);
2174                 const size_t rssSz = rss_.size();
2175                 if(rssSz == 0) return;
2176                 for(size_t i = 0; i < rssSz; i++) {
2177                         // Assuming that all
2178                         rss_[i]->setQuery(patsrc, r);
2179                 }
2180                 active_ = rss_;
2181                 this->minCost = 0;
2182                 sortActives();
2183         }
2184
2185         /**
2186          * Add a new RangeSource to the list and re-sort it.
2187          */
2188         void addSource(TRangeSrcDrPtr p, Range *r) {
2189                 assert(!this->foundRange);
2190                 this->lastRange_ = NULL;
2191                 this->delayedRange_ = NULL;
2192                 this->done = false;
2193                 if(patsrc_ != NULL) {
2194                         p->setQuery(patsrc_, r);
2195                 }
2196                 rss_.push_back(p);
2197                 active_.push_back(p);
2198                 calcPaired();
2199                 this->minCost = 0;
2200                 sortActives();
2201         }
2202
2203         /**
2204          * Free and remove all contained RangeSources.
2205          */
2206         void clearSources() {
2207                 const size_t rssSz = rss_.size();
2208                 for(size_t i = 0; i < rssSz; i++) {
2209                         delete rss_[i];
2210                 }
2211                 rss_.clear();
2212                 active_.clear();
2213                 paired_ = false;
2214         }
2215
2216         /**
2217          * Return the number of RangeSources contained within.
2218          */
2219         size_t size() const {
2220                 return rss_.size();
2221         }
2222
2223         /**
2224          * Return true iff no RangeSources are contained within.
2225          */
2226         bool empty() const {
2227                 return rss_.empty();
2228         }
2229
2230         /**
2231          * Advance the aligner by one memory op.  Return true iff we're
2232          * done with this read.
2233          */
2234         virtual void advance(int until) {
2235                 ASSERT_ONLY(uint16_t precost = this->minCost);
2236                 assert(!this->done);
2237                 assert(!this->foundRange);
2238                 until = max<int>(until, ADV_COST_CHANGES);
2239                 advanceImpl(until);
2240                 assert(!this->foundRange || lastRange_ != NULL);
2241                 if(this->foundRange) {
2242                         assert_eq(range().cost, precost);
2243                 }
2244         }
2245
2246         /// Advance the range search
2247         virtual void advanceImpl(int until) {
2248                 lastRange_ = NULL;
2249                 ASSERT_ONLY(uint16_t iminCost = this->minCost);
2250                 const size_t actSz = active_.size();
2251                 assert(sortedActives());
2252                 if(delayedRange_ != NULL) {
2253                         assert_eq(iminCost, delayedRange_->cost);
2254                         lastRange_ = delayedRange_;
2255                         delayedRange_ = NULL;
2256                         this->foundRange = true;
2257                         assert_eq(range().cost, iminCost);
2258                         if(!active_.empty()) {
2259                                 assert_geq(active_[0]->minCost, this->minCost);
2260                                 this->minCost = max(active_[0]->minCost, this->minCost);
2261                         } else {
2262                                 this->done = true;
2263                         }
2264                         return; // found a range
2265                 }
2266                 assert(delayedRange_ == NULL);
2267                 if(mateEliminated() || actSz == 0) {
2268                         // No more alternatoves; clear the active set and signal
2269                         // we're done
2270                         active_.clear();
2271                         this->done = true;
2272                         return;
2273                 }
2274                 // Advance lowest-cost RangeSourceDriver
2275                 TRangeSrcDrPtr p = active_[0];
2276                 uint16_t precost = p->minCost;
2277                 assert(!p->done || p->foundRange);
2278                 if(!p->foundRange) {
2279                         p->advance(until);
2280                 }
2281                 bool needsSort = false;
2282                 if(p->foundRange) {
2283                         Range *r = &p->range();
2284                         assert_eq(r->cost, iminCost);
2285                         needsSort = foundFirstRange(r); // may set delayedRange_; re-sorts active_
2286                         assert_eq(lastRange_->cost, iminCost);
2287                         if(delayedRange_ != NULL) assert_eq(delayedRange_->cost, iminCost);
2288                         p->foundRange = false;
2289                 }
2290                 if(p->done || (precost != p->minCost) || needsSort) {
2291                         sortActives();
2292                         if(mateEliminated() || active_.empty()) {
2293                                 active_.clear();
2294                                 this->done = (delayedRange_ == NULL);
2295                         }
2296                 }
2297                 assert(sortedActives());
2298                 assert(lastRange_ == NULL || lastRange_->cost == iminCost);
2299                 assert(delayedRange_ == NULL || delayedRange_->cost == iminCost);
2300         }
2301
2302         /// Return the last valid range found
2303         virtual Range& range() {
2304                 assert(lastRange_ != NULL);
2305                 return *lastRange_;
2306         }
2307
2308         /**
2309          * Return whether we're generating ranges for the first or the
2310          * second mate.
2311          */
2312         virtual bool mate1() const {
2313                 return rss_[0]->mate1();
2314         }
2315
2316         /**
2317          * Return true iff current pattern is forward-oriented.
2318          */
2319         virtual bool fw() const {
2320                 return rss_[0]->fw();
2321         }
2322
2323         virtual void removeMate(int m) {
2324                 bool qmate1 = (m == 1);
2325                 assert(paired_);
2326                 for(size_t i = 0; i < active_.size(); i++) {
2327                         if(active_[i]->mate1() == qmate1) {
2328                                 active_[i]->done = true;
2329                         }
2330                 }
2331                 sortActives();
2332                 assert(mateEliminated());
2333         }
2334
2335 protected:
2336
2337         /**
2338          * Set paired_ to true iff there are mate1 and mate2 range sources
2339          * in the rss_ vector.
2340          */
2341         void calcPaired() {
2342                 const size_t rssSz = rss_.size();
2343                 bool saw1 = false;
2344                 bool saw2 = false;
2345                 for(size_t i = 0; i < rssSz; i++) {
2346                         if(rss_[i]->mate1()) saw1 = true;
2347                         else saw2 = true;
2348                 }
2349                 assert(saw1 || saw2);
2350                 paired_ = saw1 && saw2;
2351         }
2352
2353         /**
2354          * Return true iff one mate or the other has been eliminated.
2355          */
2356         bool mateEliminated() {
2357                 if(!paired_) return false;
2358                 bool mate1sLeft = false;
2359                 bool mate2sLeft = false;
2360                 // If this RangeSourceDriver is done, shift everyone else
2361                 // up and delete it
2362                 const size_t rssSz = active_.size();
2363                 for(size_t i = 0; i < rssSz; i++) {
2364                         if(!active_[i]->done) {
2365                                 if(active_[i]->mate1()) mate1sLeft = true;
2366                                 else mate2sLeft = true;
2367                         }
2368                 }
2369                 return !mate1sLeft || !mate2sLeft;
2370         }
2371
2372 #ifndef NDEBUG
2373         /**
2374          * Check that the given range has not yet been reported.
2375          */
2376         bool checkRange(Range* r) {
2377                 // Assert that we have not yet dished out a range with this
2378                 // top offset
2379                 assert_gt(r->bot, r->top);
2380                 assert(r->ebwt != NULL);
2381                 int64_t top = (int64_t)r->top;
2382                 top++; // ensure it's not 0
2383                 if(!r->ebwt->fw()) top = -top;
2384                 if(r->fw) {
2385                         assert(this->allTops_.find(top) == this->allTops_.end());
2386                         if(!mixesReads_) this->allTops_.insert(top);
2387                 } else {
2388                         assert(this->allTopsRc_.find(top) == this->allTopsRc_.end());
2389                         if(!mixesReads_) this->allTopsRc_.insert(top);
2390                 }
2391                 return true;
2392         }
2393 #endif
2394
2395         /**
2396          * We found a range; check whether we should attempt to find a
2397          * range of equal quality from the opposite strand so that we can
2398          * resolve the strand bias.  Return true iff we modified the cost
2399          * of one or more items after the first item.
2400          */
2401         bool foundFirstRange(Range* r) {
2402                 assert(r != NULL);
2403                 assert(checkRange(r));
2404                 this->foundRange = true;
2405                 lastRange_ = r;
2406                 if(strandFix_) {
2407                         // We found a range but there may be an equally good range
2408                         // on the other strand; let's try to get it.
2409                         const size_t sz = active_.size();
2410                         for(size_t i = 1; i < sz; i++) {
2411                                 // Same mate, different orientation?
2412                                 if(rss_[i]->mate1() == r->mate1 && rss_[i]->fw() != r->fw) {
2413                                         // Yes; see if it has the same cost
2414                                         TRangeSrcDrPtr p = active_[i];
2415                                         uint16_t minCost = max(this->minCost, p->minCost);
2416                                         if(minCost > r->cost) break;
2417                                         // Yes, it has the same cost
2418                                         assert_eq(minCost, r->cost); // can't be better
2419                                         // Advance it until it's done, we've found a range,
2420                                         // or its cost increases
2421                                         if(this->verbose_) cout << " Looking for opposite range to avoid strand bias:" << endl;
2422                                         while(!p->done && !p->foundRange) {
2423                                                 p->advance(ADV_COST_CHANGES);
2424                                                 assert_geq(p->minCost, minCost);
2425                                                 if(p->minCost > minCost) break;
2426                                         }
2427                                         if(p->foundRange) {
2428                                                 // Found one!  Now we have to choose which one
2429                                                 // to give out first; we choose randomly using
2430                                                 // the size of the ranges as weights.
2431                                                 delayedRange_ = &p->range();
2432                                                 assert(checkRange(delayedRange_));
2433                                                 size_t tot = (delayedRange_->bot - delayedRange_->top) +
2434                                                              (lastRange_->bot    - lastRange_->top);
2435                                                 uint32_t rq = rand_.nextU32() % tot;
2436                                                 // We picked this range, not the first one
2437                                                 if(rq < (delayedRange_->bot - delayedRange_->top)) {
2438                                                         Range *tmp = lastRange_;
2439                                                         lastRange_ = delayedRange_;
2440                                                         delayedRange_ = tmp;
2441                                                 }
2442                                                 p->foundRange = false;
2443                                         }
2444                                         // Return true iff we need to force a re-sort
2445                                         return true;
2446                                 }
2447                         }
2448                         // OK, now we have a choice of two equally good ranges from
2449                         // each strand.
2450                 }
2451                 return false;
2452         }
2453
2454         /**
2455          * Sort all of the RangeSourceDriver ptrs in the rss_ array so that
2456          * the one with the lowest cumulative cost is at the top.  Break
2457          * ties randomly.  Just do selection sort for now; we don't expect
2458          * the list to be long.
2459          */
2460         void sortActives() {
2461                 TRangeSrcDrPtrVec& vec = active_;
2462                 size_t sz = vec.size();
2463                 // Selection sort / removal outer loop
2464                 for(size_t i = 0; i < sz;) {
2465                         // Remove elements that we're done with
2466                         if(vec[i]->done && !vec[i]->foundRange) {
2467                                 vec.erase(vec.begin() + i);
2468                                 if(sz == 0) break;
2469                                 else sz--;
2470                                 continue;
2471                         }
2472                         uint16_t minCost = vec[i]->minCost;
2473                         size_t minOff = i;
2474                         // Selection sort inner loop
2475                         for(size_t j = i+1; j < sz; j++) {
2476                                 if(vec[j]->done && !vec[j]->foundRange) {
2477                                         // We'll get rid of this guy later
2478                                         continue;
2479                                 }
2480                                 if(vec[j]->minCost < minCost) {
2481                                         minCost = vec[j]->minCost;
2482                                         minOff = j;
2483                                 } else if(vec[j]->minCost == minCost) {
2484                                         // Possibly randomly pick the other
2485                                         if(rand_.nextU32() & 0x1000) {
2486                                                 minOff = j;
2487                                         }
2488                                 }
2489                         }
2490                         // Do the swap, if necessary
2491                         if(i != minOff) {
2492                                 assert_leq(minCost, vec[i]->minCost);
2493                                 TRangeSrcDrPtr tmp = vec[i];
2494                                 vec[i] = vec[minOff];
2495                                 vec[minOff] = tmp;
2496                         }
2497                         i++;
2498                 }
2499                 if(delayedRange_ == NULL) {
2500                         assert_geq(this->minCost, this->minCostAdjustment_);
2501                         assert_geq(vec[0]->minCost, this->minCost);
2502                         this->minCost = vec[0]->minCost;
2503                 }
2504                 assert(sortedActives());
2505         }
2506
2507 #ifndef NDEBUG
2508         /**
2509          * Check that the rss_ array is sorted by minCost; assert if it's
2510          * not.
2511          */
2512         bool sortedActives() const {
2513                 // Selection sort outer loop
2514                 const TRangeSrcDrPtrVec& vec = active_;
2515                 const size_t sz = vec.size();
2516                 for(size_t i = 0; i < sz; i++) {
2517                         assert(!vec[i]->done || vec[i]->foundRange);
2518                         for(size_t j = i+1; j < sz; j++) {
2519                                 assert(!vec[j]->done || vec[j]->foundRange);
2520                                 assert_leq(vec[i]->minCost, vec[j]->minCost);
2521                         }
2522                 }
2523                 if(delayedRange_ == NULL && sz > 0) {
2524                         // Only assert this if there's no delayed range; if there's
2525                         // a delayed range, the minCost is its cost, not the 0th
2526                         // element's cost
2527                         assert_leq(vec[0]->minCost, this->minCost);
2528                 }
2529                 return true;
2530         }
2531 #endif
2532
2533         /// List of all the drivers
2534         TRangeSrcDrPtrVec rss_;
2535         /// List of all the as-yet-uneliminated drivers
2536         TRangeSrcDrPtrVec active_;
2537         /// Whether the list of drivers contains drivers for both mates 1 and 2
2538         bool paired_;
2539         /// If true, this driver will make an attempt to dish out ranges in
2540         /// a way that approaches the right distribution based on the
2541         /// number of hits on both strands.
2542         bool strandFix_;
2543         uint32_t randSeed_;
2544         /// The random seed from the Aligner, which we use to randomly break ties
2545         RandomSource rand_;
2546         Range *lastRange_;
2547         Range *delayedRange_;
2548         PatternSourcePerThread* patsrc_;
2549         bool verbose_;
2550         bool quiet_;
2551         bool mixesReads_;
2552         ASSERT_ONLY(std::set<int64_t> allTopsRc_);
2553 };
2554
2555 #endif /* RANGE_SOURCE_H_ */