5 #ifndef RANGE_CHASER_H_
6 #define RANGE_CHASER_H_
10 #include "random_source.h"
11 #include "row_chaser.h"
12 #include "range_cache.h"
13 #include "aligner_metrics.h"
16 * A class that statefully processes a range by picking one row
17 * randomly and then linearly scanning forward through the range,
18 * reporting reference offsets as we go.
20 template<typename TStr>
23 typedef Ebwt<TStr> TEbwt;
24 typedef std::pair<uint32_t,uint32_t> U32Pair;
25 typedef std::vector<U32Pair> U32PairVec;
26 typedef RowChaser<TStr> TRowChaser;
29 RangeChaser(uint32_t cacheThresh,
30 RangeCache* cacheFw, RangeCache* cacheBw,
31 AlignerMetrics *metrics = NULL) :
35 cacheThresh_(cacheThresh),
40 off_(make_pair(0xffffffff, 0)),
44 cacheFw_(cacheFw), cacheBw_(cacheBw),
51 * Convert a range to a vector of reference loci, where a locus is
52 * a u32 pair of <ref-idx, ref-offset>.
54 static void toOffs(const TEbwt& ebwt,
61 RangeChaser rc(ebwt, rand);
62 rc.setTopBot(top, bot, qlen);
67 dest.push_back(rc.off());
69 assert(!rc.foundOff());
76 * Set the row to chase
78 void setRow(uint32_t row) {
79 // Must be within bounds of range
81 assert_geq(row, top_);
84 // First thing to try is the cache
86 assert(cacheEnt_.valid());
87 uint32_t cached = cacheEnt_.get(row_ - top_);
88 assert(cacheEnt_.valid());
89 if(cached != RANGE_NOT_SET) {
90 // Assert that it matches what we would have got...
91 ASSERT_ONLY(uint32_t sanity = TRowChaser::toFlatRefOff(ebwt_, 1, row_));
92 assert_eq(sanity, cached);
93 // We have a cached result. Cached result is in the
94 // form of an offset into the joined reference string,
95 // so now we have to convert it to a tidx/toff pair.
96 ebwt_->joinedToTextOff(qlen_, cached, off_.first, off_.second, tlen_);
97 // Note: tidx may be 0xffffffff, if alignment overlaps a
99 if(off_.first != 0xffffffff) {
100 // Bingo, we found a valid result using the cache
105 // Wasn't in the cache; use the RowChaser
108 // Second thing to try is the chaser
109 chaser_.setRow(row_, qlen_, ebwt_);
110 assert(chaser_.prepped_ || chaser_.done);
111 // It might be done immediately...
113 // We're done immediately
114 off_ = chaser_.off();
115 if(off_.first != 0xffffffff) {
116 // This is a valid result
118 // Install the result in the cache
119 assert(cacheEnt_.valid());
120 cacheEnt_.install(row_ - top_, chaser_.flatOff());
121 //if(ebwt_->fw()) assert(cacheFw_->repOk());
122 //else assert(cacheBw_->repOk());
124 tlen_ = chaser_.tlen();
126 return; // found result
132 // That row didn't have a valid result, move to the next
139 // Exhausted all possible rows
141 assert_eq(0xffffffff, off_.first);
145 assert(chaser_.prepped_);
149 * Set the next range for us to "chase" (i.e. convert row-by-row
150 * to reference loci).
152 void setTopBot(uint32_t top,
158 assert_neq(0xffffffff, top);
159 assert_neq(0xffffffff, bot);
162 assert(ebwt != NULL);
167 uint32_t spread = bot - top;
168 irow_ = top + (rand.nextU32() % spread); // initial row
172 if(cacheFw_ != NULL || cacheBw_ != NULL) {
173 if(spread > cacheThresh_) {
175 if(ebwt->fw() && cacheFw_ != NULL) {
176 ret = cacheFw_->lookup(top, bot, cacheEnt_);
177 if(ret) assert(cacheEnt_.ebwt()->fw());
178 } else if(!ebwt->fw() && cacheBw_ != NULL) {
179 ret = cacheBw_->lookup(top, bot, cacheEnt_);
180 if(ret) assert(!cacheEnt_.ebwt()->fw());
184 assert_eq(cacheEnt_.valid(), ret);
191 assert(chaser_.prepped_ || foundOff() || done);
195 * Advance the step-left process by one step. Check if we're done.
199 assert(chaser_.prepped_ || chaser_.done);
202 // chaser finished with this row
208 // Exhausted all possible rows
210 assert_eq(0xffffffff, off_.first);
214 assert(chaser_.prepped_ || foundOff() || done);
217 assert(chaser_.prepped_ || chaser_.done);
219 // We're done immediately
220 off_ = chaser_.off();
221 if(off_.first != 0xffffffff) {
223 // Install the result in the cache
224 assert(cacheEnt_.valid());
225 cacheEnt_.install(row_ - top_, chaser_.flatOff());
226 //if(ebwt_->fw()) assert(cacheFw_->repOk());
227 //else assert(cacheBw_->repOk());
229 // Found a reference position
230 tlen_ = chaser_.tlen();
238 * Prepare for the next call to advance() by prefetching relevant
239 * data. In this case, 'chaser_' is doing this for us.
246 * Return true iff off_ contains a valid reference location for
249 bool foundOff() const {
250 return off_.first != 0xffffffff;
254 * Reset the chaser so that 'off_' does not hold a valid offset and
255 * foundOff() returns false.
258 off_.first = 0xffffffff;
262 * Get the calculated offset.
264 U32Pair off() const {
269 * Get the length of the hit reference.
271 uint32_t tlen() const {
275 bool done; /// true = chase is done & answer is in off_
279 const TEbwt* ebwt_; /// index to resolve row in
280 uint32_t qlen_; /// length of read; needed to convert to ref. coordinates
281 uint32_t cacheThresh_; /// ranges wider than thresh use cacheing
282 uint32_t top_; /// range top
283 uint32_t bot_; /// range bottom
284 uint32_t irow_; /// initial randomly-chosen row within range
285 uint32_t row_; /// current row within range
286 U32Pair off_; /// calculated offset (0xffffffff if not done)
287 uint32_t tlen_; /// length of text hit
288 TRowChaser chaser_; /// stateful row chaser
289 RangeCacheEntry cacheEnt_; /// current cache entry
290 bool cached_; /// cacheEnt is active for current range?
291 RangeCache* cacheFw_; /// cache for the forward index
292 RangeCache* cacheBw_; /// cache for the backward index
293 AlignerMetrics *metrics_;
296 #endif /* RANGE_CHASER_H_ */