9 #include "seqan/sequence.h"
11 #include "aligner_metrics.h"
14 * A class that statefully converts a row index to a reference
15 * location. There is a large memory-latency penalty usually
16 * associated with calling the Ebwt object's mapLF method, which this
17 * object does repeatedly in order to resolve the reference offset.
18 * The "statefulness" in how the computation is organized here allows
19 * some or all of that penalty to be hidden using prefetching.
21 template<typename TStr>
24 typedef std::pair<uint32_t,uint32_t> U32Pair;
25 typedef Ebwt<TStr> TEbwt;
28 RowChaser(AlignerMetrics *metrics = NULL) :
43 * Convert a row to a joined reference offset. This has to be
44 * converted to understand where it is w/r/t the reference hit and
47 static uint32_t toFlatRefOff(const TEbwt* ebwt, uint32_t qlen, uint32_t row) {
49 rc.setRow(row, qlen, ebwt);
57 * Convert a row to a reference offset.
59 static U32Pair toRefOff(const TEbwt* ebwt, uint32_t qlen, uint32_t row) {
61 rc.setRow(row, qlen, ebwt);
69 * Set the next row for us to "chase" (i.e. map to a reference
70 * location using the BWT step-left operation).
72 void setRow(uint32_t row, uint32_t qlen, const TEbwt* ebwt) {
73 assert_neq(0xffffffff, row);
80 ASSERT_ONLY(sideloc_.invalidate());
81 if(row_ == ebwt_->_zOff) {
82 // We arrived at the extreme left-hand end of the reference
86 } else if((row_ & eh_->_offMask) == row_) {
87 // We arrived at a marked row
88 off_ = ebwt_->_offs[row_ >> eh_->_offRate];
100 * Advance the step-left process by one step. Check if we're done.
108 if(metrics_ != NULL) metrics_->curBwtOps_++;
109 uint32_t newrow = ebwt_->mapLF(sideloc_);
110 ASSERT_ONLY(sideloc_.invalidate());
112 assert_neq(newrow, row_);
115 if(row_ == ebwt_->_zOff) {
116 // We arrived at the extreme left-hand end of the reference
119 } else if((row_ & eh_->_offMask) == row_) {
120 // We arrived at a marked row
121 off_ = ebwt_->_offs[row_ >> eh_->_offRate] + jumps_;
129 * Prepare for the next call to advance() by prefetching the
130 * appropriate portions of the index. The caller should make sure
136 assert(!sideloc_.valid());
137 assert_leq(row_, eh_->_len);
138 sideloc_.initFromRow(row_, *eh_, (const uint8_t*)ebwt_->_ebwt);
139 assert(sideloc_.valid());
145 * Get the calculated offset. This has to be converted with a call
146 * to Ebwt::joinedToTextOff() to understand where it is w/r/t the
147 * reference hit and offset within it.
149 uint32_t flatOff() const {
154 * Get the calculated offset.
157 uint32_t off = flatOff();
158 assert_neq(0xffffffff, off);
160 uint32_t textoff = 0xffffffff;
161 ebwt_->joinedToTextOff(qlen_, off, tidx, textoff, tlen_);
162 // Note: tidx may be 0xffffffff, if alignment overlaps a
163 // reference boundary
164 return make_pair(tidx, textoff);
167 uint32_t tlen() const {
171 bool done; /// true = chase is done & answer is in off_
172 bool prepped_; /// true = prefetch is issued and it's OK to call advance()
176 const TEbwt* ebwt_; /// index to resolve row in
177 uint32_t qlen_; /// length of read; needed to convert to ref. coordinates
178 const EbwtParams* eh_; /// eh field from index
179 uint32_t row_; /// current row
180 uint32_t jumps_; /// # steps so far
181 SideLocus sideloc_; /// current side locus
182 uint32_t off_; /// calculated offset (0xffffffff if not done)
183 uint32_t tlen_; /// hit text length
184 AlignerMetrics *metrics_;
187 #endif /* ROW_CHASER_H_ */