Imported Upstream version 0.12.7
[bowtie.git] / row_chaser.h
1 /*
2  * row_chaser.h
3  */
4
5 #ifndef ROW_CHASER_H_
6 #define ROW_CHASER_H_
7
8 #include <iostream>
9 #include "seqan/sequence.h"
10 #include "ebwt.h"
11 #include "aligner_metrics.h"
12
13 /**
14  * A class that statefully converts a row index to a reference
15  * location.  There is a large memory-latency penalty usually
16  * associated with calling the Ebwt object's mapLF method, which this
17  * object does repeatedly in order to resolve the reference offset.
18  * The "statefulness" in how the computation is organized here allows
19  * some or all of that penalty to be hidden using prefetching.
20  */
21 template<typename TStr>
22 class RowChaser {
23
24         typedef std::pair<uint32_t,uint32_t> U32Pair;
25         typedef Ebwt<TStr> TEbwt;
26
27 public:
28         RowChaser(AlignerMetrics *metrics = NULL) :
29                 done(false),
30                 prepped_(false),
31                 ebwt_(NULL),
32                 qlen_(0),
33                 eh_(NULL),
34                 row_(0xffffffff),
35                 jumps_(0),
36                 sideloc_(),
37                 off_(0xffffffff),
38                 tlen_(0),
39                 metrics_(metrics)
40         { }
41
42         /**
43          * Convert a row to a joined reference offset.  This has to be
44          * converted to understand where it is w/r/t the reference hit and
45          * offset within it.
46          */
47         static uint32_t toFlatRefOff(const TEbwt* ebwt, uint32_t qlen, uint32_t row) {
48                 RowChaser rc;
49                 rc.setRow(row, qlen, ebwt);
50                 while(!rc.done) {
51                         rc.advance();
52                 }
53                 return rc.flatOff();
54         }
55
56         /**
57          * Convert a row to a reference offset.
58          */
59         static U32Pair toRefOff(const TEbwt* ebwt, uint32_t qlen, uint32_t row) {
60                 RowChaser rc;
61                 rc.setRow(row, qlen, ebwt);
62                 while(!rc.done) {
63                         rc.advance();
64                 }
65                 return rc.off();
66         }
67
68         /**
69          * Set the next row for us to "chase" (i.e. map to a reference
70          * location using the BWT step-left operation).
71          */
72         void setRow(uint32_t row, uint32_t qlen, const TEbwt* ebwt) {
73                 assert_neq(0xffffffff, row);
74                 assert_gt(qlen, 0);
75                 assert(ebwt != NULL);
76                 ebwt_ = ebwt;
77                 eh_ = &ebwt->_eh;
78                 row_ = row;
79                 qlen_ = qlen;
80                 ASSERT_ONLY(sideloc_.invalidate());
81                 if(row_ == ebwt_->_zOff) {
82                         // We arrived at the extreme left-hand end of the reference
83                         off_ = 0;
84                         done = true;
85                         return;
86                 } else if((row_ & eh_->_offMask) == row_) {
87                         // We arrived at a marked row
88                         off_ = ebwt_->_offs[row_ >> eh_->_offRate];
89                         done = true;
90                         return;
91                 }
92                 done = false;
93                 jumps_ = 0;
94                 off_ = 0xffffffff;
95                 prepped_ = false;
96                 prep();
97         }
98
99         /**
100          * Advance the step-left process by one step.  Check if we're done.
101          */
102         void advance() {
103                 // Advance by 1
104                 assert(!done);
105                 while(!done) {
106                         assert(prepped_);
107                         prepped_ = false;
108                         if(metrics_ != NULL) metrics_->curBwtOps_++;
109                         uint32_t newrow = ebwt_->mapLF(sideloc_);
110                         ASSERT_ONLY(sideloc_.invalidate());
111                         jumps_++;
112                         assert_neq(newrow, row_);
113                         // Update row_ field
114                         row_ = newrow;
115                         if(row_ == ebwt_->_zOff) {
116                                 // We arrived at the extreme left-hand end of the reference
117                                 off_ = jumps_;
118                                 done = true;
119                         } else if((row_ & eh_->_offMask) == row_) {
120                                 // We arrived at a marked row
121                                 off_ = ebwt_->_offs[row_ >> eh_->_offRate] + jumps_;
122                                 done = true;
123                         }
124                         prep();
125                 }
126         }
127
128         /**
129          * Prepare for the next call to advance() by prefetching the
130          * appropriate portions of the index.  The caller should make sure
131          * that the
132          */
133         void prep() {
134                 if(!done) {
135                         assert(!prepped_);
136                         assert(!sideloc_.valid());
137                         assert_leq(row_, eh_->_len);
138                         sideloc_.initFromRow(row_, *eh_, (const uint8_t*)ebwt_->_ebwt);
139                         assert(sideloc_.valid());
140                 }
141                 prepped_ = true;
142         }
143
144         /**
145          * Get the calculated offset.  This has to be converted with a call
146          * to Ebwt::joinedToTextOff() to understand where it is w/r/t the
147          * reference hit and offset within it.
148          */
149         uint32_t flatOff() const {
150                 return off_;
151         }
152
153         /**
154          * Get the calculated offset.
155          */
156         U32Pair off() {
157                 uint32_t off = flatOff();
158                 assert_neq(0xffffffff, off);
159                 uint32_t tidx;
160                 uint32_t textoff = 0xffffffff;
161                 ebwt_->joinedToTextOff(qlen_, off, tidx, textoff, tlen_);
162                 // Note: tidx may be 0xffffffff, if alignment overlaps a
163                 // reference boundary
164                 return make_pair(tidx, textoff);
165         }
166
167         uint32_t tlen() const {
168                 return tlen_;
169         }
170
171         bool done;               /// true = chase is done & answer is in off_
172         bool prepped_; /// true = prefetch is issued and it's OK to call advance()
173
174 protected:
175
176         const TEbwt* ebwt_;      /// index to resolve row in
177         uint32_t qlen_;          /// length of read; needed to convert to ref. coordinates
178         const EbwtParams* eh_;   /// eh field from index
179         uint32_t row_;           /// current row
180         uint32_t jumps_;         /// # steps so far
181         SideLocus sideloc_;      /// current side locus
182         uint32_t off_;           /// calculated offset (0xffffffff if not done)
183         uint32_t tlen_;          /// hit text length
184         AlignerMetrics *metrics_;
185 };
186
187 #endif /* ROW_CHASER_H_ */