Commit patch to not break on spaces.
[bowtie.git] / aligner_1mm.h
1 /*
2  * aligner_1mm.h
3  */
4
5 #ifndef ALIGNER_1MM_H_
6 #define ALIGNER_1MM_H_
7
8 #include <utility>
9 #include <vector>
10 #include "aligner.h"
11 #include "hit.h"
12 #include "range_source.h"
13 #include "row_chaser.h"
14 #include "range_chaser.h"
15 #include "ref_aligner.h"
16
17 /**
18  * Concrete factory class for constructing unpaired exact aligners.
19  */
20 class Unpaired1mmAlignerV1Factory : public AlignerFactory {
21         typedef RangeSourceDriver<EbwtRangeSource> TRangeSrcDr;
22         typedef CostAwareRangeSourceDriver<EbwtRangeSource> TCostAwareRangeSrcDr;
23         typedef std::vector<TRangeSrcDr*> TRangeSrcDrPtrVec;
24 public:
25         Unpaired1mmAlignerV1Factory(
26                         Ebwt<String<Dna> >& ebwtFw,
27                         Ebwt<String<Dna> >* ebwtBw,
28                         bool doFw,
29                         bool doRc,
30                         HitSink& sink,
31                         const HitSinkPerThreadFactory& sinkPtFactory,
32                         RangeCache *cacheFw,
33                         RangeCache *cacheBw,
34                         uint32_t cacheLimit,
35                         ChunkPool *pool,
36                         BitPairReference* refs,
37                         vector<String<Dna5> >& os,
38                         bool maqPenalty,
39                         bool qualOrder,
40                         bool strandFix,
41                         bool rangeMode,
42                         bool verbose,
43                         bool quiet,
44                         uint32_t seed) :
45                         ebwtFw_(ebwtFw),
46                         ebwtBw_(ebwtBw),
47                         doFw_(doFw),
48                         doRc_(doRc),
49                         sink_(sink),
50                         sinkPtFactory_(sinkPtFactory),
51                         cacheFw_(cacheFw),
52                         cacheBw_(cacheBw),
53                         cacheLimit_(cacheLimit),
54                         pool_(pool),
55                         refs_(refs),
56                         os_(os),
57                         maqPenalty_(maqPenalty),
58                         qualOrder_(qualOrder),
59                         strandFix_(strandFix),
60                         rangeMode_(rangeMode),
61                         verbose_(verbose),
62                         quiet_(quiet),
63                         seed_(seed)
64         {
65                 assert(ebwtFw.isInMemory());
66                 assert(ebwtBw != NULL);
67                 assert(ebwtBw->isInMemory());
68         }
69
70         /**
71          * Create a new UnpairedExactAlignerV1s.
72          */
73         virtual Aligner* create() const {
74
75                 HitSinkPerThread* sinkPt = sinkPtFactory_.create();
76                 EbwtSearchParams<String<Dna> >* params =
77                         new EbwtSearchParams<String<Dna> >(*sinkPt, os_);
78
79                 const int halfAndHalf = 0;
80                 const bool seeded = false;
81
82                 EbwtRangeSource *rFw_Bw = new EbwtRangeSource(
83                          ebwtBw_, true, 0xffffffff, true,  verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
84                 EbwtRangeSource *rFw_Fw = new EbwtRangeSource(
85                         &ebwtFw_, true, 0xffffffff, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
86
87                 EbwtRangeSourceDriver * drFw_Bw = new EbwtRangeSourceDriver(
88                         *params, rFw_Bw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt,
89                         0,          // seedLen (0 = whole read is seed)
90                         false,      // nudgeLeft (true for Fw index, false for Bw)
91                         PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
92                         PIN_TO_LEN, // allow 1 mismatch in rest of read
93                         PIN_TO_LEN, // "
94                         PIN_TO_LEN, // "
95                         os_, verbose_, quiet_, true, pool_, NULL);
96                 //
97                 EbwtRangeSourceDriver * drFw_Fw = new EbwtRangeSourceDriver(
98                         *params, rFw_Fw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt,
99                         0,          // seedLen (0 = whole read is seed)
100                         true,       // nudgeLeft (true for Fw index, false for Bw)
101                         PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
102                         PIN_TO_LEN, // allow 1 mismatch in rest of read
103                         PIN_TO_LEN, // "
104                         PIN_TO_LEN, // "
105                         os_, verbose_, quiet_, true, pool_, NULL);
106                 TRangeSrcDrPtrVec *drVec = new TRangeSrcDrPtrVec();
107                 if(doFw_) {
108                         drVec->push_back(drFw_Bw);
109                         drVec->push_back(drFw_Fw);
110                 }
111
112                 EbwtRangeSource *rRc_Fw = new EbwtRangeSource(
113                         &ebwtFw_, false, 0xffffffff, true,  verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
114                 EbwtRangeSource *rRc_Bw = new EbwtRangeSource(
115                          ebwtBw_, false, 0xffffffff, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
116
117                 EbwtRangeSourceDriver * drRc_Fw = new EbwtRangeSourceDriver(
118                         *params, rRc_Fw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt,
119                         0,          // seedLen (0 = whole read is seed)
120                         true,       // nudgeLeft (true for Fw index, false for Bw)
121                         PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
122                         PIN_TO_LEN, // allow 1 mismatch in rest of read
123                         PIN_TO_LEN, // "
124                         PIN_TO_LEN, // "
125                         os_, verbose_, quiet_, true, pool_, NULL);
126                 //
127                 EbwtRangeSourceDriver * drRc_Bw = new EbwtRangeSourceDriver(
128                         *params, rRc_Bw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt,
129                         0,          // seedLen (0 = whole read is seed)
130                         false,      // nudgeLeft (true for Fw index, false for Bw)
131                         PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
132                         PIN_TO_LEN, // allow 1 mismatch in rest of read
133                         PIN_TO_LEN, // "
134                         PIN_TO_LEN, // "
135                         os_, verbose_, quiet_, true, pool_, NULL);
136                 if(doRc_) {
137                         drVec->push_back(drRc_Fw);
138                         drVec->push_back(drRc_Bw);
139                 }
140                 TCostAwareRangeSrcDr* dr = new TCostAwareRangeSrcDr(strandFix_, drVec, verbose_, quiet_, false);
141                 delete drVec;
142
143                 // Set up a RangeChaser
144                 RangeChaser<String<Dna> > *rchase =
145                         new RangeChaser<String<Dna> >(cacheLimit_, cacheFw_, cacheBw_);
146
147                 // Set up the aligner
148                 return new UnpairedAlignerV2<EbwtRangeSource>(
149                         params, dr, rchase,
150                         sink_, sinkPtFactory_, sinkPt, os_, refs_,
151                         rangeMode_, verbose_, quiet_, INT_MAX, pool_, NULL, NULL);
152         }
153
154 private:
155         Ebwt<String<Dna> >& ebwtFw_;
156         Ebwt<String<Dna> >* ebwtBw_;
157         bool doFw_;
158         bool doRc_;
159         HitSink& sink_;
160         const HitSinkPerThreadFactory& sinkPtFactory_;
161         RangeCache *cacheFw_;
162         RangeCache *cacheBw_;
163         const uint32_t cacheLimit_;
164         ChunkPool *pool_;
165         BitPairReference* refs_;
166         vector<String<Dna5> >& os_;
167         const bool maqPenalty_;
168         const bool qualOrder_;
169         bool strandFix_;
170         bool rangeMode_;
171         bool verbose_;
172         bool quiet_;
173         uint32_t seed_;
174 };
175
176 /**
177  * Concrete factory class for constructing unpaired exact aligners.
178  */
179 class Paired1mmAlignerV1Factory : public AlignerFactory {
180         typedef RangeSourceDriver<EbwtRangeSource> TRangeSrcDr;
181         typedef CostAwareRangeSourceDriver<EbwtRangeSource> TCostAwareRangeSrcDr;
182         typedef std::vector<TRangeSrcDr*> TRangeSrcDrPtrVec;
183 public:
184         Paired1mmAlignerV1Factory(
185                         Ebwt<String<Dna> >& ebwtFw,
186                         Ebwt<String<Dna> >* ebwtBw,
187                         bool color,
188                         bool doFw,
189                         bool doRc,
190                         bool v1,
191                         HitSink& sink,
192                         const HitSinkPerThreadFactory& sinkPtFactory,
193                         bool mate1fw,
194                         bool mate2fw,
195                         uint32_t peInner,
196                         uint32_t peOuter,
197                         bool dontReconcile,
198                         uint32_t symCeil,
199                         uint32_t mixedThresh,
200                         uint32_t mixedAttemptLim,
201                         RangeCache *cacheFw,
202                         RangeCache *cacheBw,
203                         uint32_t cacheLimit,
204                         ChunkPool *pool,
205                         BitPairReference* refs,
206                         vector<String<Dna5> >& os,
207                         bool reportSe,
208                         bool maqPenalty,
209                         bool qualOrder,
210                         bool strandFix,
211                         bool rangeMode,
212                         bool verbose,
213                         bool quiet,
214                         uint32_t seed) :
215                         ebwtFw_(ebwtFw),
216                         ebwtBw_(ebwtBw),
217                         color_(color),
218                         doFw_(doFw),
219                         doRc_(doRc),
220                         v1_(v1),
221                         sink_(sink),
222                         sinkPtFactory_(sinkPtFactory),
223                         mate1fw_(mate1fw),
224                         mate2fw_(mate2fw),
225                         peInner_(peInner),
226                         peOuter_(peOuter),
227                         dontReconcile_(dontReconcile),
228                         symCeil_(symCeil),
229                         mixedThresh_(mixedThresh),
230                         mixedAttemptLim_(mixedAttemptLim),
231                         cacheFw_(cacheFw),
232                         cacheBw_(cacheBw),
233                         cacheLimit_(cacheLimit),
234                         pool_(pool),
235                         refs_(refs), os_(os),
236                         reportSe_(reportSe),
237                         maqPenalty_(maqPenalty),
238                         qualOrder_(qualOrder),
239                         strandFix_(strandFix),
240                         rangeMode_(rangeMode),
241                         verbose_(verbose),
242                         quiet_(quiet),
243                         seed_(seed)
244         {
245                 assert(ebwtBw != NULL);
246                 assert(ebwtFw.isInMemory());
247                 assert(ebwtBw->isInMemory());
248         }
249
250         /**
251          * Create a new UnpairedExactAlignerV1s.
252          */
253         virtual Aligner* create() const {
254                 HitSinkPerThread* sinkPt = sinkPtFactory_.createMult(2);
255                 HitSinkPerThread* sinkPtSe1 = NULL, * sinkPtSe2 = NULL;
256                 EbwtSearchParams<String<Dna> >* params =
257                         new EbwtSearchParams<String<Dna> >(*sinkPt, os_);
258                 EbwtSearchParams<String<Dna> >* paramsSe1 = NULL, * paramsSe2 = NULL;
259                 if(reportSe_) {
260                         sinkPtSe1 = sinkPtFactory_.create();
261                         sinkPtSe2 = sinkPtFactory_.create();
262                         paramsSe1 =
263                                 new EbwtSearchParams<String<Dna> >(*sinkPtSe1, os_);
264                         paramsSe2 =
265                                 new EbwtSearchParams<String<Dna> >(*sinkPtSe2, os_);
266                 }
267
268                 const int halfAndHalf = 0;
269                 const bool seeded = false;
270
271                 bool do1Fw = true;
272                 bool do1Rc = true;
273                 bool do2Fw = true;
274                 bool do2Rc = true;
275                 if(!doFw_) {
276                         if(mate1fw_) do1Fw = false;
277                         else         do1Rc = false;
278                         if(mate2fw_) do2Fw = false;
279                         else         do2Rc = false;
280                 }
281                 if(!doRc_) {
282                         if(mate1fw_) do1Rc = false;
283                         else         do1Fw = false;
284                         if(mate2fw_) do2Rc = false;
285                         else         do2Fw = false;
286                 }
287
288                 TRangeSrcDrPtrVec *dr1FwVec;
289                 dr1FwVec = new TRangeSrcDrPtrVec();
290                 if(do1Fw) {
291                         EbwtRangeSource *r1Fw_Bw = new EbwtRangeSource(
292                                  ebwtBw_, true, 0xffffffff, true,  verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
293                         EbwtRangeSource *r1Fw_Fw = new EbwtRangeSource(
294                                 &ebwtFw_, true, 0xffffffff, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
295
296                         EbwtRangeSourceDriver * dr1Fw_Bw = new EbwtRangeSourceDriver(
297                                 *params, r1Fw_Bw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt,
298                                 0,          // seedLen (0 = whole read is seed)
299                                 true,       // nudgeLeft (true for Fw index, false for Bw)
300                                 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
301                                 PIN_TO_LEN, // allow 1 mismatch in rest of read
302                                 PIN_TO_LEN, // "
303                                 PIN_TO_LEN, // "
304                                 os_, verbose_, quiet_, true, pool_, NULL);
305                         EbwtRangeSourceDriver * dr1Fw_Fw = new EbwtRangeSourceDriver(
306                                 *params, r1Fw_Fw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt,
307                                 0,          // seedLen
308                                 false,      // nudgeLeft (true for Fw index, false for Bw)
309                                 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable
310                                 PIN_TO_LEN, // "
311                                 PIN_TO_LEN, // "
312                                 PIN_TO_LEN, // "
313                                 os_, verbose_, quiet_, true, pool_, NULL);
314
315                         dr1FwVec->push_back(dr1Fw_Bw);
316                         dr1FwVec->push_back(dr1Fw_Fw);
317                 }
318
319                 TRangeSrcDrPtrVec *dr1RcVec;
320                 if(v1_) {
321                         dr1RcVec = new TRangeSrcDrPtrVec();
322                 } else {
323                         dr1RcVec = dr1FwVec;
324                 }
325                 if(do1Rc) {
326                         EbwtRangeSource *r1Rc_Fw = new EbwtRangeSource(
327                                 &ebwtFw_, false, 0xffffffff, true,  verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
328                         EbwtRangeSource *r1Rc_Bw = new EbwtRangeSource(
329                                  ebwtBw_, false, 0xffffffff, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
330
331                         EbwtRangeSourceDriver * dr1Rc_Fw = new EbwtRangeSourceDriver(
332                                 *params, r1Rc_Fw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt,
333                                 0,          // seedLen
334                                 true,       // nudgeLeft (true for Fw index, false for Bw)
335                                 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable
336                                 PIN_TO_LEN, // "
337                                 PIN_TO_LEN, // "
338                                 PIN_TO_LEN, // "
339                                 os_, verbose_, quiet_, true, pool_, NULL);
340                         EbwtRangeSourceDriver * dr1Rc_Bw = new EbwtRangeSourceDriver(
341                                 *params, r1Rc_Bw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt,
342                                 0,          // seedLen (0 = whole read is seed)
343                                 false,      // nudgeLeft (true for Fw index, false for Bw)
344                                 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
345                                 PIN_TO_LEN, // allow 1 mismatch in rest of read
346                                 PIN_TO_LEN, // "
347                                 PIN_TO_LEN, // "
348                                 os_, verbose_, quiet_, true, pool_, NULL);
349                         dr1RcVec->push_back(dr1Rc_Fw);
350                         dr1RcVec->push_back(dr1Rc_Bw);
351                 }
352
353                 TRangeSrcDrPtrVec *dr2FwVec;
354                 if(v1_) {
355                         dr2FwVec = new TRangeSrcDrPtrVec();
356                 } else {
357                         dr2FwVec = dr1FwVec;
358                 }
359                 if(do2Fw) {
360                         EbwtRangeSource *r2Fw_Bw = new EbwtRangeSource(
361                                  ebwtBw_, true, 0xffffffff, true,  verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
362                         EbwtRangeSource *r2Fw_Fw = new EbwtRangeSource(
363                                 &ebwtFw_, true, 0xffffffff, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
364
365                         EbwtRangeSourceDriver * dr2Fw_Bw = new EbwtRangeSourceDriver(
366                                 *params, r2Fw_Bw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt,
367                                 0,          // seedLen (0 = whole read is seed)
368                                 true,       // nudgeLeft (true for Fw index, false for Bw)
369                                 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
370                                 PIN_TO_LEN, // allow 1 mismatch in rest of read
371                                 PIN_TO_LEN, // "
372                                 PIN_TO_LEN, // "
373                                 os_, verbose_, quiet_, false, pool_, NULL);
374                         EbwtRangeSourceDriver * dr2Fw_Fw = new EbwtRangeSourceDriver(
375                                 *params, r2Fw_Fw, true, false, maqPenalty_, qualOrder_, sink_, sinkPt,
376                                 0,          // seedLen
377                                 false,      // nudgeLeft (true for Fw index, false for Bw)
378                                 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable
379                                 PIN_TO_LEN, // "
380                                 PIN_TO_LEN, // "
381                                 PIN_TO_LEN, // "
382                                 os_, verbose_, quiet_, false, pool_, NULL);
383                         dr2FwVec->push_back(dr2Fw_Bw);
384                         dr2FwVec->push_back(dr2Fw_Fw);
385                 }
386
387                 TRangeSrcDrPtrVec *dr2RcVec;
388                 if(v1_) {
389                         dr2RcVec = new TRangeSrcDrPtrVec();
390                 } else {
391                         dr2RcVec = dr1FwVec;
392                 }
393                 if(do2Rc) {
394                         EbwtRangeSource *r2Rc_Fw = new EbwtRangeSource(
395                                 &ebwtFw_, false, 0xffffffff, true,  verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
396                         EbwtRangeSource *r2Rc_Bw = new EbwtRangeSource(
397                                  ebwtBw_, false, 0xffffffff, false, verbose_, quiet_, halfAndHalf, seeded, maqPenalty_, qualOrder_);
398
399                         EbwtRangeSourceDriver * dr2Rc_Fw = new EbwtRangeSourceDriver(
400                                 *params, r2Rc_Fw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt,
401                                 0,          // seedLen
402                                 true,       // nudgeLeft (true for Fw index, false for Bw)
403                                 PIN_TO_HI_HALF_EDGE, // right-hand half alignment is unrevisitable
404                                 PIN_TO_LEN, // "
405                                 PIN_TO_LEN, // "
406                                 PIN_TO_LEN, // "
407                                 os_, verbose_, quiet_, false, pool_, NULL);
408                         EbwtRangeSourceDriver * dr2Rc_Bw = new EbwtRangeSourceDriver(
409                                 *params, r2Rc_Bw, false, false, maqPenalty_, qualOrder_, sink_, sinkPt,
410                                 0,          // seedLen (0 = whole read is seed)
411                                 false,      // nudgeLeft (true for Fw index, false for Bw)
412                                 PIN_TO_HI_HALF_EDGE, // right half is unrevisitable
413                                 PIN_TO_LEN, // allow 1 mismatch in rest of read
414                                 PIN_TO_LEN, // "
415                                 PIN_TO_LEN, // "
416                                 os_, verbose_, quiet_, false, pool_, NULL);
417                         dr2RcVec->push_back(dr2Rc_Fw);
418                         dr2RcVec->push_back(dr2Rc_Bw);
419                 }
420
421                 RefAligner<String<Dna5> >* refAligner =
422                         new OneMMRefAligner<String<Dna5> >(color_, verbose_, quiet_);
423
424                 // Set up a RangeChaser
425                 RangeChaser<String<Dna> > *rchase =
426                         new RangeChaser<String<Dna> >(cacheLimit_, cacheFw_, cacheBw_);
427
428                 if(v1_) {
429                         PairedBWAlignerV1<EbwtRangeSource>* al = new PairedBWAlignerV1<EbwtRangeSource>(
430                                 params,
431                                 new TCostAwareRangeSrcDr(strandFix_, dr1FwVec, verbose_, quiet_, false),
432                                 new TCostAwareRangeSrcDr(strandFix_, dr1RcVec, verbose_, quiet_, false),
433                                 new TCostAwareRangeSrcDr(strandFix_, dr2FwVec, verbose_, quiet_, false),
434                                 new TCostAwareRangeSrcDr(strandFix_, dr2RcVec, verbose_, quiet_, false),
435                                 refAligner, rchase,
436                                 sink_, sinkPtFactory_, sinkPt, mate1fw_, mate2fw_,
437                                 peInner_, peOuter_, dontReconcile_, symCeil_, mixedThresh_,
438                                 mixedAttemptLim_, refs_, rangeMode_, verbose_,
439                                 quiet_, INT_MAX, pool_, NULL);
440                         delete dr1FwVec;
441                         delete dr1RcVec;
442                         delete dr2FwVec;
443                         delete dr2RcVec;
444                         return al;
445                 } else {
446                         PairedBWAlignerV2<EbwtRangeSource>* al = new PairedBWAlignerV2<EbwtRangeSource>(
447                                 params, paramsSe1, paramsSe2,
448                                 new TCostAwareRangeSrcDr(strandFix_, dr1FwVec, verbose_, quiet_, true),
449                                 refAligner, rchase,
450                                 sink_, sinkPtFactory_,
451                                 sinkPt, sinkPtSe1, sinkPtSe2,
452                                 mate1fw_, mate2fw_,
453                                 peInner_, peOuter_,
454                                 mixedAttemptLim_, refs_, rangeMode_,
455                                 verbose_, quiet_, INT_MAX, pool_, NULL);
456                         delete dr1FwVec;
457                         return al;
458                 }
459         }
460
461 private:
462         Ebwt<String<Dna> >& ebwtFw_;
463         Ebwt<String<Dna> >* ebwtBw_;
464         bool color_;
465         bool doFw_;
466         bool doRc_;
467         bool v1_;
468         HitSink& sink_;
469         const HitSinkPerThreadFactory& sinkPtFactory_;
470         const bool mate1fw_;
471         const bool mate2fw_;
472         const uint32_t peInner_;
473         const uint32_t peOuter_;
474         const bool dontReconcile_;
475         const uint32_t symCeil_;
476         const uint32_t mixedThresh_;
477         const uint32_t mixedAttemptLim_;
478         RangeCache *cacheFw_;
479         RangeCache *cacheBw_;
480         const uint32_t cacheLimit_;
481         ChunkPool *pool_;
482         BitPairReference* refs_;
483         vector<String<Dna5> >& os_;
484         const bool reportSe_;
485         const bool maqPenalty_;
486         const bool qualOrder_;
487         const bool strandFix_;
488         const bool rangeMode_;
489         const bool verbose_;
490         const bool quiet_;
491         const uint32_t seed_;
492 };
493
494 #endif /* ALIGNER_1MM_H_ */