Commit patch to not break on spaces.
[bowtie.git] / hit.h
1 #ifndef HIT_H_
2 #define HIT_H_
3
4 #include <vector>
5 #include <stdint.h>
6 #include <iostream>
7 #include <sstream>
8 #include <fstream>
9 #include <string>
10 #include <stdexcept>
11 #include <seqan/sequence.h>
12 #include "alphabet.h"
13 #include "assert_helpers.h"
14 #include "spinlock.h"
15 #include "threading.h"
16 #include "bitset.h"
17 #include "tokenize.h"
18 #include "pat.h"
19 #include "formats.h"
20 #include "filebuf.h"
21 #include "edit.h"
22 #include "refmap.h"
23 #include "annot.h"
24
25 /**
26  * Classes for dealing with reporting alignments.
27  */
28
29 using namespace std;
30 using namespace seqan;
31
32 /// Constants for the various output modes
33 enum output_types {
34         OUTPUT_FULL = 1,
35         OUTPUT_CONCISE,
36         OUTPUT_BINARY,
37         OUTPUT_CHAIN,
38         OUTPUT_SAM,
39         OUTPUT_NONE
40 };
41
42 /// Names of the various output modes
43 static const std::string output_type_names[] = {
44         "Invalid!",
45         "Full",
46         "Concise",
47         "Binary",
48         "None"
49 };
50
51 typedef pair<uint32_t,uint32_t> U32Pair;
52
53 /**
54  * Encapsulates a hit, including a text-id/text-offset pair, a pattern
55  * id, and a boolean indicating whether it matched as its forward or
56  * reverse-complement version.
57  */
58 class Hit {
59 public:
60         Hit() : stratum(-1) { }
61
62         /**
63          * Initialize this Ent to represent the given hit.
64          */
65         void toHitSetEnt(HitSetEnt& hse, AnnotationMap *amap) const {
66                 hse.h = h;
67                 hse.fw = fw ? 1 : 0;
68                 hse.oms = oms;
69                 hse.stratum = stratum;
70                 hse.cost = cost;
71                 if(mms.count() == 0) return;
72                 for(int i = 0; i < (int)length(); i++) {
73                         if(mms.test(i)) {
74                                 hse.expand();
75                                 hse.back().type = EDIT_TYPE_MM;
76                                 hse.back().pos = i;
77                                 hse.back().chr = refcs[i];
78                         }
79                 }
80         }
81
82         /**
83          * Convert a list of Hit objects to a single HitSet object suitable
84          * for chaining.
85          */
86         static void toHitSet(const std::vector<Hit>& hits, HitSet& hs,
87                              AnnotationMap *amap)
88         {
89                 if(hits.empty()) return;
90                 // Initialize HitSet
91                 hs.name = hits.front().patName;
92                 hs.seq  = hits.front().patSeq;
93                 hs.qual = hits.front().quals;
94                 hs.color = hits.front().color;
95                 if(!hits.front().fw) {
96                         // Re-reverse
97                         reverseComplementInPlace(hs.seq, hs.color);
98                         reverseInPlace(hs.qual);
99                 }
100                 // Convert hits to entries
101                 hs.ents.resize(hits.size());
102                 for(size_t i = 0; i < hs.ents.size(); i++) {
103                         hits[i].toHitSetEnt(hs.ents[i], amap);
104                 }
105         }
106
107         /**
108          * Populate a vector of Hits with hits from the given HitSet.
109          */
110         static void fromHitSet(std::vector<Hit>& hits, const HitSet& hs) {
111                 assert(hs.sorted());
112                 hits.resize(hs.size());
113                 for(size_t i = 0; i < hs.size(); i++) {
114                         hits[i].h = hs[i].h;
115                         hits[i].oms = hs[i].oms;
116                         hits[i].fw = hs[i].fw;
117                         hits[i].stratum = hs[i].stratum;
118                         hits[i].cost = hs[i].cost;
119                         hits[i].mate = 0; //hs[i].mate;
120                         hits[i].patName = hs.name;
121                         hits[i].patSeq = hs.seq;
122                         hits[i].quals = hs.qual;
123                         hits[i].color = hs.color;
124                         size_t len = seqan::length(hs.seq);
125                         if(!hs[i].fw) {
126                                 reverseComplementInPlace(hits[i].patSeq, hs.color);
127                                 reverseInPlace(hits[i].quals);
128                         }
129                         hits[i].refcs.resize(len);
130                         for(size_t j = 0; j < hs[i].size(); j++) {
131                                 if(hs[i][j].type != EDIT_TYPE_MM) continue;
132                                 hits[i].mms.set(hs[i][j].pos);
133                                 hits[i].refcs[hs[i][j].pos] = hs[i][j].chr;
134                         }
135                 }
136         }
137
138         U32Pair             h;       /// reference index & offset
139         U32Pair             mh;      /// reference index & offset for mate
140         uint32_t            patId;   /// read index
141         String<char>        patName; /// read name
142         String<Dna5>        patSeq;  /// read sequence
143         String<Dna5>        colSeq;  /// original color sequence, not decoded
144         String<char>        quals;   /// read qualities
145         String<char>        colQuals;/// original color qualities, not decoded
146         FixedBitset<1024>   mms;     /// nucleotide mismatch mask
147         FixedBitset<1024>   cmms;    /// color mismatch mask (if relevant)
148         vector<char>        refcs;   /// reference characters for mms
149         vector<char>        crefcs;  /// reference characters for cmms
150         uint32_t            oms;     /// # of other possible mappings; 0 -> this is unique
151         bool                fw;      /// orientation of read in alignment
152         bool                mfw;     /// orientation of mate in alignment
153         uint16_t            mlen;    /// length of mate
154         int8_t              stratum; /// stratum of hit (= mismatches in seed)
155         uint32_t            cost;    /// total cost, factoring in stratum and quality penalty
156         uint8_t             mate;    /// matedness; 0 = not a mate
157                                      ///            1 = upstream mate
158                                      ///            2 = downstream mate
159         bool                color;   /// read is in colorspace?
160         uint32_t            seed;    /// pseudo-random seed for aligned read
161
162         /**
163          * Return true if this Hit is internally consistent.  Otherwise,
164          * throw an assertion.
165          */
166         bool repOk() const {
167                 assert_geq(cost, (uint32_t)(stratum << 14));
168                 return true;
169         }
170
171         size_t length() const { return seqan::length(patSeq); }
172
173         Hit& operator = (const Hit &other) {
174                 this->h       = other.h;
175                 this->mh      = other.mh;
176                 this->patId   = other.patId;
177                 this->patName = other.patName;
178                 this->patSeq  = other.patSeq;
179                 this->colSeq  = other.colSeq;
180                 this->quals   = other.quals;
181                 this->colQuals= other.colQuals;
182                 this->mms     = other.mms;
183                 this->cmms    = other.cmms;
184                 this->refcs   = other.refcs;
185                 this->crefcs  = other.crefcs;
186                 this->oms     = other.oms;
187                 this->fw      = other.fw;
188                 this->mfw     = other.mfw;
189                 this->mlen    = other.mlen;
190                 this->stratum = other.stratum;
191                 this->cost    = other.cost;
192                 this->mate    = other.mate;
193                 this->color   = other.color;
194                 this->cmms    = other.cmms;
195                 this->seed    = other.seed;
196                 return *this;
197         }
198 };
199
200 /**
201  * Compare hits a and b; a < b if its cost is less than B's.  If
202  * there's a tie, break on position and orientation.
203  */
204 class HitCostCompare {
205 public:
206         bool operator() (const Hit& a, const Hit& b) {
207                 if(a.cost < b.cost) return true;
208                 if(a.cost > b.cost) return false;
209                 if(a.h < b.h) return true;
210                 if(a.h > b.h) return false;
211                 if(a.fw < b.fw) return true;
212                 if(a.fw > b.fw) return false;
213                 return false;
214         }
215 };
216
217 /// Sort by text-id then by text-offset
218 bool operator< (const Hit& a, const Hit& b);
219
220 /**
221  * Table for holding recalibration counts, along the lines of the table
222  * presented in the SOAPsnp paper in Genome Res.  Each element maps a
223  * read allele (o), quality score (q), cycle (c), and reference allele
224  * (H), to a count of how many times that combination is observed in a
225  * reported alignment.
226  *
227  * RecalTable is not synchronized, so it's assumed that threads are
228  * incrementing the counters from within critical sections.
229  */
230 class RecalTable {
231 public:
232         RecalTable(int maxCycle,
233                    int maxQual,
234                    int qualShift) : maxCycle_(maxCycle),
235                                     maxQual_(maxQual),
236                                     qualShift_(qualShift),
237                                     shift1_(6 - qualShift_),
238                                     shift2_(shift1_ + 2),
239                                     shift3_(shift2_ + 2),
240                                     ents_(NULL), len_(0)
241         {
242                 if(maxCycle == 0) {
243                         cerr << "Warning: maximum cycle for recalibration table is 0" << endl;
244                 } else if(maxQual >> qualShift == 0) {
245                         cerr << "Warning: maximum quality value " << maxQual << ", when shifted, is 0" << endl;
246                 } else if(qualShift > 5) {
247                         cerr << "Warning: quality shift value " << qualShift << " exceeds ceiling of 5" << endl;
248                 } else {
249                         try {
250                                 len_ = maxCycle_ * 4 /* subj alleles*/ * 4 /* ref alleles */ * 64 /* quals */;
251                                 ents_ = new uint32_t[len_];
252                                 if(ents_ == NULL) {
253                                         throw std::bad_alloc();
254                                 }
255                                 memset(ents_, 0, len_ << 2);
256                         } catch(std::bad_alloc& e) {
257                                 cerr << "Error allocating recalibration table with " << len_ << " entries" << endl;
258                                 throw 1;
259                         }
260                 }
261         }
262
263         ~RecalTable() {
264                 if(ents_ != NULL) delete[] ents_;
265         }
266
267         /**
268          * Factor a new alignment into the recalibration table.
269          */
270         void commitHit(const Hit& h) {
271                 // Iterate through the pattern from 5' to 3', calculate the
272                 // shifted quality value, obtain the reference character, and
273                 // increment the appropriate counter
274                 assert(h.repOk());
275                 for(int i = 0; i < (int)h.length(); i++) {
276                         int ii = i;
277                         if(!h.fw) {
278                                 ii = h.length() - ii - 1;
279                         }
280                         int qc = (int)h.patSeq[ii];
281                         int rc = qc;
282                         if(h.mms.test(i)) {
283                                 rc = charToDna5[(int)h.refcs[i]];
284                                 assert_neq(rc, qc);
285                         }
286                         int q = (int)h.quals[ii]-33;
287                         assert_lt(q, 64);
288                         q >>= qualShift_;
289                         ents_[calcIdx(i, qc, rc, q)]++;
290                 }
291         }
292
293         /**
294          * Print the contents of the recalibration table.
295          */
296         void print (std::ostream& out) const {
297                 if(ents_ == NULL) return;
298                 const int lim = maxCycle_;
299                 for(int i = 0; i < lim; i++) {
300                         out << "t" << i << "\t";
301                         // Iterate over subject alleles
302                         for(int j = 0; j < 4; j++) {
303                                 // Iterate over reference alleles
304                                 for(int k = 0; k < 4; k++) {
305                                         // Iterate over qualities
306                                         int lim2 = maxQual_ >> qualShift_;
307                                         for(int l = 0; l < lim2; l++) {
308                                                 out << ents_[calcIdx(i, j, k, l)] << '\t';
309                                         }
310                                 }
311                         }
312                         out << endl;
313                 }
314         }
315
316 protected:
317
318         /**
319          * Calculate index into the ents_ array given cycle, subject
320          * allele, reference allele, and (shifted) quality.
321          */
322         int calcIdx(int cyc, int sa, int ra, int q) const {
323                 int ret = q | (ra << shift1_) | (sa << shift2_) | (cyc << shift3_);
324                 assert_lt(ret, len_);
325                 return ret;
326         }
327
328         const int maxCycle_;
329         const int maxQual_;
330         const int qualShift_;
331         const int shift1_;
332         const int shift2_;
333         const int shift3_;
334         uint32_t *ents_;
335         int len_;
336 };
337
338 #define DECL_HIT_DUMPS \
339         const std::string& dumpAl, \
340         const std::string& dumpUnal, \
341         const std::string& dumpMax
342
343 #define INIT_HIT_DUMPS \
344         dumpAlBase_(dumpAl), \
345         dumpUnalBase_(dumpUnal), \
346         dumpMaxBase_(dumpMax)
347
348 #define DECL_HIT_DUMPS2 \
349         DECL_HIT_DUMPS, \
350         bool onePairFile, \
351         bool sampleMax, \
352         RecalTable *recalTable, \
353         std::vector<std::string>* refnames
354
355 #define PASS_HIT_DUMPS \
356         dumpAl, \
357         dumpUnal, \
358         dumpMax
359
360 #define PASS_HIT_DUMPS2 \
361         PASS_HIT_DUMPS, \
362         onePairFile, \
363         sampleMax, \
364         recalTable, \
365         refnames
366
367 /**
368  * Encapsulates an object that accepts hits, optionally retains them in
369  * a vector, and does something else with them according to
370  * descendent's implementation of pure virtual member reportHitImpl().
371  */
372 class HitSink {
373 public:
374         explicit HitSink(OutFileBuf* out,
375                         DECL_HIT_DUMPS,
376                         bool onePairFile,
377                         bool sampleMax,
378                         RecalTable *table,
379                         vector<string>* refnames = NULL) :
380                 _outs(),
381                 _deleteOuts(false),
382                 recalTable_(table),
383                 _refnames(refnames),
384                 _numWrappers(0),
385                 _locks(),
386                 INIT_HIT_DUMPS,
387                 onePairFile_(onePairFile),
388                 sampleMax_(sampleMax),
389                 first_(true),
390                 numAligned_(0llu),
391                 numUnaligned_(0llu),
392                 numMaxed_(0llu),
393                 numReported_(0llu),
394                 numReportedPaired_(0llu),
395                 quiet_(false),
396                 ssmode_(ios_base::out)
397         {
398                 _outs.push_back(out);
399                 _locks.resize(1);
400                 MUTEX_INIT(_locks[0]);
401                 MUTEX_INIT(_mainlock);
402                 initDumps();
403         }
404
405         /**
406          * Open a number of output streams; usually one per reference
407          * sequence.  For now, we give then names refXXXXX.map where XXXXX
408          * is the 0-padded reference index.  Someday we may want to include
409          * the name of the reference sequence in the filename somehow.
410          */
411         explicit HitSink(size_t numOuts,
412                         DECL_HIT_DUMPS,
413                         bool onePairFile,
414                         bool sampleMax,
415                         RecalTable *table,
416                         vector<string>* refnames = NULL) :
417                 _outs(),
418                 _deleteOuts(true),
419                 recalTable_(table),
420                 _refnames(refnames),
421                 _locks(),
422                 INIT_HIT_DUMPS,
423                 onePairFile_(onePairFile),
424                 sampleMax_(sampleMax),
425                 quiet_(false),
426                 ssmode_(ios_base::out)
427         {
428                 // Open all files for writing and initialize all locks
429                 for(size_t i = 0; i < numOuts; i++) {
430                         _outs.push_back(NULL); // we open output streams lazily
431                         _locks.resize(i+1);
432                         MUTEX_INIT(_locks[i]);
433                 }
434                 MUTEX_INIT(_mainlock);
435                 initDumps();
436         }
437
438         /**
439          * Destroy HitSinkobject;
440          */
441         virtual ~HitSink() {
442                 closeOuts();
443                 if(_deleteOuts) {
444                         // Delete all non-NULL output streams
445                         for(size_t i = 0; i < _outs.size(); i++) {
446                                 if(_outs[i] != NULL) {
447                                         delete _outs[i];
448                                         _outs[i] = NULL;
449                                 }
450                         }
451                 }
452                 destroyDumps();
453         }
454
455         /**
456          * Call this whenever this HitSink is wrapped by a new
457          * HitSinkPerThread.  This helps us keep track of whether the main
458          * lock or any of the per-stream locks will be contended.
459          */
460         void addWrapper() {
461                 _numWrappers++;
462         }
463
464         /**
465          * Called by concrete subclasses to figure out which elements of
466          * the _outs/_locks array to use when outputting the alignment.
467          */
468         size_t refIdxToStreamIdx(size_t refIdx) {
469                 if(refIdx >= _outs.size()) return 0;
470                 return refIdx;
471         }
472
473         /**
474          * Append a single hit to the given output stream.
475          */
476         virtual void append(ostream& o, const Hit& h) = 0;
477
478         /**
479          * Report a batch of hits; all in the given vector.
480          */
481         virtual void reportHits(vector<Hit>& hs) {
482                 reportHits(hs, 0, hs.size());
483         }
484
485         /**
486          * Report a batch of hits from a vector, perhaps subsetting it.
487          */
488         virtual void reportHits(vector<Hit>& hs, size_t start, size_t end) {
489                 assert_geq(end, start);
490                 if(end-start == 0) return;
491                 bool paired = hs[start].mate > 0;
492                 // Sort reads so that those against the same reference sequence
493                 // are consecutive.
494                 if(_outs.size() > 1 && end-start > 2) {
495                         sort(hs.begin() + start, hs.begin() + end);
496                 }
497                 char buf[4096];
498                 for(size_t i = start; i < end; i++) {
499                         const Hit& h = hs[i];
500                         assert(h.repOk());
501                         bool diff = false;
502                         if(i > start) {
503                                 diff = (refIdxToStreamIdx(h.h.first) != refIdxToStreamIdx(hs[i-1].h.first));
504                                 if(diff) unlock(hs[i-1].h.first);
505                         }
506                         ostringstream ss(ssmode_);
507                         ss.rdbuf()->pubsetbuf(buf, 4096);
508                         append(ss, h);
509                         if(i == start || diff) {
510                                 lock(h.h.first);
511                         }
512                         out(h.h.first).writeChars(buf, ss.tellp());
513                 }
514                 unlock(hs[end-1].h.first);
515                 mainlock();
516                 commitHits(hs);
517                 first_ = false;
518                 numAligned_++;
519                 if(paired) numReportedPaired_ += (end-start);
520                 else       numReported_ += (end-start);
521                 mainunlock();
522         }
523
524         void commitHit(const Hit& hit) {
525                 if(recalTable_ != NULL) {
526                         recalTable_->commitHit(hit);
527                 }
528         }
529
530         void commitHits(const std::vector<Hit>& hits) {
531                 if(recalTable_ != NULL) {
532                         const size_t sz = hits.size();
533                         for(size_t i = 0; i < sz; i++) {
534                                 commitHit(hits[i]);
535                         }
536                 }
537         }
538
539         /**
540          * Called when all alignments are complete.  It is assumed that no
541          * synchronization is necessary.
542          */
543         void finish(bool hadoopOut) {
544                 // Close output streams
545                 closeOuts();
546                 if(!quiet_) {
547                         // Print information about how many unpaired and/or paired
548                         // reads were aligned.
549                         uint64_t tot = numAligned_ + numUnaligned_ + numMaxed_;
550                         double alPct = 0.0, unalPct = 0.0, maxPct = 0.0;
551                         if(tot > 0) {
552                                 alPct   = 100.0 * (double)numAligned_ / (double)tot;
553                                 unalPct = 100.0 * (double)numUnaligned_ / (double)tot;
554                                 maxPct  = 100.0 * (double)numMaxed_ / (double)tot;
555                         }
556                         cerr << "# reads processed: " << tot << endl;
557                         cerr << "# reads with at least one reported alignment: "
558                              << numAligned_ << " (" << fixed << setprecision(2)
559                              << alPct << "%)" << endl;
560                         cerr << "# reads that failed to align: "
561                              << numUnaligned_ << " (" << fixed << setprecision(2)
562                              << unalPct << "%)" << endl;
563                         if(numMaxed_ > 0) {
564                                 if(sampleMax_) {
565                                         cerr << "# reads with alignments sampled due to -M: "
566                                                  << numMaxed_ << " (" << fixed << setprecision(2)
567                                                  << maxPct << "%)" << endl;
568                                 } else {
569                                         cerr << "# reads with alignments suppressed due to -m: "
570                                                  << numMaxed_ << " (" << fixed << setprecision(2)
571                                                  << maxPct << "%)" << endl;
572                                 }
573                         }
574                         if(first_) {
575                                 assert_eq(0llu, numReported_);
576                                 cerr << "No alignments" << endl;
577                         }
578                         else if(numReportedPaired_ > 0 && numReported_ == 0) {
579                                 cerr << "Reported " << (numReportedPaired_ >> 1)
580                                          << " paired-end alignments to " << _outs.size()
581                                          << " output stream(s)" << endl;
582                         }
583                         else if(numReported_ > 0 && numReportedPaired_ == 0) {
584                                 cerr << "Reported " << numReported_
585                                          << " alignments to " << _outs.size()
586                                          << " output stream(s)" << endl;
587                         }
588                         else {
589                                 assert_gt(numReported_, 0);
590                                 assert_gt(numReportedPaired_, 0);
591                                 cerr << "Reported " << (numReportedPaired_ >> 1)
592                                          << " paired-end alignments and " << numReported_
593                                          << " singleton alignments to " << _outs.size()
594                                          << " output stream(s)" << endl;
595                         }
596                         if(hadoopOut) {
597                                 cerr << "reporter:counter:Bowtie,Reads with reported alignments," << numAligned_ << endl;
598                                 cerr << "reporter:counter:Bowtie,Reads with no alignments," << numUnaligned_ << endl;
599                                 cerr << "reporter:counter:Bowtie,Reads exceeding -m limit," << numMaxed_ << endl;
600                                 cerr << "reporter:counter:Bowtie,Unpaired alignments reported," << numReported_ << endl;
601                                 cerr << "reporter:counter:Bowtie,Paired alignments reported," << numReportedPaired_ << endl;
602                         }
603                 }
604                 // Print the recalibration table.
605                 if(recalTable_ != NULL) {
606                         recalTable_->print(cout);
607                 }
608         }
609
610         /// Returns the alignment output stream; if the stream needs to be
611         /// created, create it
612         OutFileBuf& out(size_t refIdx) {
613                 size_t strIdx = refIdxToStreamIdx(refIdx);
614                 if(_outs[strIdx] == NULL) {
615                         assert(_deleteOuts);
616                         ostringstream oss;
617                         oss << "ref";
618                         if     (strIdx < 10)    oss << "0000";
619                         else if(strIdx < 100)   oss << "000";
620                         else if(strIdx < 1000)  oss << "00";
621                         else if(strIdx < 10000) oss << "0";
622                         oss << strIdx << ".map";
623                         _outs[strIdx] = new OutFileBuf(oss.str().c_str(), ssmode_ == ios_base::binary);
624                 }
625                 assert(_outs[strIdx] != NULL);
626                 return *(_outs[strIdx]);
627         }
628
629         /**
630          * Lock the monolithic lock for this HitSink.  This is useful when,
631          * for example, outputting a read to an unaligned-read file.
632          */
633         void mainlock() {
634                 MUTEX_LOCK(_mainlock);
635         }
636
637         /**
638          * Unlock the monolithic lock for this HitSink.  This is useful
639          * when, for example, outputting a read to an unaligned-read file.
640          */
641         void mainunlock() {
642                 MUTEX_UNLOCK(_mainlock);
643         }
644
645         /**
646          * Return true iff this HitSink dumps aligned reads to an output
647          * stream (i.e., iff --alfa or --alfq are specified).
648          */
649         bool dumpsAlignedReads() {
650                 return dumpAlignFlag_;
651         }
652
653         /**
654          * Return true iff this HitSink dumps unaligned reads to an output
655          * stream (i.e., iff --unfa or --unfq are specified).
656          */
657         bool dumpsUnalignedReads() {
658                 return dumpUnalignFlag_;
659         }
660
661         /**
662          * Return true iff this HitSink dumps maxed-out reads to an output
663          * stream (i.e., iff --maxfa or --maxfq are specified).
664          */
665         bool dumpsMaxedReads() {
666                 return dumpMaxedFlag_ || dumpUnalignFlag_;
667         }
668
669         /**
670          * Return true iff this HitSink dumps either unaligned or maxed-
671          * out reads to an output stream (i.e., iff --unfa, --maxfa,
672          * --unfq, or --maxfq are specified).
673          */
674         bool dumpsReads() {
675                 return dumpAlignFlag_ || dumpUnalignFlag_ || dumpMaxedFlag_;
676         }
677
678         /**
679          * Dump an aligned read to all of the appropriate output streams.
680          * Be careful to synchronize correctly - there may be multiple
681          * simultaneous writers.
682          */
683         void dumpAlign(PatternSourcePerThread& p) {
684                 if(!dumpAlignFlag_) return;
685                 if(!p.paired() || onePairFile_) {
686                         // Dump unpaired read to an aligned-read file of the same format
687                         if(!dumpAlBase_.empty()) {
688                                 MUTEX_LOCK(dumpAlignLock_);
689                                 if(dumpAl_ == NULL) {
690                                         assert(dumpAlQv_ == NULL);
691                                         dumpAl_ = openOf(dumpAlBase_, 0, "");
692                                         assert(dumpAl_ != NULL);
693                                         if(p.bufa().qualOrigBufLen > 0) {
694                                                 dumpAlQv_ = openOf(dumpAlBase_ + ".qual", 0, "");
695                                                 assert(dumpAlQv_ != NULL);
696                                         }
697                                 }
698                                 dumpAl_->write(p.bufa().readOrigBuf, p.bufa().readOrigBufLen);
699                                 if(dumpAlQv_ != NULL) {
700                                         dumpAlQv_->write(p.bufa().qualOrigBuf, p.bufa().qualOrigBufLen);
701                                 }
702                                 MUTEX_UNLOCK(dumpAlignLock_);
703                         }
704                 } else {
705                         // Dump paired-end read to an aligned-read file (or pair of
706                         // files) of the same format
707                         if(!dumpAlBase_.empty()) {
708                                 MUTEX_LOCK(dumpAlignLockPE_);
709                                 if(dumpAl_1_ == NULL) {
710                                         assert(dumpAlQv_1_ == NULL);
711                                         assert(dumpAlQv_2_ == NULL);
712                                         dumpAl_1_ = openOf(dumpAlBase_, 1, "");
713                                         dumpAl_2_ = openOf(dumpAlBase_, 2, "");
714                                         assert(dumpAl_1_ != NULL);
715                                         assert(dumpAl_2_ != NULL);
716                                         if(p.bufa().qualOrigBufLen > 0) {
717                                                 dumpAlQv_1_ = openOf(dumpAlBase_ + ".qual", 1, "");
718                                                 dumpAlQv_2_ = openOf(dumpAlBase_ + ".qual", 2, "");
719                                                 assert(dumpAlQv_1_ != NULL);
720                                                 assert(dumpAlQv_2_ != NULL);
721                                         }
722                                 }
723                                 dumpAl_1_->write(p.bufa().readOrigBuf, p.bufa().readOrigBufLen);
724                                 dumpAl_2_->write(p.bufb().readOrigBuf, p.bufb().readOrigBufLen);
725                                 if(dumpAlQv_1_ != NULL) {
726                                         dumpAlQv_1_->write(p.bufa().qualOrigBuf, p.bufa().qualOrigBufLen);
727                                         dumpAlQv_2_->write(p.bufb().qualOrigBuf, p.bufb().qualOrigBufLen);
728                                 }
729                                 MUTEX_UNLOCK(dumpAlignLockPE_);
730                         }
731                 }
732         }
733
734         /**
735          * Dump an unaligned read to all of the appropriate output streams.
736          * Be careful to synchronize correctly - there may be multiple
737          * simultaneous writers.
738          */
739         void dumpUnal(PatternSourcePerThread& p) {
740                 if(!dumpUnalignFlag_) return;
741                 if(!p.paired() || onePairFile_) {
742                         // Dump unpaired read to an unaligned-read file of the same format
743                         if(!dumpUnalBase_.empty()) {
744                                 MUTEX_LOCK(dumpUnalLock_);
745                                 if(dumpUnal_ == NULL) {
746                                         assert(dumpUnalQv_ == NULL);
747                                         dumpUnal_ = openOf(dumpUnalBase_, 0, "");
748                                         assert(dumpUnal_ != NULL);
749                                         if(p.bufa().qualOrigBufLen > 0) {
750                                                 dumpUnalQv_ = openOf(dumpUnalBase_ + ".qual", 0, "");
751                                                 assert(dumpUnalQv_ != NULL);
752                                         }
753                                 }
754                                 dumpUnal_->write(p.bufa().readOrigBuf, p.bufa().readOrigBufLen);
755                                 if(dumpUnalQv_ != NULL) {
756                                         dumpUnalQv_->write(p.bufa().qualOrigBuf, p.bufa().qualOrigBufLen);
757                                 }
758                                 MUTEX_UNLOCK(dumpUnalLock_);
759                         }
760                 } else {
761                         // Dump paired-end read to an unaligned-read file (or pair
762                         // of files) of the same format
763                         if(!dumpUnalBase_.empty()) {
764                                 MUTEX_LOCK(dumpUnalLockPE_);
765                                 if(dumpUnal_1_ == NULL) {
766                                         assert(dumpUnal_1_ == NULL);
767                                         assert(dumpUnal_2_ == NULL);
768                                         dumpUnal_1_ = openOf(dumpUnalBase_, 1, "");
769                                         dumpUnal_2_ = openOf(dumpUnalBase_, 2, "");
770                                         assert(dumpUnal_1_ != NULL);
771                                         assert(dumpUnal_2_ != NULL);
772                                         if(p.bufa().qualOrigBufLen > 0) {
773                                                 dumpUnalQv_1_ = openOf(dumpUnalBase_ + ".qual", 1, "");
774                                                 dumpUnalQv_2_ = openOf(dumpUnalBase_ + ".qual", 2, "");
775                                         }
776                                 }
777                                 dumpUnal_1_->write(p.bufa().readOrigBuf, p.bufa().readOrigBufLen);
778                                 dumpUnal_2_->write(p.bufb().readOrigBuf, p.bufb().readOrigBufLen);
779                                 if(dumpUnalQv_1_ != NULL) {
780                                         dumpUnalQv_1_->write(p.bufa().qualOrigBuf, p.bufa().qualOrigBufLen);
781                                         dumpUnalQv_2_->write(p.bufb().qualOrigBuf, p.bufb().qualOrigBufLen);
782                                 }
783                                 MUTEX_UNLOCK(dumpUnalLockPE_);
784                         }
785                 }
786         }
787
788         /**
789          * Dump a maxed-out read to all of the appropriate output streams.
790          * Be careful to synchronize correctly - there may be multiple
791          * simultaneous writers.
792          */
793         void dumpMaxed(PatternSourcePerThread& p) {
794                 if(!dumpMaxedFlag_) {
795                         if(dumpUnalignFlag_) dumpUnal(p);
796                         return;
797                 }
798                 if(!p.paired() || onePairFile_) {
799                         // Dump unpaired read to an maxed-out-read file of the same format
800                         if(!dumpMaxBase_.empty()) {
801                                 MUTEX_LOCK(dumpMaxLock_);
802                                 if(dumpMax_ == NULL) {
803                                         dumpMax_ = openOf(dumpMaxBase_, 0, "");
804                                         assert(dumpMax_ != NULL);
805                                         if(p.bufa().qualOrigBufLen > 0) {
806                                                 dumpMaxQv_ = openOf(dumpMaxBase_ + ".qual", 0, "");
807                                         }
808                                 }
809                                 dumpMax_->write(p.bufa().readOrigBuf, p.bufa().readOrigBufLen);
810                                 if(dumpMaxQv_ != NULL) {
811                                         dumpMaxQv_->write(p.bufa().qualOrigBuf, p.bufa().qualOrigBufLen);
812                                 }
813                                 MUTEX_UNLOCK(dumpMaxLock_);
814                         }
815                 } else {
816                         // Dump paired-end read to a maxed-out-read file (or pair
817                         // of files) of the same format
818                         if(!dumpMaxBase_.empty()) {
819                                 MUTEX_LOCK(dumpMaxLockPE_);
820                                 if(dumpMax_1_ == NULL) {
821                                         assert(dumpMaxQv_1_ == NULL);
822                                         assert(dumpMaxQv_2_ == NULL);
823                                         dumpMax_1_ = openOf(dumpMaxBase_, 1, "");
824                                         dumpMax_2_ = openOf(dumpMaxBase_, 2, "");
825                                         assert(dumpMax_1_ != NULL);
826                                         assert(dumpMax_2_ != NULL);
827                                         if(p.bufa().qualOrigBufLen > 0) {
828                                                 dumpMaxQv_1_ = openOf(dumpMaxBase_ + ".qual", 1, "");
829                                                 dumpMaxQv_2_ = openOf(dumpMaxBase_ + ".qual", 2, "");
830                                         }
831                                 }
832                                 dumpMax_1_->write(p.bufa().readOrigBuf, p.bufa().readOrigBufLen);
833                                 dumpMax_2_->write(p.bufb().readOrigBuf, p.bufb().readOrigBufLen);
834                                 if(dumpMaxQv_1_ != NULL) {
835                                         dumpMaxQv_1_->write(p.bufa().qualOrigBuf, p.bufa().qualOrigBufLen);
836                                         dumpMaxQv_2_->write(p.bufb().qualOrigBuf, p.bufb().qualOrigBufLen);
837                                 }
838                                 MUTEX_UNLOCK(dumpMaxLockPE_);
839                         }
840                 }
841         }
842
843         /**
844          * Report a maxed-out read.  Typically we do nothing, but we might
845          * want to print a placeholder when output is chained.
846          */
847         virtual void reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) {
848                 mainlock();
849                 numMaxed_++;
850                 mainunlock();
851         }
852
853         /**
854          * Report an unaligned read.  Typically we do nothing, but we might
855          * want to print a placeholder when output is chained.
856          */
857         virtual void reportUnaligned(PatternSourcePerThread& p) {
858                 mainlock();
859                 numUnaligned_++;
860                 mainunlock();
861         }
862
863 protected:
864
865         /// Implementation of hit-report
866         virtual void reportHit(const Hit& h) {
867                 assert(h.repOk());
868                 mainlock();
869                 commitHit(h);
870                 first_ = false;
871                 if(h.mate > 0) numReportedPaired_++;
872                 else           numReported_++;
873                 numAligned_++;
874                 mainunlock();
875         }
876
877         /**
878          * Close (and flush) all OutFileBufs.
879          */
880         void closeOuts() {
881                 // Flush and close all non-NULL output streams
882                 for(size_t i = 0; i < _outs.size(); i++) {
883                         if(_outs[i] != NULL && !_outs[i]->closed()) {
884                                 _outs[i]->close();
885                         }
886                 }
887         }
888
889         /**
890          * Lock the output buffer for the output stream for reference with
891          * index 'refIdx'.  By default, hits for all references are
892          * directed to the same output stream, but if --refout is
893          * specified, each reference has its own reference stream.
894          */
895         void lock(size_t refIdx) {
896                 size_t strIdx = refIdxToStreamIdx(refIdx);
897                 MUTEX_LOCK(_locks[strIdx]);
898         }
899
900         /**
901          * Lock the output buffer for the output stream for reference with
902          * index 'refIdx'.  By default, hits for all references are
903          * directed to the same output stream, but if --refout is
904          * specified, each reference has its own reference stream.
905          */
906         void unlock(size_t refIdx) {
907                 size_t strIdx = refIdxToStreamIdx(refIdx);
908                 MUTEX_UNLOCK(_locks[strIdx]);
909         }
910
911         vector<OutFileBuf*> _outs;        /// the alignment output stream(s)
912         bool                _deleteOuts;  /// Whether to delete elements of _outs upon exit
913         RecalTable         *recalTable_;  /// recalibration table
914         vector<string>*     _refnames;    /// map from reference indexes to names
915         int                 _numWrappers; /// # threads owning a wrapper for this HitSink
916         vector<MUTEX_T>     _locks;       /// pthreads mutexes for per-file critical sections
917         MUTEX_T             _mainlock;    /// pthreads mutexes for fields of this object
918
919         // Output filenames for dumping
920         std::string dumpAlBase_;
921         std::string dumpUnalBase_;
922         std::string dumpMaxBase_;
923
924         bool onePairFile_;
925         bool sampleMax_;
926
927         // Output streams for dumping sequences
928         std::ofstream *dumpAl_;       // for single-ended reads
929         std::ofstream *dumpAl_1_;     // for first mates
930         std::ofstream *dumpAl_2_;     // for second mates
931         std::ofstream *dumpUnal_;     // for single-ended reads
932         std::ofstream *dumpUnal_1_;   // for first mates
933         std::ofstream *dumpUnal_2_;   // for second mates
934         std::ofstream *dumpMax_;      // for single-ended reads
935         std::ofstream *dumpMax_1_;    // for first mates
936         std::ofstream *dumpMax_2_;    // for second mates
937
938         // Output streams for dumping qualities
939         std::ofstream *dumpAlQv_;     // for single-ended reads
940         std::ofstream *dumpAlQv_1_;   // for first mates
941         std::ofstream *dumpAlQv_2_;   // for second mates
942         std::ofstream *dumpUnalQv_;   // for single-ended reads
943         std::ofstream *dumpUnalQv_1_; // for first mates
944         std::ofstream *dumpUnalQv_2_; // for second mates
945         std::ofstream *dumpMaxQv_;    // for single-ended reads
946         std::ofstream *dumpMaxQv_1_;  // for first mates
947         std::ofstream *dumpMaxQv_2_;  // for second mates
948
949         /**
950          * Open an ofstream with given name; output error message and quit
951          * if it fails.
952          */
953         std::ofstream* openOf(const std::string& name,
954                               int mateType,
955                               const std::string& suffix)
956         {
957                 std::string s = name;
958                 size_t dotoff = name.find_last_of(".");
959                 if(mateType == 1) {
960                         if(dotoff == string::npos) {
961                                 s += "_1"; s += suffix;
962                         } else {
963                                 s = name.substr(0, dotoff) + "_1" + s.substr(dotoff);
964                         }
965                 } else if(mateType == 2) {
966                         if(dotoff == string::npos) {
967                                 s += "_2"; s += suffix;
968                         } else {
969                                 s = name.substr(0, dotoff) + "_2" + s.substr(dotoff);
970                         }
971                 } else if(mateType != 0) {
972                         cerr << "Bad mate type " << mateType << endl; throw 1;
973                 }
974                 std::ofstream* tmp = new ofstream(s.c_str(), ios::out);
975                 if(tmp->fail()) {
976                         if(mateType == 0) {
977                                 cerr << "Could not open single-ended aligned/unaligned-read file for writing: " << name << endl;
978                         } else {
979                                 cerr << "Could not open paired-end aligned/unaligned-read file for writing: " << name << endl;
980                         }
981                         throw 1;
982                 }
983                 return tmp;
984         }
985
986         /**
987          * Initialize all the locks for dumping.
988          */
989         void initDumps() {
990                 dumpAl_       = dumpAl_1_     = dumpAl_2_     = NULL;
991                 dumpUnal_     = dumpUnal_1_   = dumpUnal_2_   = NULL;
992                 dumpMax_      = dumpMax_1_    = dumpMax_2_    = NULL;
993                 dumpAlQv_     = dumpAlQv_1_   = dumpAlQv_2_   = NULL;
994                 dumpUnalQv_   = dumpUnalQv_1_ = dumpUnalQv_2_ = NULL;
995                 dumpMaxQv_    = dumpMaxQv_1_  = dumpMaxQv_2_  = NULL;
996                 dumpAlignFlag_   = !dumpAlBase_.empty();
997                 dumpUnalignFlag_ = !dumpUnalBase_.empty();
998                 dumpMaxedFlag_   = !dumpMaxBase_.empty();
999                 MUTEX_INIT(dumpAlignLock_);
1000                 MUTEX_INIT(dumpAlignLockPE_);
1001                 MUTEX_INIT(dumpUnalLock_);
1002                 MUTEX_INIT(dumpUnalLockPE_);
1003                 MUTEX_INIT(dumpMaxLock_);
1004                 MUTEX_INIT(dumpMaxLockPE_);
1005         }
1006
1007         void destroyDumps() {
1008                 if(dumpAl_       != NULL) { dumpAl_->close();       delete dumpAl_; }
1009                 if(dumpAl_1_     != NULL) { dumpAl_1_->close();     delete dumpAl_1_; }
1010                 if(dumpAl_2_     != NULL) { dumpAl_2_->close();     delete dumpAl_2_; }
1011                 if(dumpUnal_     != NULL) { dumpUnal_->close();     delete dumpUnal_; }
1012                 if(dumpUnal_1_   != NULL) { dumpUnal_1_->close();   delete dumpUnal_1_; }
1013                 if(dumpUnal_2_   != NULL) { dumpUnal_2_->close();   delete dumpUnal_2_; }
1014                 if(dumpMax_      != NULL) { dumpMax_->close();      delete dumpMax_; }
1015                 if(dumpMax_1_    != NULL) { dumpMax_1_->close();    delete dumpMax_1_; }
1016                 if(dumpMax_2_    != NULL) { dumpMax_2_->close();    delete dumpMax_2_; }
1017                 if(dumpAlQv_     != NULL) { dumpAlQv_->close();     delete dumpAlQv_; }
1018                 if(dumpAlQv_1_   != NULL) { dumpAlQv_1_->close();   delete dumpAlQv_1_; }
1019                 if(dumpAlQv_2_   != NULL) { dumpAlQv_2_->close();   delete dumpAlQv_2_; }
1020                 if(dumpUnalQv_   != NULL) { dumpUnalQv_->close();   delete dumpUnalQv_; }
1021                 if(dumpUnalQv_1_ != NULL) { dumpUnalQv_1_->close(); delete dumpUnalQv_1_; }
1022                 if(dumpUnalQv_2_ != NULL) { dumpUnalQv_2_->close(); delete dumpUnalQv_2_; }
1023                 if(dumpMaxQv_    != NULL) { dumpMaxQv_->close();    delete dumpMaxQv_; }
1024                 if(dumpMaxQv_1_  != NULL) { dumpMaxQv_1_->close();  delete dumpMaxQv_1_; }
1025                 if(dumpMaxQv_2_  != NULL) { dumpMaxQv_2_->close();  delete dumpMaxQv_2_; }
1026         }
1027
1028         // Locks for dumping
1029         MUTEX_T dumpAlignLock_;
1030         MUTEX_T dumpAlignLockPE_; // _1 and _2
1031         MUTEX_T dumpUnalLock_;
1032         MUTEX_T dumpUnalLockPE_; // _1 and _2
1033         MUTEX_T dumpMaxLock_;
1034         MUTEX_T dumpMaxLockPE_;   // _1 and _2
1035
1036         // false -> no dumping
1037         bool dumpAlignFlag_;
1038         bool dumpUnalignFlag_;
1039         bool dumpMaxedFlag_;
1040
1041         volatile bool     first_;       /// true -> first hit hasn't yet been reported
1042         volatile uint64_t numAligned_;  /// # reads with >= 1 alignment
1043         volatile uint64_t numUnaligned_;/// # reads with no alignments
1044         volatile uint64_t numMaxed_;    /// # reads with # alignments exceeding -m ceiling
1045         volatile uint64_t numReported_; /// # single-ended alignments reported
1046         volatile uint64_t numReportedPaired_; /// # paired-end alignments reported
1047         bool quiet_;  /// true -> don't print alignment stats at the end
1048         ios_base::openmode ssmode_;     /// output mode for stringstreams
1049 };
1050
1051 /**
1052  * A per-thread wrapper for a HitSink.  Incorporates state that a
1053  * single search thread cares about.
1054  */
1055 class HitSinkPerThread {
1056 public:
1057         HitSinkPerThread(HitSink& sink, uint32_t max, uint32_t n) :
1058                 _sink(sink),
1059                 _bestRemainingStratum(0),
1060                 _numValidHits(0llu),
1061                 _hits(),
1062                 _bufferedHits(),
1063                 hitsForThisRead_(),
1064                 _max(max),
1065                 _n(n)
1066         {
1067                 _sink.addWrapper();
1068                 assert_gt(_n, 0);
1069         }
1070
1071         virtual ~HitSinkPerThread() { }
1072
1073         /// Return the vector of retained hits
1074         vector<Hit>& retainedHits()   { return _hits; }
1075
1076         /// Finalize current read
1077         virtual uint32_t finishRead(PatternSourcePerThread& p, bool report, bool dump) {
1078                 uint32_t ret = finishReadImpl();
1079                 _bestRemainingStratum = 0;
1080                 if(!report) {
1081                         _bufferedHits.clear();
1082                         return 0;
1083                 }
1084                 bool maxed = (ret > _max);
1085                 bool unal = (ret == 0);
1086                 if(dump && (unal || maxed)) {
1087                         // Either no reportable hits were found or the number of
1088                         // reportable hits exceeded the -m limit specified by the
1089                         // user
1090                         assert(ret == 0 || ret > _max);
1091                         if(maxed) _sink.dumpMaxed(p);
1092                         else      _sink.dumpUnal(p);
1093                 }
1094                 ret = 0;
1095                 if(maxed) {
1096                         // Report that the read maxed-out; useful for chaining output
1097                         if(dump) _sink.reportMaxed(_bufferedHits, p);
1098                         _bufferedHits.clear();
1099                 } else if(unal) {
1100                         // Report that the read failed to align; useful for chaining output
1101                         if(dump) _sink.reportUnaligned(p);
1102                 } else {
1103                         // Flush buffered hits
1104                         assert_gt(_bufferedHits.size(), 0);
1105                         if(_bufferedHits.size() > _n) {
1106                                 _bufferedHits.resize(_n);
1107                         }
1108                         _sink.reportHits(_bufferedHits);
1109                         _sink.dumpAlign(p);
1110                         ret = _bufferedHits.size();
1111                         _bufferedHits.clear();
1112                 }
1113                 assert_eq(0, _bufferedHits.size());
1114                 return ret;
1115         }
1116
1117         virtual uint32_t finishReadImpl() = 0;
1118
1119         /**
1120          * Implementation for hit reporting; update per-thread _hits and
1121          * _numReportableHits variables and call the master HitSink to do the actual
1122          * reporting
1123          */
1124         virtual void bufferHit(const Hit& h, int stratum) {
1125 #ifndef NDEBUG
1126                 // Ensure all buffered hits have the same patid
1127                 for(size_t i = 1; i < _bufferedHits.size(); i++) {
1128                         assert_eq(_bufferedHits[0].patId, _bufferedHits[i].patId);
1129                 }
1130 #endif
1131                 _bufferedHits.push_back(h);
1132         }
1133
1134         /**
1135          * Concrete subclasses override this to (possibly) report a hit and
1136          * return true iff the caller should continue to report more hits.
1137          */
1138         virtual bool reportHit(const Hit& h, int stratum) {
1139                 assert(h.repOk());
1140                 _numValidHits++;
1141                 return true;
1142         }
1143
1144         /// Return the number of valid hits so far (not necessarily
1145         /// reported).  It's up to the concrete subclasses
1146         uint64_t numValidHits()    { return _numValidHits; }
1147
1148         /**
1149          * Return true if there are no more reportable hits.
1150          */
1151         bool finishedWithStratum(int stratum) {
1152                 bool ret = finishedWithStratumImpl(stratum);
1153                 _bestRemainingStratum = stratum+1;
1154                 return ret;
1155         }
1156
1157         /**
1158          * Use the given set of hits as a starting point.  By default, we don't
1159          */
1160         virtual bool setHits(HitSet& hs) {
1161                 if(!hs.empty()) {
1162                         cerr << "Error: default setHits() called with non-empty HitSet" << endl;
1163                         throw 1;
1164                 }
1165                 return false;
1166         }
1167
1168         /**
1169          * Return true if there are no reportable hits with the given cost
1170          * (or worse).
1171          */
1172         virtual bool irrelevantCost(uint16_t cost) {
1173                 return false;
1174         }
1175
1176         /**
1177          * Concrete subclasses override this to determine whether the
1178          * search routine should keep searching after having finished
1179          * reporting all alignments at the given stratum.
1180          */
1181         virtual bool finishedWithStratumImpl(int stratum) = 0;
1182
1183         /// The mhits maximum
1184         uint32_t overThresh() { return _max; }
1185
1186         /// Whether this thread, for this read, knows that we have already
1187         /// exceeded the mhits maximum
1188         bool exceededOverThresh() { return hitsForThisRead_ > _max; }
1189
1190         /// Return whether we span strata
1191         virtual bool spanStrata() = 0;
1192
1193         /// Return whether we report only the best possible hits
1194         virtual bool best() = 0;
1195
1196         /**
1197          * Return true iff the underlying HitSink dumps unaligned or
1198          * maxed-out reads.
1199          */
1200         bool dumpsReads() {
1201                 return _sink.dumpsReads();
1202         }
1203
1204         /**
1205          * Return true iff there are currently no buffered hits.
1206          */
1207         bool empty() const {
1208                 return _bufferedHits.empty();
1209         }
1210
1211         /**
1212          * Return the number of currently buffered hits.
1213          */
1214         bool size() const {
1215                 return _bufferedHits.size();
1216         }
1217
1218         /**
1219          * Return max # hits to report (*2 in paired-end mode because mates
1220          * count separately)
1221          */
1222         virtual uint32_t maxHits() {
1223                 return _n;
1224         }
1225
1226 protected:
1227         HitSink&    _sink; /// Ultimate destination of reported hits
1228         /// Least # mismatches in alignments that will be reported in the
1229         /// future.  Updated by the search routine.
1230         int         _bestRemainingStratum;
1231         /// # hits reported to this HitSink so far (not all of which were
1232         /// necesssary reported to _sink)
1233         uint64_t    _numValidHits;
1234         vector<Hit> _hits; /// Repository for retained hits
1235         /// Buffered hits, to be reported and flushed at end of read-phase
1236         vector<Hit> _bufferedHits;
1237
1238         // Following variables are declared in the parent but maintained in
1239         // the concrete subcalsses
1240         uint32_t hitsForThisRead_; /// # hits for this read so far
1241         uint32_t _max; /// don't report any hits if there were > _max
1242         uint32_t _n;   /// report at most _n hits
1243 };
1244
1245 /**
1246  * Abstract parent factory for HitSinkPerThreads.
1247  */
1248 class HitSinkPerThreadFactory {
1249 public:
1250         virtual ~HitSinkPerThreadFactory() { }
1251         virtual HitSinkPerThread* create() const = 0;
1252         virtual HitSinkPerThread* createMult(uint32_t m) const = 0;
1253
1254         /// Free memory associated with a per-thread hit sink
1255         virtual void destroy(HitSinkPerThread* sink) const {
1256                 assert(sink != NULL);
1257                 // Free the HitSinkPerThread
1258                 delete sink;
1259         }
1260 };
1261
1262 /**
1263  * Report first N good alignments encountered; trust search routine
1264  * to try alignments in something approximating a best-first order.
1265  * Best used in combination with a stringent alignment policy.
1266  */
1267 class NGoodHitSinkPerThread : public HitSinkPerThread {
1268
1269 public:
1270         NGoodHitSinkPerThread(
1271                         HitSink& sink,
1272                         uint32_t n,
1273                         uint32_t max) :
1274                                 HitSinkPerThread(sink, max, n)
1275         { }
1276
1277         virtual bool spanStrata() {
1278                 return true; // we span strata
1279         }
1280
1281         virtual bool best() {
1282                 return false; // we settle for "good" hits
1283         }
1284
1285         /// Finalize current read
1286         virtual uint32_t finishReadImpl() {
1287                 uint32_t ret = hitsForThisRead_;
1288                 hitsForThisRead_ = 0;
1289                 return ret;
1290         }
1291
1292         /**
1293          * Report and then return true if we've already reported N good
1294          * hits.  Ignore the stratum - it's not relevant for finding "good"
1295          * hits.
1296          */
1297         virtual bool reportHit(const Hit& h, int stratum) {
1298                 HitSinkPerThread::reportHit(h, stratum);
1299                 hitsForThisRead_++;
1300                 if(hitsForThisRead_ > _max) {
1301                         return true; // done - report nothing
1302                 }
1303                 //if(hitsForThisRead_ <= _n) {
1304                         // Only report hit if we haven't
1305                         bufferHit(h, stratum);
1306                 //}
1307                 if(hitsForThisRead_ == _n &&
1308                    (_max == 0xffffffff || _max < _n))
1309                 {
1310                         return true; // already reported N good hits and max isn't set; stop!
1311                 }
1312                 return false; // not at N or max yet; keep going
1313         }
1314
1315         /**
1316          * Always return true; search routine should only stop if it's
1317          * already reported N hits.
1318          */
1319         virtual bool finishedWithStratumImpl(int stratum) { return false; }
1320 };
1321
1322 /**
1323  * Concrete factory for FirstNGoodHitSinkPerThreads.
1324  */
1325 class NGoodHitSinkPerThreadFactory : public HitSinkPerThreadFactory {
1326 public:
1327         NGoodHitSinkPerThreadFactory(
1328                         HitSink& sink,
1329                         uint32_t n,
1330                         uint32_t max) :
1331                         sink_(sink),
1332                         n_(n),
1333                         max_(max)
1334         { }
1335
1336         /**
1337          * Allocate a new NGoodHitSinkPerThread object on the heap,
1338          * using the parameters given in the constructor.
1339          */
1340         virtual HitSinkPerThread* create() const {
1341                 return new NGoodHitSinkPerThread(sink_, n_, max_);
1342         }
1343         virtual HitSinkPerThread* createMult(uint32_t m) const {
1344                 uint32_t max = max_ * (max_ == 0xffffffff ? 1 : m);
1345                 uint32_t n = n_ * (n_ == 0xffffffff ? 1 : m);
1346                 return new NGoodHitSinkPerThread(sink_, n, max);
1347         }
1348
1349 private:
1350         HitSink& sink_;
1351         uint32_t n_;
1352         uint32_t max_;
1353 };
1354
1355 /**
1356  * Report the first N best alignments encountered in a single
1357  * alignment stratum assuming that we're receiving the alignments in
1358  * best-first order.
1359  */
1360 class NBestFirstStratHitSinkPerThread : public HitSinkPerThread {
1361
1362 public:
1363         NBestFirstStratHitSinkPerThread(
1364                         HitSink& sink,
1365                         uint32_t n,
1366                         uint32_t max,
1367                         uint32_t mult) :
1368                                 HitSinkPerThread(sink, max, n),
1369                                 bestStratum_(999), mult_(mult)
1370         { }
1371
1372         /**
1373          * false -> we do not allow strata to be spanned
1374          */
1375         virtual bool spanStrata() {
1376                 return false; // we do not span strata
1377         }
1378
1379         /**
1380          * true -> we report best hits
1381          */
1382         virtual bool best() {
1383                 return true;
1384         }
1385
1386         /**
1387          * Report and then return false if we've already reported N.
1388          */
1389         virtual bool reportHit(const Hit& h, int stratum) {
1390                 HitSinkPerThread::reportHit(h, stratum);
1391                 // This hit is within th best possible remaining stratum,
1392                 // so it should definitely count
1393                 hitsForThisRead_++;
1394                 // It doesn't exceed the limit, so buffer it
1395                 if(stratum < bestStratum_) {
1396                         bestStratum_ = stratum;
1397                 }
1398                 if(hitsForThisRead_ > _max) {
1399                         return true; // done - report nothing
1400                 }
1401                 //if(hitsForThisRead_ <= _n) {
1402                         bufferHit(h, stratum);
1403                 //}
1404                 if(hitsForThisRead_ == _n &&
1405                    (_max == 0xffffffff || _max < _n))
1406                 {
1407                         return true; // already reported N good hits; stop!
1408                 }
1409                 return false; // not at N yet; keep going
1410         }
1411
1412         /**
1413          * Finalize current read by reporting any buffered hits from best
1414          * to worst until they're all reported or until we've reported all
1415          * N
1416          */
1417         virtual uint32_t finishReadImpl() {
1418                 uint32_t ret = hitsForThisRead_;
1419                 hitsForThisRead_ = 0;
1420                 bestStratum_ = 999;
1421                 const size_t sz = _bufferedHits.size();
1422                 for(size_t i = 0; i < sz; i++) {
1423                         // Set 'oms' according to the number of other alignments
1424                         // at this stratum
1425                         _bufferedHits[i].oms = (sz / mult_) - 1;
1426                 }
1427                 return ret;
1428         }
1429
1430         /**
1431          * If we had any alignments at all and we're now moving on to a new
1432          * stratum, then we're done.
1433          */
1434         virtual bool finishedWithStratumImpl(int stratum) {
1435                 return hitsForThisRead_ > 0;
1436         }
1437
1438         /**
1439          * If there have been any hits reported so far, classify any
1440          * subsequent alignments with higher strata as irrelevant.
1441          */
1442         virtual bool irrelevantCost(uint16_t cost) {
1443                 if(hitsForThisRead_) {
1444                         // irrelevant iff at worse stratum
1445                         return ((int)cost >> 14) > bestStratum_;
1446                 }
1447                 return false;
1448         }
1449
1450 private:
1451
1452         int bestStratum_; /// best stratum observed so far
1453         uint32_t mult_; /// number of batched-up alignments
1454 };
1455
1456 /**
1457  * Concrete factory for NBestStratHitSinkPerThread.
1458  */
1459 class NBestFirstStratHitSinkPerThreadFactory : public HitSinkPerThreadFactory {
1460 public:
1461         NBestFirstStratHitSinkPerThreadFactory(
1462                         HitSink& sink,
1463                         uint32_t n,
1464                         uint32_t max) :
1465                         sink_(sink),
1466                         n_(n),
1467                         max_(max)
1468         { }
1469
1470         /**
1471          * Allocate a new NGoodHitSinkPerThread object on the heap,
1472          * using the parameters given in the constructor.
1473          */
1474         virtual HitSinkPerThread* create() const {
1475                 return new NBestFirstStratHitSinkPerThread(sink_, n_, max_, 1);
1476         }
1477         virtual HitSinkPerThread* createMult(uint32_t m) const {
1478                 uint32_t max = max_ * (max_ == 0xffffffff ? 1 : m);
1479                 uint32_t n = n_ * (n_ == 0xffffffff ? 1 : m);
1480                 return new NBestFirstStratHitSinkPerThread(sink_, n, max, m);
1481         }
1482
1483 private:
1484         HitSink& sink_;
1485         uint32_t n_;
1486         uint32_t max_;
1487 };
1488
1489 /**
1490  *
1491  */
1492 class ChainingHitSinkPerThread : public HitSinkPerThread {
1493 public:
1494
1495         ChainingHitSinkPerThread(HitSink& sink,
1496                                  uint32_t n,
1497                                  uint32_t max,
1498                                  bool strata,
1499                                  uint32_t mult) :
1500         HitSinkPerThread(sink, max, n),
1501         mult_(mult), strata_(strata), cutoff_(0xffff)
1502         {
1503                 hs_ = NULL;
1504                 hsISz_ = 0;
1505         }
1506
1507         /**
1508          * Return true iff we're allowed to span strata.
1509          */
1510         virtual bool spanStrata() { return !strata_; }
1511
1512         /**
1513          * true -> we report best hits
1514          */
1515         virtual bool best() { return true; }
1516
1517         /**
1518          * Report and then return false if we've already reported N.
1519          */
1520         virtual bool reportHit(const Hit& h, int stratum) {
1521                 HitSinkPerThread::reportHit(h, stratum);
1522                 assert(hs_->sorted());
1523                 assert(hs_ != NULL);
1524                 assert_eq(h.stratum, stratum);
1525                 assert_eq(1, mult_);
1526                 assert(consistentStrata());
1527                 assert(!irrelevantCost(h.cost));
1528
1529                 if(!hs_->empty() && strata_ && stratum < hs_->front().stratum) {
1530                         hs_->clear();
1531                         _bufferedHits.clear();
1532                         hitsForThisRead_ = 0;
1533                 }
1534                 assert(consistentStrata());
1535
1536                 size_t replPos = 0;
1537                 if(!hs_->empty() && hs_->tryReplacing(h.h, h.fw, h.cost, replPos)) {
1538                         if(replPos != 0xffffffff) {
1539                                 // Replaced an existing hit
1540                                 assert_lt(replPos, _bufferedHits.size());
1541                                 _bufferedHits[replPos] = h;
1542                                 hs_->sort();
1543                         }
1544                         // Size didn't change, so no need to check against _max and _n
1545                         assert(hs_->sorted());
1546                         assert(consistentStrata());
1547                 } else {
1548                         // Added a new hit
1549                         hs_->expand();
1550                         hs_->back().h = h.h;
1551                         hs_->back().fw = h.fw;
1552                         hs_->back().stratum = h.stratum;
1553                         hs_->back().cost = h.cost;
1554                         hitsForThisRead_++;
1555                         if(hs_->size() > _max) {
1556                                 assert_eq(hs_->size(), _bufferedHits.size());
1557                                 return true; // done - report nothing
1558                         }
1559                         _bufferedHits.push_back(h);
1560                         if(hsISz_ == 0 &&
1561                            hs_->size() == _n &&
1562                            (_max == 0xffffffff || _max < _n))
1563                         {
1564                                 assert_eq(hs_->size(), _bufferedHits.size());
1565                                 return true; // already reported N good hits; stop!
1566                         }
1567                         hs_->sort();
1568                         assert(consistentStrata());
1569                 }
1570                 assert_eq(hs_->size(), _bufferedHits.size());
1571                 updateCutoff();
1572                 return false; // not at N yet; keep going
1573         }
1574
1575         /**
1576          * Finalize current read by reporting any buffered hits from best
1577          * to worst until they're all reported or until we've reported all
1578          * N
1579          */
1580         virtual uint32_t finishReadImpl() {
1581                 assert_eq(1, mult_);
1582                 assert(hs_ != NULL);
1583                 assert(consistentStrata());
1584                 uint32_t ret = hitsForThisRead_;
1585                 hitsForThisRead_ = 0;
1586                 if(!hs_->empty() && hs_->size() < _n) {
1587                         const size_t sz = _bufferedHits.size();
1588                         for(size_t i = 0; i < sz; i++) {
1589                                 // Set 'oms' according to the number of other alignments
1590                                 // at this stratum
1591                                 _bufferedHits[i].oms = (sz / mult_) - 1;
1592                         }
1593                 }
1594                 std::sort(_bufferedHits.begin(), _bufferedHits.end(), HitCostCompare());
1595                 if(hs_->size() > _n) {
1596                         _bufferedHits.resize(_n);
1597                 }
1598                 assert(consistentStrata());
1599                 // Make sure that a chained, maxed-out read gets treated as
1600                 // maxed-out and not as unaligned
1601                 if(hs_->empty() && hs_->maxedStratum != -1) {
1602                         assert(_bufferedHits.empty());
1603                         // Boy, this is stupid.  Need to switch to just using HitSet
1604                         // internally all the time.
1605                         _bufferedHits.resize(_max+1);
1606                         for(size_t i = 0; i < _max+1; i++) {
1607                                 _bufferedHits[i].stratum = hs_->maxedStratum;
1608                         }
1609                         ret = _max+1;
1610                 } else if(!hs_->empty() && hs_->maxedStratum != -1) {
1611                         assert_lt(hs_->front().stratum, hs_->maxedStratum);
1612                 }
1613                 return ret;
1614         }
1615
1616         /**
1617          * Set the initial set of Hits.
1618          */
1619         virtual bool setHits(HitSet& hs) {
1620                 hs_ = &hs;
1621                 assert(hs_ != NULL);
1622                 hsISz_ = hs.size();
1623                 cutoff_ = 0xffff;
1624                 hitsForThisRead_ = hs.size();
1625                 assert(_bufferedHits.empty());
1626                 assert_geq(hs.maxedStratum, -1);
1627                 assert_lt(hs.maxedStratum, 4);
1628                 if(!hs.empty()) {
1629                         assert_eq(-1, hs.maxedStratum);
1630                         hs.sort();
1631                         Hit::fromHitSet(_bufferedHits, hs);
1632                         assert(!_bufferedHits.empty());
1633                         assert_leq(_bufferedHits.size(), _max);
1634                         assert(consistentStrata());
1635                 } else if(hs.maxedStratum != -1) {
1636                         if(hs.maxedStratum == 0) {
1637                                 cutoff_ = 0;
1638                                 // Already done
1639                                 return true;
1640                         }
1641                         cutoff_ = (hs.maxedStratum << 14);
1642                 }
1643                 assert_eq(hs_->size(), _bufferedHits.size());
1644                 updateCutoff();
1645                 return false;
1646         }
1647
1648         /**
1649          * If we had any alignments at all and we're now moving on to a new
1650          * stratum, then we're done.
1651          */
1652         virtual bool finishedWithStratumImpl(int stratum) {
1653                 assert(false);
1654                 return false;
1655         }
1656
1657         /**
1658          * If there have been any hits reported so far, classify any
1659          * subsequent alignments with higher strata as irrelevant.
1660          */
1661         virtual bool irrelevantCost(uint16_t cost) {
1662                 if(cutoff_ == 0) return false;
1663                 return cost > cutoff_;
1664         }
1665
1666 protected:
1667
1668 #ifndef NDEBUG
1669         /**
1670          * Sanity check that, if we're in strata_ mode, all 'stratum's
1671          * should be the same among hits.
1672          */
1673         bool consistentStrata() {
1674                 if(hs_->empty() || !strata_) return true;
1675                 int stratum = hs_->front().stratum;
1676                 for(size_t i = 1; i < hs_->size(); i++) {
1677                         assert_eq(stratum, (*hs_)[i].stratum);
1678                 }
1679                 return true;
1680         }
1681 #endif
1682
1683         /**
1684          * Update the lowest relevant cost.
1685          */
1686         void updateCutoff() {
1687                 ASSERT_ONLY(uint16_t origCutoff = cutoff_);
1688                 assert(hs_->sorted());
1689                 assert(hs_->empty() || hs_->back().cost >= hs_->front().cost);
1690                 bool atCapacity = (hs_->size() >= _n);
1691                 if(atCapacity && (_max == 0xffffffff || _max < _n)) {
1692                         cutoff_ = min(hs_->back().cost, cutoff_);
1693                 }
1694                 if(strata_ && !hs_->empty()) {
1695                         uint16_t sc = hs_->back().cost;
1696                         sc = ((sc >> 14) + 1) << 14;
1697                         cutoff_ = min(cutoff_, sc);
1698                         assert_leq(cutoff_, origCutoff);
1699                 }
1700         }
1701
1702         HitSet *hs_;
1703         size_t hsISz_;
1704         uint32_t mult_;
1705         bool strata_; /// true -> reporting is stratified
1706         uint16_t cutoff_; /// the smallest irrelevant cost
1707 };
1708
1709 /**
1710  * Concrete factory for ChainingHitSinkPerThread.
1711  */
1712 class ChainingHitSinkPerThreadFactory : public HitSinkPerThreadFactory {
1713 public:
1714         ChainingHitSinkPerThreadFactory(
1715                         HitSink& sink,
1716                         uint32_t n,
1717                         uint32_t max,
1718                         bool strata) :
1719                         sink_(sink),
1720                         n_(n),
1721                         max_(max),
1722                         strata_(strata)
1723         { }
1724
1725         /**
1726          * Allocate a new NGoodHitSinkPerThread object on the heap,
1727          * using the parameters given in the constructor.
1728          */
1729         virtual HitSinkPerThread* create() const {
1730                 return new ChainingHitSinkPerThread(sink_, n_, max_, strata_, 1);
1731         }
1732         virtual HitSinkPerThread* createMult(uint32_t m) const {
1733                 uint32_t max = max_ * (max_ == 0xffffffff ? 1 : m);
1734                 uint32_t n = n_ * (n_ == 0xffffffff ? 1 : m);
1735                 return new ChainingHitSinkPerThread(sink_, n, max, strata_, m);
1736         }
1737
1738 private:
1739         HitSink& sink_;
1740         uint32_t n_;
1741         uint32_t max_;
1742         bool strata_;
1743 };
1744
1745 /**
1746  * Report all valid alignments.
1747  */
1748 class AllHitSinkPerThread : public HitSinkPerThread {
1749
1750 public:
1751         AllHitSinkPerThread(
1752                         HitSink& sink,
1753                 uint32_t max) :
1754                     HitSinkPerThread(sink, max, 0xffffffff) { }
1755
1756         virtual bool spanStrata() {
1757                 return true; // we span strata
1758         }
1759
1760         virtual bool best() {
1761                 return true; // we report "best" hits
1762         }
1763
1764         /**
1765          * Report and always return true; we're finiding all hits so that
1766          * search routine should always continue.
1767          */
1768         virtual bool reportHit(const Hit& h, int stratum) {
1769                 HitSinkPerThread::reportHit(h, stratum);
1770                 hitsForThisRead_++;
1771                 if(hitsForThisRead_ > _max) {
1772                         return true; // done - report nothing
1773                 }
1774                 bufferHit(h, stratum);
1775                 return false; // reporting all; always keep going
1776         }
1777
1778         /**
1779          * Finalize; do nothing because we haven't buffered anything
1780          */
1781         virtual uint32_t finishReadImpl() {
1782                 uint32_t ret = hitsForThisRead_;
1783                 hitsForThisRead_ = 0;
1784                 return ret;
1785         }
1786
1787         /**
1788          * Always return false; search routine should not stop.
1789          */
1790         virtual bool finishedWithStratumImpl(int stratum) { return false; }
1791 };
1792
1793 /**
1794  * Concrete factory for AllHitSinkPerThread.
1795  */
1796 class AllHitSinkPerThreadFactory : public HitSinkPerThreadFactory {
1797 public:
1798         AllHitSinkPerThreadFactory(
1799                         HitSink& sink,
1800                         uint32_t max) :
1801                         sink_(sink),
1802                         max_(max)
1803         { }
1804
1805         /**
1806          * Allocate a new NGoodHitSinkPerThread object on the heap,
1807          * using the parameters given in the constructor.
1808          */
1809         virtual HitSinkPerThread* create() const {
1810                 return new AllHitSinkPerThread(sink_, max_);
1811         }
1812         virtual HitSinkPerThread* createMult(uint32_t m) const {
1813                 uint32_t max = max_ * (max_ == 0xffffffff ? 1 : m);
1814                 return new AllHitSinkPerThread(sink_, max);
1815         }
1816
1817 private:
1818         HitSink& sink_;
1819         uint32_t max_;
1820 };
1821
1822 /**
1823  * Sink that prints lines like this:
1824  * (pat-id)[-|+]:<hit1-text-id,hit2-text-offset>,<hit2-text-id...
1825  *
1826  * Activated with --concise
1827  */
1828 class ConciseHitSink : public HitSink {
1829 public:
1830         /**
1831          * Construct a single-stream ConciseHitSink (default)
1832          */
1833         ConciseHitSink(OutFileBuf* out,
1834                                int offBase,
1835                        DECL_HIT_DUMPS2,
1836                        bool reportOpps = false) :
1837                 HitSink(out, PASS_HIT_DUMPS2),
1838                 _reportOpps(reportOpps),
1839                 offBase_(offBase) { }
1840
1841         /**
1842          * Construct a multi-stream ConciseHitSink with one stream per
1843          * reference string (see --refout)
1844          */
1845         ConciseHitSink(size_t numOuts,
1846                        int offBase,
1847                        DECL_HIT_DUMPS2,
1848                        bool reportOpps = false) :
1849                 HitSink(numOuts, PASS_HIT_DUMPS2),
1850                 _reportOpps(reportOpps),
1851                 offBase_(offBase) { }
1852
1853         /**
1854          * Append a verbose, readable hit to the given output stream.
1855          */
1856         static void append(ostream& ss, const Hit& h, int offBase, bool reportOpps) {
1857                 ss << h.patId;
1858                 if(h.mate > 0) {
1859                         assert(h.mate == 1 || h.mate == 2);
1860                         ss << '/' << (int)h.mate;
1861                 }
1862                 ss << (h.fw? "+" : "-") << ":";
1863                 // .first is text id, .second is offset
1864                 ss << "<" << h.h.first << "," << (h.h.second + offBase) << "," << h.mms.count();
1865                 if(reportOpps) ss << "," << h.oms;
1866                 ss << ">" << endl;
1867         }
1868
1869         /**
1870          * Append a verbose, readable hit to the given output stream.
1871          */
1872         void append(ostream& ss, const Hit& h) {
1873                 ConciseHitSink::append(ss, h, this->offBase_, this->_reportOpps);
1874         }
1875
1876 protected:
1877
1878         /**
1879          * Report a concise alignment to the appropriate output stream.
1880          */
1881         virtual void reportHit(const Hit& h) {
1882                 HitSink::reportHit(h);
1883                 ostringstream ss;
1884                 append(ss, h);
1885                 lock(h.h.first);
1886                 out(h.h.first).writeString(ss.str());
1887                 unlock(h.h.first);
1888         }
1889
1890 private:
1891         bool _reportOpps;
1892         int  offBase_;     /// Add this to reference offsets before outputting.
1893                            /// (An easy way to make things 1-based instead of
1894                            /// 0-based)
1895 };
1896
1897 /**
1898  * Print the given string.  If ws = true, print only up to and not
1899  * including the first space or tab.  Useful for printing reference
1900  * names.
1901  */
1902 inline void printUptoWs(std::ostream& os, const std::string& str, bool ws) {
1903         if(!ws) {
1904                 os << str;
1905         } else {
1906                 size_t pos = str.find_first_of(" \t");
1907                 if(pos != string::npos) {
1908                         os << str.substr(0, pos);
1909                 } else {
1910                         os << str;
1911                 }
1912         }
1913 }
1914
1915 /**
1916  * Sink that prints lines like this:
1917  * pat-name \t [-|+] \t ref-name \t ref-off \t pat \t qual \t #-alt-hits \t mm-list
1918  */
1919 class VerboseHitSink : public HitSink {
1920 public:
1921         /**
1922          * Construct a single-stream VerboseHitSink (default)
1923          */
1924         VerboseHitSink(OutFileBuf* out,
1925                        int offBase,
1926                        bool colorSeq,
1927                        bool colorQual,
1928                        bool printCost,
1929                        const Bitset& suppressOuts,
1930                        ReferenceMap *rmap,
1931                        AnnotationMap *amap,
1932                        bool fullRef,
1933                        DECL_HIT_DUMPS2,
1934                                    int partition = 0) :
1935         HitSink(out, PASS_HIT_DUMPS2),
1936         partition_(partition),
1937         offBase_(offBase),
1938         colorSeq_(colorSeq),
1939         colorQual_(colorQual),
1940         cost_(printCost),
1941         suppress_(suppressOuts),
1942         fullRef_(fullRef),
1943         rmap_(rmap), amap_(amap)
1944         { }
1945
1946         /**
1947          * Construct a multi-stream VerboseHitSink with one stream per
1948          * reference string (see --refout)
1949          */
1950         VerboseHitSink(size_t numOuts,
1951                        int offBase,
1952                        bool colorSeq,
1953                        bool colorQual,
1954                        bool printCost,
1955                        const Bitset& suppressOuts,
1956                        ReferenceMap *rmap,
1957                        AnnotationMap *amap,
1958                        bool fullRef,
1959                        DECL_HIT_DUMPS2,
1960                                    int partition = 0) :
1961         HitSink(numOuts, PASS_HIT_DUMPS2),
1962         partition_(partition),
1963         offBase_(offBase),
1964         colorSeq_(colorSeq),
1965         colorQual_(colorQual),
1966         cost_(printCost),
1967         suppress_(64),
1968         fullRef_(fullRef),
1969         rmap_(rmap),
1970         amap_(amap)
1971         { }
1972
1973         // In hit.cpp
1974         static void append(ostream& ss,
1975                            const Hit& h,
1976                            const vector<string>* refnames,
1977                            ReferenceMap *rmap,
1978                            AnnotationMap *amap,
1979                            bool fullRef,
1980                            int partition,
1981                            int offBase,
1982                            bool colorSeq,
1983                            bool colorQual,
1984                            bool cost,
1985                            const Bitset& suppress);
1986
1987         /**
1988          * Append a verbose, readable hit to the output stream
1989          * corresponding to the hit.
1990          */
1991         virtual void append(ostream& ss, const Hit& h) {
1992                 VerboseHitSink::append(ss, h, _refnames, rmap_, amap_,
1993                                        fullRef_, partition_, offBase_,
1994                                        colorSeq_, colorQual_, cost_,
1995                                        suppress_);
1996         }
1997
1998         /**
1999          * See hit.cpp
2000          */
2001         virtual void reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p);
2002
2003 protected:
2004
2005         /**
2006          * Report a verbose, human-readable alignment to the appropriate
2007          * output stream.
2008          */
2009         virtual void reportHit(const Hit& h) {
2010                 reportHit(h, true);
2011         }
2012
2013         /**
2014          * Report a verbose, human-readable alignment to the appropriate
2015          * output stream.
2016          */
2017         virtual void reportHit(const Hit& h, bool count) {
2018                 if(count) HitSink::reportHit(h);
2019                 ostringstream ss;
2020                 append(ss, h);
2021                 // Make sure to grab lock before writing to output stream
2022                 lock(h.h.first);
2023                 out(h.h.first).writeString(ss.str());
2024                 unlock(h.h.first);
2025         }
2026
2027 private:
2028         int      partition_;   /// partition size, or 0 if partitioning is disabled
2029         int      offBase_;     /// Add this to reference offsets before outputting.
2030                                /// (An easy way to make things 1-based instead of
2031                                /// 0-based)
2032         bool     colorSeq_;    /// true -> print colorspace alignment sequence in colors
2033         bool     colorQual_;   /// true -> print colorspace quals as originals, not decoded
2034         bool     cost_;        /// true -> print statum and cost
2035         Bitset   suppress_;    /// output fields to suppress
2036         bool fullRef_;         /// print full reference name
2037         ReferenceMap *rmap_;   /// mapping to reference coordinate system.
2038         AnnotationMap *amap_;  ///
2039 };
2040
2041 /**
2042  * Sink for outputting alignments in a binary format.
2043  */
2044 class ChainingHitSink : public HitSink {
2045 public:
2046
2047         /**
2048          * Construct a single-stream BinaryHitSink (default)
2049          */
2050         ChainingHitSink(OutFileBuf* out, bool strata, AnnotationMap *amap, DECL_HIT_DUMPS2) :
2051         HitSink(out, PASS_HIT_DUMPS2), amap_(amap), strata_(strata)
2052         {
2053                 ssmode_ |= ios_base::binary;
2054         }
2055
2056         /**
2057          * Report a batch of hits.
2058          */
2059         virtual void reportHits(vector<Hit>& hs);
2060
2061         /**
2062          * Append a binary alignment to the output stream corresponding to
2063          * the reference sequence involved.
2064          */
2065         virtual void append(ostream& o, const Hit& h) {
2066                 cerr << "Error: ChainingHitSink::append() not implemented" << endl;
2067                 throw 1;
2068         }
2069
2070         /**
2071          * See hit.cpp
2072          */
2073         virtual void reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p);
2074
2075         /**
2076          * See hit.cpp
2077          */
2078         virtual void reportUnaligned(PatternSourcePerThread& p);
2079
2080 protected:
2081         AnnotationMap *amap_;
2082         bool strata_;
2083 };
2084
2085 /**
2086  * Sink that does nothing.
2087  */
2088 class StubHitSink : public HitSink {
2089 public:
2090         StubHitSink() : HitSink(new OutFileBuf(".tmp"), "", "", "", false, false, NULL) { }
2091         virtual void append(ostream& o, const Hit& h) { }
2092 };
2093
2094 #endif /*HIT_H_*/
2095