4 * Created on: Sep 23, 2009
18 * Write the SAM header lines.
20 void SAMHitSink::appendHeaders(OutFileBuf& os,
22 const vector<string>& refnames,
32 ss << "@HD\tVN:1.0\tSO:unsorted" << endl;
34 for(size_t i = 0; i < numRefs; i++) {
37 if(!refnames.empty() && rmap != NULL) {
38 printUptoWs(ss, rmap->getName(i), !fullRef);
39 } else if(i < refnames.size()) {
40 printUptoWs(ss, refnames[i], !fullRef);
44 ss << "\tLN:" << (plen[i] + (color ? 1 : 0)) << endl;
48 ss << "@RG\t" << rgline << endl;
50 ss << "@PG\tID:Bowtie\tVN:" << BOWTIE_VERSION << "\tCL:\"" << cmdline << "\"" << endl;
51 os.writeString(ss.str());
55 * Append a SAM output record for an unaligned read.
57 void SAMHitSink::appendAligned(ostream& ss,
60 int xms, // value for XM:I field
61 const vector<string>* refnames,
69 // truncate final 2 chars
70 for(int i = 0; i < (int)seqan::length(h.patName)-2; i++) {
71 if(isspace(h.patName[i])) break;
75 for(int i = 0; i < (int)seqan::length(h.patName); i++) {
76 if(isspace(h.patName[i])) break;
84 flags |= SAM_FLAG_PAIRED | SAM_FLAG_FIRST_IN_PAIR | SAM_FLAG_MAPPED_PAIRED;
85 } else if(h.mate == 2) {
86 flags |= SAM_FLAG_PAIRED | SAM_FLAG_SECOND_IN_PAIR | SAM_FLAG_MAPPED_PAIRED;
88 if(!h.fw) flags |= SAM_FLAG_QUERY_STRAND;
89 if(h.mate > 0 && !h.mfw) flags |= SAM_FLAG_MATE_STRAND;
92 if(refnames != NULL && rmap != NULL) {
93 printUptoWs(ss, rmap->getName(h.h.first), !fullRef);
94 } else if(refnames != NULL && h.h.first < refnames->size()) {
95 printUptoWs(ss, (*refnames)[h.h.first], !fullRef);
100 ss << '\t' << (h.h.second + 1);
104 ss << '\t' << h.length() << 'M';
113 ss << '\t' << (h.mh.second + 1);
120 assert_eq(h.h.first, h.mh.first);
122 if(h.h.second > h.mh.second) {
123 inslen = (int64_t)h.h.second - (int64_t)h.mh.second + (int64_t)h.length();
126 inslen = (int64_t)h.mh.second - (int64_t)h.h.second + (int64_t)h.mlen;
133 ss << '\t' << h.patSeq;
135 ss << '\t' << h.quals;
139 // Always output stratum
140 ss << "\tXA:i:" << (int)h.stratum;
141 // Always output cost
142 //ss << "\tXC:i:" << (int)h.cost;
143 // Look for SNP annotations falling within the alignment
145 size_t len = length(h.patSeq);
149 const FixedBitset<1024> *mms = &h.mms;
150 const String<Dna5>* pat = &h.patSeq;
151 const vector<char>* refcs = &h.refcs;
152 if(h.color && false) {
153 // Disabled: print MD:Z string w/r/t to colors, not letters
156 assert_eq(length(h.colSeq), len+1);
157 len = length(h.colSeq);
161 for (int i = 0; i < (int)len; ++ i) {
164 // There's a mismatch at this position
165 assert_gt((int)refcs->size(), i);
166 char refChar = toupper((*refcs)[i]);
167 ASSERT_ONLY(char qryChar = (h.fw ? (*pat)[i] : (*pat)[len-i-1]));
168 assert_neq(refChar, qryChar);
169 ss << run << refChar;
176 for (int i = len-1; i >= 0; -- i) {
179 // There's a mismatch at this position
180 assert_gt((int)refcs->size(), i);
181 char refChar = toupper((*refcs)[i]);
182 ASSERT_ONLY(char qryChar = (h.fw ? (*pat)[i] : (*pat)[len-i-1]));
183 assert_neq(refChar, qryChar);
184 ss << run << refChar;
192 // Add optional edit distance field
193 ss << "\tNM:i:" << nm;
194 if(h.color) ss << "\tCM:i:" << h.cmms.count();
195 if(xms > 0) ss << "\tXM:i:" << xms;
200 * Report a verbose, human-readable alignment to the appropriate
203 void SAMHitSink::reportHit(const Hit& h, int mapq, int xms) {
205 // Otherwise, this is actually a sampled read and belongs in
206 // the same category as maxed reads
207 HitSink::reportHit(h);
210 append(ss, h, mapq, xms);
211 // Make sure to grab lock before writing to output stream
213 out(h.h.first).writeString(ss.str());
218 * Report a batch of hits from a vector, perhaps subsetting it.
220 void SAMHitSink::reportHits(vector<Hit>& hs,
221 size_t start, size_t end,
224 assert_geq(end, start);
225 if(end-start == 0) return;
226 assert_gt(hs[start].mate, 0);
229 for(size_t i = start; i < end; i++) {
230 ostringstream ss(ssmode_);
231 ss.rdbuf()->pubsetbuf(buf, 4096);
232 append(ss, hs[i], mapq, xms);
233 out(0).writeChars(buf, ss.tellp());
240 numReportedPaired_ += (end-start);
245 * Report either an unaligned read or a read that exceeded the -m
246 * ceiling. We output placeholders for most of the fields in this
249 void SAMHitSink::reportUnOrMax(PatternSourcePerThread& p,
251 bool un) // lower bound on number of other hits
253 if(un) HitSink::reportUnaligned(p);
254 else HitSink::reportMaxed(*hs, p);
256 bool paired = !p.bufb().empty();
257 assert(paired || p.bufa().mate == 0);
258 assert(!paired || p.bufa().mate > 0);
259 assert(un || hs->size() > 0);
260 assert(!un || hs == NULL || hs->size() == 0);
262 if(hs != NULL) hssz = hs->size();
264 // truncate final 2 chars
265 for(int i = 0; i < (int)seqan::length(p.bufa().name)-2; i++) {
266 ss << p.bufa().name[i];
272 << (SAM_FLAG_UNMAPPED | (paired ? (SAM_FLAG_PAIRED | SAM_FLAG_FIRST_IN_PAIR | SAM_FLAG_MATE_UNMAPPED) : 0)) << "\t*"
273 << "\t0\t0\t*\t*\t0\t0\t"
274 << p.bufa().patFw << "\t" << p.bufa().qual << "\tXM:i:"
275 << (paired ? (hssz+1)/2 : hssz) << endl;
277 // truncate final 2 chars
278 for(int i = 0; i < (int)seqan::length(p.bufb().name)-2; i++) {
279 ss << p.bufb().name[i];
282 << (SAM_FLAG_UNMAPPED | (paired ? (SAM_FLAG_PAIRED | SAM_FLAG_SECOND_IN_PAIR | SAM_FLAG_MATE_UNMAPPED) : 0)) << "\t*"
283 << "\t0\t0\t*\t*\t0\t0\t"
284 << p.bufb().patFw << "\t" << p.bufb().qual << "\tXM:i:"
285 << (hssz+1)/2 << endl;
288 out(0).writeString(ss.str());
293 * Append a SAM alignment to the given output stream.
295 void SAMHitSink::append(ostream& ss,
299 const vector<string>* refnames,
305 appendAligned(ss, h, mapq, xms, refnames, rmap, amap, fullRef, offBase);
309 * Report maxed-out read; if sampleMax_ is set, then report 1 alignment
312 void SAMHitSink::reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) {
314 HitSink::reportMaxed(hs, p);
316 rand.init(p.bufa().seed);
317 assert_gt(hs.size(), 0);
318 bool paired = hs.front().mate > 0;
322 int bestStratum = 999;
323 for(size_t i = 0; i < hs.size()-1; i += 2) {
324 int strat = min(hs[i].stratum, hs[i+1].stratum);
325 if(strat < bestStratum) {
328 } else if(strat == bestStratum) {
332 assert_leq(num, hs.size());
333 uint32_t r = rand.nextU32() % num;
335 for(size_t i = 0; i < hs.size()-1; i += 2) {
336 int strat = min(hs[i].stratum, hs[i+1].stratum);
337 if(strat == bestStratum) {
339 reportHits(hs, i, i+2, 0, hs.size()/2+1);
347 for(size_t i = 1; i < hs.size(); i++) {
348 assert_geq(hs[i].stratum, hs[i-1].stratum);
349 if(hs[i].stratum == hs[i-1].stratum) num++;
352 assert_leq(num, hs.size());
353 uint32_t r = rand.nextU32() % num;
354 reportHit(hs[r], /*MAPQ*/0, /*XM:I*/hs.size()+1);
357 reportUnOrMax(p, &hs, false);