4 * Created on: Jul 31, 2009
11 #include <seqan/sequence.h>
14 #include "assert_helpers.h"
22 * Encapsulates a hit contained within a HitSet that can be
23 * (de)serialized to/from FileBufs. Used for chaining.
26 typedef std::pair<uint32_t,uint32_t> U32Pair;
31 * Write binary representation of HitSetEnt to an OutFileBuf.
33 void serialize(OutFileBuf& fb) const {
34 fb.writeChars((const char*)&h.first, 4);
35 fb.writeChars((const char*)&h.second, 4);
36 assert(fw == 0 || fw == 1);
38 assert_geq(stratum, 0);
39 assert_lt(stratum, 4);
41 assert_eq(stratum, (cost >> 14));
42 fb.writeChars((const char*)&cost, 2);
43 fb.writeChars((const char*)&oms, 4);
44 uint32_t sz = edits.size();
45 fb.writeChars((const char*)&sz, 4);
46 std::vector<Edit>::const_iterator it;
47 for(it = edits.begin(); it != edits.end(); it++) {
51 fb.writeChars((const char*)&sz, 4);
52 for(it = cedits.begin(); it != cedits.end(); it++) {
58 * Repopulate a HitSetEnt from its binary representation in FileBuf.
60 void deserialize(FileBuf& fb) {
61 fb.get((char*)&h.first, 4);
62 fb.get((char*)&h.second, 4);
64 assert(fw == 0 || fw == 1);
66 assert_geq(stratum, 0);
67 assert_lt(stratum, 4);
68 fb.get((char*)&cost, 2);
69 assert_eq(stratum, (cost >> 14));
70 fb.get((char*)&oms, 4);
72 fb.get((char*)&sz, 4);
75 for(uint32_t i = 0; i < sz; i++) {
76 edits[i].deserialize(fb);
78 fb.get((char*)&sz, 4);
81 for(uint32_t i = 0; i < sz; i++) {
82 cedits[i].deserialize(fb);
87 * Less than operator. Break HitSetEnt ties by:
93 int operator< (const HitSetEnt &rhs) const {
94 if(stratum < rhs.stratum) return 1;
95 if(stratum > rhs.stratum) return 0;
96 if(cost < rhs.cost) return 1;
97 if(cost > rhs.cost) return 0;
98 if(h < rhs.h) return 1;
99 if(h > rhs.h) return 0;
100 return (fw < rhs.fw)? 1 : 0;
104 * Greater than operator.
106 int operator> (const HitSetEnt &rhs) const {
107 if(stratum < rhs.stratum) return 0;
108 if(stratum > rhs.stratum) return 1;
109 if(cost < rhs.cost) return 0;
110 if(cost > rhs.cost) return 1;
111 if(h < rhs.h) return 0;
112 if(h > rhs.h) return 1;
113 return (fw <= rhs.fw)? 0 : 1;
117 * Equality comparison operator.
119 int operator== (const HitSetEnt &rhs) const {
120 return(stratum == rhs.stratum &&
127 * Indexing returns edits.
129 Edit& operator[](unsigned x) {
134 * Indexing returns edits.
136 const Edit& operator[](unsigned x) const {
141 * Another way to get at an edit.
143 Edit& editAt(unsigned i) {
148 * Another way to get at a const edit.
150 const Edit& editAt(unsigned i) const {
155 * Get the ith color edit.
157 Edit& colorEditAt(unsigned i) {
162 * Another way to get at an edit.
164 const Edit& colorEditAt(unsigned i) const {
169 * Return the front entry.
172 return edits.front();
176 * Return the back entry.
183 * Expand the entry list by one.
186 edits.resize(edits.size() + 1);
190 * Sort edits by position
193 if(edits.size() > 1) std::sort(edits.begin(), edits.end());
197 * Return number of edits.
199 size_t size() const {
204 return edits.empty();
208 * Write HitSetEnt to an output stream.
210 friend std::ostream& operator << (std::ostream& os, const HitSetEnt& hse);
212 U32Pair h; // reference coordinates
213 uint8_t fw; // orientation
214 int8_t stratum; // stratum
215 uint16_t cost; // cost, including stratum
216 uint32_t oms; // # others
217 std::vector<Edit> edits; // edits to get from reference to subject
218 std::vector<Edit> cedits; // color edits to get from reference to subject
222 * Encapsulates a set of hits that can be (de)serialized to/from
223 * FileBufs. Used for chaining.
227 typedef std::vector<HitSetEnt> EntVec;
228 typedef EntVec::const_iterator Iter;
229 typedef std::pair<uint32_t,uint32_t> U32Pair;
235 HitSet(FileBuf& fb) {
240 * Write binary representation of HitSet to an OutFileBuf.
242 void serialize(OutFileBuf& fb) const {
243 fb.write(color ? 1 : 0);
244 uint32_t i = seqan::length(name);
246 fb.writeChars((const char*)&i, 4);
247 fb.writeChars(seqan::begin(name), i);
248 i = seqan::length(seq);
251 fb.writeChars((const char*)&i, 4);
252 for(size_t j = 0; j < i; j++) {
253 fb.write("ACGTN"[(int)seq[j]]);
255 fb.writeChars(seqan::begin(qual), i);
257 fb.writeChars((const char*)&i, 4);
258 std::vector<HitSetEnt>::const_iterator it;
259 for(it = ents.begin(); it != ents.end(); it++) {
262 fb.write(maxedStratum);
266 * Repopulate a HitSet from its binary representation in FileBuf.
268 void deserialize(FileBuf& fb) {
269 color = (fb.get() != 0 ? true : false);
271 if(fb.get((char*)&sz, 4) != 4) {
278 seqan::resize(name, sz);
279 fb.get(seqan::begin(name), sz);
280 fb.get((char*)&sz, 4);
283 seqan::resize(seq, sz);
284 for(size_t j = 0; j < sz; j++) {
285 seq[j] = charToDna5[fb.get()];
287 seqan::resize(qual, sz);
288 fb.get(seqan::begin(qual), sz);
289 fb.get((char*)&sz, 4);
292 for(size_t i = 0; i < sz; i++) {
293 ents[i].deserialize(fb);
298 maxedStratum = fb.get();
302 * Return true iff this HitSet is initialized with a read (but not
303 * necessarily any alignments).
305 bool initialized() const {
306 return !seqan::empty(seq);
310 * Return true iff this HitSet has no hits.
317 * Return number of entries in this HitSet.
319 size_t size() const {
324 * Remove all entries from this HitSet.
340 * Return the front entry.
347 * Return the back entry.
354 * Expand the entry list by one.
357 ents.resize(ents.size() + 1);
358 assert(ents.back().empty());
364 void resize(size_t sz) {
369 * Sort hits by stratum/penalty.
372 if(ents.size() > 1) std::sort(ents.begin(), ents.end());
376 * Return true iff Ents are sorted
378 bool sorted() const {
379 if(ents.empty()) return true;
380 for(size_t i = 0; i < ents.size()-1; i++) {
381 if(!(ents[i] < ents[i+1])) return false;
387 * Remove the ith hit from the HitSet, shifting all subsequent hits
390 void remove(size_t i) {
391 ents.erase(ents.begin() + i);
395 * Return true if strata are uniform across hits; assert if they're
398 bool uniformStrata() const {
399 for(size_t i = 0; i < ents.size()-1; i++) {
400 assert(ents[i].stratum == ents[i+1].stratum);
406 * Indexing returns entries.
408 HitSetEnt& operator[](unsigned x) {
413 * Indexing returns entries.
415 const HitSetEnt& operator[](unsigned x) const {
420 * Apply a reference mappings to all the contained hits.
422 void applyReferenceMap(const ReferenceMap& map) {
423 std::vector<HitSetEnt>::iterator it;
424 for(it = ents.begin(); it != ents.end(); it++) map.map(it->h);
428 * Clear out all the strings and all the entries.
441 bool tryReplacing(U32Pair h,
446 for(size_t i = 0; i < ents.size(); i++) {
447 if(ents[i].h == h && ents[i].fw == fw) {
448 if(cost < ents[i].cost) {
449 // New hit at same position is better! Replace it.
452 ents[i].stratum = cost >> 14;
466 * Report up to 'khits' hits from this HitSet.
468 void reportUpTo(std::ostream& os, int khits = INT_MAX);
473 friend std::ostream& operator << (std::ostream& os, const HitSet& hs);
475 seqan::String<char> name;
476 seqan::String<seqan::Dna5> seq;
477 seqan::String<char> qual;
479 std::vector<HitSetEnt> ents;
480 bool color; // whether read was orginally in colorspace
483 #endif /* HIT_SET_H_ */