16 #include "assert_helpers.h"
19 * Simple wrapper for a FILE*, istream or ifstream that reads it in
20 * chunks (with fread) and keeps those chunks in a buffer. It also
21 * services calls to get(), peek() and gets() from the buffer, reading
22 * in additional chunks when necessary.
36 FileBuf(std::ifstream *inf) {
42 FileBuf(std::istream *ins) {
49 return _in != NULL || _inf != NULL || _ins != NULL;
53 * Close the input stream (if that's possible)
56 if(_in != NULL && _in != stdin) {
58 } else if(_inf != NULL) {
66 * Get the next character of input and advance.
69 assert(_in != NULL || _inf != NULL || _ins != NULL);
73 if(_lastn_cur < LASTN_BUF_SZ) _lastn_buf[_lastn_cur++] = c;
79 * Return true iff all input is exhausted.
82 return (_cur == _buf_sz) && _done;
86 * Initialize the buffer with a new C-style file.
88 void newFile(FILE *in) {
98 * Initialize the buffer with a new ifstream.
100 void newFile(std::ifstream *__inf) {
110 * Initialize the buffer with a new istream.
112 void newFile(std::istream *__ins) {
122 * Restore state as though we just started reading the input
128 _inf->seekg(0, std::ios::beg);
129 } else if(_ins != NULL) {
131 _ins->seekg(0, std::ios::beg);
141 * Peek at the next character of the input stream without
142 * advancing. Typically we can simple read it from the buffer.
143 * Occasionally we'll need to read in a new buffer's worth of data.
146 assert(_in != NULL || _inf != NULL || _ins != NULL);
147 assert_leq(_cur, _buf_sz);
148 if(_cur == _buf_sz) {
150 // We already exhausted the input stream
153 // Read a new buffer's worth of data
155 // Get the next chunk
157 _inf->read((char*)_buf, BUF_SZ);
158 _buf_sz = _inf->gcount();
159 } else if(_ins != NULL) {
160 _ins->read((char*)_buf, BUF_SZ);
161 _buf_sz = _ins->gcount();
164 _buf_sz = fread(_buf, 1, BUF_SZ, _in);
168 // Exhausted, and we have nothing to return to the
172 } else if(_buf_sz < BUF_SZ) {
178 return (int)_buf[_cur];
182 * Store a string of characters from the input file into 'buf',
183 * until we see a newline, EOF, or until 'len' characters have been
186 size_t gets(char *buf, size_t len) {
195 if(stored == len-1 || c == '\n' || c == '\r') {
198 // Skip over all end-of-line characters
200 while(pc == '\n' || pc == '\r') {
204 // Next get() will be after all newline characters
207 buf[stored++] = (char)c;
212 * Store a string of characters from the input file into 'buf',
213 * until we see a newline, EOF, or until 'len' characters have been
216 size_t get(char *buf, size_t len) {
218 for(size_t i = 0; i < len; i++) {
220 if(c == -1) return i;
221 buf[stored++] = (char)c;
226 static const size_t LASTN_BUF_SZ = 8 * 1024;
229 * Keep get()ing characters until a non-whitespace character (or
230 * -1) is reached, and return it.
232 int getPastWhitespace() {
234 while(isspace(c = get()) && c != -1);
239 * Keep get()ing characters until a we've passed over the next
240 * string of newline characters (\r's and \n's) or -1 is reached,
243 int getPastNewline() {
245 while(c != '\r' && c != '\n' && c != -1) c = get();
246 while(c == '\r' || c == '\n') c = get();
253 * Keep get()ing characters until a we've passed over the next
254 * string of newline characters (\r's and \n's) or -1 is reached,
257 int peekPastNewline() {
259 while(c != '\r' && c != '\n' && c != -1) c = get();
260 while(c == '\r' || c == '\n') c = get();
267 * Keep peek()ing then get()ing characters until the next return
268 * from peek() is just after the last newline of the line.
270 int peekUptoNewline() {
272 while(c != '\r' && c != '\n' && c != -1) {
275 while(c == '\r' || c == '\n') {
284 size_t lastNCur() const { return _lastn_cur; }
287 * Reset to the beginning of the last-N-chars buffer.
294 * Copy the last several characters in the last-N-chars buffer
295 * (since the last reset) into the provided buffer.
297 size_t copyLastN(char *buf) {
298 memcpy(buf, _lastn_buf, _lastn_cur);
303 * Get const pointer to the last-N-chars buffer.
305 const char *lastN() const {
310 * Get current size of the last-N-chars buffer.
312 const size_t lastNLen() const {
322 _cur = _buf_sz = BUF_SZ;
325 // no need to clear _buf[]
328 static const size_t BUF_SZ = 256 * 1024;
335 uint8_t _buf[BUF_SZ]; // (large) input buffer
337 char _lastn_buf[LASTN_BUF_SZ]; // buffer of the last N chars dispensed
341 * Wrapper for a buffered output stream that writes bitpairs.
343 class BitpairOutFileBuf {
346 * Open a new output stream to a file with given name.
348 BitpairOutFileBuf(const char *in) : bpPtr_(0), cur_(0) {
350 out_ = fopen(in, "wb");
352 std::cerr << "Error: Could not open bitpair-output file " << in << std::endl;
355 memset(buf_, 0, BUF_SZ);
359 * Write a single bitpair into the buf. Flush the buffer if it's
365 buf_[cur_] |= (bp << bpPtr_);
371 if(!fwrite((const void *)buf_, BUF_SZ, 1, out_)) {
372 std::cerr << "Error writing to the reference index file (.4.ebwt)" << std::endl;
375 // Reset to beginning of the buffer
378 // Initialize next octet to 0
386 * Write any remaining bitpairs and then close the input
389 if(cur_ > 0 || bpPtr_ > 0) {
390 if(bpPtr_ == 0) cur_--;
391 if(!fwrite((const void *)buf_, cur_ + 1, 1, out_)) {
392 std::cerr << "Error writing to the reference index file (.4.ebwt)" << std::endl;
399 static const size_t BUF_SZ = 128 * 1024;
403 char buf_[BUF_SZ]; // (large) input buffer
407 * Wrapper for a buffered output stream that writes characters and
408 * other data types. This class is *not* synchronized; the caller is
409 * responsible for synchronization.
416 * Open a new output stream to a file with given name.
418 OutFileBuf(const char *out, bool binary = false) :
419 name_(out), cur_(0), closed_(false)
422 out_ = fopen(out, binary ? "wb" : "w");
424 std::cerr << "Error: Could not open alignment output file " << out << std::endl;
430 * Open a new output stream to standard out.
432 OutFileBuf() : name_("cout"), cur_(0), closed_(false) {
437 * Open a new output stream to a file with given name.
439 void setFile(const char *out, bool binary = false) {
441 out_ = fopen(out, binary ? "wb" : "w");
443 std::cerr << "Error: Could not open alignment output file " << out << std::endl;
450 * Write a single character into the write buffer and, if
455 if(cur_ == BUF_SZ) flush();
460 * Write a c++ string to the write buffer and, if necessary, flush.
462 void writeString(const std::string& s) {
464 size_t slen = s.length();
465 if(cur_ + slen > BUF_SZ) {
466 if(cur_ > 0) flush();
468 size_t wlen = fwrite(s.c_str(), 1, slen, out_);
470 std::cerr << "Error while writing string output; " << slen
471 << " characters in string, " << wlen
472 << " written" << std::endl;
476 memcpy(&buf_[cur_], s.data(), slen);
481 memcpy(&buf_[cur_], s.data(), slen);
484 assert_leq(cur_, BUF_SZ);
488 * Write a c++ string to the write buffer and, if necessary, flush.
490 void writeChars(const char * s, size_t len) {
492 if(cur_ + len > BUF_SZ) {
493 if(cur_ > 0) flush();
495 size_t wlen = fwrite(s, 1, len, out_);
497 std::cerr << "Error while writing string output; " << len
498 << " characters in string, " << wlen
499 << " written" << std::endl;
503 memcpy(&buf_[cur_], s, len);
508 memcpy(&buf_[cur_], s, len);
511 assert_leq(cur_, BUF_SZ);
515 * Write any remaining bitpairs and then close the input
519 if(cur_ > 0) flush();
527 * Reset so that the next write is as though it's the first.
535 if(!fwrite((const void *)buf_, cur_, 1, out_)) {
536 std::cerr << "Error while flushing and closing output" << std::endl;
543 * Return true iff this stream is closed.
545 bool closed() const {
550 * Return the filename.
558 static const size_t BUF_SZ = 16 * 1024;
563 char buf_[BUF_SZ]; // (large) input buffer
567 #endif /*ndef FILEBUF_H_*/