X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=bgzf.c;h=a6923daa0cc1cfa8d0d0bc2269255326be8439e3;hp=fe4e31da109f6ca523e73088fb3fd27aa9672587;hb=049705238f6c2d4cdd79022e7bcf2cc7d83ba9c7;hpb=b27e00385f41769d03a8cca4dbd71275fc9fa906 diff --git a/bgzf.c b/bgzf.c index fe4e31d..a6923da 100644 --- a/bgzf.c +++ b/bgzf.c @@ -1,13 +1,25 @@ -/* - * The Broad Institute - * SOFTWARE COPYRIGHT NOTICE AGREEMENT - * This software and its documentation are copyright 2008 by the - * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. - * - * This software is supplied without any warranty or guaranteed support whatsoever. - * Neither the Broad Institute nor MIT can be responsible for its use, misuse, - * or functionality. - */ +/* The MIT License + + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ /* 2009-06-29 by lh3: cache recent uncompressed blocks. @@ -31,10 +43,15 @@ typedef struct { } cache_t; KHASH_MAP_INIT_INT64(cache, cache_t) +#if defined(_WIN32) || defined(_MSC_VER) +#define ftello(fp) ftell(fp) +#define fseeko(fp, offset, whence) fseek(fp, offset, whence) +#else extern off_t ftello(FILE *stream); extern int fseeko(FILE *stream, off_t offset, int whence); +#endif -typedef int8_t byte; +typedef int8_t bgzf_byte_t; static const int DEFAULT_BLOCK_SIZE = 64 * 1024; static const int MAX_BLOCK_SIZE = 64 * 1024; @@ -81,9 +98,9 @@ packInt32(uint8_t* buffer, uint32_t value) buffer[3] = value >> 24; } -inline +static inline int -min(int x, int y) +bgzf_min(int x, int y) { return (x < y) ? x : y; } @@ -169,20 +186,24 @@ bgzf_open(const char* __restrict path, const char* __restrict mode) fp->open_mode = 'r'; fp->x.fpr = file; #else - int oflag = O_RDONLY; - int fd = open(path, oflag); + int fd, oflag = O_RDONLY; +#ifdef _WIN32 + oflag |= O_BINARY; +#endif + fd = open(path, oflag); if (fd == -1) return 0; fp = open_read(fd); #endif } else if (mode[0] == 'w' || mode[0] == 'W') { - int oflag = O_WRONLY | O_CREAT | O_TRUNC; - int fd = open(path, oflag, 0644); + int fd, oflag = O_WRONLY | O_CREAT | O_TRUNC; +#ifdef _WIN32 + oflag |= O_BINARY; +#endif + fd = open(path, oflag, 0666); if (fd == -1) return 0; fp = open_write(fd, strstr(mode, "u")? 1 : 0); } - if (fp != NULL) { - fp->owned_file = 1; - } + if (fp != NULL) fp->owned_file = 1; return fp; } @@ -206,7 +227,7 @@ deflate_block(BGZF* fp, int block_length) // Deflate the block in fp->uncompressed_block into fp->compressed_block. // Also adds an extra field that stores the compressed block length. - byte* buffer = fp->compressed_block; + bgzf_byte_t* buffer = fp->compressed_block; int buffer_size = fp->compressed_block_size; // Init gzip header @@ -337,10 +358,10 @@ inflate_block(BGZF* fp, int block_length) static int -check_header(const byte* header) +check_header(const bgzf_byte_t* header) { return (header[0] == GZIP_ID1 && - header[1] == (byte) GZIP_ID2 && + header[1] == (bgzf_byte_t) GZIP_ID2 && header[2] == Z_DEFLATED && (header[3] & FLG_FEXTRA) != 0 && unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN && @@ -406,20 +427,19 @@ static void cache_block(BGZF *fp, int size) memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE); } -static int -read_block(BGZF* fp) +bgzf_read_block(BGZF* fp) { - byte header[BLOCK_HEADER_LENGTH]; - int size = 0; + bgzf_byte_t header[BLOCK_HEADER_LENGTH]; + int count, size = 0; #ifdef _USE_KNETFILE int64_t block_address = knet_tell(fp->x.fpr); if (load_block_from_cache(fp, block_address)) return 0; - int count = knet_read(fp->x.fpr, header, sizeof(header)); + count = knet_read(fp->x.fpr, header, sizeof(header)); #else int64_t block_address = ftello(fp->file); if (load_block_from_cache(fp, block_address)) return 0; - int count = fread(header, 1, sizeof(header), fp->file); + count = fread(header, 1, sizeof(header), fp->file); #endif if (count == 0) { fp->block_length = 0; @@ -435,7 +455,7 @@ read_block(BGZF* fp) return -1; } int block_length = unpackInt16((uint8_t*)&header[16]) + 1; - byte* compressed_block = (byte*) fp->compressed_block; + bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block; memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); int remaining = block_length - BLOCK_HEADER_LENGTH; #ifdef _USE_KNETFILE @@ -449,9 +469,7 @@ read_block(BGZF* fp) } size += count; count = inflate_block(fp, block_length); - if (count < 0) { - return -1; - } + if (count < 0) return -1; if (fp->block_length != 0) { // Do not reset offset if this read follows a seek. fp->block_offset = 0; @@ -474,11 +492,11 @@ bgzf_read(BGZF* fp, void* data, int length) } int bytes_read = 0; - byte* output = data; + bgzf_byte_t* output = data; while (bytes_read < length) { int available = fp->block_length - fp->block_offset; if (available <= 0) { - if (read_block(fp) != 0) { + if (bgzf_read_block(fp) != 0) { return -1; } available = fp->block_length - fp->block_offset; @@ -486,8 +504,8 @@ bgzf_read(BGZF* fp, void* data, int length) break; } } - int copy_length = min(length-bytes_read, available); - byte* buffer = fp->uncompressed_block; + int copy_length = bgzf_min(length-bytes_read, available); + bgzf_byte_t* buffer = fp->uncompressed_block; memcpy(output, buffer + fp->block_offset, copy_length); fp->block_offset += copy_length; output += copy_length; @@ -505,19 +523,16 @@ bgzf_read(BGZF* fp, void* data, int length) return bytes_read; } -static -int -flush_block(BGZF* fp) +int bgzf_flush(BGZF* fp) { while (fp->block_offset > 0) { - int block_length = deflate_block(fp, fp->block_offset); - if (block_length < 0) { - return -1; - } + int count, block_length; + block_length = deflate_block(fp, fp->block_offset); + if (block_length < 0) return -1; #ifdef _USE_KNETFILE - int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); + count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); #else - int count = fwrite(fp->compressed_block, 1, block_length, fp->file); + count = fwrite(fp->compressed_block, 1, block_length, fp->file); #endif if (count != block_length) { report_error(fp, "write failed"); @@ -528,30 +543,35 @@ flush_block(BGZF* fp) return 0; } -int -bgzf_write(BGZF* fp, const void* data, int length) +int bgzf_flush_try(BGZF *fp, int size) +{ + if (fp->block_offset + size > fp->uncompressed_block_size) + return bgzf_flush(fp); + return -1; +} + +int bgzf_write(BGZF* fp, const void* data, int length) { if (fp->open_mode != 'w') { report_error(fp, "file not open for writing"); return -1; } - if (fp->uncompressed_block == NULL) { + if (fp->uncompressed_block == NULL) fp->uncompressed_block = malloc(fp->uncompressed_block_size); - } - const byte* input = data; + const bgzf_byte_t* input = data; int block_length = fp->uncompressed_block_size; int bytes_written = 0; while (bytes_written < length) { - int copy_length = min(block_length - fp->block_offset, length - bytes_written); - byte* buffer = fp->uncompressed_block; + int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written); + bgzf_byte_t* buffer = fp->uncompressed_block; memcpy(buffer + fp->block_offset, input, copy_length); fp->block_offset += copy_length; input += copy_length; bytes_written += copy_length; if (fp->block_offset == block_length) { - if (flush_block(fp) != 0) { + if (bgzf_flush(fp) != 0) { break; } } @@ -559,13 +579,18 @@ bgzf_write(BGZF* fp, const void* data, int length) return bytes_written; } -int -bgzf_close(BGZF* fp) +int bgzf_close(BGZF* fp) { if (fp->open_mode == 'w') { - if (flush_block(fp) != 0) { - return -1; - } + if (bgzf_flush(fp) != 0) return -1; + { // add an empty block + int count, block_length = deflate_block(fp, 0); +#ifdef _USE_KNETFILE + count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); +#else + count = fwrite(fp->compressed_block, 1, block_length, fp->file); +#endif + } #ifdef _USE_KNETFILE if (fflush(fp->x.fpw) != 0) { #else @@ -582,9 +607,7 @@ bgzf_close(BGZF* fp) else ret = knet_close(fp->x.fpr); if (ret != 0) return -1; #else - if (fclose(fp->file) != 0) { - return -1; - } + if (fclose(fp->file) != 0) return -1; #endif } free(fp->uncompressed_block); @@ -594,20 +617,35 @@ bgzf_close(BGZF* fp) return 0; } -int64_t -bgzf_tell(BGZF* fp) +void bgzf_set_cache_size(BGZF *fp, int cache_size) { - return ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)); + if (fp) fp->cache_size = cache_size; } -void bgzf_set_cache_size(BGZF *fp, int cache_size) +int bgzf_check_EOF(BGZF *fp) { - if (fp) fp->cache_size = cache_size; + static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0"; + uint8_t buf[28]; + off_t offset; +#ifdef _USE_KNETFILE + offset = knet_tell(fp->x.fpr); + if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1; + knet_read(fp->x.fpr, buf, 28); + knet_seek(fp->x.fpr, offset, SEEK_SET); +#else + offset = ftello(fp->file); + if (fseeko(fp->file, -28, SEEK_END) != 0) return -1; + fread(buf, 1, 28, fp->file); + fseeko(fp->file, offset, SEEK_SET); +#endif + return (memcmp(magic, buf, 28) == 0)? 1 : 0; } -int64_t -bgzf_seek(BGZF* fp, int64_t pos, int where) +int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) { + int block_offset; + int64_t block_address; + if (fp->open_mode != 'r') { report_error(fp, "file not open for read"); return -1; @@ -616,8 +654,8 @@ bgzf_seek(BGZF* fp, int64_t pos, int where) report_error(fp, "unimplemented seek option"); return -1; } - int block_offset = pos & 0xFFFF; - int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL; + block_offset = pos & 0xFFFF; + block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL; #ifdef _USE_KNETFILE if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) { #else @@ -631,4 +669,3 @@ bgzf_seek(BGZF* fp, int64_t pos, int where) fp->block_offset = block_offset; return 0; } -