X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=tabix.git;a=blobdiff_plain;f=bgzf.h;h=1fdf6259d5d860973ac25465049162b1889b52c0;hp=70f497e819442adc843f66c4023012fade6005e8;hb=HEAD;hpb=3be5ff47495762af7f2ebec145bc8f9c7674593d diff --git a/bgzf.h b/bgzf.h index 70f497e..1fdf625 100644 --- a/bgzf.h +++ b/bgzf.h @@ -1,6 +1,7 @@ /* The MIT License Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + 2011 Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,143 +22,172 @@ THE SOFTWARE. */ +/* The BGZF library was originally written by Bob Handsaker from the Broad + * Institute. It was later improved by the SAMtools developers. */ + #ifndef __BGZF_H #define __BGZF_H #include #include -#include #include -#ifdef _USE_KNETFILE -#include "knetfile.h" -#endif -//typedef int8_t bool; +#define BGZF_BLOCK_SIZE 0x10000 // 64k + +#define BGZF_ERR_ZLIB 1 +#define BGZF_ERR_HEADER 2 +#define BGZF_ERR_IO 4 +#define BGZF_ERR_MISUSE 8 typedef struct { - int file_descriptor; - char open_mode; // 'r' or 'w' - bool owned_file, is_uncompressed; -#ifdef _USE_KNETFILE - union { - knetFile *fpr; - FILE *fpw; - } x; -#else - FILE* file; -#endif - int uncompressed_block_size; - int compressed_block_size; - void* uncompressed_block; - void* compressed_block; - int64_t block_address; - int block_length; - int block_offset; + int open_mode:8, compress_level:8, errcode:16; int cache_size; - const char* error; + int block_length, block_offset; + int64_t block_address; + void *uncompressed_block, *compressed_block; void *cache; // a pointer to a hash table + void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading } BGZF; +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + #ifdef __cplusplus extern "C" { #endif -/* - * Checks the magic string of the file. Returns 1 - * for bgzipped files, -1 on errors and 0 for files - * without the bgzip magic string. - */ -int is_bgzipped(const char *path); - -/* - * Open an existing file descriptor for reading or writing. - * Mode must be either "r" or "w". - * A subsequent bgzf_close will not close the file descriptor. - * Returns null on error. - */ -BGZF* bgzf_fdopen(int fd, const char* __restrict mode); - -/* - * Open the specified file for reading or writing. - * Mode must be either "r" or "w". - * Returns null on error. - */ -BGZF* bgzf_open(const char* path, const char* __restrict mode); - -/* - * Close the BGZ file and free all associated resources. - * Does not close the underlying file descriptor if created with bgzf_fdopen. - * Returns zero on success, -1 on error. - */ -int bgzf_close(BGZF* fp); - -/* - * Read up to length bytes from the file storing into data. - * Returns the number of bytes actually read. - * Returns zero on end of file. - * Returns -1 on error. - */ -int bgzf_read(BGZF* fp, void* data, int length); - -/* - * Write length bytes from data to the file. - * Returns the number of bytes written. - * Returns -1 on error. - */ -int bgzf_write(BGZF* fp, const void* data, int length); - -/* - * Return a virtual file pointer to the current location in the file. - * No interpetation of the value should be made, other than a subsequent - * call to bgzf_seek can be used to position the file at the same point. - * Return value is non-negative on success. - * Returns -1 on error. - */ -#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) - -/* - * Set the file to read from the location specified by pos, which must - * be a value previously returned by bgzf_tell for this file (but not - * necessarily one returned by this file handle). - * The where argument must be SEEK_SET. - * Seeking on a file opened for write is not supported. - * Returns zero on success, -1 on error. - */ -int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); - -/* - * Set the cache size. Zero to disable. By default, caching is - * disabled. The recommended cache size for frequent random access is - * about 8M bytes. - */ -void bgzf_set_cache_size(BGZF *fp, int cache_size); - -int bgzf_check_EOF(BGZF *fp); - -int bgzf_read_block(BGZF* fp); + /****************** + * Basic routines * + ******************/ + + /** + * Open an existing file descriptor for reading or writing. + * + * @param fd file descriptor + * @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies + * the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored. + * @return BGZF file handler; 0 on error + */ + BGZF* bgzf_dopen(int fd, const char *mode); + + /** + * Open the specified file for reading or writing. + */ + BGZF* bgzf_open(const char* path, const char *mode); + + /** + * Close the BGZF and free all associated resources. + * + * @param fp BGZF file handler + * @return 0 on success and -1 on error + */ + int bgzf_close(BGZF *fp); + + /** + * Read up to _length_ bytes from the file storing into _data_. + * + * @param fp BGZF file handler + * @param data data array to read into + * @param length size of data to read + * @return number of bytes actually read; 0 on end-of-file and -1 on error + */ + ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length); + + /** + * Write _length_ bytes from _data_ to the file. + * + * @param fp BGZF file handler + * @param data data array to write + * @param length size of data to write + * @return number of bytes actually written; -1 on error + */ + ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length); + + /** + * Write the data in the buffer to the file. + */ + int bgzf_flush(BGZF *fp); + + /** + * Return a virtual file pointer to the current location in the file. + * No interpetation of the value should be made, other than a subsequent + * call to bgzf_seek can be used to position the file at the same point. + * Return value is non-negative on success. + */ + #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) + + /** + * Set the file to read from the location specified by _pos_. + * + * @param fp BGZF file handler + * @param pos virtual file offset returned by bgzf_tell() + * @param whence must be SEEK_SET + * @return 0 on success and -1 on error + */ + int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence); + + /** + * Check if the BGZF end-of-file (EOF) marker is present + * + * @param fp BGZF file handler opened for reading + * @return 1 if EOF is present; 0 if not or on I/O error + */ + int bgzf_check_EOF(BGZF *fp); + + /** + * Check if a file is in the BGZF format + * + * @param fn file name + * @return 1 if _fn_ is BGZF; 0 if not or on I/O error + */ + int bgzf_is_bgzf(const char *fn); + + /********************* + * Advanced routines * + *********************/ + + /** + * Set the cache size. Only effective when compiled with -DBGZF_CACHE. + * + * @param fp BGZF file handler + * @param size size of cache in bytes; 0 to disable caching (default) + */ + void bgzf_set_cache_size(BGZF *fp, int size); + + /** + * Flush the file if the remaining buffer size is smaller than _size_ + */ + int bgzf_flush_try(BGZF *fp, ssize_t size); + + /** + * Read one byte from a BGZF file. It is faster than bgzf_read() + * @param fp BGZF file handler + * @return byte read; -1 on end-of-file or error + */ + int bgzf_getc(BGZF *fp); + + /** + * Read one line from a BGZF file. It is faster than bgzf_getc() + * + * @param fp BGZF file handler + * @param delim delimitor + * @param str string to write to; must be initialized + * @return length of the string; 0 on end-of-file; negative on error + */ + int bgzf_getline(BGZF *fp, int delim, kstring_t *str); + + /** + * Read the next BGZF block. + */ + int bgzf_read_block(BGZF *fp); #ifdef __cplusplus } #endif -static inline int bgzf_getc(BGZF *fp) -{ - int c; - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) return -2; /* error */ - if (fp->block_length == 0) return -1; /* end-of-file */ - } - c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - if (fp->block_offset == fp->block_length) { -#ifdef _USE_KNETFILE - fp->block_address = knet_tell(fp->x.fpr); -#else - fp->block_address = ftello(fp->file); -#endif - fp->block_offset = 0; - fp->block_length = 0; - } - return c; -} - #endif