X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=samtools.git;a=blobdiff_plain;f=razf.c;h=e7499f9f61e6c2fae4ec71a5f4a500ed5b151fe8;hp=a5e8f5161a8d0780a820556e0e6640fd9762bb12;hb=aa08abe5f0b84ee0dd3491f00fe357d661c08e0c;hpb=4a17fa7e1f91b2fe04ad334a63fc2b0d5e859d8a diff --git a/razf.c b/razf.c index a5e8f51..e7499f9 100644 --- a/razf.c +++ b/razf.c @@ -38,6 +38,7 @@ #include #include "razf.h" + #if ZLIB_VERNUM < 0x1221 struct _gz_header_s { int text; @@ -107,20 +108,36 @@ static void save_zindex(RAZF *rz, int fd){ } #endif +#ifdef _USE_KNETFILE +static void load_zindex(RAZF *rz, knetFile *fp){ +#else static void load_zindex(RAZF *rz, int fd){ +#endif int32_t i, v32; int is_be; if(!rz->load_index) return; if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex)); is_be = is_big_endian(); +#ifdef _USE_KNETFILE + knet_read(fp, &rz->index->size, sizeof(int)); +#else read(fd, &rz->index->size, sizeof(int)); +#endif if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size); rz->index->cap = rz->index->size; v32 = rz->index->size / RZ_BIN_SIZE + 1; rz->index->bin_offsets = malloc(sizeof(int64_t) * v32); +#ifdef _USE_KNETFILE + knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32); +#else read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); +#endif rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size); +#ifdef _USE_KNETFILE + knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size); +#else read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size); +#endif if(!is_be){ for(i=0;iindex->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); for(i=0;iindex->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); @@ -141,7 +158,11 @@ static RAZF* razf_open_w(int fd){ #endif rz = calloc(1, sizeof(RAZF)); rz->mode = 'w'; +#ifdef _USE_KNETFILE + rz->x.fpw = fd; +#else rz->filedes = fd; +#endif rz->stream = calloc(sizeof(z_stream), 1); rz->inbuf = malloc(RZ_BUFFER_SIZE); rz->outbuf = malloc(RZ_BUFFER_SIZE); @@ -176,7 +197,11 @@ static void _razf_write(RAZF* rz, const void *data, int size){ deflate(rz->stream, Z_NO_FLUSH); rz->out += tout - rz->stream->avail_out; if(rz->stream->avail_out) break; +#ifdef _USE_KNETFILE + write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; if(rz->stream->avail_in == 0) break; @@ -192,7 +217,11 @@ static void razf_flush(RAZF *rz){ rz->buf_off = rz->buf_len = 0; } if(rz->stream->avail_out){ +#ifdef _USE_KNETFILE + write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; } @@ -201,7 +230,11 @@ static void razf_flush(RAZF *rz){ deflate(rz->stream, Z_FULL_FLUSH); rz->out += tout - rz->stream->avail_out; if(rz->stream->avail_out == 0){ +#ifdef _USE_KNETFILE + write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; } else break; @@ -221,7 +254,11 @@ static void razf_end_flush(RAZF *rz){ deflate(rz->stream, Z_FINISH); rz->out += tout - rz->stream->avail_out; if(rz->stream->avail_out < RZ_BUFFER_SIZE){ +#ifdef _USE_KNETFILE + write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#else write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); +#endif rz->stream->avail_out = RZ_BUFFER_SIZE; rz->stream->next_out = rz->outbuf; } else break; @@ -308,23 +345,35 @@ static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *e return n; } +#ifdef _USE_KNETFILE +static RAZF* razf_open_r(knetFile *fp, int _load_index){ +#else static RAZF* razf_open_r(int fd, int _load_index){ +#endif RAZF *rz; int ext_off, ext_len; int n, is_be, ret; int64_t end; unsigned char c[] = "RAZF"; + rz = calloc(1, sizeof(RAZF)); + rz->mode = 'r'; +#ifdef _USE_KNETFILE + rz->x.fpr = fp; +#else #ifdef _WIN32 setmode(fd, O_BINARY); #endif - rz = calloc(1, sizeof(RAZF)); - rz->mode = 'r'; rz->filedes = fd; +#endif rz->stream = calloc(sizeof(z_stream), 1); rz->inbuf = malloc(RZ_BUFFER_SIZE); rz->outbuf = malloc(RZ_BUFFER_SIZE); rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL; +#ifdef _USE_KNETFILE + n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE); +#else n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); +#endif ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len); if(ret == 0){ PLAIN_FILE: @@ -355,7 +404,11 @@ static RAZF* razf_open_r(int fd, int _load_index){ } rz->load_index = _load_index; rz->file_type = FILE_TYPE_RZ; +#ifdef _USE_KNETFILE + if(knet_seek(fp, -16, SEEK_END) == -1){ +#else if(lseek(fd, -16, SEEK_END) == -1){ +#endif UNSEEKABLE: rz->seekable = 0; rz->index = NULL; @@ -363,10 +416,19 @@ static RAZF* razf_open_r(int fd, int _load_index){ } else { is_be = is_big_endian(); rz->seekable = 1; +#ifdef _USE_KNETFILE + knet_read(fp, &end, sizeof(int64_t)); +#else read(fd, &end, sizeof(int64_t)); +#endif if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end); else rz->src_end = end; + +#ifdef _USE_KNETFILE + knet_read(fp, &end, sizeof(int64_t)); +#else read(fd, &end, sizeof(int64_t)); +#endif if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end); else rz->end = end; if(n > rz->end){ @@ -374,19 +436,47 @@ static RAZF* razf_open_r(int fd, int _load_index){ n = rz->end; } if(rz->end > rz->src_end){ +#ifdef _USE_KNETFILE + knet_seek(fp, rz->in, SEEK_SET); +#else lseek(fd, rz->in, SEEK_SET); +#endif goto UNSEEKABLE; } +#ifdef _USE_KNETFILE + knet_seek(fp, rz->end, SEEK_SET); + if(knet_tell(fp) != rz->end){ + knet_seek(fp, rz->in, SEEK_SET); +#else if(lseek(fd, rz->end, SEEK_SET) != rz->end){ lseek(fd, rz->in, SEEK_SET); +#endif goto UNSEEKABLE; } +#ifdef _USE_KNETFILE + load_zindex(rz, fp); + knet_seek(fp, n, SEEK_SET); +#else load_zindex(rz, fd); lseek(fd, n, SEEK_SET); +#endif } return rz; } +#ifdef _USE_KNETFILE +RAZF* razf_dopen(int fd, const char *mode){ + if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n"); + else if(strstr(mode, "w")) return razf_open_w(fd); + return NULL; +} + +RAZF* razf_dopen2(int fd, const char *mode) +{ + fprintf(stderr,"[razf_dopen2] implement me\n"); + return NULL; +} +#else RAZF* razf_dopen(int fd, const char *mode){ if(strstr(mode, "r")) return razf_open_r(fd, 1); else if(strstr(mode, "w")) return razf_open_w(fd); @@ -399,23 +489,34 @@ RAZF* razf_dopen2(int fd, const char *mode) else if(strstr(mode, "w")) return razf_open_w(fd); else return NULL; } +#endif static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){ int fd; RAZF *rz; if(strstr(mode, "r")){ +#ifdef _USE_KNETFILE + knetFile *fd = knet_open(filename, "r"); + if (fd == 0) { + fprintf(stderr, "[_razf_open] fail to open %s\n", filename); + return NULL; + } +#else #ifdef _WIN32 fd = open(filename, O_RDONLY | O_BINARY); #else fd = open(filename, O_RDONLY); #endif +#endif + if(fd < 0) return NULL; rz = razf_open_r(fd, _load_index); } else if(strstr(mode, "w")){ #ifdef _WIN32 - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666); #else - fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); + fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666); #endif + if(fd < 0) return NULL; rz = razf_open_w(fd); } else return NULL; return rz; @@ -435,9 +536,17 @@ int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){ switch(rz->file_type){ case FILE_TYPE_PLAIN: if(rz->end == 0x7fffffffffffffffLL){ +#ifdef _USE_KNETFILE + if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0; + n = knet_tell(rz->x.fpr); + knet_seek(rz->x.fpr, 0, SEEK_END); + rz->end = knet_tell(rz->x.fpr); + knet_seek(rz->x.fpr, n, SEEK_SET); +#else if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0; rz->end = lseek(rz->filedes, 0, SEEK_END); lseek(rz->filedes, n, SEEK_SET); +#endif } *u_size = *c_size = rz->end; return 1; @@ -457,7 +566,11 @@ static int _razf_read(RAZF* rz, void *data, int size){ int ret, tin; if(rz->z_eof || rz->z_err) return 0; if (rz->file_type == FILE_TYPE_PLAIN) { +#ifdef _USE_KNETFILE + ret = knet_read(rz->x.fpr, data, size); +#else ret = read(rz->filedes, data, size); +#endif if (ret == 0) rz->z_eof = 1; return ret; } @@ -467,9 +580,17 @@ static int _razf_read(RAZF* rz, void *data, int size){ if(rz->stream->avail_in == 0){ if(rz->in >= rz->end){ rz->z_eof = 1; break; } if(rz->end - rz->in < RZ_BUFFER_SIZE){ +#ifdef _USE_KNETFILE + rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in); +#else rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in); +#endif } else { +#ifdef _USE_KNETFILE + rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE); +#else rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); +#endif } if(rz->stream->avail_in == 0){ rz->z_eof = 1; @@ -481,7 +602,7 @@ static int _razf_read(RAZF* rz, void *data, int size){ ret = inflate(rz->stream, Z_BLOCK); rz->in += tin - rz->stream->avail_in; if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){ - fprintf(stderr, "[_razf_read] inflate error: %d (at %s:%d)\n", ret, __FILE__, __LINE__); + fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__); rz->z_err = 1; break; } @@ -566,14 +687,18 @@ int razf_skip(RAZF* rz, int size){ } if(rz->buf_flush) continue; rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); - if(rz->z_eof) break; + if(rz->z_eof || rz->z_err) break; } rz->out += ori_size - size; return ori_size - size; } static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){ +#ifdef _USE_KNETFILE + knet_seek(rz->x.fpr, in, SEEK_SET); +#else lseek(rz->filedes, in, SEEK_SET); +#endif rz->in = in; rz->out = out; rz->block_pos = in; @@ -592,7 +717,12 @@ int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){ if(rz->file_type == FILE_TYPE_PLAIN){ rz->buf_off = rz->buf_len = 0; pos = block_start + block_offset; +#ifdef _USE_KNETFILE + knet_seek(rz->x.fpr, pos, SEEK_SET); + pos = knet_tell(rz->x.fpr); +#else pos = lseek(rz->filedes, pos, SEEK_SET); +#endif rz->out = rz->in = pos; return pos; } @@ -614,7 +744,12 @@ int64_t razf_seek(RAZF* rz, int64_t pos, int where){ if (where == SEEK_CUR) pos += rz->out; else if (where == SEEK_END) pos += rz->src_end; if(rz->file_type == FILE_TYPE_PLAIN){ +#ifdef _USE_KNETFILE + knet_seek(rz->x.fpr, pos, SEEK_SET); + seek_pos = knet_tell(rz->x.fpr); +#else seek_pos = lseek(rz->filedes, pos, SEEK_SET); +#endif rz->buf_off = rz->buf_len = 0; rz->out = rz->in = seek_pos; return seek_pos; @@ -663,6 +798,18 @@ void razf_close(RAZF *rz){ #ifndef _RZ_READONLY razf_end_flush(rz); deflateEnd(rz->stream); +#ifdef _USE_KNETFILE + save_zindex(rz, rz->x.fpw); + if(is_big_endian()){ + write(rz->x.fpw, &rz->in, sizeof(int64_t)); + write(rz->x.fpw, &rz->out, sizeof(int64_t)); + } else { + uint64_t v64 = byte_swap_8((uint64_t)rz->in); + write(rz->x.fpw, &v64, sizeof(int64_t)); + v64 = byte_swap_8((uint64_t)rz->out); + write(rz->x.fpw, &v64, sizeof(int64_t)); + } +#else save_zindex(rz, rz->filedes); if(is_big_endian()){ write(rz->filedes, &rz->in, sizeof(int64_t)); @@ -673,6 +820,7 @@ void razf_close(RAZF *rz){ v64 = byte_swap_8((uint64_t)rz->out); write(rz->filedes, &v64, sizeof(int64_t)); } +#endif #endif } else if(rz->mode == 'r'){ if(rz->stream) inflateEnd(rz->stream); @@ -691,7 +839,14 @@ void razf_close(RAZF *rz){ free(rz->index); } free(rz->stream); +#ifdef _USE_KNETFILE + if (rz->mode == 'r') + knet_close(rz->x.fpr); + if (rz->mode == 'w') + close(rz->x.fpw); +#else close(rz->filedes); +#endif free(rz); }