razf.c:
[samtools.git] / razf.c
1 /*
2  * RAZF : Random Access compressed(Z) File
3  * Version: 1.0
4  * Release Date: 2008-10-27
5  *
6  * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #ifndef _NO_RAZF
33
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include "razf.h"
40
41
42 #if ZLIB_VERNUM < 0x1221
43 struct _gz_header_s {
44     int     text;
45     uLong   time;
46     int     xflags;
47     int     os;
48     Bytef   *extra;
49     uInt    extra_len;
50     uInt    extra_max;
51     Bytef   *name;
52     uInt    name_max;
53     Bytef   *comment;
54     uInt    comm_max;
55     int     hcrc;
56     int     done;
57 };
58 #warning "zlib < 1.2.2.1; RAZF writing is disabled."
59 #endif
60
61 #define DEF_MEM_LEVEL 8
62
63 static inline uint32_t byte_swap_4(uint32_t v){
64         v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
65         return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
66 }
67
68 static inline uint64_t byte_swap_8(uint64_t v){
69         v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
70         v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
71         return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
72 }
73
74 static inline int is_big_endian(){
75         int x = 0x01;
76         char *c = (char*)&x;
77         return (c[0] != 0x01);
78 }
79
80 #ifndef _RZ_READONLY
81 static void add_zindex(RAZF *rz, int64_t in, int64_t out){
82         uint32_t *cores;
83         int64_t *bores;
84
85         if(rz->index->size == rz->index->cap){
86                 rz->index->cap = rz->index->cap * 1.5 + 2;
87                 cores = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
88                 bores = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
89                 if ((cores == NULL) || (bores == NULL)) {
90                         fprintf(stderr, "[%s] failure to allocate space for new zindex.\n", __func__);
91                         abort();
92                 }
93         }
94         if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
95         rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
96         rz->index->size ++;
97 }
98
99 static void save_zindex(RAZF *rz, int fd){
100         size_t count;
101         int32_t i, v32;
102         int is_be;
103         is_be = is_big_endian();
104         if(is_be) {
105                 if (write(fd, &rz->index->size, sizeof(int)) < 0) {
106                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
107                         abort();
108                 }
109         }
110         else {
111                 v32 = byte_swap_4((uint32_t)rz->index->size);
112                 if (write(fd, &v32, sizeof(uint32_t)) < 0) {
113                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
114                         abort();
115                 }
116         }
117         v32 = rz->index->size / RZ_BIN_SIZE + 1;
118         if(!is_be){
119                 for(i=0;i<v32;i++) rz->index->bin_offsets[i]  = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
120                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
121         }
122         count = sizeof(int64_t) * v32;
123         if (write(fd, rz->index->bin_offsets, count) < 0) {
124                 fprintf(stderr, "[%s] failure to write zindex bin_offsets.\n", __func__);
125                 abort();
126         }
127
128         count = sizeof(int32_t) * rz->index->size;
129         if (write(fd, rz->index->cell_offsets, count) < 0) {
130                 fprintf(stderr, "[%s] failure to write zindex cell_offsets.\n", __func__);
131                 abort();
132         }
133 }
134 #endif
135
136 #ifdef _USE_KNETFILE
137 static void load_zindex(RAZF *rz, knetFile *fp){
138 #else
139 static void load_zindex(RAZF *rz, int fd){
140 #endif
141         int32_t i, v32;
142         int is_be;
143         size_t count;
144         if(!rz->load_index) return;
145         if(rz->index == NULL) {
146                 if ((rz->index = malloc(sizeof(ZBlockIndex))) == NULL) {
147                         fprintf(stderr, "[%s] failure to allocate index.\n", __func__);
148                         abort();
149                 }
150         }
151         is_be = is_big_endian();
152 #ifdef _USE_KNETFILE
153         if (knet_read(fp, &rz->index->size, sizeof(int)) < 0) {
154 #else
155         if (read(fd, &rz->index->size, sizeof(int)) < 0) {
156 #endif
157                 fprintf(stderr, "[%s] failure to read zindex size.\n", __func__);
158                 abort();
159         }
160         if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
161         rz->index->cap = rz->index->size;
162         v32 = rz->index->size / RZ_BIN_SIZE + 1;
163         count = sizeof(int64_t) * v32;
164         if ((rz->index->bin_offsets = malloc(count)) == NULL) {
165                 fprintf(stderr, "[%s] failure to allocate bin_offsets array.\n", __func__);
166                 abort();
167         }
168 #ifdef _USE_KNETFILE
169         if (knet_read(fp, rz->index->bin_offsets, count) < 0) {
170 #else
171         if (read(fd, rz->index->bin_offsets, count) < 0) {
172 #endif
173                 fprintf(stderr, "[%s] failure to read bin_offsets.\n", __func__);
174                 abort();
175         }
176         count = sizeof(int) * rz->index->size;
177         if ((rz->index->cell_offsets = malloc(count)) == NULL) {
178                 fprintf(stderr, "[%s] failure to allocate cell_offsets array.\n", __func__);
179                 abort();
180         }
181 #ifdef _USE_KNETFILE
182         if (knet_read(fp, rz->index->cell_offsets, count) < count) {
183 #else
184         if (read(fd, rz->index->cell_offsets, count) < count) {
185 #endif
186                 fprintf(stderr, "[%s] failure to read cell_offsets.\n", __func__);
187                 abort();
188         }
189         if(!is_be){
190                 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
191                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
192         }
193 }
194
195 #ifdef _RZ_READONLY
196 static RAZF* razf_open_w(int fd)
197 {
198         fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
199         return 0;
200 }
201 #else
202 static RAZF* razf_open_w(int fd){
203         RAZF *rz;
204 #ifdef _WIN32
205         setmode(fd, O_BINARY);
206 #endif
207         if ((rz = calloc(1, sizeof(RAZF))) == NULL) {
208                 fprintf(stderr, "[%s] failure to allocate RAZF structure.\n", __func__);
209                 return NULL;
210         }
211         rz->mode = 'w';
212 #ifdef _USE_KNETFILE
213     rz->x.fpw = fd;
214 #else
215         rz->filedes = fd;
216 #endif
217         if ((rz->stream = calloc(sizeof(z_stream), 1)) == NULL) {
218                 fprintf(stderr, "[%s] failure to allocate stream buffer.\n", __func__);
219                 free(rz);
220                 return NULL;
221         }
222         if ((rz->inbuf  = malloc(RZ_BUFFER_SIZE)) == NULL) {
223                 fprintf(stderr, "[%s] failure to allocate input buffer.\n", __func__);
224                 free(rz->stream);
225                 free(rz);
226                 return NULL;
227         }
228         if ((rz->outbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
229                 fprintf(stderr, "[%s] failure to allocate output buffer.\n", __func__);
230                 free(rz->stream);
231                 free(rz->inbuf);
232                 free(rz);
233                 return NULL;
234         }
235         if ((rz->index = calloc(sizeof(ZBlockIndex), 1)) == NULL) {
236                 fprintf(stderr, "[%s] failure to allocate index.\n", __func__);
237                 free(rz->stream);
238                 free(rz->inbuf);
239                 free(rz->outbuf);
240                 free(rz);
241                 return NULL;
242         }
243
244         deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
245         rz->stream->avail_out = RZ_BUFFER_SIZE;
246         rz->stream->next_out  = rz->outbuf;
247         if ((rz->header = calloc(sizeof(gz_header), 1)) == NULL) {
248                 fprintf(stderr, "[%s] failure to allocate header buffer.\n", __func__);
249                 free(rz->stream);
250                 free(rz->inbuf);
251                 free(rz->outbuf);
252                 free(rz->index);
253                 free(rz);
254                 return NULL;
255         }
256         rz->header->os    = 0x03; //Unix
257         rz->header->text  = 0;
258         rz->header->time  = 0;
259         if ((rz->header->extra = malloc(7)) == NULL) {
260                 fprintf(stderr, "[%s] failure to allocate header buffer.\n", __func__);
261                 free(rz->stream);
262                 free(rz->inbuf);
263                 free(rz->outbuf);
264                 free(rz->index);
265                 free(rz->header);
266                 free(rz);
267                 return NULL;
268         }
269         strncpy((char*)rz->header->extra, "RAZF", 4);
270         rz->header->extra[4] = 1; // obsolete field
271         // block size = RZ_BLOCK_SIZE, Big-Endian
272         rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
273         rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
274         rz->header->extra_len = 7;
275         rz->header->name = rz->header->comment  = 0;
276         rz->header->hcrc = 0;
277         deflateSetHeader(rz->stream, rz->header);
278         rz->block_pos = rz->block_off = 0;
279         return rz;
280 }
281
282 static void _razf_write(RAZF* rz, const void *data, int size){
283         int tout;
284         size_t count;
285         rz->stream->avail_in = size;
286         rz->stream->next_in  = (void*)data;
287         while(1){
288                 tout = rz->stream->avail_out;
289                 deflate(rz->stream, Z_NO_FLUSH);
290                 rz->out += tout - rz->stream->avail_out;
291                 if(rz->stream->avail_out) break;
292                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
293 #ifdef _USE_KNETFILE
294                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
295 #else
296                 if (write(rz->filedes, rz->outbuf, count) < 0) {
297 #endif
298                         fprintf(stderr, "[%s] failed to write output buffer.\n", __func__);
299                         abort();
300                 }
301                 rz->stream->avail_out = RZ_BUFFER_SIZE;
302                 rz->stream->next_out  = rz->outbuf;
303                 if(rz->stream->avail_in == 0) break;
304         };
305         rz->in += size - rz->stream->avail_in;
306         rz->block_off += size - rz->stream->avail_in;
307 }
308
309 static void razf_flush(RAZF *rz){
310         size_t count;
311         uint32_t tout;
312         if(rz->buf_len){
313                 _razf_write(rz, rz->inbuf, rz->buf_len);
314                 rz->buf_off = rz->buf_len = 0;
315         }
316         if(rz->stream->avail_out){
317                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
318 #ifdef _USE_KNETFILE    
319                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
320 #else        
321                 if (write(rz->filedes, rz->outbuf, count) < 0) {
322 #endif
323                         fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
324                         abort();
325                 }
326                 rz->stream->avail_out = RZ_BUFFER_SIZE;
327                 rz->stream->next_out  = rz->outbuf;
328         }
329         while(1){
330                 tout = rz->stream->avail_out;
331                 deflate(rz->stream, Z_FULL_FLUSH);
332                 rz->out += tout - rz->stream->avail_out;
333                 if(rz->stream->avail_out == 0){
334                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
335 #ifdef _USE_KNETFILE    
336                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
337 #else            
338                         if (write(rz->filedes, rz->outbuf, count) < 0) {
339 #endif
340                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
341                                 abort();
342                         }
343                         rz->stream->avail_out = RZ_BUFFER_SIZE;
344                         rz->stream->next_out  = rz->outbuf;
345                 } else break;
346         }
347         rz->block_pos = rz->out;
348         rz->block_off = 0;
349 }
350
351 static void razf_end_flush(RAZF *rz){
352         size_t count;
353         uint32_t tout;
354         if(rz->buf_len){
355                 _razf_write(rz, rz->inbuf, rz->buf_len);
356                 rz->buf_off = rz->buf_len = 0;
357         }
358         while(1){
359                 tout = rz->stream->avail_out;
360                 deflate(rz->stream, Z_FINISH);
361                 rz->out += tout - rz->stream->avail_out;
362                 if(rz->stream->avail_out < RZ_BUFFER_SIZE){
363                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
364 #ifdef _USE_KNETFILE        
365                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
366 #else            
367                         if (write(rz->filedes, rz->outbuf, count) < 0) {
368 #endif
369                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
370                                 abort();
371                         }
372                         rz->stream->avail_out = RZ_BUFFER_SIZE;
373                         rz->stream->next_out  = rz->outbuf;
374                 } else break;
375         }
376 }
377
378 static void _razf_buffered_write(RAZF *rz, const void *data, int size){
379         int i, n;
380         while(1){
381                 if(rz->buf_len == RZ_BUFFER_SIZE){
382                         _razf_write(rz, rz->inbuf, rz->buf_len);
383                         rz->buf_len = 0;
384                 }
385                 if(size + rz->buf_len < RZ_BUFFER_SIZE){
386                         for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
387                         rz->buf_len += size;
388                         return;
389                 } else {
390                         n = RZ_BUFFER_SIZE - rz->buf_len;
391                         for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
392                         size -= n;
393                         data += n;
394                         rz->buf_len += n;
395                 }
396         }
397 }
398
399 int razf_write(RAZF* rz, const void *data, int size){
400         int ori_size, n;
401         int64_t next_block;
402         ori_size = size;
403         next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
404         while(rz->in + rz->buf_len + size >= next_block){
405                 n = next_block - rz->in - rz->buf_len;
406                 _razf_buffered_write(rz, data, n);
407                 data += n;
408                 size -= n;
409                 razf_flush(rz);
410                 add_zindex(rz, rz->in, rz->out);
411                 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
412         }
413         _razf_buffered_write(rz, data, size);
414         return ori_size;
415 }
416 #endif
417
418 /* gzip flag byte */
419 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
420 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
421 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
422 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
423 #define COMMENT      0x10 /* bit 4 set: file comment present */
424 #define RESERVED     0xE0 /* bits 5..7: reserved */
425
426 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
427         int method, flags, n, len;
428         if(size < 2) return 0;
429         if(data[0] != 0x1f || data[1] != 0x8b) return 0;
430         if(size < 4) return 0;
431         method = data[2];
432         flags  = data[3];
433         if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
434         n = 4 + 6; // Skip 6 bytes
435         *extra_off = n + 2;
436         *extra_len = 0;
437         if(flags & EXTRA_FIELD){
438                 if(size < n + 2) return 0;
439                 len = ((int)data[n + 1] << 8) | data[n];
440                 n += 2;
441                 *extra_off = n;
442                 while(len){
443                         if(n >= size) return 0;
444                         n ++;
445                         len --;
446                 }
447                 *extra_len = n - (*extra_off);
448         }
449         if(flags & ORIG_NAME) while(n < size && data[n++]);
450         if(flags & COMMENT) while(n < size && data[n++]);
451         if(flags & HEAD_CRC){
452                 if(n + 2 > size) return 0;
453                 n += 2;
454         }
455         return n;
456 }
457
458 #ifdef _USE_KNETFILE
459 static RAZF* razf_open_r(knetFile *fp, int _load_index){
460 #else
461 static RAZF* razf_open_r(int fd, int _load_index){
462 #endif
463         RAZF *rz;
464         int ext_off, ext_len;
465         int n, is_be, ret;
466         int64_t end;
467         unsigned char c[] = "RAZF";
468         if ((rz = calloc(1, sizeof(RAZF))) == NULL) {
469                 fprintf(stderr, "[%s] failure to allocate RAZF structure.\n", __func__);
470                 return NULL;
471         }
472         rz->mode = 'r';
473 #ifdef _USE_KNETFILE
474     rz->x.fpr = fp;
475 #else
476 #ifdef _WIN32
477         setmode(fd, O_BINARY);
478 #endif
479         rz->filedes = fd;
480 #endif
481         if ((rz->stream = calloc(sizeof(z_stream), 1)) == NULL) {
482                 fprintf(stderr, "[%s] failure to allocate z_stream.\n", __func__);
483                 free(rz);
484                 return NULL;
485         }
486         if ((rz->inbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
487                 fprintf(stderr, "[%s] failure to allocate input buffer.\n", __func__);
488                 free(rz->stream);
489                 free(rz);
490                 return NULL;
491         }
492         if ((rz->outbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
493                 fprintf(stderr, "[%s] failure to allocate output buffer.\n", __func__);
494                 free(rz->inbuf);
495                 free(rz->stream);
496                 free(rz);
497                 return NULL;
498         }
499         rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
500 #ifdef _USE_KNETFILE
501     n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
502 #else
503         n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
504 #endif
505         ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
506         if(ret == 0){
507                 PLAIN_FILE:
508                 rz->in = n;
509                 rz->file_type = FILE_TYPE_PLAIN;
510                 memcpy(rz->outbuf, rz->inbuf, n);
511                 rz->buf_len = n;
512                 free(rz->stream);
513                 rz->stream = NULL;
514                 return rz;
515         }
516         rz->header_size = ret;
517         ret = inflateInit2(rz->stream, -WINDOW_BITS);
518         if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
519         rz->stream->avail_in = n - rz->header_size;
520         rz->stream->next_in  = rz->inbuf + rz->header_size;
521         rz->stream->avail_out = RZ_BUFFER_SIZE;
522         rz->stream->next_out  = rz->outbuf;
523         rz->file_type = FILE_TYPE_GZ;
524         rz->in = rz->header_size;
525         rz->block_pos = rz->header_size;
526         rz->next_block_pos = rz->header_size;
527         rz->block_off = 0;
528         if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
529         if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
530                 fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file.  in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
531                 return rz;
532         }
533         rz->load_index = _load_index;
534         rz->file_type = FILE_TYPE_RZ;
535 #ifdef _USE_KNETFILE
536         if(knet_seek(fp, -16, SEEK_END) == -1){
537 #else
538         if(lseek(fd, -16, SEEK_END) == -1){
539 #endif
540                 UNSEEKABLE:
541                 rz->seekable = 0;
542                 rz->index = NULL;
543                 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
544         } else {
545                 is_be = is_big_endian();
546                 rz->seekable = 1;
547 #ifdef _USE_KNETFILE
548         knet_read(fp, &end, sizeof(int64_t));
549 #else
550                 read(fd, &end, sizeof(int64_t));
551 #endif        
552                 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
553                 else rz->src_end = end;
554
555 #ifdef _USE_KNETFILE
556                 knet_read(fp, &end, sizeof(int64_t));
557 #else
558                 read(fd, &end, sizeof(int64_t));
559 #endif        
560                 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
561                 else rz->end = end;
562                 if(n > rz->end){
563                         rz->stream->avail_in -= n - rz->end;
564                         n = rz->end;
565                 }
566                 if(rz->end > rz->src_end){
567 #ifdef _USE_KNETFILE
568             knet_seek(fp, rz->in, SEEK_SET);
569 #else
570                         lseek(fd, rz->in, SEEK_SET);
571 #endif
572                         goto UNSEEKABLE;
573                 }
574 #ifdef _USE_KNETFILE
575         knet_seek(fp, rz->end, SEEK_SET);
576                 if(knet_tell(fp) != rz->end){
577                         knet_seek(fp, rz->in, SEEK_SET);
578 #else
579                 if(lseek(fd, rz->end, SEEK_SET) != rz->end){
580                         lseek(fd, rz->in, SEEK_SET);
581 #endif
582                         goto UNSEEKABLE;
583                 }
584 #ifdef _USE_KNETFILE
585                 load_zindex(rz, fp);
586                 knet_seek(fp, n, SEEK_SET);
587 #else
588                 load_zindex(rz, fd);
589                 lseek(fd, n, SEEK_SET);
590 #endif
591         }
592         return rz;
593 }
594
595 #ifdef _USE_KNETFILE
596 RAZF* razf_dopen(int fd, const char *mode){
597     if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
598     else if(strstr(mode, "w")) return razf_open_w(fd);
599         return NULL;
600 }
601
602 RAZF* razf_dopen2(int fd, const char *mode)
603 {
604     fprintf(stderr,"[razf_dopen2] implement me\n");
605     return NULL;
606 }
607 #else
608 RAZF* razf_dopen(int fd, const char *mode){
609         if(strstr(mode, "r")) return razf_open_r(fd, 1);
610         else if(strstr(mode, "w")) return razf_open_w(fd);
611         else return NULL;
612 }
613
614 RAZF* razf_dopen2(int fd, const char *mode)
615 {
616         if(strstr(mode, "r")) return razf_open_r(fd, 0);
617         else if(strstr(mode, "w")) return razf_open_w(fd);
618         else return NULL;
619 }
620 #endif
621
622 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
623         int fd;
624         RAZF *rz;
625         if(strstr(mode, "r")){
626 #ifdef _USE_KNETFILE
627         knetFile *fd = knet_open(filename, "r");
628         if (fd == 0) {
629             fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
630             return NULL;
631         }
632 #else
633 #ifdef _WIN32
634                 fd = open(filename, O_RDONLY | O_BINARY);
635 #else
636                 fd = open(filename, O_RDONLY);
637 #endif
638 #endif
639                 if(fd < 0) return NULL;
640                 rz = razf_open_r(fd, _load_index);
641         } else if(strstr(mode, "w")){
642 #ifdef _WIN32
643                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
644 #else
645                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
646 #endif
647                 if(fd < 0) return NULL;
648                 rz = razf_open_w(fd);
649         } else return NULL;
650         return rz;
651 }
652
653 RAZF* razf_open(const char *filename, const char *mode){
654         return _razf_open(filename, mode, 1);
655 }
656
657 RAZF* razf_open2(const char *filename, const char *mode){
658         return _razf_open(filename, mode, 0);
659 }
660
661 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
662         int64_t n;
663         if(rz->mode != 'r' && rz->mode != 'R') return 0;
664         switch(rz->file_type){
665                 case FILE_TYPE_PLAIN:
666                         if(rz->end == 0x7fffffffffffffffLL){
667 #ifdef _USE_KNETFILE
668                                 if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
669                 n = knet_tell(rz->x.fpr);
670                                 knet_seek(rz->x.fpr, 0, SEEK_END);
671                 rz->end = knet_tell(rz->x.fpr);
672                                 knet_seek(rz->x.fpr, n, SEEK_SET);
673 #else
674                                 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
675                                 rz->end = lseek(rz->filedes, 0, SEEK_END);
676                                 lseek(rz->filedes, n, SEEK_SET);
677 #endif                
678                         }
679                         *u_size = *c_size = rz->end;
680                         return 1;
681                 case FILE_TYPE_GZ:
682                         return 0;
683                 case FILE_TYPE_RZ:
684                         if(rz->src_end == rz->end) return 0;
685                         *u_size = rz->src_end;
686                         *c_size = rz->end;
687                         return 1;
688                 default:
689                         return 0;
690         }
691 }
692
693 static int _razf_read(RAZF* rz, void *data, int size){
694         int ret, tin;
695         if(rz->z_eof || rz->z_err) return 0;
696         if (rz->file_type == FILE_TYPE_PLAIN) {
697 #ifdef _USE_KNETFILE
698                 ret = knet_read(rz->x.fpr, data, size);
699 #else
700                 ret = read(rz->filedes, data, size);
701 #endif        
702                 if (ret == 0) rz->z_eof = 1;
703                 return ret;
704         }
705         rz->stream->avail_out = size;
706         rz->stream->next_out  = data;
707         while(rz->stream->avail_out){
708                 if(rz->stream->avail_in == 0){
709                         if(rz->in >= rz->end){ rz->z_eof = 1; break; }
710                         if(rz->end - rz->in < RZ_BUFFER_SIZE){
711 #ifdef _USE_KNETFILE
712                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
713 #else
714                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
715 #endif        
716                         } else {
717 #ifdef _USE_KNETFILE
718                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
719 #else
720                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
721 #endif        
722                         }
723                         if(rz->stream->avail_in == 0){
724                                 rz->z_eof = 1;
725                                 break;
726                         }
727                         rz->stream->next_in = rz->inbuf;
728                 }
729                 tin = rz->stream->avail_in;
730                 ret = inflate(rz->stream, Z_BLOCK);
731                 rz->in += tin - rz->stream->avail_in;
732                 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
733                         fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
734                         rz->z_err = 1;
735                         break;
736                 }
737                 if(ret == Z_STREAM_END){
738                         rz->z_eof = 1;
739                         break;
740                 }
741                 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
742                         rz->buf_flush = 1;
743                         rz->next_block_pos = rz->in;
744                         break;
745                 }
746         }
747         return size - rz->stream->avail_out;
748 }
749
750 int razf_read(RAZF *rz, void *data, int size){
751         int ori_size, i;
752         ori_size = size;
753         while(size > 0){
754                 if(rz->buf_len){
755                         if(size < rz->buf_len){
756                                 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
757                                 rz->buf_off += size;
758                                 rz->buf_len -= size;
759                                 data += size;
760                                 rz->block_off += size;
761                                 size = 0;
762                                 break;
763                         } else {
764                                 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
765                                 data += rz->buf_len;
766                                 size -= rz->buf_len;
767                                 rz->block_off += rz->buf_len;
768                                 rz->buf_off = 0;
769                                 rz->buf_len = 0;
770                                 if(rz->buf_flush){
771                                         rz->block_pos = rz->next_block_pos;
772                                         rz->block_off = 0;
773                                         rz->buf_flush = 0;
774                                 }
775                         }
776                 } else if(rz->buf_flush){
777                         rz->block_pos = rz->next_block_pos;
778                         rz->block_off = 0;
779                         rz->buf_flush = 0;
780                 }
781                 if(rz->buf_flush) continue;
782                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
783                 if(rz->z_eof && rz->buf_len == 0) break;
784         }
785         rz->out += ori_size - size;
786         return ori_size - size;
787 }
788
789 int razf_skip(RAZF* rz, int size){
790         int ori_size;
791         ori_size = size;
792         while(size > 0){
793                 if(rz->buf_len){
794                         if(size < rz->buf_len){
795                                 rz->buf_off += size;
796                                 rz->buf_len -= size;
797                                 rz->block_off += size;
798                                 size = 0;
799                                 break;
800                         } else {
801                                 size -= rz->buf_len;
802                                 rz->buf_off = 0;
803                                 rz->buf_len = 0;
804                                 rz->block_off += rz->buf_len;
805                                 if(rz->buf_flush){
806                                         rz->block_pos = rz->next_block_pos;
807                                         rz->block_off = 0;
808                                         rz->buf_flush = 0;
809                                 }
810                         }
811                 } else if(rz->buf_flush){
812                         rz->block_pos = rz->next_block_pos;
813                         rz->block_off = 0;
814                         rz->buf_flush = 0;
815                 }
816                 if(rz->buf_flush) continue;
817                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
818                 if(rz->z_eof || rz->z_err) break;
819         }
820         rz->out += ori_size - size;
821         return ori_size - size;
822 }
823
824 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
825 #ifdef _USE_KNETFILE
826         knet_seek(rz->x.fpr, in, SEEK_SET);
827 #else
828         lseek(rz->filedes, in, SEEK_SET);
829 #endif
830         rz->in  = in;
831         rz->out = out;
832         rz->block_pos = in;
833         rz->next_block_pos = in;
834         rz->block_off = 0;
835         rz->buf_flush = 0;
836         rz->z_eof = rz->z_err = 0;
837         inflateReset(rz->stream);
838         rz->stream->avail_in = 0;
839         rz->buf_off = rz->buf_len = 0;
840 }
841
842 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
843         int64_t pos;
844         rz->z_eof = 0;
845         if(rz->file_type == FILE_TYPE_PLAIN){
846                 rz->buf_off = rz->buf_len = 0;
847                 pos = block_start + block_offset;
848 #ifdef _USE_KNETFILE
849                 knet_seek(rz->x.fpr, pos, SEEK_SET);
850         pos = knet_tell(rz->x.fpr);
851 #else
852                 pos = lseek(rz->filedes, pos, SEEK_SET);
853 #endif
854                 rz->out = rz->in = pos;
855                 return pos;
856         }
857         if(block_start == rz->block_pos && block_offset >= rz->block_off) {
858                 block_offset -= rz->block_off;
859                 goto SKIP; // Needn't reset inflate
860         }
861         if(block_start  == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
862         _razf_reset_read(rz, block_start, 0);
863         SKIP:
864         if(block_offset) razf_skip(rz, block_offset);
865         return rz->block_off;
866 }
867
868 int64_t razf_seek(RAZF* rz, int64_t pos, int where){
869         int64_t idx;
870         int64_t seek_pos, new_out;
871         rz->z_eof = 0;
872         if (where == SEEK_CUR) pos += rz->out;
873         else if (where == SEEK_END) pos += rz->src_end;
874         if(rz->file_type == FILE_TYPE_PLAIN){
875 #ifdef _USE_KNETFILE
876                 knet_seek(rz->x.fpr, pos, SEEK_SET);
877         seek_pos = knet_tell(rz->x.fpr);
878 #else
879                 seek_pos = lseek(rz->filedes, pos, SEEK_SET);
880 #endif
881                 rz->buf_off = rz->buf_len = 0;
882                 rz->out = rz->in = seek_pos;
883                 return seek_pos;
884         } else if(rz->file_type == FILE_TYPE_GZ){
885                 if(pos >= rz->out) goto SKIP;
886                 return rz->out;
887         }
888         if(pos == rz->out) return pos;
889         if(pos > rz->src_end) return rz->out;
890         if(!rz->seekable || !rz->load_index){
891                 if(pos >= rz->out) goto SKIP;
892         }
893         idx = pos / RZ_BLOCK_SIZE - 1;
894         seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
895         new_out  = (idx + 1) * RZ_BLOCK_SIZE;
896         if(pos > rz->out && new_out <= rz->out) goto SKIP;
897         _razf_reset_read(rz, seek_pos, new_out);
898         SKIP:
899         razf_skip(rz, (int)(pos - rz->out));
900         return rz->out;
901 }
902
903 uint64_t razf_tell2(RAZF *rz)
904 {
905         /*
906         if (rz->load_index) {
907                 int64_t idx, seek_pos;
908                 idx = rz->out / RZ_BLOCK_SIZE - 1;
909                 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
910                 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
911                         fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
912                                         (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
913         }
914         */
915         return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
916 }
917
918 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
919 {
920         if (where != SEEK_SET) return -1;
921         return razf_jump(rz, voffset>>16, voffset&0xffff);
922 }
923
924 void razf_close(RAZF *rz){
925         if(rz->mode == 'w'){
926 #ifndef _RZ_READONLY
927                 razf_end_flush(rz);
928                 deflateEnd(rz->stream);
929 #ifdef _USE_KNETFILE
930                 save_zindex(rz, rz->x.fpw);
931                 if(is_big_endian()){
932                         if (write(rz->x.fpw, &rz->in, sizeof(int64_t)) < 0) {
933                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
934                                 abort();
935                         }
936                         if (write(rz->x.fpw, &rz->out, sizeof(int64_t)) < 0) {
937                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
938                                 abort();
939                         }
940                 } else {
941                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
942                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
943                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
944                                 abort();
945                         }
946                         v64 = byte_swap_8((uint64_t)rz->out);
947                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
948                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
949                                 abort();
950                         }
951                 }
952 #else
953                 save_zindex(rz, rz->filedes);
954                 if(is_big_endian()){
955                         if (write(rz->filedes, &rz->in, sizeof(int64_t)) < 0) {
956                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
957                                 abort();
958                         }
959                         if (write(rz->filedes, &rz->out, sizeof(int64_t)) < 0) {
960                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
961                                 abort();
962                         }
963                 } else {
964                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
965                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
966                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
967                                 abort();
968                         }
969                         v64 = byte_swap_8((uint64_t)rz->out);
970                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
971                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
972                                 abort();
973                         }
974                 }
975 #endif
976 #endif
977         } else if(rz->mode == 'r'){
978                 if(rz->stream) inflateEnd(rz->stream);
979         }
980         if(rz->inbuf) free(rz->inbuf);
981         if(rz->outbuf) free(rz->outbuf);
982         if(rz->header){
983                 free(rz->header->extra);
984                 free(rz->header->name);
985                 free(rz->header->comment);
986                 free(rz->header);
987         }
988         if(rz->index){
989                 free(rz->index->bin_offsets);
990                 free(rz->index->cell_offsets);
991                 free(rz->index);
992         }
993         free(rz->stream);
994 #ifdef _USE_KNETFILE
995     if (rz->mode == 'r')
996         knet_close(rz->x.fpr);
997     if (rz->mode == 'w')
998         close(rz->x.fpw);
999 #else
1000         close(rz->filedes);
1001 #endif
1002         free(rz);
1003 }
1004
1005 #endif