022397169188e89a8ed49c4ef846fc18fa1a026b
[samtools.git] / razf.c
1 /*
2  * RAZF : Random Access compressed(Z) File
3  * Version: 1.0
4  * Release Date: 2008-10-27
5  *
6  * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #ifndef _NO_RAZF
33
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include "razf.h"
40
41
42 #if ZLIB_VERNUM < 0x1221
43 struct _gz_header_s {
44     int     text;
45     uLong   time;
46     int     xflags;
47     int     os;
48     Bytef   *extra;
49     uInt    extra_len;
50     uInt    extra_max;
51     Bytef   *name;
52     uInt    name_max;
53     Bytef   *comment;
54     uInt    comm_max;
55     int     hcrc;
56     int     done;
57 };
58 #warning "zlib < 1.2.2.1; RAZF writing is disabled."
59 #endif
60
61 #define DEF_MEM_LEVEL 8
62
63 static inline uint32_t byte_swap_4(uint32_t v){
64         v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
65         return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
66 }
67
68 static inline uint64_t byte_swap_8(uint64_t v){
69         v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
70         v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
71         return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
72 }
73
74 static inline int is_big_endian(){
75         int x = 0x01;
76         char *c = (char*)&x;
77         return (c[0] != 0x01);
78 }
79
80 #ifndef _RZ_READONLY
81 static void add_zindex(RAZF *rz, int64_t in, int64_t out){
82         uint32_t *cores;
83         int64_t *bores;
84
85         if(rz->index->size == rz->index->cap){
86                 rz->index->cap = rz->index->cap * 1.5 + 2;
87                 cores = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
88                 bores = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
89                 if ((cores == NULL) || (bores == NULL)) {
90                         fprintf(stderr, "[%s] failure to allocate space for new zindex.\n", __func__);
91                         abort();
92                 }
93                 rz->index->cell_offsets = cores;
94                 rz->index->bin_offsets = bores;
95         }
96         if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
97         rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
98         rz->index->size ++;
99 }
100
101 static void save_zindex(RAZF *rz, int fd){
102         size_t count;
103         int32_t i, v32;
104         int is_be;
105         is_be = is_big_endian();
106         if(is_be) {
107                 if (write(fd, &rz->index->size, sizeof(int)) < 0) {
108                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
109                         abort();
110                 }
111         }
112         else {
113                 v32 = byte_swap_4((uint32_t)rz->index->size);
114                 if (write(fd, &v32, sizeof(uint32_t)) < 0) {
115                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
116                         abort();
117                 }
118         }
119         v32 = rz->index->size / RZ_BIN_SIZE + 1;
120         if(!is_be){
121                 for(i=0;i<v32;i++) rz->index->bin_offsets[i]  = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
122                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
123         }
124         count = sizeof(int64_t) * v32;
125         if (write(fd, rz->index->bin_offsets, count) < 0) {
126                 fprintf(stderr, "[%s] failure to write zindex bin_offsets.\n", __func__);
127                 abort();
128         }
129
130         count = sizeof(int32_t) * rz->index->size;
131         if (write(fd, rz->index->cell_offsets, count) < 0) {
132                 fprintf(stderr, "[%s] failure to write zindex cell_offsets.\n", __func__);
133                 abort();
134         }
135 }
136 #endif
137
138 #ifdef _USE_KNETFILE
139 static void load_zindex(RAZF *rz, knetFile *fp){
140 #else
141 static void load_zindex(RAZF *rz, int fd){
142 #endif
143         int32_t i, v32;
144         int is_be;
145         size_t count;
146         if(!rz->load_index) return;
147         if(rz->index == NULL) {
148                 if ((rz->index = malloc(sizeof(ZBlockIndex))) == NULL) {
149                         fprintf(stderr, "[%s] failure to allocate index.\n", __func__);
150                         abort();
151                 }
152         }
153         is_be = is_big_endian();
154 #ifdef _USE_KNETFILE
155         if (knet_read(fp, &rz->index->size, sizeof(int)) < 0) {
156 #else
157         if (read(fd, &rz->index->size, sizeof(int)) < 0) {
158 #endif
159                 fprintf(stderr, "[%s] failure to read zindex size.\n", __func__);
160                 abort();
161         }
162         if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
163         rz->index->cap = rz->index->size;
164         v32 = rz->index->size / RZ_BIN_SIZE + 1;
165         count = sizeof(int64_t) * v32;
166         if ((rz->index->bin_offsets = malloc(count)) == NULL) {
167                 fprintf(stderr, "[%s] failure to allocate bin_offsets array.\n", __func__);
168                 abort();
169         }
170 #ifdef _USE_KNETFILE
171         if (knet_read(fp, rz->index->bin_offsets, count) < 0) {
172 #else
173         if (read(fd, rz->index->bin_offsets, count) < 0) {
174 #endif
175                 fprintf(stderr, "[%s] failure to read bin_offsets.\n", __func__);
176                 abort();
177         }
178         count = sizeof(int) * rz->index->size;
179         if ((rz->index->cell_offsets = malloc(count)) == NULL) {
180                 fprintf(stderr, "[%s] failure to allocate cell_offsets array.\n", __func__);
181                 abort();
182         }
183 #ifdef _USE_KNETFILE
184         if (knet_read(fp, rz->index->cell_offsets, count) < count) {
185 #else
186         if (read(fd, rz->index->cell_offsets, count) < count) {
187 #endif
188                 fprintf(stderr, "[%s] failure to read cell_offsets.\n", __func__);
189                 abort();
190         }
191         if(!is_be){
192                 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
193                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
194         }
195 }
196
197 #ifdef _RZ_READONLY
198 static RAZF* razf_open_w(int fd)
199 {
200         fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
201         return 0;
202 }
203 #else
204 static RAZF* razf_open_w(int fd){
205         RAZF *rz;
206 #ifdef _WIN32
207         setmode(fd, O_BINARY);
208 #endif
209         if ((rz = calloc(1, sizeof(RAZF))) == NULL) {
210                 fprintf(stderr, "[%s] failure to allocate RAZF structure.\n", __func__);
211                 return NULL;
212         }
213         rz->mode = 'w';
214 #ifdef _USE_KNETFILE
215     rz->x.fpw = fd;
216 #else
217         rz->filedes = fd;
218 #endif
219         if ((rz->stream = calloc(sizeof(z_stream), 1)) == NULL) {
220                 fprintf(stderr, "[%s] failure to allocate stream buffer.\n", __func__);
221                 free(rz);
222                 return NULL;
223         }
224         if ((rz->inbuf  = malloc(RZ_BUFFER_SIZE)) == NULL) {
225                 fprintf(stderr, "[%s] failure to allocate input buffer.\n", __func__);
226                 free(rz->stream);
227                 free(rz);
228                 return NULL;
229         }
230         if ((rz->outbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
231                 fprintf(stderr, "[%s] failure to allocate output buffer.\n", __func__);
232                 free(rz->stream);
233                 free(rz->inbuf);
234                 free(rz);
235                 return NULL;
236         }
237         if ((rz->index = calloc(sizeof(ZBlockIndex), 1)) == NULL) {
238                 fprintf(stderr, "[%s] failure to allocate index.\n", __func__);
239                 free(rz->stream);
240                 free(rz->inbuf);
241                 free(rz->outbuf);
242                 free(rz);
243                 return NULL;
244         }
245
246         deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
247         rz->stream->avail_out = RZ_BUFFER_SIZE;
248         rz->stream->next_out  = rz->outbuf;
249         if ((rz->header = calloc(sizeof(gz_header), 1)) == NULL) {
250                 fprintf(stderr, "[%s] failure to allocate header buffer.\n", __func__);
251                 free(rz->stream);
252                 free(rz->inbuf);
253                 free(rz->outbuf);
254                 free(rz->index);
255                 free(rz);
256                 return NULL;
257         }
258         rz->header->os    = 0x03; //Unix
259         rz->header->text  = 0;
260         rz->header->time  = 0;
261         if ((rz->header->extra = malloc(7)) == NULL) {
262                 fprintf(stderr, "[%s] failure to allocate header buffer.\n", __func__);
263                 free(rz->stream);
264                 free(rz->inbuf);
265                 free(rz->outbuf);
266                 free(rz->index);
267                 free(rz->header);
268                 free(rz);
269                 return NULL;
270         }
271         strncpy((char*)rz->header->extra, "RAZF", 4);
272         rz->header->extra[4] = 1; // obsolete field
273         // block size = RZ_BLOCK_SIZE, Big-Endian
274         rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
275         rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
276         rz->header->extra_len = 7;
277         rz->header->name = rz->header->comment  = 0;
278         rz->header->hcrc = 0;
279         deflateSetHeader(rz->stream, rz->header);
280         rz->block_pos = rz->block_off = 0;
281         return rz;
282 }
283
284 static void _razf_write(RAZF* rz, const void *data, int size){
285         int tout;
286         size_t count;
287         rz->stream->avail_in = size;
288         rz->stream->next_in  = (void*)data;
289         while(1){
290                 tout = rz->stream->avail_out;
291                 deflate(rz->stream, Z_NO_FLUSH);
292                 rz->out += tout - rz->stream->avail_out;
293                 if(rz->stream->avail_out) break;
294                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
295 #ifdef _USE_KNETFILE
296                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
297 #else
298                 if (write(rz->filedes, rz->outbuf, count) < 0) {
299 #endif
300                         fprintf(stderr, "[%s] failed to write output buffer.\n", __func__);
301                         abort();
302                 }
303                 rz->stream->avail_out = RZ_BUFFER_SIZE;
304                 rz->stream->next_out  = rz->outbuf;
305                 if(rz->stream->avail_in == 0) break;
306         };
307         rz->in += size - rz->stream->avail_in;
308         rz->block_off += size - rz->stream->avail_in;
309 }
310
311 static void razf_flush(RAZF *rz){
312         size_t count;
313         uint32_t tout;
314         if(rz->buf_len){
315                 _razf_write(rz, rz->inbuf, rz->buf_len);
316                 rz->buf_off = rz->buf_len = 0;
317         }
318         if(rz->stream->avail_out){
319                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
320 #ifdef _USE_KNETFILE    
321                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
322 #else        
323                 if (write(rz->filedes, rz->outbuf, count) < 0) {
324 #endif
325                         fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
326                         abort();
327                 }
328                 rz->stream->avail_out = RZ_BUFFER_SIZE;
329                 rz->stream->next_out  = rz->outbuf;
330         }
331         while(1){
332                 tout = rz->stream->avail_out;
333                 deflate(rz->stream, Z_FULL_FLUSH);
334                 rz->out += tout - rz->stream->avail_out;
335                 if(rz->stream->avail_out == 0){
336                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
337 #ifdef _USE_KNETFILE    
338                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
339 #else            
340                         if (write(rz->filedes, rz->outbuf, count) < 0) {
341 #endif
342                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
343                                 abort();
344                         }
345                         rz->stream->avail_out = RZ_BUFFER_SIZE;
346                         rz->stream->next_out  = rz->outbuf;
347                 } else break;
348         }
349         rz->block_pos = rz->out;
350         rz->block_off = 0;
351 }
352
353 static void razf_end_flush(RAZF *rz){
354         size_t count;
355         uint32_t tout;
356         if(rz->buf_len){
357                 _razf_write(rz, rz->inbuf, rz->buf_len);
358                 rz->buf_off = rz->buf_len = 0;
359         }
360         while(1){
361                 tout = rz->stream->avail_out;
362                 deflate(rz->stream, Z_FINISH);
363                 rz->out += tout - rz->stream->avail_out;
364                 if(rz->stream->avail_out < RZ_BUFFER_SIZE){
365                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
366 #ifdef _USE_KNETFILE        
367                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
368 #else            
369                         if (write(rz->filedes, rz->outbuf, count) < 0) {
370 #endif
371                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
372                                 abort();
373                         }
374                         rz->stream->avail_out = RZ_BUFFER_SIZE;
375                         rz->stream->next_out  = rz->outbuf;
376                 } else break;
377         }
378 }
379
380 static void _razf_buffered_write(RAZF *rz, const void *data, int size){
381         int i, n;
382         while(1){
383                 if(rz->buf_len == RZ_BUFFER_SIZE){
384                         _razf_write(rz, rz->inbuf, rz->buf_len);
385                         rz->buf_len = 0;
386                 }
387                 if(size + rz->buf_len < RZ_BUFFER_SIZE){
388                         for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
389                         rz->buf_len += size;
390                         return;
391                 } else {
392                         n = RZ_BUFFER_SIZE - rz->buf_len;
393                         for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
394                         size -= n;
395                         data += n;
396                         rz->buf_len += n;
397                 }
398         }
399 }
400
401 int razf_write(RAZF* rz, const void *data, int size){
402         int ori_size, n;
403         int64_t next_block;
404         ori_size = size;
405         next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
406         while(rz->in + rz->buf_len + size >= next_block){
407                 n = next_block - rz->in - rz->buf_len;
408                 _razf_buffered_write(rz, data, n);
409                 data += n;
410                 size -= n;
411                 razf_flush(rz);
412                 add_zindex(rz, rz->in, rz->out);
413                 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
414         }
415         _razf_buffered_write(rz, data, size);
416         return ori_size;
417 }
418 #endif
419
420 /* gzip flag byte */
421 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
422 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
423 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
424 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
425 #define COMMENT      0x10 /* bit 4 set: file comment present */
426 #define RESERVED     0xE0 /* bits 5..7: reserved */
427
428 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
429         int method, flags, n, len;
430         if(size < 2) return 0;
431         if(data[0] != 0x1f || data[1] != 0x8b) return 0;
432         if(size < 4) return 0;
433         method = data[2];
434         flags  = data[3];
435         if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
436         n = 4 + 6; // Skip 6 bytes
437         *extra_off = n + 2;
438         *extra_len = 0;
439         if(flags & EXTRA_FIELD){
440                 if(size < n + 2) return 0;
441                 len = ((int)data[n + 1] << 8) | data[n];
442                 n += 2;
443                 *extra_off = n;
444                 while(len){
445                         if(n >= size) return 0;
446                         n ++;
447                         len --;
448                 }
449                 *extra_len = n - (*extra_off);
450         }
451         if(flags & ORIG_NAME) while(n < size && data[n++]);
452         if(flags & COMMENT) while(n < size && data[n++]);
453         if(flags & HEAD_CRC){
454                 if(n + 2 > size) return 0;
455                 n += 2;
456         }
457         return n;
458 }
459
460 #ifdef _USE_KNETFILE
461 static RAZF* razf_open_r(knetFile *fp, int _load_index){
462 #else
463 static RAZF* razf_open_r(int fd, int _load_index){
464 #endif
465         RAZF *rz;
466         int ext_off, ext_len;
467         int n, is_be, ret;
468         int64_t end;
469         unsigned char c[] = "RAZF";
470         if ((rz = calloc(1, sizeof(RAZF))) == NULL) {
471                 fprintf(stderr, "[%s] failure to allocate RAZF structure.\n", __func__);
472                 return NULL;
473         }
474         rz->mode = 'r';
475 #ifdef _USE_KNETFILE
476     rz->x.fpr = fp;
477 #else
478 #ifdef _WIN32
479         setmode(fd, O_BINARY);
480 #endif
481         rz->filedes = fd;
482 #endif
483         if ((rz->stream = calloc(sizeof(z_stream), 1)) == NULL) {
484                 fprintf(stderr, "[%s] failure to allocate z_stream.\n", __func__);
485                 free(rz);
486                 return NULL;
487         }
488         if ((rz->inbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
489                 fprintf(stderr, "[%s] failure to allocate input buffer.\n", __func__);
490                 free(rz->stream);
491                 free(rz);
492                 return NULL;
493         }
494         if ((rz->outbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
495                 fprintf(stderr, "[%s] failure to allocate output buffer.\n", __func__);
496                 free(rz->inbuf);
497                 free(rz->stream);
498                 free(rz);
499                 return NULL;
500         }
501         rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
502 #ifdef _USE_KNETFILE
503     n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
504 #else
505         n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
506 #endif
507         ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
508         if(ret == 0){
509                 PLAIN_FILE:
510                 rz->in = n;
511                 rz->file_type = FILE_TYPE_PLAIN;
512                 memcpy(rz->outbuf, rz->inbuf, n);
513                 rz->buf_len = n;
514                 free(rz->stream);
515                 rz->stream = NULL;
516                 return rz;
517         }
518         rz->header_size = ret;
519         ret = inflateInit2(rz->stream, -WINDOW_BITS);
520         if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
521         rz->stream->avail_in = n - rz->header_size;
522         rz->stream->next_in  = rz->inbuf + rz->header_size;
523         rz->stream->avail_out = RZ_BUFFER_SIZE;
524         rz->stream->next_out  = rz->outbuf;
525         rz->file_type = FILE_TYPE_GZ;
526         rz->in = rz->header_size;
527         rz->block_pos = rz->header_size;
528         rz->next_block_pos = rz->header_size;
529         rz->block_off = 0;
530         if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
531         if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
532                 fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file.  in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
533                 return rz;
534         }
535         rz->load_index = _load_index;
536         rz->file_type = FILE_TYPE_RZ;
537 #ifdef _USE_KNETFILE
538         if(knet_seek(fp, -16, SEEK_END) == -1){
539 #else
540         if(lseek(fd, -16, SEEK_END) == -1){
541 #endif
542                 UNSEEKABLE:
543                 rz->seekable = 0;
544                 rz->index = NULL;
545                 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
546         } else {
547                 is_be = is_big_endian();
548                 rz->seekable = 1;
549 #ifdef _USE_KNETFILE
550         knet_read(fp, &end, sizeof(int64_t));
551 #else
552                 read(fd, &end, sizeof(int64_t));
553 #endif        
554                 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
555                 else rz->src_end = end;
556
557 #ifdef _USE_KNETFILE
558                 knet_read(fp, &end, sizeof(int64_t));
559 #else
560                 read(fd, &end, sizeof(int64_t));
561 #endif        
562                 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
563                 else rz->end = end;
564                 if(n > rz->end){
565                         rz->stream->avail_in -= n - rz->end;
566                         n = rz->end;
567                 }
568                 if(rz->end > rz->src_end){
569 #ifdef _USE_KNETFILE
570             knet_seek(fp, rz->in, SEEK_SET);
571 #else
572                         lseek(fd, rz->in, SEEK_SET);
573 #endif
574                         goto UNSEEKABLE;
575                 }
576 #ifdef _USE_KNETFILE
577         knet_seek(fp, rz->end, SEEK_SET);
578                 if(knet_tell(fp) != rz->end){
579                         knet_seek(fp, rz->in, SEEK_SET);
580 #else
581                 if(lseek(fd, rz->end, SEEK_SET) != rz->end){
582                         lseek(fd, rz->in, SEEK_SET);
583 #endif
584                         goto UNSEEKABLE;
585                 }
586 #ifdef _USE_KNETFILE
587                 load_zindex(rz, fp);
588                 knet_seek(fp, n, SEEK_SET);
589 #else
590                 load_zindex(rz, fd);
591                 lseek(fd, n, SEEK_SET);
592 #endif
593         }
594         return rz;
595 }
596
597 #ifdef _USE_KNETFILE
598 RAZF* razf_dopen(int fd, const char *mode){
599     if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
600     else if(strstr(mode, "w")) return razf_open_w(fd);
601         return NULL;
602 }
603
604 RAZF* razf_dopen2(int fd, const char *mode)
605 {
606     fprintf(stderr,"[razf_dopen2] implement me\n");
607     return NULL;
608 }
609 #else
610 RAZF* razf_dopen(int fd, const char *mode){
611         if(strstr(mode, "r")) return razf_open_r(fd, 1);
612         else if(strstr(mode, "w")) return razf_open_w(fd);
613         else return NULL;
614 }
615
616 RAZF* razf_dopen2(int fd, const char *mode)
617 {
618         if(strstr(mode, "r")) return razf_open_r(fd, 0);
619         else if(strstr(mode, "w")) return razf_open_w(fd);
620         else return NULL;
621 }
622 #endif
623
624 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
625         int fd;
626         RAZF *rz;
627         if(strstr(mode, "r")){
628 #ifdef _USE_KNETFILE
629         knetFile *fd = knet_open(filename, "r");
630         if (fd == 0) {
631             fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
632             return NULL;
633         }
634 #else
635 #ifdef _WIN32
636                 fd = open(filename, O_RDONLY | O_BINARY);
637 #else
638                 fd = open(filename, O_RDONLY);
639 #endif
640 #endif
641                 if(fd < 0) return NULL;
642                 rz = razf_open_r(fd, _load_index);
643         } else if(strstr(mode, "w")){
644 #ifdef _WIN32
645                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
646 #else
647                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
648 #endif
649                 if(fd < 0) return NULL;
650                 rz = razf_open_w(fd);
651         } else return NULL;
652         return rz;
653 }
654
655 RAZF* razf_open(const char *filename, const char *mode){
656         return _razf_open(filename, mode, 1);
657 }
658
659 RAZF* razf_open2(const char *filename, const char *mode){
660         return _razf_open(filename, mode, 0);
661 }
662
663 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
664         int64_t n;
665         if(rz->mode != 'r' && rz->mode != 'R') return 0;
666         switch(rz->file_type){
667                 case FILE_TYPE_PLAIN:
668                         if(rz->end == 0x7fffffffffffffffLL){
669 #ifdef _USE_KNETFILE
670                                 if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
671                 n = knet_tell(rz->x.fpr);
672                                 knet_seek(rz->x.fpr, 0, SEEK_END);
673                 rz->end = knet_tell(rz->x.fpr);
674                                 knet_seek(rz->x.fpr, n, SEEK_SET);
675 #else
676                                 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
677                                 rz->end = lseek(rz->filedes, 0, SEEK_END);
678                                 lseek(rz->filedes, n, SEEK_SET);
679 #endif                
680                         }
681                         *u_size = *c_size = rz->end;
682                         return 1;
683                 case FILE_TYPE_GZ:
684                         return 0;
685                 case FILE_TYPE_RZ:
686                         if(rz->src_end == rz->end) return 0;
687                         *u_size = rz->src_end;
688                         *c_size = rz->end;
689                         return 1;
690                 default:
691                         return 0;
692         }
693 }
694
695 static int _razf_read(RAZF* rz, void *data, int size){
696         int ret, tin;
697         if(rz->z_eof || rz->z_err) return 0;
698         if (rz->file_type == FILE_TYPE_PLAIN) {
699 #ifdef _USE_KNETFILE
700                 ret = knet_read(rz->x.fpr, data, size);
701 #else
702                 ret = read(rz->filedes, data, size);
703 #endif        
704                 if (ret == 0) rz->z_eof = 1;
705                 return ret;
706         }
707         rz->stream->avail_out = size;
708         rz->stream->next_out  = data;
709         while(rz->stream->avail_out){
710                 if(rz->stream->avail_in == 0){
711                         if(rz->in >= rz->end){ rz->z_eof = 1; break; }
712                         if(rz->end - rz->in < RZ_BUFFER_SIZE){
713 #ifdef _USE_KNETFILE
714                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
715 #else
716                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
717 #endif        
718                         } else {
719 #ifdef _USE_KNETFILE
720                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
721 #else
722                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
723 #endif        
724                         }
725                         if(rz->stream->avail_in == 0){
726                                 rz->z_eof = 1;
727                                 break;
728                         }
729                         rz->stream->next_in = rz->inbuf;
730                 }
731                 tin = rz->stream->avail_in;
732                 ret = inflate(rz->stream, Z_BLOCK);
733                 rz->in += tin - rz->stream->avail_in;
734                 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
735                         fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
736                         rz->z_err = 1;
737                         break;
738                 }
739                 if(ret == Z_STREAM_END){
740                         rz->z_eof = 1;
741                         break;
742                 }
743                 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
744                         rz->buf_flush = 1;
745                         rz->next_block_pos = rz->in;
746                         break;
747                 }
748         }
749         return size - rz->stream->avail_out;
750 }
751
752 int razf_read(RAZF *rz, void *data, int size){
753         int ori_size, i;
754         ori_size = size;
755         while(size > 0){
756                 if(rz->buf_len){
757                         if(size < rz->buf_len){
758                                 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
759                                 rz->buf_off += size;
760                                 rz->buf_len -= size;
761                                 data += size;
762                                 rz->block_off += size;
763                                 size = 0;
764                                 break;
765                         } else {
766                                 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
767                                 data += rz->buf_len;
768                                 size -= rz->buf_len;
769                                 rz->block_off += rz->buf_len;
770                                 rz->buf_off = 0;
771                                 rz->buf_len = 0;
772                                 if(rz->buf_flush){
773                                         rz->block_pos = rz->next_block_pos;
774                                         rz->block_off = 0;
775                                         rz->buf_flush = 0;
776                                 }
777                         }
778                 } else if(rz->buf_flush){
779                         rz->block_pos = rz->next_block_pos;
780                         rz->block_off = 0;
781                         rz->buf_flush = 0;
782                 }
783                 if(rz->buf_flush) continue;
784                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
785                 if(rz->z_eof && rz->buf_len == 0) break;
786         }
787         rz->out += ori_size - size;
788         return ori_size - size;
789 }
790
791 int razf_skip(RAZF* rz, int size){
792         int ori_size;
793         ori_size = size;
794         while(size > 0){
795                 if(rz->buf_len){
796                         if(size < rz->buf_len){
797                                 rz->buf_off += size;
798                                 rz->buf_len -= size;
799                                 rz->block_off += size;
800                                 size = 0;
801                                 break;
802                         } else {
803                                 size -= rz->buf_len;
804                                 rz->buf_off = 0;
805                                 rz->buf_len = 0;
806                                 rz->block_off += rz->buf_len;
807                                 if(rz->buf_flush){
808                                         rz->block_pos = rz->next_block_pos;
809                                         rz->block_off = 0;
810                                         rz->buf_flush = 0;
811                                 }
812                         }
813                 } else if(rz->buf_flush){
814                         rz->block_pos = rz->next_block_pos;
815                         rz->block_off = 0;
816                         rz->buf_flush = 0;
817                 }
818                 if(rz->buf_flush) continue;
819                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
820                 if(rz->z_eof || rz->z_err) break;
821         }
822         rz->out += ori_size - size;
823         return ori_size - size;
824 }
825
826 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
827 #ifdef _USE_KNETFILE
828         knet_seek(rz->x.fpr, in, SEEK_SET);
829 #else
830         lseek(rz->filedes, in, SEEK_SET);
831 #endif
832         rz->in  = in;
833         rz->out = out;
834         rz->block_pos = in;
835         rz->next_block_pos = in;
836         rz->block_off = 0;
837         rz->buf_flush = 0;
838         rz->z_eof = rz->z_err = 0;
839         inflateReset(rz->stream);
840         rz->stream->avail_in = 0;
841         rz->buf_off = rz->buf_len = 0;
842 }
843
844 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
845         int64_t pos;
846         rz->z_eof = 0;
847         if(rz->file_type == FILE_TYPE_PLAIN){
848                 rz->buf_off = rz->buf_len = 0;
849                 pos = block_start + block_offset;
850 #ifdef _USE_KNETFILE
851                 knet_seek(rz->x.fpr, pos, SEEK_SET);
852         pos = knet_tell(rz->x.fpr);
853 #else
854                 pos = lseek(rz->filedes, pos, SEEK_SET);
855 #endif
856                 rz->out = rz->in = pos;
857                 return pos;
858         }
859         if(block_start == rz->block_pos && block_offset >= rz->block_off) {
860                 block_offset -= rz->block_off;
861                 goto SKIP; // Needn't reset inflate
862         }
863         if(block_start  == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
864         _razf_reset_read(rz, block_start, 0);
865         SKIP:
866         if(block_offset) razf_skip(rz, block_offset);
867         return rz->block_off;
868 }
869
870 int64_t razf_seek(RAZF* rz, int64_t pos, int where){
871         int64_t idx;
872         int64_t seek_pos, new_out;
873         rz->z_eof = 0;
874         if (where == SEEK_CUR) pos += rz->out;
875         else if (where == SEEK_END) pos += rz->src_end;
876         if(rz->file_type == FILE_TYPE_PLAIN){
877 #ifdef _USE_KNETFILE
878                 knet_seek(rz->x.fpr, pos, SEEK_SET);
879         seek_pos = knet_tell(rz->x.fpr);
880 #else
881                 seek_pos = lseek(rz->filedes, pos, SEEK_SET);
882 #endif
883                 rz->buf_off = rz->buf_len = 0;
884                 rz->out = rz->in = seek_pos;
885                 return seek_pos;
886         } else if(rz->file_type == FILE_TYPE_GZ){
887                 if(pos >= rz->out) goto SKIP;
888                 return rz->out;
889         }
890         if(pos == rz->out) return pos;
891         if(pos > rz->src_end) return rz->out;
892         if(!rz->seekable || !rz->load_index){
893                 if(pos >= rz->out) goto SKIP;
894         }
895         idx = pos / RZ_BLOCK_SIZE - 1;
896         seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
897         new_out  = (idx + 1) * RZ_BLOCK_SIZE;
898         if(pos > rz->out && new_out <= rz->out) goto SKIP;
899         _razf_reset_read(rz, seek_pos, new_out);
900         SKIP:
901         razf_skip(rz, (int)(pos - rz->out));
902         return rz->out;
903 }
904
905 uint64_t razf_tell2(RAZF *rz)
906 {
907         /*
908         if (rz->load_index) {
909                 int64_t idx, seek_pos;
910                 idx = rz->out / RZ_BLOCK_SIZE - 1;
911                 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
912                 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
913                         fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
914                                         (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
915         }
916         */
917         return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
918 }
919
920 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
921 {
922         if (where != SEEK_SET) return -1;
923         return razf_jump(rz, voffset>>16, voffset&0xffff);
924 }
925
926 void razf_close(RAZF *rz){
927         if(rz->mode == 'w'){
928 #ifndef _RZ_READONLY
929                 razf_end_flush(rz);
930                 deflateEnd(rz->stream);
931 #ifdef _USE_KNETFILE
932                 save_zindex(rz, rz->x.fpw);
933                 if(is_big_endian()){
934                         if (write(rz->x.fpw, &rz->in, sizeof(int64_t)) < 0) {
935                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
936                                 abort();
937                         }
938                         if (write(rz->x.fpw, &rz->out, sizeof(int64_t)) < 0) {
939                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
940                                 abort();
941                         }
942                 } else {
943                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
944                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
945                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
946                                 abort();
947                         }
948                         v64 = byte_swap_8((uint64_t)rz->out);
949                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
950                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
951                                 abort();
952                         }
953                 }
954 #else
955                 save_zindex(rz, rz->filedes);
956                 if(is_big_endian()){
957                         if (write(rz->filedes, &rz->in, sizeof(int64_t)) < 0) {
958                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
959                                 abort();
960                         }
961                         if (write(rz->filedes, &rz->out, sizeof(int64_t)) < 0) {
962                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
963                                 abort();
964                         }
965                 } else {
966                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
967                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
968                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
969                                 abort();
970                         }
971                         v64 = byte_swap_8((uint64_t)rz->out);
972                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
973                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
974                                 abort();
975                         }
976                 }
977 #endif
978 #endif
979         } else if(rz->mode == 'r'){
980                 if(rz->stream) inflateEnd(rz->stream);
981         }
982         if(rz->inbuf) free(rz->inbuf);
983         if(rz->outbuf) free(rz->outbuf);
984         if(rz->header){
985                 free(rz->header->extra);
986                 free(rz->header->name);
987                 free(rz->header->comment);
988                 free(rz->header);
989         }
990         if(rz->index){
991                 free(rz->index->bin_offsets);
992                 free(rz->index->cell_offsets);
993                 free(rz->index);
994         }
995         free(rz->stream);
996 #ifdef _USE_KNETFILE
997     if (rz->mode == 'r')
998         knet_close(rz->x.fpr);
999     if (rz->mode == 'w')
1000         close(rz->x.fpw);
1001 #else
1002         close(rz->filedes);
1003 #endif
1004         free(rz);
1005 }
1006
1007 #endif