94fa85492f343e0ba5af92b6dbcfb59895a5a022
[samtools.git] / razf.c
1 /*
2  * RAZF : Random Access compressed(Z) File
3  * Version: 1.0
4  * Release Date: 2008-10-27
5  *
6  * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #ifndef _NO_RAZF
33
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include "razf.h"
40
41
42 #if ZLIB_VERNUM < 0x1221
43 struct _gz_header_s {
44     int     text;
45     uLong   time;
46     int     xflags;
47     int     os;
48     Bytef   *extra;
49     uInt    extra_len;
50     uInt    extra_max;
51     Bytef   *name;
52     uInt    name_max;
53     Bytef   *comment;
54     uInt    comm_max;
55     int     hcrc;
56     int     done;
57 };
58 #warning "zlib < 1.2.2.1; RAZF writing is disabled."
59 #endif
60
61 #define DEF_MEM_LEVEL 8
62
63 static inline uint32_t byte_swap_4(uint32_t v){
64         v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
65         return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
66 }
67
68 static inline uint64_t byte_swap_8(uint64_t v){
69         v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
70         v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
71         return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
72 }
73
74 static inline int is_big_endian(){
75         int x = 0x01;
76         char *c = (char*)&x;
77         return (c[0] != 0x01);
78 }
79
80 #ifndef _RZ_READONLY
81 static void add_zindex(RAZF *rz, int64_t in, int64_t out){
82         if(rz->index->size == rz->index->cap){
83                 rz->index->cap = rz->index->cap * 1.5 + 2;
84                 rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
85                 rz->index->bin_offsets  = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
86         }
87         if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
88         rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
89         rz->index->size ++;
90 }
91
92 static void save_zindex(RAZF *rz, int fd){
93         size_t count;
94         int32_t i, v32;
95         int is_be;
96         is_be = is_big_endian();
97         if(is_be) {
98                 if (write(fd, &rz->index->size, sizeof(int)) < 0) {
99                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
100                         abort();
101                 }
102         }
103         else {
104                 v32 = byte_swap_4((uint32_t)rz->index->size);
105                 if (write(fd, &v32, sizeof(uint32_t)) < 0) {
106                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
107                         abort();
108                 }
109         }
110         v32 = rz->index->size / RZ_BIN_SIZE + 1;
111         if(!is_be){
112                 for(i=0;i<v32;i++) rz->index->bin_offsets[i]  = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
113                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
114         }
115         count = sizeof(int64_t) * v32;
116         if (write(fd, rz->index->bin_offsets, count) < 0) {
117                 fprintf(stderr, "[%s] failure to write zindex bin_offsets.\n", __func__);
118                 abort();
119         }
120
121         count = sizeof(int32_t) * rz->index->size;
122         if (write(fd, rz->index->cell_offsets, count) < 0) {
123                 fprintf(stderr, "[%s] failure to write zindex cell_offsets.\n", __func__);
124                 abort();
125         }
126 }
127 #endif
128
129 #ifdef _USE_KNETFILE
130 static void load_zindex(RAZF *rz, knetFile *fp){
131 #else
132 static void load_zindex(RAZF *rz, int fd){
133 #endif
134         int32_t i, v32;
135         int is_be;
136         size_t count;
137         if(!rz->load_index) return;
138         if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex));
139         if(rz->index == NULL) {
140                 fprintf(stderr, "[%s] failure to allocate index.\n", __func__);
141                 abort();
142         }
143         is_be = is_big_endian();
144 #ifdef _USE_KNETFILE
145         if (knet_read(fp, &rz->index->size, sizeof(int)) < 0) {
146 #else
147         if (read(fd, &rz->index->size, sizeof(int)) < 0) {
148 #endif
149                 fprintf(stderr, "[%s] failure to read zindex size.\n", __func__);
150                 abort();
151         }
152         if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
153         rz->index->cap = rz->index->size;
154         v32 = rz->index->size / RZ_BIN_SIZE + 1;
155         count = sizeof(int64_t) * v32;
156         if ((rz->index->bin_offsets = malloc(count)) == NULL) {
157                 fprintf(stderr, "[%s] failure to allocate bin_offsets array.\n", __func__);
158                 abort();
159         }
160 #ifdef _USE_KNETFILE
161         if (knet_read(fp, rz->index->bin_offsets, count) < 0) {
162 #else
163         if (read(fd, rz->index->bin_offsets, count) < 0) {
164 #endif
165                 fprintf(stderr, "[%s] failure to read bin_offsets.\n", __func__);
166                 abort();
167         }
168         count = sizeof(int) * rz->index->size;
169         if ((rz->index->cell_offsets = malloc(count)) == NULL) {
170                 fprintf(stderr, "[%s] failure to allocate cell_offsets array.\n", __func__);
171                 abort();
172         }
173 #ifdef _USE_KNETFILE
174         if (knet_read(fp, rz->index->cell_offsets, count) < count) {
175 #else
176         if (read(fd, rz->index->cell_offsets, count) < count) {
177 #endif
178                 fprintf(stderr, "[%s] failure to read cell_offsets.\n", __func__);
179                 abort();
180         }
181         if(!is_be){
182                 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
183                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
184         }
185 }
186
187 #ifdef _RZ_READONLY
188 static RAZF* razf_open_w(int fd)
189 {
190         fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
191         return 0;
192 }
193 #else
194 static RAZF* razf_open_w(int fd){
195         RAZF *rz;
196 #ifdef _WIN32
197         setmode(fd, O_BINARY);
198 #endif
199         rz = calloc(1, sizeof(RAZF));
200         rz->mode = 'w';
201 #ifdef _USE_KNETFILE
202     rz->x.fpw = fd;
203 #else
204         rz->filedes = fd;
205 #endif
206         rz->stream = calloc(sizeof(z_stream), 1);
207         rz->inbuf  = malloc(RZ_BUFFER_SIZE);
208         rz->outbuf = malloc(RZ_BUFFER_SIZE);
209         rz->index = calloc(sizeof(ZBlockIndex), 1);
210         deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
211         rz->stream->avail_out = RZ_BUFFER_SIZE;
212         rz->stream->next_out  = rz->outbuf;
213         rz->header = calloc(sizeof(gz_header), 1);
214         rz->header->os    = 0x03; //Unix
215         rz->header->text  = 0;
216         rz->header->time  = 0;
217         rz->header->extra = malloc(7);
218         strncpy((char*)rz->header->extra, "RAZF", 4);
219         rz->header->extra[4] = 1; // obsolete field
220         // block size = RZ_BLOCK_SIZE, Big-Endian
221         rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
222         rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
223         rz->header->extra_len = 7;
224         rz->header->name = rz->header->comment  = 0;
225         rz->header->hcrc = 0;
226         deflateSetHeader(rz->stream, rz->header);
227         rz->block_pos = rz->block_off = 0;
228         return rz;
229 }
230
231 static void _razf_write(RAZF* rz, const void *data, int size){
232         int tout;
233         size_t count;
234         rz->stream->avail_in = size;
235         rz->stream->next_in  = (void*)data;
236         while(1){
237                 tout = rz->stream->avail_out;
238                 deflate(rz->stream, Z_NO_FLUSH);
239                 rz->out += tout - rz->stream->avail_out;
240                 if(rz->stream->avail_out) break;
241                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
242 #ifdef _USE_KNETFILE
243                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
244 #else
245                 if (write(rz->filedes, rz->outbuf, count) < 0) {
246 #endif
247                         fprintf(stderr, "[%s] failed to write output buffer.\n", __func__);
248                         abort();
249                 }
250                 rz->stream->avail_out = RZ_BUFFER_SIZE;
251                 rz->stream->next_out  = rz->outbuf;
252                 if(rz->stream->avail_in == 0) break;
253         };
254         rz->in += size - rz->stream->avail_in;
255         rz->block_off += size - rz->stream->avail_in;
256 }
257
258 static void razf_flush(RAZF *rz){
259         size_t count;
260         uint32_t tout;
261         if(rz->buf_len){
262                 _razf_write(rz, rz->inbuf, rz->buf_len);
263                 rz->buf_off = rz->buf_len = 0;
264         }
265         if(rz->stream->avail_out){
266                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
267 #ifdef _USE_KNETFILE    
268                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
269 #else        
270                 if (write(rz->filedes, rz->outbuf, count) < 0) {
271 #endif
272                         fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
273                         abort();
274                 }
275                 rz->stream->avail_out = RZ_BUFFER_SIZE;
276                 rz->stream->next_out  = rz->outbuf;
277         }
278         while(1){
279                 tout = rz->stream->avail_out;
280                 deflate(rz->stream, Z_FULL_FLUSH);
281                 rz->out += tout - rz->stream->avail_out;
282                 if(rz->stream->avail_out == 0){
283                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
284 #ifdef _USE_KNETFILE    
285                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
286 #else            
287                         if (write(rz->filedes, rz->outbuf, count) < 0) {
288 #endif
289                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
290                                 abort();
291                         }
292                         rz->stream->avail_out = RZ_BUFFER_SIZE;
293                         rz->stream->next_out  = rz->outbuf;
294                 } else break;
295         }
296         rz->block_pos = rz->out;
297         rz->block_off = 0;
298 }
299
300 static void razf_end_flush(RAZF *rz){
301         size_t count;
302         uint32_t tout;
303         if(rz->buf_len){
304                 _razf_write(rz, rz->inbuf, rz->buf_len);
305                 rz->buf_off = rz->buf_len = 0;
306         }
307         while(1){
308                 tout = rz->stream->avail_out;
309                 deflate(rz->stream, Z_FINISH);
310                 rz->out += tout - rz->stream->avail_out;
311                 if(rz->stream->avail_out < RZ_BUFFER_SIZE){
312                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
313 #ifdef _USE_KNETFILE        
314                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
315 #else            
316                         if (write(rz->filedes, rz->outbuf, count) < 0) {
317 #endif
318                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
319                                 abort();
320                         }
321                         rz->stream->avail_out = RZ_BUFFER_SIZE;
322                         rz->stream->next_out  = rz->outbuf;
323                 } else break;
324         }
325 }
326
327 static void _razf_buffered_write(RAZF *rz, const void *data, int size){
328         int i, n;
329         while(1){
330                 if(rz->buf_len == RZ_BUFFER_SIZE){
331                         _razf_write(rz, rz->inbuf, rz->buf_len);
332                         rz->buf_len = 0;
333                 }
334                 if(size + rz->buf_len < RZ_BUFFER_SIZE){
335                         for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
336                         rz->buf_len += size;
337                         return;
338                 } else {
339                         n = RZ_BUFFER_SIZE - rz->buf_len;
340                         for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
341                         size -= n;
342                         data += n;
343                         rz->buf_len += n;
344                 }
345         }
346 }
347
348 int razf_write(RAZF* rz, const void *data, int size){
349         int ori_size, n;
350         int64_t next_block;
351         ori_size = size;
352         next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
353         while(rz->in + rz->buf_len + size >= next_block){
354                 n = next_block - rz->in - rz->buf_len;
355                 _razf_buffered_write(rz, data, n);
356                 data += n;
357                 size -= n;
358                 razf_flush(rz);
359                 add_zindex(rz, rz->in, rz->out);
360                 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
361         }
362         _razf_buffered_write(rz, data, size);
363         return ori_size;
364 }
365 #endif
366
367 /* gzip flag byte */
368 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
369 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
370 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
371 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
372 #define COMMENT      0x10 /* bit 4 set: file comment present */
373 #define RESERVED     0xE0 /* bits 5..7: reserved */
374
375 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
376         int method, flags, n, len;
377         if(size < 2) return 0;
378         if(data[0] != 0x1f || data[1] != 0x8b) return 0;
379         if(size < 4) return 0;
380         method = data[2];
381         flags  = data[3];
382         if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
383         n = 4 + 6; // Skip 6 bytes
384         *extra_off = n + 2;
385         *extra_len = 0;
386         if(flags & EXTRA_FIELD){
387                 if(size < n + 2) return 0;
388                 len = ((int)data[n + 1] << 8) | data[n];
389                 n += 2;
390                 *extra_off = n;
391                 while(len){
392                         if(n >= size) return 0;
393                         n ++;
394                         len --;
395                 }
396                 *extra_len = n - (*extra_off);
397         }
398         if(flags & ORIG_NAME) while(n < size && data[n++]);
399         if(flags & COMMENT) while(n < size && data[n++]);
400         if(flags & HEAD_CRC){
401                 if(n + 2 > size) return 0;
402                 n += 2;
403         }
404         return n;
405 }
406
407 #ifdef _USE_KNETFILE
408 static RAZF* razf_open_r(knetFile *fp, int _load_index){
409 #else
410 static RAZF* razf_open_r(int fd, int _load_index){
411 #endif
412         RAZF *rz;
413         int ext_off, ext_len;
414         int n, is_be, ret;
415         int64_t end;
416         unsigned char c[] = "RAZF";
417         if ((rz = calloc(1, sizeof(RAZF))) == NULL) {
418                 fprintf(stderr, "[%s] failure to allocate RAZF structure.\n", __func__);
419                 return NULL;
420         }
421         rz->mode = 'r';
422 #ifdef _USE_KNETFILE
423     rz->x.fpr = fp;
424 #else
425 #ifdef _WIN32
426         setmode(fd, O_BINARY);
427 #endif
428         rz->filedes = fd;
429 #endif
430         if ((rz->stream = calloc(sizeof(z_stream), 1)) == NULL) {
431                 fprintf(stderr, "[%s] failure to allocate z_stream.\n", __func__);
432                 free(rz);
433                 return NULL;
434         }
435         if ((rz->inbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
436                 fprintf(stderr, "[%s] failure to allocate input buffer.\n", __func__);
437                 free(rz->stream);
438                 free(rz);
439                 return NULL;
440         }
441         if ((rz->outbuf = malloc(RZ_BUFFER_SIZE)) == NULL) {
442                 fprintf(stderr, "[%s] failure to allocate output buffer.\n", __func__);
443                 free(rz->inbuf);
444                 free(rz->stream);
445                 free(rz);
446                 return NULL;
447         }
448         rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
449 #ifdef _USE_KNETFILE
450     n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
451 #else
452         n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
453 #endif
454         ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
455         if(ret == 0){
456                 PLAIN_FILE:
457                 rz->in = n;
458                 rz->file_type = FILE_TYPE_PLAIN;
459                 memcpy(rz->outbuf, rz->inbuf, n);
460                 rz->buf_len = n;
461                 free(rz->stream);
462                 rz->stream = NULL;
463                 return rz;
464         }
465         rz->header_size = ret;
466         ret = inflateInit2(rz->stream, -WINDOW_BITS);
467         if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
468         rz->stream->avail_in = n - rz->header_size;
469         rz->stream->next_in  = rz->inbuf + rz->header_size;
470         rz->stream->avail_out = RZ_BUFFER_SIZE;
471         rz->stream->next_out  = rz->outbuf;
472         rz->file_type = FILE_TYPE_GZ;
473         rz->in = rz->header_size;
474         rz->block_pos = rz->header_size;
475         rz->next_block_pos = rz->header_size;
476         rz->block_off = 0;
477         if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
478         if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
479                 fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file.  in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
480                 return rz;
481         }
482         rz->load_index = _load_index;
483         rz->file_type = FILE_TYPE_RZ;
484 #ifdef _USE_KNETFILE
485         if(knet_seek(fp, -16, SEEK_END) == -1){
486 #else
487         if(lseek(fd, -16, SEEK_END) == -1){
488 #endif
489                 UNSEEKABLE:
490                 rz->seekable = 0;
491                 rz->index = NULL;
492                 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
493         } else {
494                 is_be = is_big_endian();
495                 rz->seekable = 1;
496 #ifdef _USE_KNETFILE
497         knet_read(fp, &end, sizeof(int64_t));
498 #else
499                 read(fd, &end, sizeof(int64_t));
500 #endif        
501                 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
502                 else rz->src_end = end;
503
504 #ifdef _USE_KNETFILE
505                 knet_read(fp, &end, sizeof(int64_t));
506 #else
507                 read(fd, &end, sizeof(int64_t));
508 #endif        
509                 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
510                 else rz->end = end;
511                 if(n > rz->end){
512                         rz->stream->avail_in -= n - rz->end;
513                         n = rz->end;
514                 }
515                 if(rz->end > rz->src_end){
516 #ifdef _USE_KNETFILE
517             knet_seek(fp, rz->in, SEEK_SET);
518 #else
519                         lseek(fd, rz->in, SEEK_SET);
520 #endif
521                         goto UNSEEKABLE;
522                 }
523 #ifdef _USE_KNETFILE
524         knet_seek(fp, rz->end, SEEK_SET);
525                 if(knet_tell(fp) != rz->end){
526                         knet_seek(fp, rz->in, SEEK_SET);
527 #else
528                 if(lseek(fd, rz->end, SEEK_SET) != rz->end){
529                         lseek(fd, rz->in, SEEK_SET);
530 #endif
531                         goto UNSEEKABLE;
532                 }
533 #ifdef _USE_KNETFILE
534                 load_zindex(rz, fp);
535                 knet_seek(fp, n, SEEK_SET);
536 #else
537                 load_zindex(rz, fd);
538                 lseek(fd, n, SEEK_SET);
539 #endif
540         }
541         return rz;
542 }
543
544 #ifdef _USE_KNETFILE
545 RAZF* razf_dopen(int fd, const char *mode){
546     if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
547     else if(strstr(mode, "w")) return razf_open_w(fd);
548         return NULL;
549 }
550
551 RAZF* razf_dopen2(int fd, const char *mode)
552 {
553     fprintf(stderr,"[razf_dopen2] implement me\n");
554     return NULL;
555 }
556 #else
557 RAZF* razf_dopen(int fd, const char *mode){
558         if(strstr(mode, "r")) return razf_open_r(fd, 1);
559         else if(strstr(mode, "w")) return razf_open_w(fd);
560         else return NULL;
561 }
562
563 RAZF* razf_dopen2(int fd, const char *mode)
564 {
565         if(strstr(mode, "r")) return razf_open_r(fd, 0);
566         else if(strstr(mode, "w")) return razf_open_w(fd);
567         else return NULL;
568 }
569 #endif
570
571 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
572         int fd;
573         RAZF *rz;
574         if(strstr(mode, "r")){
575 #ifdef _USE_KNETFILE
576         knetFile *fd = knet_open(filename, "r");
577         if (fd == 0) {
578             fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
579             return NULL;
580         }
581 #else
582 #ifdef _WIN32
583                 fd = open(filename, O_RDONLY | O_BINARY);
584 #else
585                 fd = open(filename, O_RDONLY);
586 #endif
587 #endif
588                 if(fd < 0) return NULL;
589                 rz = razf_open_r(fd, _load_index);
590         } else if(strstr(mode, "w")){
591 #ifdef _WIN32
592                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
593 #else
594                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
595 #endif
596                 if(fd < 0) return NULL;
597                 rz = razf_open_w(fd);
598         } else return NULL;
599         return rz;
600 }
601
602 RAZF* razf_open(const char *filename, const char *mode){
603         return _razf_open(filename, mode, 1);
604 }
605
606 RAZF* razf_open2(const char *filename, const char *mode){
607         return _razf_open(filename, mode, 0);
608 }
609
610 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
611         int64_t n;
612         if(rz->mode != 'r' && rz->mode != 'R') return 0;
613         switch(rz->file_type){
614                 case FILE_TYPE_PLAIN:
615                         if(rz->end == 0x7fffffffffffffffLL){
616 #ifdef _USE_KNETFILE
617                                 if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
618                 n = knet_tell(rz->x.fpr);
619                                 knet_seek(rz->x.fpr, 0, SEEK_END);
620                 rz->end = knet_tell(rz->x.fpr);
621                                 knet_seek(rz->x.fpr, n, SEEK_SET);
622 #else
623                                 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
624                                 rz->end = lseek(rz->filedes, 0, SEEK_END);
625                                 lseek(rz->filedes, n, SEEK_SET);
626 #endif                
627                         }
628                         *u_size = *c_size = rz->end;
629                         return 1;
630                 case FILE_TYPE_GZ:
631                         return 0;
632                 case FILE_TYPE_RZ:
633                         if(rz->src_end == rz->end) return 0;
634                         *u_size = rz->src_end;
635                         *c_size = rz->end;
636                         return 1;
637                 default:
638                         return 0;
639         }
640 }
641
642 static int _razf_read(RAZF* rz, void *data, int size){
643         int ret, tin;
644         if(rz->z_eof || rz->z_err) return 0;
645         if (rz->file_type == FILE_TYPE_PLAIN) {
646 #ifdef _USE_KNETFILE
647                 ret = knet_read(rz->x.fpr, data, size);
648 #else
649                 ret = read(rz->filedes, data, size);
650 #endif        
651                 if (ret == 0) rz->z_eof = 1;
652                 return ret;
653         }
654         rz->stream->avail_out = size;
655         rz->stream->next_out  = data;
656         while(rz->stream->avail_out){
657                 if(rz->stream->avail_in == 0){
658                         if(rz->in >= rz->end){ rz->z_eof = 1; break; }
659                         if(rz->end - rz->in < RZ_BUFFER_SIZE){
660 #ifdef _USE_KNETFILE
661                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
662 #else
663                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
664 #endif        
665                         } else {
666 #ifdef _USE_KNETFILE
667                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
668 #else
669                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
670 #endif        
671                         }
672                         if(rz->stream->avail_in == 0){
673                                 rz->z_eof = 1;
674                                 break;
675                         }
676                         rz->stream->next_in = rz->inbuf;
677                 }
678                 tin = rz->stream->avail_in;
679                 ret = inflate(rz->stream, Z_BLOCK);
680                 rz->in += tin - rz->stream->avail_in;
681                 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
682                         fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
683                         rz->z_err = 1;
684                         break;
685                 }
686                 if(ret == Z_STREAM_END){
687                         rz->z_eof = 1;
688                         break;
689                 }
690                 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
691                         rz->buf_flush = 1;
692                         rz->next_block_pos = rz->in;
693                         break;
694                 }
695         }
696         return size - rz->stream->avail_out;
697 }
698
699 int razf_read(RAZF *rz, void *data, int size){
700         int ori_size, i;
701         ori_size = size;
702         while(size > 0){
703                 if(rz->buf_len){
704                         if(size < rz->buf_len){
705                                 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
706                                 rz->buf_off += size;
707                                 rz->buf_len -= size;
708                                 data += size;
709                                 rz->block_off += size;
710                                 size = 0;
711                                 break;
712                         } else {
713                                 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
714                                 data += rz->buf_len;
715                                 size -= rz->buf_len;
716                                 rz->block_off += rz->buf_len;
717                                 rz->buf_off = 0;
718                                 rz->buf_len = 0;
719                                 if(rz->buf_flush){
720                                         rz->block_pos = rz->next_block_pos;
721                                         rz->block_off = 0;
722                                         rz->buf_flush = 0;
723                                 }
724                         }
725                 } else if(rz->buf_flush){
726                         rz->block_pos = rz->next_block_pos;
727                         rz->block_off = 0;
728                         rz->buf_flush = 0;
729                 }
730                 if(rz->buf_flush) continue;
731                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
732                 if(rz->z_eof && rz->buf_len == 0) break;
733         }
734         rz->out += ori_size - size;
735         return ori_size - size;
736 }
737
738 int razf_skip(RAZF* rz, int size){
739         int ori_size;
740         ori_size = size;
741         while(size > 0){
742                 if(rz->buf_len){
743                         if(size < rz->buf_len){
744                                 rz->buf_off += size;
745                                 rz->buf_len -= size;
746                                 rz->block_off += size;
747                                 size = 0;
748                                 break;
749                         } else {
750                                 size -= rz->buf_len;
751                                 rz->buf_off = 0;
752                                 rz->buf_len = 0;
753                                 rz->block_off += rz->buf_len;
754                                 if(rz->buf_flush){
755                                         rz->block_pos = rz->next_block_pos;
756                                         rz->block_off = 0;
757                                         rz->buf_flush = 0;
758                                 }
759                         }
760                 } else if(rz->buf_flush){
761                         rz->block_pos = rz->next_block_pos;
762                         rz->block_off = 0;
763                         rz->buf_flush = 0;
764                 }
765                 if(rz->buf_flush) continue;
766                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
767                 if(rz->z_eof || rz->z_err) break;
768         }
769         rz->out += ori_size - size;
770         return ori_size - size;
771 }
772
773 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
774 #ifdef _USE_KNETFILE
775         knet_seek(rz->x.fpr, in, SEEK_SET);
776 #else
777         lseek(rz->filedes, in, SEEK_SET);
778 #endif
779         rz->in  = in;
780         rz->out = out;
781         rz->block_pos = in;
782         rz->next_block_pos = in;
783         rz->block_off = 0;
784         rz->buf_flush = 0;
785         rz->z_eof = rz->z_err = 0;
786         inflateReset(rz->stream);
787         rz->stream->avail_in = 0;
788         rz->buf_off = rz->buf_len = 0;
789 }
790
791 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
792         int64_t pos;
793         rz->z_eof = 0;
794         if(rz->file_type == FILE_TYPE_PLAIN){
795                 rz->buf_off = rz->buf_len = 0;
796                 pos = block_start + block_offset;
797 #ifdef _USE_KNETFILE
798                 knet_seek(rz->x.fpr, pos, SEEK_SET);
799         pos = knet_tell(rz->x.fpr);
800 #else
801                 pos = lseek(rz->filedes, pos, SEEK_SET);
802 #endif
803                 rz->out = rz->in = pos;
804                 return pos;
805         }
806         if(block_start == rz->block_pos && block_offset >= rz->block_off) {
807                 block_offset -= rz->block_off;
808                 goto SKIP; // Needn't reset inflate
809         }
810         if(block_start  == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
811         _razf_reset_read(rz, block_start, 0);
812         SKIP:
813         if(block_offset) razf_skip(rz, block_offset);
814         return rz->block_off;
815 }
816
817 int64_t razf_seek(RAZF* rz, int64_t pos, int where){
818         int64_t idx;
819         int64_t seek_pos, new_out;
820         rz->z_eof = 0;
821         if (where == SEEK_CUR) pos += rz->out;
822         else if (where == SEEK_END) pos += rz->src_end;
823         if(rz->file_type == FILE_TYPE_PLAIN){
824 #ifdef _USE_KNETFILE
825                 knet_seek(rz->x.fpr, pos, SEEK_SET);
826         seek_pos = knet_tell(rz->x.fpr);
827 #else
828                 seek_pos = lseek(rz->filedes, pos, SEEK_SET);
829 #endif
830                 rz->buf_off = rz->buf_len = 0;
831                 rz->out = rz->in = seek_pos;
832                 return seek_pos;
833         } else if(rz->file_type == FILE_TYPE_GZ){
834                 if(pos >= rz->out) goto SKIP;
835                 return rz->out;
836         }
837         if(pos == rz->out) return pos;
838         if(pos > rz->src_end) return rz->out;
839         if(!rz->seekable || !rz->load_index){
840                 if(pos >= rz->out) goto SKIP;
841         }
842         idx = pos / RZ_BLOCK_SIZE - 1;
843         seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
844         new_out  = (idx + 1) * RZ_BLOCK_SIZE;
845         if(pos > rz->out && new_out <= rz->out) goto SKIP;
846         _razf_reset_read(rz, seek_pos, new_out);
847         SKIP:
848         razf_skip(rz, (int)(pos - rz->out));
849         return rz->out;
850 }
851
852 uint64_t razf_tell2(RAZF *rz)
853 {
854         /*
855         if (rz->load_index) {
856                 int64_t idx, seek_pos;
857                 idx = rz->out / RZ_BLOCK_SIZE - 1;
858                 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
859                 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
860                         fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
861                                         (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
862         }
863         */
864         return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
865 }
866
867 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
868 {
869         if (where != SEEK_SET) return -1;
870         return razf_jump(rz, voffset>>16, voffset&0xffff);
871 }
872
873 void razf_close(RAZF *rz){
874         if(rz->mode == 'w'){
875 #ifndef _RZ_READONLY
876                 razf_end_flush(rz);
877                 deflateEnd(rz->stream);
878 #ifdef _USE_KNETFILE
879                 save_zindex(rz, rz->x.fpw);
880                 if(is_big_endian()){
881                         if (write(rz->x.fpw, &rz->in, sizeof(int64_t)) < 0) {
882                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
883                                 abort();
884                         }
885                         if (write(rz->x.fpw, &rz->out, sizeof(int64_t)) < 0) {
886                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
887                                 abort();
888                         }
889                 } else {
890                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
891                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
892                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
893                                 abort();
894                         }
895                         v64 = byte_swap_8((uint64_t)rz->out);
896                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
897                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
898                                 abort();
899                         }
900                 }
901 #else
902                 save_zindex(rz, rz->filedes);
903                 if(is_big_endian()){
904                         if (write(rz->filedes, &rz->in, sizeof(int64_t)) < 0) {
905                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
906                                 abort();
907                         }
908                         if (write(rz->filedes, &rz->out, sizeof(int64_t)) < 0) {
909                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
910                                 abort();
911                         }
912                 } else {
913                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
914                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
915                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
916                                 abort();
917                         }
918                         v64 = byte_swap_8((uint64_t)rz->out);
919                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
920                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
921                                 abort();
922                         }
923                 }
924 #endif
925 #endif
926         } else if(rz->mode == 'r'){
927                 if(rz->stream) inflateEnd(rz->stream);
928         }
929         if(rz->inbuf) free(rz->inbuf);
930         if(rz->outbuf) free(rz->outbuf);
931         if(rz->header){
932                 free(rz->header->extra);
933                 free(rz->header->name);
934                 free(rz->header->comment);
935                 free(rz->header);
936         }
937         if(rz->index){
938                 free(rz->index->bin_offsets);
939                 free(rz->index->cell_offsets);
940                 free(rz->index);
941         }
942         free(rz->stream);
943 #ifdef _USE_KNETFILE
944     if (rz->mode == 'r')
945         knet_close(rz->x.fpr);
946     if (rz->mode == 'w')
947         close(rz->x.fpw);
948 #else
949         close(rz->filedes);
950 #endif
951         free(rz);
952 }
953
954 #endif