razf.c: replaced many calloc() calls with one large calloc() using RAZF_STRUCT_SIZE...
[samtools.git] / razf.c
1 /*
2  * RAZF : Random Access compressed(Z) File
3  * Version: 1.0
4  * Release Date: 2008-10-27
5  *
6  * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31
32 #ifndef _NO_RAZF
33
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include "razf.h"
40
41
42 #if ZLIB_VERNUM < 0x1221
43 struct _gz_header_s {
44     int     text;
45     uLong   time;
46     int     xflags;
47     int     os;
48     Bytef   *extra;
49     uInt    extra_len;
50     uInt    extra_max;
51     Bytef   *name;
52     uInt    name_max;
53     Bytef   *comment;
54     uInt    comm_max;
55     int     hcrc;
56     int     done;
57 };
58 #warning "zlib < 1.2.2.1; RAZF writing is disabled."
59 #endif
60
61 #define DEF_MEM_LEVEL 8
62
63 static inline uint32_t byte_swap_4(uint32_t v){
64         v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
65         return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
66 }
67
68 static inline uint64_t byte_swap_8(uint64_t v){
69         v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
70         v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
71         return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
72 }
73
74 static inline int is_big_endian(){
75         int x = 0x01;
76         char *c = (char*)&x;
77         return (c[0] != 0x01);
78 }
79
80 #ifndef _RZ_READONLY
81 static void add_zindex(RAZF *rz, int64_t in, int64_t out){
82         uint32_t *cores;
83         int64_t *bores;
84
85         if(rz->index->size == rz->index->cap){
86                 rz->index->cap = rz->index->cap * 1.5 + 2;
87                 cores = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
88                 bores = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
89                 if ((cores == NULL) || (bores == NULL)) {
90                         fprintf(stderr, "[%s] failure to allocate space for new zindex.\n", __func__);
91                         abort();
92                 }
93                 rz->index->cell_offsets = cores;
94                 rz->index->bin_offsets = bores;
95         }
96         if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
97         rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
98         rz->index->size ++;
99 }
100
101 static void save_zindex(RAZF *rz, int fd){
102         size_t count;
103         int32_t i, v32;
104         int is_be;
105         is_be = is_big_endian();
106         if(is_be) {
107                 if (write(fd, &rz->index->size, sizeof(int)) < 0) {
108                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
109                         abort();
110                 }
111         }
112         else {
113                 v32 = byte_swap_4((uint32_t)rz->index->size);
114                 if (write(fd, &v32, sizeof(uint32_t)) < 0) {
115                         fprintf(stderr, "[%s] failure to write zindex size.\n", __func__);
116                         abort();
117                 }
118         }
119         v32 = rz->index->size / RZ_BIN_SIZE + 1;
120         if(!is_be){
121                 for(i=0;i<v32;i++) rz->index->bin_offsets[i]  = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
122                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
123         }
124         count = sizeof(int64_t) * v32;
125         if (write(fd, rz->index->bin_offsets, count) < 0) {
126                 fprintf(stderr, "[%s] failure to write zindex bin_offsets.\n", __func__);
127                 abort();
128         }
129
130         count = sizeof(int32_t) * rz->index->size;
131         if (write(fd, rz->index->cell_offsets, count) < 0) {
132                 fprintf(stderr, "[%s] failure to write zindex cell_offsets.\n", __func__);
133                 abort();
134         }
135 }
136 #endif
137
138 #ifdef _USE_KNETFILE
139 static void load_zindex(RAZF *rz, knetFile *fp){
140 #else
141 static void load_zindex(RAZF *rz, int fd){
142 #endif
143         int32_t i, v32;
144         int is_be;
145         size_t count;
146         if(!rz->load_index) return;
147         if(rz->index == NULL) {
148                 if ((rz->index = malloc(sizeof(ZBlockIndex))) == NULL) {
149                         fprintf(stderr, "[%s] failure to allocate index.\n", __func__);
150                         abort();
151                 }
152         }
153         is_be = is_big_endian();
154 #ifdef _USE_KNETFILE
155         if (knet_read(fp, &rz->index->size, sizeof(int)) < 0) {
156 #else
157         if (read(fd, &rz->index->size, sizeof(int)) < 0) {
158 #endif
159                 fprintf(stderr, "[%s] failure to read zindex size.\n", __func__);
160                 abort();
161         }
162         if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
163         rz->index->cap = rz->index->size;
164         v32 = rz->index->size / RZ_BIN_SIZE + 1;
165         count = sizeof(int64_t) * v32;
166         if ((rz->index->bin_offsets = malloc(count)) == NULL) {
167                 fprintf(stderr, "[%s] failure to allocate bin_offsets array.\n", __func__);
168                 abort();
169         }
170 #ifdef _USE_KNETFILE
171         if (knet_read(fp, rz->index->bin_offsets, count) < 0) {
172 #else
173         if (read(fd, rz->index->bin_offsets, count) < 0) {
174 #endif
175                 fprintf(stderr, "[%s] failure to read bin_offsets.\n", __func__);
176                 abort();
177         }
178         count = sizeof(int) * rz->index->size;
179         if ((rz->index->cell_offsets = malloc(count)) == NULL) {
180                 fprintf(stderr, "[%s] failure to allocate cell_offsets array.\n", __func__);
181                 abort();
182         }
183 #ifdef _USE_KNETFILE
184         if (knet_read(fp, rz->index->cell_offsets, count) < count) {
185 #else
186         if (read(fd, rz->index->cell_offsets, count) < count) {
187 #endif
188                 fprintf(stderr, "[%s] failure to read cell_offsets.\n", __func__);
189                 abort();
190         }
191         if(!is_be){
192                 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
193                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
194         }
195 }
196
197 #ifdef _RZ_READONLY
198 static RAZF* razf_open_w(int fd)
199 {
200         fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
201         return 0;
202 }
203 #else
204 static RAZF* razf_open_w(int fd){
205         RAZF *rz;
206 #ifdef _WIN32
207         setmode(fd, O_BINARY);
208 #endif
209         if ((rz = calloc(1, RAZF_STRUCT_SIZE)) == NULL) {
210                 fprintf(stderr, "[%s] failure to allocate RAZF structure.\n", __func__);
211                 return NULL;
212         }
213         rz->stream = (z_stream *)rz + sizeof(RAZF);
214         rz->inbuf = rz->stream + sizeof(z_stream);
215         rz->outbuf = rz->inbuf + RZ_BUFFER_SIZE;
216         rz->index = rz->outbuf + RZ_BUFFER_SIZE;
217         rz->header = (gz_header *)rz->index + sizeof(ZBlockIndex);
218         rz->header->extra = (Bytef *)rz->header + sizeof(gz_header);
219         rz->mode = 'w';
220 #ifdef _USE_KNETFILE
221         rz->x.fpw = fd;
222 #else
223         rz->filedes = fd;
224 #endif
225         deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
226         rz->stream->avail_out = RZ_BUFFER_SIZE;
227         rz->stream->next_out  = rz->outbuf;
228         rz->header->os    = 0x03; //Unix
229         rz->header->text  = 0;
230         rz->header->time  = 0;
231         strncpy((char*)rz->header->extra, "RAZF", 4);
232         rz->header->extra[4] = 1; // obsolete field
233         // block size = RZ_BLOCK_SIZE, Big-Endian
234         rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
235         rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
236         rz->header->extra_len = 7;
237         rz->header->name = rz->header->comment  = 0;
238         rz->header->hcrc = 0;
239         deflateSetHeader(rz->stream, rz->header);
240         rz->block_pos = rz->block_off = 0;
241         return rz;
242 }
243
244 static void _razf_write(RAZF* rz, const void *data, int size){
245         int tout;
246         size_t count;
247         rz->stream->avail_in = size;
248         rz->stream->next_in  = (void*)data;
249         while(1){
250                 tout = rz->stream->avail_out;
251                 deflate(rz->stream, Z_NO_FLUSH);
252                 rz->out += tout - rz->stream->avail_out;
253                 if(rz->stream->avail_out) break;
254                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
255 #ifdef _USE_KNETFILE
256                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
257 #else
258                 if (write(rz->filedes, rz->outbuf, count) < 0) {
259 #endif
260                         fprintf(stderr, "[%s] failed to write output buffer.\n", __func__);
261                         abort();
262                 }
263                 rz->stream->avail_out = RZ_BUFFER_SIZE;
264                 rz->stream->next_out  = rz->outbuf;
265                 if(rz->stream->avail_in == 0) break;
266         };
267         rz->in += size - rz->stream->avail_in;
268         rz->block_off += size - rz->stream->avail_in;
269 }
270
271 static void razf_flush(RAZF *rz){
272         size_t count;
273         uint32_t tout;
274         if(rz->buf_len){
275                 _razf_write(rz, rz->inbuf, rz->buf_len);
276                 rz->buf_off = rz->buf_len = 0;
277         }
278         if(rz->stream->avail_out){
279                 count = RZ_BUFFER_SIZE - rz->stream->avail_out;
280 #ifdef _USE_KNETFILE    
281                 if (write(rz->x.fpw, rz->outbuf, count) < 0) {
282 #else        
283                 if (write(rz->filedes, rz->outbuf, count) < 0) {
284 #endif
285                         fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
286                         abort();
287                 }
288                 rz->stream->avail_out = RZ_BUFFER_SIZE;
289                 rz->stream->next_out  = rz->outbuf;
290         }
291         while(1){
292                 tout = rz->stream->avail_out;
293                 deflate(rz->stream, Z_FULL_FLUSH);
294                 rz->out += tout - rz->stream->avail_out;
295                 if(rz->stream->avail_out == 0){
296                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
297 #ifdef _USE_KNETFILE    
298                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
299 #else            
300                         if (write(rz->filedes, rz->outbuf, count) < 0) {
301 #endif
302                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
303                                 abort();
304                         }
305                         rz->stream->avail_out = RZ_BUFFER_SIZE;
306                         rz->stream->next_out  = rz->outbuf;
307                 } else break;
308         }
309         rz->block_pos = rz->out;
310         rz->block_off = 0;
311 }
312
313 static void razf_end_flush(RAZF *rz){
314         size_t count;
315         uint32_t tout;
316         if(rz->buf_len){
317                 _razf_write(rz, rz->inbuf, rz->buf_len);
318                 rz->buf_off = rz->buf_len = 0;
319         }
320         while(1){
321                 tout = rz->stream->avail_out;
322                 deflate(rz->stream, Z_FINISH);
323                 rz->out += tout - rz->stream->avail_out;
324                 if(rz->stream->avail_out < RZ_BUFFER_SIZE){
325                         count = RZ_BUFFER_SIZE - rz->stream->avail_out;
326 #ifdef _USE_KNETFILE        
327                         if (write(rz->x.fpw, rz->outbuf, count) < 0) {
328 #else            
329                         if (write(rz->filedes, rz->outbuf, count) < 0) {
330 #endif
331                                 fprintf(stderr, "[%s] failed to flush output buffer.\n", __func__);
332                                 abort();
333                         }
334                         rz->stream->avail_out = RZ_BUFFER_SIZE;
335                         rz->stream->next_out  = rz->outbuf;
336                 } else break;
337         }
338 }
339
340 static void _razf_buffered_write(RAZF *rz, const void *data, int size){
341         int i, n;
342         while(1){
343                 if(rz->buf_len == RZ_BUFFER_SIZE){
344                         _razf_write(rz, rz->inbuf, rz->buf_len);
345                         rz->buf_len = 0;
346                 }
347                 if(size + rz->buf_len < RZ_BUFFER_SIZE){
348                         for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
349                         rz->buf_len += size;
350                         return;
351                 } else {
352                         n = RZ_BUFFER_SIZE - rz->buf_len;
353                         for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
354                         size -= n;
355                         data += n;
356                         rz->buf_len += n;
357                 }
358         }
359 }
360
361 int razf_write(RAZF* rz, const void *data, int size){
362         int ori_size, n;
363         int64_t next_block;
364         ori_size = size;
365         next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
366         while(rz->in + rz->buf_len + size >= next_block){
367                 n = next_block - rz->in - rz->buf_len;
368                 _razf_buffered_write(rz, data, n);
369                 data += n;
370                 size -= n;
371                 razf_flush(rz);
372                 add_zindex(rz, rz->in, rz->out);
373                 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
374         }
375         _razf_buffered_write(rz, data, size);
376         return ori_size;
377 }
378 #endif
379
380 /* gzip flag byte */
381 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
382 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
383 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
384 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
385 #define COMMENT      0x10 /* bit 4 set: file comment present */
386 #define RESERVED     0xE0 /* bits 5..7: reserved */
387
388 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
389         int method, flags, n, len;
390         if(size < 2) return 0;
391         if(data[0] != 0x1f || data[1] != 0x8b) return 0;
392         if(size < 4) return 0;
393         method = data[2];
394         flags  = data[3];
395         if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
396         n = 4 + 6; // Skip 6 bytes
397         *extra_off = n + 2;
398         *extra_len = 0;
399         if(flags & EXTRA_FIELD){
400                 if(size < n + 2) return 0;
401                 len = ((int)data[n + 1] << 8) | data[n];
402                 n += 2;
403                 *extra_off = n;
404                 while(len){
405                         if(n >= size) return 0;
406                         n ++;
407                         len --;
408                 }
409                 *extra_len = n - (*extra_off);
410         }
411         if(flags & ORIG_NAME) while(n < size && data[n++]);
412         if(flags & COMMENT) while(n < size && data[n++]);
413         if(flags & HEAD_CRC){
414                 if(n + 2 > size) return 0;
415                 n += 2;
416         }
417         return n;
418 }
419
420 #ifdef _USE_KNETFILE
421 static RAZF* razf_open_r(knetFile *fp, int _load_index){
422 #else
423 static RAZF* razf_open_r(int fd, int _load_index){
424 #endif
425         RAZF *rz;
426         int ext_off, ext_len;
427         int n, is_be, ret;
428         int64_t end;
429         unsigned char c[] = "RAZF";
430         if ((rz = calloc(1, RAZF_STRUCT_SIZE)) == NULL) {
431                 fprintf(stderr, "[%s] failure to allocate RAZF structure.\n", __func__);
432                 return NULL;
433         }
434         rz->stream = (z_stream *)rz + sizeof(RAZF);
435         rz->inbuf = rz->stream + sizeof(z_stream);
436         rz->outbuf = rz->inbuf + RZ_BUFFER_SIZE;
437         rz->index = rz->outbuf + RZ_BUFFER_SIZE;
438         rz->header = (gz_header *)rz->index + sizeof(ZBlockIndex);
439         rz->header->extra = (Bytef *)rz->header + sizeof(gz_header);
440         rz->mode = 'r';
441 #ifdef _USE_KNETFILE
442     rz->x.fpr = fp;
443 #else
444 #ifdef _WIN32
445         setmode(fd, O_BINARY);
446 #endif
447         rz->filedes = fd;
448 #endif
449         rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
450 #ifdef _USE_KNETFILE
451     n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
452 #else
453         n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
454 #endif
455         ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
456         if(ret == 0){
457                 PLAIN_FILE:
458                 rz->in = n;
459                 rz->file_type = FILE_TYPE_PLAIN;
460                 memcpy(rz->outbuf, rz->inbuf, n);
461                 rz->buf_len = n;
462                 free(rz->stream);
463                 rz->stream = NULL;
464                 return rz;
465         }
466         rz->header_size = ret;
467         ret = inflateInit2(rz->stream, -WINDOW_BITS);
468         if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
469         rz->stream->avail_in = n - rz->header_size;
470         rz->stream->next_in  = rz->inbuf + rz->header_size;
471         rz->stream->avail_out = RZ_BUFFER_SIZE;
472         rz->stream->next_out  = rz->outbuf;
473         rz->file_type = FILE_TYPE_GZ;
474         rz->in = rz->header_size;
475         rz->block_pos = rz->header_size;
476         rz->next_block_pos = rz->header_size;
477         rz->block_off = 0;
478         if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
479         if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
480                 fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file.  in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
481                 return rz;
482         }
483         rz->load_index = _load_index;
484         rz->file_type = FILE_TYPE_RZ;
485 #ifdef _USE_KNETFILE
486         if(knet_seek(fp, -16, SEEK_END) == -1){
487 #else
488         if(lseek(fd, -16, SEEK_END) == -1){
489 #endif
490                 UNSEEKABLE:
491                 rz->seekable = 0;
492                 rz->index = NULL;
493                 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
494         } else {
495                 is_be = is_big_endian();
496                 rz->seekable = 1;
497 #ifdef _USE_KNETFILE
498         knet_read(fp, &end, sizeof(int64_t));
499 #else
500                 read(fd, &end, sizeof(int64_t));
501 #endif        
502                 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
503                 else rz->src_end = end;
504
505 #ifdef _USE_KNETFILE
506                 knet_read(fp, &end, sizeof(int64_t));
507 #else
508                 read(fd, &end, sizeof(int64_t));
509 #endif        
510                 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
511                 else rz->end = end;
512                 if(n > rz->end){
513                         rz->stream->avail_in -= n - rz->end;
514                         n = rz->end;
515                 }
516                 if(rz->end > rz->src_end){
517 #ifdef _USE_KNETFILE
518             knet_seek(fp, rz->in, SEEK_SET);
519 #else
520                         lseek(fd, rz->in, SEEK_SET);
521 #endif
522                         goto UNSEEKABLE;
523                 }
524 #ifdef _USE_KNETFILE
525         knet_seek(fp, rz->end, SEEK_SET);
526                 if(knet_tell(fp) != rz->end){
527                         knet_seek(fp, rz->in, SEEK_SET);
528 #else
529                 if(lseek(fd, rz->end, SEEK_SET) != rz->end){
530                         lseek(fd, rz->in, SEEK_SET);
531 #endif
532                         goto UNSEEKABLE;
533                 }
534 #ifdef _USE_KNETFILE
535                 load_zindex(rz, fp);
536                 knet_seek(fp, n, SEEK_SET);
537 #else
538                 load_zindex(rz, fd);
539                 lseek(fd, n, SEEK_SET);
540 #endif
541         }
542         return rz;
543 }
544
545 #ifdef _USE_KNETFILE
546 RAZF* razf_dopen(int fd, const char *mode){
547     if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
548     else if(strstr(mode, "w")) return razf_open_w(fd);
549         return NULL;
550 }
551
552 RAZF* razf_dopen2(int fd, const char *mode)
553 {
554     fprintf(stderr,"[razf_dopen2] implement me\n");
555     return NULL;
556 }
557 #else
558 RAZF* razf_dopen(int fd, const char *mode){
559         if(strstr(mode, "r")) return razf_open_r(fd, 1);
560         else if(strstr(mode, "w")) return razf_open_w(fd);
561         else return NULL;
562 }
563
564 RAZF* razf_dopen2(int fd, const char *mode)
565 {
566         if(strstr(mode, "r")) return razf_open_r(fd, 0);
567         else if(strstr(mode, "w")) return razf_open_w(fd);
568         else return NULL;
569 }
570 #endif
571
572 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
573         int fd;
574         RAZF *rz;
575         if(strstr(mode, "r")){
576 #ifdef _USE_KNETFILE
577         knetFile *fd = knet_open(filename, "r");
578         if (fd == 0) {
579             fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
580             return NULL;
581         }
582 #else
583 #ifdef _WIN32
584                 fd = open(filename, O_RDONLY | O_BINARY);
585 #else
586                 fd = open(filename, O_RDONLY);
587 #endif
588 #endif
589                 if(fd < 0) return NULL;
590                 rz = razf_open_r(fd, _load_index);
591         } else if(strstr(mode, "w")){
592 #ifdef _WIN32
593                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
594 #else
595                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
596 #endif
597                 if(fd < 0) return NULL;
598                 rz = razf_open_w(fd);
599         } else return NULL;
600         return rz;
601 }
602
603 RAZF* razf_open(const char *filename, const char *mode){
604         return _razf_open(filename, mode, 1);
605 }
606
607 RAZF* razf_open2(const char *filename, const char *mode){
608         return _razf_open(filename, mode, 0);
609 }
610
611 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
612         int64_t n;
613         if(rz->mode != 'r' && rz->mode != 'R') return 0;
614         switch(rz->file_type){
615                 case FILE_TYPE_PLAIN:
616                         if(rz->end == 0x7fffffffffffffffLL){
617 #ifdef _USE_KNETFILE
618                                 if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
619                 n = knet_tell(rz->x.fpr);
620                                 knet_seek(rz->x.fpr, 0, SEEK_END);
621                 rz->end = knet_tell(rz->x.fpr);
622                                 knet_seek(rz->x.fpr, n, SEEK_SET);
623 #else
624                                 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
625                                 rz->end = lseek(rz->filedes, 0, SEEK_END);
626                                 lseek(rz->filedes, n, SEEK_SET);
627 #endif                
628                         }
629                         *u_size = *c_size = rz->end;
630                         return 1;
631                 case FILE_TYPE_GZ:
632                         return 0;
633                 case FILE_TYPE_RZ:
634                         if(rz->src_end == rz->end) return 0;
635                         *u_size = rz->src_end;
636                         *c_size = rz->end;
637                         return 1;
638                 default:
639                         return 0;
640         }
641 }
642
643 static int _razf_read(RAZF* rz, void *data, int size){
644         int ret, tin;
645         if(rz->z_eof || rz->z_err) return 0;
646         if (rz->file_type == FILE_TYPE_PLAIN) {
647 #ifdef _USE_KNETFILE
648                 ret = knet_read(rz->x.fpr, data, size);
649 #else
650                 ret = read(rz->filedes, data, size);
651 #endif        
652                 if (ret == 0) rz->z_eof = 1;
653                 return ret;
654         }
655         rz->stream->avail_out = size;
656         rz->stream->next_out  = data;
657         while(rz->stream->avail_out){
658                 if(rz->stream->avail_in == 0){
659                         if(rz->in >= rz->end){ rz->z_eof = 1; break; }
660                         if(rz->end - rz->in < RZ_BUFFER_SIZE){
661 #ifdef _USE_KNETFILE
662                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
663 #else
664                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
665 #endif        
666                         } else {
667 #ifdef _USE_KNETFILE
668                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
669 #else
670                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
671 #endif        
672                         }
673                         if(rz->stream->avail_in == 0){
674                                 rz->z_eof = 1;
675                                 break;
676                         }
677                         rz->stream->next_in = rz->inbuf;
678                 }
679                 tin = rz->stream->avail_in;
680                 ret = inflate(rz->stream, Z_BLOCK);
681                 rz->in += tin - rz->stream->avail_in;
682                 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
683                         fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
684                         rz->z_err = 1;
685                         break;
686                 }
687                 if(ret == Z_STREAM_END){
688                         rz->z_eof = 1;
689                         break;
690                 }
691                 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
692                         rz->buf_flush = 1;
693                         rz->next_block_pos = rz->in;
694                         break;
695                 }
696         }
697         return size - rz->stream->avail_out;
698 }
699
700 int razf_read(RAZF *rz, void *data, int size){
701         int ori_size, i;
702         ori_size = size;
703         while(size > 0){
704                 if(rz->buf_len){
705                         if(size < rz->buf_len){
706                                 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
707                                 rz->buf_off += size;
708                                 rz->buf_len -= size;
709                                 data += size;
710                                 rz->block_off += size;
711                                 size = 0;
712                                 break;
713                         } else {
714                                 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
715                                 data += rz->buf_len;
716                                 size -= rz->buf_len;
717                                 rz->block_off += rz->buf_len;
718                                 rz->buf_off = 0;
719                                 rz->buf_len = 0;
720                                 if(rz->buf_flush){
721                                         rz->block_pos = rz->next_block_pos;
722                                         rz->block_off = 0;
723                                         rz->buf_flush = 0;
724                                 }
725                         }
726                 } else if(rz->buf_flush){
727                         rz->block_pos = rz->next_block_pos;
728                         rz->block_off = 0;
729                         rz->buf_flush = 0;
730                 }
731                 if(rz->buf_flush) continue;
732                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
733                 if(rz->z_eof && rz->buf_len == 0) break;
734         }
735         rz->out += ori_size - size;
736         return ori_size - size;
737 }
738
739 int razf_skip(RAZF* rz, int size){
740         int ori_size;
741         ori_size = size;
742         while(size > 0){
743                 if(rz->buf_len){
744                         if(size < rz->buf_len){
745                                 rz->buf_off += size;
746                                 rz->buf_len -= size;
747                                 rz->block_off += size;
748                                 size = 0;
749                                 break;
750                         } else {
751                                 size -= rz->buf_len;
752                                 rz->buf_off = 0;
753                                 rz->buf_len = 0;
754                                 rz->block_off += rz->buf_len;
755                                 if(rz->buf_flush){
756                                         rz->block_pos = rz->next_block_pos;
757                                         rz->block_off = 0;
758                                         rz->buf_flush = 0;
759                                 }
760                         }
761                 } else if(rz->buf_flush){
762                         rz->block_pos = rz->next_block_pos;
763                         rz->block_off = 0;
764                         rz->buf_flush = 0;
765                 }
766                 if(rz->buf_flush) continue;
767                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
768                 if(rz->z_eof || rz->z_err) break;
769         }
770         rz->out += ori_size - size;
771         return ori_size - size;
772 }
773
774 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
775 #ifdef _USE_KNETFILE
776         knet_seek(rz->x.fpr, in, SEEK_SET);
777 #else
778         lseek(rz->filedes, in, SEEK_SET);
779 #endif
780         rz->in  = in;
781         rz->out = out;
782         rz->block_pos = in;
783         rz->next_block_pos = in;
784         rz->block_off = 0;
785         rz->buf_flush = 0;
786         rz->z_eof = rz->z_err = 0;
787         inflateReset(rz->stream);
788         rz->stream->avail_in = 0;
789         rz->buf_off = rz->buf_len = 0;
790 }
791
792 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
793         int64_t pos;
794         rz->z_eof = 0;
795         if(rz->file_type == FILE_TYPE_PLAIN){
796                 rz->buf_off = rz->buf_len = 0;
797                 pos = block_start + block_offset;
798 #ifdef _USE_KNETFILE
799                 knet_seek(rz->x.fpr, pos, SEEK_SET);
800         pos = knet_tell(rz->x.fpr);
801 #else
802                 pos = lseek(rz->filedes, pos, SEEK_SET);
803 #endif
804                 rz->out = rz->in = pos;
805                 return pos;
806         }
807         if(block_start == rz->block_pos && block_offset >= rz->block_off) {
808                 block_offset -= rz->block_off;
809                 goto SKIP; // Needn't reset inflate
810         }
811         if(block_start  == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
812         _razf_reset_read(rz, block_start, 0);
813         SKIP:
814         if(block_offset) razf_skip(rz, block_offset);
815         return rz->block_off;
816 }
817
818 int64_t razf_seek(RAZF* rz, int64_t pos, int where){
819         int64_t idx;
820         int64_t seek_pos, new_out;
821         rz->z_eof = 0;
822         if (where == SEEK_CUR) pos += rz->out;
823         else if (where == SEEK_END) pos += rz->src_end;
824         if(rz->file_type == FILE_TYPE_PLAIN){
825 #ifdef _USE_KNETFILE
826                 knet_seek(rz->x.fpr, pos, SEEK_SET);
827         seek_pos = knet_tell(rz->x.fpr);
828 #else
829                 seek_pos = lseek(rz->filedes, pos, SEEK_SET);
830 #endif
831                 rz->buf_off = rz->buf_len = 0;
832                 rz->out = rz->in = seek_pos;
833                 return seek_pos;
834         } else if(rz->file_type == FILE_TYPE_GZ){
835                 if(pos >= rz->out) goto SKIP;
836                 return rz->out;
837         }
838         if(pos == rz->out) return pos;
839         if(pos > rz->src_end) return rz->out;
840         if(!rz->seekable || !rz->load_index){
841                 if(pos >= rz->out) goto SKIP;
842         }
843         idx = pos / RZ_BLOCK_SIZE - 1;
844         seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
845         new_out  = (idx + 1) * RZ_BLOCK_SIZE;
846         if(pos > rz->out && new_out <= rz->out) goto SKIP;
847         _razf_reset_read(rz, seek_pos, new_out);
848         SKIP:
849         razf_skip(rz, (int)(pos - rz->out));
850         return rz->out;
851 }
852
853 uint64_t razf_tell2(RAZF *rz)
854 {
855         /*
856         if (rz->load_index) {
857                 int64_t idx, seek_pos;
858                 idx = rz->out / RZ_BLOCK_SIZE - 1;
859                 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
860                 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
861                         fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
862                                         (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
863         }
864         */
865         return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
866 }
867
868 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
869 {
870         if (where != SEEK_SET) return -1;
871         return razf_jump(rz, voffset>>16, voffset&0xffff);
872 }
873
874 void razf_close(RAZF *rz){
875         if(rz->mode == 'w'){
876 #ifndef _RZ_READONLY
877                 razf_end_flush(rz);
878                 deflateEnd(rz->stream);
879 #ifdef _USE_KNETFILE
880                 save_zindex(rz, rz->x.fpw);
881                 if(is_big_endian()){
882                         if (write(rz->x.fpw, &rz->in, sizeof(int64_t)) < 0) {
883                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
884                                 abort();
885                         }
886                         if (write(rz->x.fpw, &rz->out, sizeof(int64_t)) < 0) {
887                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
888                                 abort();
889                         }
890                 } else {
891                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
892                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
893                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
894                                 abort();
895                         }
896                         v64 = byte_swap_8((uint64_t)rz->out);
897                         if (write(rz->x.fpw, &v64, sizeof(int64_t)) < 0) {
898                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
899                                 abort();
900                         }
901                 }
902 #else
903                 save_zindex(rz, rz->filedes);
904                 if(is_big_endian()){
905                         if (write(rz->filedes, &rz->in, sizeof(int64_t)) < 0) {
906                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
907                                 abort();
908                         }
909                         if (write(rz->filedes, &rz->out, sizeof(int64_t)) < 0) {
910                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
911                                 abort();
912                         }
913                 } else {
914                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
915                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
916                                 fprintf(stderr, "[%s] failed to write rz.in.\n", __func__);
917                                 abort();
918                         }
919                         v64 = byte_swap_8((uint64_t)rz->out);
920                         if (write(rz->filedes, &v64, sizeof(int64_t)) < 0) {
921                                 fprintf(stderr, "[%s] failed to write rz.out.\n", __func__);
922                                 abort();
923                         }
924                 }
925 #endif
926 #endif
927         } else if(rz->mode == 'r'){
928                 if(rz->stream) inflateEnd(rz->stream);
929         }
930         if(rz->inbuf) free(rz->inbuf);
931         if(rz->outbuf) free(rz->outbuf);
932         if(rz->header){
933                 free(rz->header->extra);
934                 free(rz->header->name);
935                 free(rz->header->comment);
936                 free(rz->header);
937         }
938         if(rz->index){
939                 free(rz->index->bin_offsets);
940                 free(rz->index->cell_offsets);
941                 free(rz->index);
942         }
943         free(rz->stream);
944 #ifdef _USE_KNETFILE
945     if (rz->mode == 'r')
946         knet_close(rz->x.fpr);
947     if (rz->mode == 'w')
948         close(rz->x.fpw);
949 #else
950         close(rz->filedes);
951 #endif
952         free(rz);
953 }
954
955 #endif