Update debian changelog
[pysam.git] / samtools / razf.c.pysam.c
1 #include "pysam.h"
2
3 /*
4  * RAZF : Random Access compressed(Z) File
5  * Version: 1.0
6  * Release Date: 2008-10-27
7  *
8  * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
9  *
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33
34 #ifndef _NO_RAZF
35
36 #include <fcntl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include "razf.h"
42
43
44 #if ZLIB_VERNUM < 0x1221
45 struct _gz_header_s {
46     int     text;
47     uLong   time;
48     int     xflags;
49     int     os;
50     Bytef   *extra;
51     uInt    extra_len;
52     uInt    extra_max;
53     Bytef   *name;
54     uInt    name_max;
55     Bytef   *comment;
56     uInt    comm_max;
57     int     hcrc;
58     int     done;
59 };
60 #warning "zlib < 1.2.2.1; RAZF writing is disabled."
61 #endif
62
63 #define DEF_MEM_LEVEL 8
64
65 static inline uint32_t byte_swap_4(uint32_t v){
66         v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
67         return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
68 }
69
70 static inline uint64_t byte_swap_8(uint64_t v){
71         v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
72         v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
73         return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
74 }
75
76 static inline int is_big_endian(){
77         int x = 0x01;
78         char *c = (char*)&x;
79         return (c[0] != 0x01);
80 }
81
82 #ifndef _RZ_READONLY
83 static void add_zindex(RAZF *rz, int64_t in, int64_t out){
84         if(rz->index->size == rz->index->cap){
85                 rz->index->cap = rz->index->cap * 1.5 + 2;
86                 rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
87                 rz->index->bin_offsets  = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
88         }
89         if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
90         rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
91         rz->index->size ++;
92 }
93
94 static void save_zindex(RAZF *rz, int fd){
95         int32_t i, v32;
96         int is_be;
97         is_be = is_big_endian();
98         if(is_be) write(fd, &rz->index->size, sizeof(int));
99         else {
100                 v32 = byte_swap_4((uint32_t)rz->index->size);
101                 write(fd, &v32, sizeof(uint32_t));
102         }
103         v32 = rz->index->size / RZ_BIN_SIZE + 1;
104         if(!is_be){
105                 for(i=0;i<v32;i++) rz->index->bin_offsets[i]  = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
106                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
107         }
108         write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
109         write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size);
110 }
111 #endif
112
113 #ifdef _USE_KNETFILE
114 static void load_zindex(RAZF *rz, knetFile *fp){
115 #else
116 static void load_zindex(RAZF *rz, int fd){
117 #endif
118         int32_t i, v32;
119         int is_be;
120         if(!rz->load_index) return;
121         if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex));
122         is_be = is_big_endian();
123 #ifdef _USE_KNETFILE
124         knet_read(fp, &rz->index->size, sizeof(int));
125 #else
126         read(fd, &rz->index->size, sizeof(int));
127 #endif
128         if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
129         rz->index->cap = rz->index->size;
130         v32 = rz->index->size / RZ_BIN_SIZE + 1;
131         rz->index->bin_offsets  = malloc(sizeof(int64_t) * v32);
132 #ifdef _USE_KNETFILE
133         knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32);
134 #else
135         read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
136 #endif
137         rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size);
138 #ifdef _USE_KNETFILE
139         knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size);
140 #else
141         read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size);
142 #endif
143         if(!is_be){
144                 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
145                 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
146         }
147 }
148
149 #ifdef _RZ_READONLY
150 static RAZF* razf_open_w(int fd)
151 {
152         fprintf(pysamerr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
153         return 0;
154 }
155 #else
156 static RAZF* razf_open_w(int fd){
157         RAZF *rz;
158 #ifdef _WIN32
159         setmode(fd, O_BINARY);
160 #endif
161         rz = calloc(1, sizeof(RAZF));
162         rz->mode = 'w';
163 #ifdef _USE_KNETFILE
164     rz->x.fpw = fd;
165 #else
166         rz->filedes = fd;
167 #endif
168         rz->stream = calloc(sizeof(z_stream), 1);
169         rz->inbuf  = malloc(RZ_BUFFER_SIZE);
170         rz->outbuf = malloc(RZ_BUFFER_SIZE);
171         rz->index = calloc(sizeof(ZBlockIndex), 1);
172         deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
173         rz->stream->avail_out = RZ_BUFFER_SIZE;
174         rz->stream->next_out  = rz->outbuf;
175         rz->header = calloc(sizeof(gz_header), 1);
176         rz->header->os    = 0x03; //Unix
177         rz->header->text  = 0;
178         rz->header->time  = 0;
179         rz->header->extra = malloc(7);
180         strncpy((char*)rz->header->extra, "RAZF", 4);
181         rz->header->extra[4] = 1; // obsolete field
182         // block size = RZ_BLOCK_SIZE, Big-Endian
183         rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
184         rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
185         rz->header->extra_len = 7;
186         rz->header->name = rz->header->comment  = 0;
187         rz->header->hcrc = 0;
188         deflateSetHeader(rz->stream, rz->header);
189         rz->block_pos = rz->block_off = 0;
190         return rz;
191 }
192
193 static void _razf_write(RAZF* rz, const void *data, int size){
194         int tout;
195         rz->stream->avail_in = size;
196         rz->stream->next_in  = (void*)data;
197         while(1){
198                 tout = rz->stream->avail_out;
199                 deflate(rz->stream, Z_NO_FLUSH);
200                 rz->out += tout - rz->stream->avail_out;
201                 if(rz->stream->avail_out) break;
202 #ifdef _USE_KNETFILE
203                 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
204 #else
205                 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
206 #endif
207                 rz->stream->avail_out = RZ_BUFFER_SIZE;
208                 rz->stream->next_out  = rz->outbuf;
209                 if(rz->stream->avail_in == 0) break;
210         };
211         rz->in += size - rz->stream->avail_in;
212         rz->block_off += size - rz->stream->avail_in;
213 }
214
215 static void razf_flush(RAZF *rz){
216         uint32_t tout;
217         if(rz->buf_len){
218                 _razf_write(rz, rz->inbuf, rz->buf_len);
219                 rz->buf_off = rz->buf_len = 0;
220         }
221         if(rz->stream->avail_out){
222 #ifdef _USE_KNETFILE    
223                 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
224 #else        
225                 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
226 #endif
227                 rz->stream->avail_out = RZ_BUFFER_SIZE;
228                 rz->stream->next_out  = rz->outbuf;
229         }
230         while(1){
231                 tout = rz->stream->avail_out;
232                 deflate(rz->stream, Z_FULL_FLUSH);
233                 rz->out += tout - rz->stream->avail_out;
234                 if(rz->stream->avail_out == 0){
235 #ifdef _USE_KNETFILE    
236                         write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
237 #else            
238                         write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
239 #endif
240                         rz->stream->avail_out = RZ_BUFFER_SIZE;
241                         rz->stream->next_out  = rz->outbuf;
242                 } else break;
243         }
244         rz->block_pos = rz->out;
245         rz->block_off = 0;
246 }
247
248 static void razf_end_flush(RAZF *rz){
249         uint32_t tout;
250         if(rz->buf_len){
251                 _razf_write(rz, rz->inbuf, rz->buf_len);
252                 rz->buf_off = rz->buf_len = 0;
253         }
254         while(1){
255                 tout = rz->stream->avail_out;
256                 deflate(rz->stream, Z_FINISH);
257                 rz->out += tout - rz->stream->avail_out;
258                 if(rz->stream->avail_out < RZ_BUFFER_SIZE){
259 #ifdef _USE_KNETFILE        
260                         write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
261 #else            
262                         write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
263 #endif
264                         rz->stream->avail_out = RZ_BUFFER_SIZE;
265                         rz->stream->next_out  = rz->outbuf;
266                 } else break;
267         }
268 }
269
270 static void _razf_buffered_write(RAZF *rz, const void *data, int size){
271         int i, n;
272         while(1){
273                 if(rz->buf_len == RZ_BUFFER_SIZE){
274                         _razf_write(rz, rz->inbuf, rz->buf_len);
275                         rz->buf_len = 0;
276                 }
277                 if(size + rz->buf_len < RZ_BUFFER_SIZE){
278                         for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
279                         rz->buf_len += size;
280                         return;
281                 } else {
282                         n = RZ_BUFFER_SIZE - rz->buf_len;
283                         for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
284                         size -= n;
285                         data += n;
286                         rz->buf_len += n;
287                 }
288         }
289 }
290
291 int razf_write(RAZF* rz, const void *data, int size){
292         int ori_size, n;
293         int64_t next_block;
294         ori_size = size;
295         next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
296         while(rz->in + rz->buf_len + size >= next_block){
297                 n = next_block - rz->in - rz->buf_len;
298                 _razf_buffered_write(rz, data, n);
299                 data += n;
300                 size -= n;
301                 razf_flush(rz);
302                 add_zindex(rz, rz->in, rz->out);
303                 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
304         }
305         _razf_buffered_write(rz, data, size);
306         return ori_size;
307 }
308 #endif
309
310 /* gzip flag byte */
311 #define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
312 #define HEAD_CRC     0x02 /* bit 1 set: header CRC present */
313 #define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
314 #define ORIG_NAME    0x08 /* bit 3 set: original file name present */
315 #define COMMENT      0x10 /* bit 4 set: file comment present */
316 #define RESERVED     0xE0 /* bits 5..7: reserved */
317
318 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
319         int method, flags, n, len;
320         if(size < 2) return 0;
321         if(data[0] != 0x1f || data[1] != 0x8b) return 0;
322         if(size < 4) return 0;
323         method = data[2];
324         flags  = data[3];
325         if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
326         n = 4 + 6; // Skip 6 bytes
327         *extra_off = n + 2;
328         *extra_len = 0;
329         if(flags & EXTRA_FIELD){
330                 if(size < n + 2) return 0;
331                 len = ((int)data[n + 1] << 8) | data[n];
332                 n += 2;
333                 *extra_off = n;
334                 while(len){
335                         if(n >= size) return 0;
336                         n ++;
337                         len --;
338                 }
339                 *extra_len = n - (*extra_off);
340         }
341         if(flags & ORIG_NAME) while(n < size && data[n++]);
342         if(flags & COMMENT) while(n < size && data[n++]);
343         if(flags & HEAD_CRC){
344                 if(n + 2 > size) return 0;
345                 n += 2;
346         }
347         return n;
348 }
349
350 #ifdef _USE_KNETFILE
351 static RAZF* razf_open_r(knetFile *fp, int _load_index){
352 #else
353 static RAZF* razf_open_r(int fd, int _load_index){
354 #endif
355         RAZF *rz;
356         int ext_off, ext_len;
357         int n, is_be, ret;
358         int64_t end;
359         unsigned char c[] = "RAZF";
360         rz = calloc(1, sizeof(RAZF));
361         rz->mode = 'r';
362 #ifdef _USE_KNETFILE
363     rz->x.fpr = fp;
364 #else
365 #ifdef _WIN32
366         setmode(fd, O_BINARY);
367 #endif
368         rz->filedes = fd;
369 #endif
370         rz->stream = calloc(sizeof(z_stream), 1);
371         rz->inbuf  = malloc(RZ_BUFFER_SIZE);
372         rz->outbuf = malloc(RZ_BUFFER_SIZE);
373         rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
374 #ifdef _USE_KNETFILE
375     n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
376 #else
377         n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
378 #endif
379         ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
380         if(ret == 0){
381                 PLAIN_FILE:
382                 rz->in = n;
383                 rz->file_type = FILE_TYPE_PLAIN;
384                 memcpy(rz->outbuf, rz->inbuf, n);
385                 rz->buf_len = n;
386                 free(rz->stream);
387                 rz->stream = NULL;
388                 return rz;
389         }
390         rz->header_size = ret;
391         ret = inflateInit2(rz->stream, -WINDOW_BITS);
392         if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
393         rz->stream->avail_in = n - rz->header_size;
394         rz->stream->next_in  = rz->inbuf + rz->header_size;
395         rz->stream->avail_out = RZ_BUFFER_SIZE;
396         rz->stream->next_out  = rz->outbuf;
397         rz->file_type = FILE_TYPE_GZ;
398         rz->in = rz->header_size;
399         rz->block_pos = rz->header_size;
400         rz->next_block_pos = rz->header_size;
401         rz->block_off = 0;
402         if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
403         if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
404                 fprintf(pysamerr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file.  in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
405                 return rz;
406         }
407         rz->load_index = _load_index;
408         rz->file_type = FILE_TYPE_RZ;
409 #ifdef _USE_KNETFILE
410         if(knet_seek(fp, -16, SEEK_END) == -1){
411 #else
412         if(lseek(fd, -16, SEEK_END) == -1){
413 #endif
414                 UNSEEKABLE:
415                 rz->seekable = 0;
416                 rz->index = NULL;
417                 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
418         } else {
419                 is_be = is_big_endian();
420                 rz->seekable = 1;
421 #ifdef _USE_KNETFILE
422         knet_read(fp, &end, sizeof(int64_t));
423 #else
424                 read(fd, &end, sizeof(int64_t));
425 #endif        
426                 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
427                 else rz->src_end = end;
428
429 #ifdef _USE_KNETFILE
430                 knet_read(fp, &end, sizeof(int64_t));
431 #else
432                 read(fd, &end, sizeof(int64_t));
433 #endif        
434                 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
435                 else rz->end = end;
436                 if(n > rz->end){
437                         rz->stream->avail_in -= n - rz->end;
438                         n = rz->end;
439                 }
440                 if(rz->end > rz->src_end){
441 #ifdef _USE_KNETFILE
442             knet_seek(fp, rz->in, SEEK_SET);
443 #else
444                         lseek(fd, rz->in, SEEK_SET);
445 #endif
446                         goto UNSEEKABLE;
447                 }
448 #ifdef _USE_KNETFILE
449         knet_seek(fp, rz->end, SEEK_SET);
450                 if(knet_tell(fp) != rz->end){
451                         knet_seek(fp, rz->in, SEEK_SET);
452 #else
453                 if(lseek(fd, rz->end, SEEK_SET) != rz->end){
454                         lseek(fd, rz->in, SEEK_SET);
455 #endif
456                         goto UNSEEKABLE;
457                 }
458 #ifdef _USE_KNETFILE
459                 load_zindex(rz, fp);
460                 knet_seek(fp, n, SEEK_SET);
461 #else
462                 load_zindex(rz, fd);
463                 lseek(fd, n, SEEK_SET);
464 #endif
465         }
466         return rz;
467 }
468
469 #ifdef _USE_KNETFILE
470 RAZF* razf_dopen(int fd, const char *mode){
471     if (strstr(mode, "r")) fprintf(pysamerr,"[razf_dopen] implement me\n");
472     else if(strstr(mode, "w")) return razf_open_w(fd);
473         return NULL;
474 }
475
476 RAZF* razf_dopen2(int fd, const char *mode)
477 {
478     fprintf(pysamerr,"[razf_dopen2] implement me\n");
479     return NULL;
480 }
481 #else
482 RAZF* razf_dopen(int fd, const char *mode){
483         if(strstr(mode, "r")) return razf_open_r(fd, 1);
484         else if(strstr(mode, "w")) return razf_open_w(fd);
485         else return NULL;
486 }
487
488 RAZF* razf_dopen2(int fd, const char *mode)
489 {
490         if(strstr(mode, "r")) return razf_open_r(fd, 0);
491         else if(strstr(mode, "w")) return razf_open_w(fd);
492         else return NULL;
493 }
494 #endif
495
496 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
497         int fd;
498         RAZF *rz;
499         if(strstr(mode, "r")){
500 #ifdef _USE_KNETFILE
501         knetFile *fd = knet_open(filename, "r");
502         if (fd == 0) {
503             fprintf(pysamerr, "[_razf_open] fail to open %s\n", filename);
504             return NULL;
505         }
506 #else
507 #ifdef _WIN32
508                 fd = open(filename, O_RDONLY | O_BINARY);
509 #else
510                 fd = open(filename, O_RDONLY);
511 #endif
512 #endif
513                 if(fd < 0) return NULL;
514                 rz = razf_open_r(fd, _load_index);
515         } else if(strstr(mode, "w")){
516 #ifdef _WIN32
517                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
518 #else
519                 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
520 #endif
521                 if(fd < 0) return NULL;
522                 rz = razf_open_w(fd);
523         } else return NULL;
524         return rz;
525 }
526
527 RAZF* razf_open(const char *filename, const char *mode){
528         return _razf_open(filename, mode, 1);
529 }
530
531 RAZF* razf_open2(const char *filename, const char *mode){
532         return _razf_open(filename, mode, 0);
533 }
534
535 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
536         int64_t n;
537         if(rz->mode != 'r' && rz->mode != 'R') return 0;
538         switch(rz->file_type){
539                 case FILE_TYPE_PLAIN:
540                         if(rz->end == 0x7fffffffffffffffLL){
541 #ifdef _USE_KNETFILE
542                                 if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
543                 n = knet_tell(rz->x.fpr);
544                                 knet_seek(rz->x.fpr, 0, SEEK_END);
545                 rz->end = knet_tell(rz->x.fpr);
546                                 knet_seek(rz->x.fpr, n, SEEK_SET);
547 #else
548                                 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
549                                 rz->end = lseek(rz->filedes, 0, SEEK_END);
550                                 lseek(rz->filedes, n, SEEK_SET);
551 #endif                
552                         }
553                         *u_size = *c_size = rz->end;
554                         return 1;
555                 case FILE_TYPE_GZ:
556                         return 0;
557                 case FILE_TYPE_RZ:
558                         if(rz->src_end == rz->end) return 0;
559                         *u_size = rz->src_end;
560                         *c_size = rz->end;
561                         return 1;
562                 default:
563                         return 0;
564         }
565 }
566
567 static int _razf_read(RAZF* rz, void *data, int size){
568         int ret, tin;
569         if(rz->z_eof || rz->z_err) return 0;
570         if (rz->file_type == FILE_TYPE_PLAIN) {
571 #ifdef _USE_KNETFILE
572                 ret = knet_read(rz->x.fpr, data, size);
573 #else
574                 ret = read(rz->filedes, data, size);
575 #endif        
576                 if (ret == 0) rz->z_eof = 1;
577                 return ret;
578         }
579         rz->stream->avail_out = size;
580         rz->stream->next_out  = data;
581         while(rz->stream->avail_out){
582                 if(rz->stream->avail_in == 0){
583                         if(rz->in >= rz->end){ rz->z_eof = 1; break; }
584                         if(rz->end - rz->in < RZ_BUFFER_SIZE){
585 #ifdef _USE_KNETFILE
586                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
587 #else
588                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
589 #endif        
590                         } else {
591 #ifdef _USE_KNETFILE
592                                 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
593 #else
594                                 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
595 #endif        
596                         }
597                         if(rz->stream->avail_in == 0){
598                                 rz->z_eof = 1;
599                                 break;
600                         }
601                         rz->stream->next_in = rz->inbuf;
602                 }
603                 tin = rz->stream->avail_in;
604                 ret = inflate(rz->stream, Z_BLOCK);
605                 rz->in += tin - rz->stream->avail_in;
606                 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
607                         fprintf(pysamerr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
608                         rz->z_err = 1;
609                         break;
610                 }
611                 if(ret == Z_STREAM_END){
612                         rz->z_eof = 1;
613                         break;
614                 }
615                 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
616                         rz->buf_flush = 1;
617                         rz->next_block_pos = rz->in;
618                         break;
619                 }
620         }
621         return size - rz->stream->avail_out;
622 }
623
624 int razf_read(RAZF *rz, void *data, int size){
625         int ori_size, i;
626         ori_size = size;
627         while(size > 0){
628                 if(rz->buf_len){
629                         if(size < rz->buf_len){
630                                 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
631                                 rz->buf_off += size;
632                                 rz->buf_len -= size;
633                                 data += size;
634                                 rz->block_off += size;
635                                 size = 0;
636                                 break;
637                         } else {
638                                 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
639                                 data += rz->buf_len;
640                                 size -= rz->buf_len;
641                                 rz->block_off += rz->buf_len;
642                                 rz->buf_off = 0;
643                                 rz->buf_len = 0;
644                                 if(rz->buf_flush){
645                                         rz->block_pos = rz->next_block_pos;
646                                         rz->block_off = 0;
647                                         rz->buf_flush = 0;
648                                 }
649                         }
650                 } else if(rz->buf_flush){
651                         rz->block_pos = rz->next_block_pos;
652                         rz->block_off = 0;
653                         rz->buf_flush = 0;
654                 }
655                 if(rz->buf_flush) continue;
656                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
657                 if(rz->z_eof && rz->buf_len == 0) break;
658         }
659         rz->out += ori_size - size;
660         return ori_size - size;
661 }
662
663 int razf_skip(RAZF* rz, int size){
664         int ori_size;
665         ori_size = size;
666         while(size > 0){
667                 if(rz->buf_len){
668                         if(size < rz->buf_len){
669                                 rz->buf_off += size;
670                                 rz->buf_len -= size;
671                                 rz->block_off += size;
672                                 size = 0;
673                                 break;
674                         } else {
675                                 size -= rz->buf_len;
676                                 rz->buf_off = 0;
677                                 rz->buf_len = 0;
678                                 rz->block_off += rz->buf_len;
679                                 if(rz->buf_flush){
680                                         rz->block_pos = rz->next_block_pos;
681                                         rz->block_off = 0;
682                                         rz->buf_flush = 0;
683                                 }
684                         }
685                 } else if(rz->buf_flush){
686                         rz->block_pos = rz->next_block_pos;
687                         rz->block_off = 0;
688                         rz->buf_flush = 0;
689                 }
690                 if(rz->buf_flush) continue;
691                 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
692                 if(rz->z_eof || rz->z_err) break;
693         }
694         rz->out += ori_size - size;
695         return ori_size - size;
696 }
697
698 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
699 #ifdef _USE_KNETFILE
700         knet_seek(rz->x.fpr, in, SEEK_SET);
701 #else
702         lseek(rz->filedes, in, SEEK_SET);
703 #endif
704         rz->in  = in;
705         rz->out = out;
706         rz->block_pos = in;
707         rz->next_block_pos = in;
708         rz->block_off = 0;
709         rz->buf_flush = 0;
710         rz->z_eof = rz->z_err = 0;
711         inflateReset(rz->stream);
712         rz->stream->avail_in = 0;
713         rz->buf_off = rz->buf_len = 0;
714 }
715
716 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
717         int64_t pos;
718         rz->z_eof = 0;
719         if(rz->file_type == FILE_TYPE_PLAIN){
720                 rz->buf_off = rz->buf_len = 0;
721                 pos = block_start + block_offset;
722 #ifdef _USE_KNETFILE
723                 knet_seek(rz->x.fpr, pos, SEEK_SET);
724         pos = knet_tell(rz->x.fpr);
725 #else
726                 pos = lseek(rz->filedes, pos, SEEK_SET);
727 #endif
728                 rz->out = rz->in = pos;
729                 return pos;
730         }
731         if(block_start == rz->block_pos && block_offset >= rz->block_off) {
732                 block_offset -= rz->block_off;
733                 goto SKIP; // Needn't reset inflate
734         }
735         if(block_start  == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
736         _razf_reset_read(rz, block_start, 0);
737         SKIP:
738         if(block_offset) razf_skip(rz, block_offset);
739         return rz->block_off;
740 }
741
742 int64_t razf_seek(RAZF* rz, int64_t pos, int where){
743         int64_t idx;
744         int64_t seek_pos, new_out;
745         rz->z_eof = 0;
746         if (where == SEEK_CUR) pos += rz->out;
747         else if (where == SEEK_END) pos += rz->src_end;
748         if(rz->file_type == FILE_TYPE_PLAIN){
749 #ifdef _USE_KNETFILE
750                 knet_seek(rz->x.fpr, pos, SEEK_SET);
751         seek_pos = knet_tell(rz->x.fpr);
752 #else
753                 seek_pos = lseek(rz->filedes, pos, SEEK_SET);
754 #endif
755                 rz->buf_off = rz->buf_len = 0;
756                 rz->out = rz->in = seek_pos;
757                 return seek_pos;
758         } else if(rz->file_type == FILE_TYPE_GZ){
759                 if(pos >= rz->out) goto SKIP;
760                 return rz->out;
761         }
762         if(pos == rz->out) return pos;
763         if(pos > rz->src_end) return rz->out;
764         if(!rz->seekable || !rz->load_index){
765                 if(pos >= rz->out) goto SKIP;
766         }
767         idx = pos / RZ_BLOCK_SIZE - 1;
768         seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
769         new_out  = (idx + 1) * RZ_BLOCK_SIZE;
770         if(pos > rz->out && new_out <= rz->out) goto SKIP;
771         _razf_reset_read(rz, seek_pos, new_out);
772         SKIP:
773         razf_skip(rz, (int)(pos - rz->out));
774         return rz->out;
775 }
776
777 uint64_t razf_tell2(RAZF *rz)
778 {
779         /*
780         if (rz->load_index) {
781                 int64_t idx, seek_pos;
782                 idx = rz->out / RZ_BLOCK_SIZE - 1;
783                 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
784                 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
785                         fprintf(pysamerr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
786                                         (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
787         }
788         */
789         return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
790 }
791
792 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
793 {
794         if (where != SEEK_SET) return -1;
795         return razf_jump(rz, voffset>>16, voffset&0xffff);
796 }
797
798 void razf_close(RAZF *rz){
799         if(rz->mode == 'w'){
800 #ifndef _RZ_READONLY
801                 razf_end_flush(rz);
802                 deflateEnd(rz->stream);
803 #ifdef _USE_KNETFILE
804                 save_zindex(rz, rz->x.fpw);
805                 if(is_big_endian()){
806                         write(rz->x.fpw, &rz->in, sizeof(int64_t));
807                         write(rz->x.fpw, &rz->out, sizeof(int64_t));
808                 } else {
809                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
810                         write(rz->x.fpw, &v64, sizeof(int64_t));
811                         v64 = byte_swap_8((uint64_t)rz->out);
812                         write(rz->x.fpw, &v64, sizeof(int64_t));
813                 }
814 #else
815                 save_zindex(rz, rz->filedes);
816                 if(is_big_endian()){
817                         write(rz->filedes, &rz->in, sizeof(int64_t));
818                         write(rz->filedes, &rz->out, sizeof(int64_t));
819                 } else {
820                         uint64_t v64 = byte_swap_8((uint64_t)rz->in);
821                         write(rz->filedes, &v64, sizeof(int64_t));
822                         v64 = byte_swap_8((uint64_t)rz->out);
823                         write(rz->filedes, &v64, sizeof(int64_t));
824                 }
825 #endif
826 #endif
827         } else if(rz->mode == 'r'){
828                 if(rz->stream) inflateEnd(rz->stream);
829         }
830         if(rz->inbuf) free(rz->inbuf);
831         if(rz->outbuf) free(rz->outbuf);
832         if(rz->header){
833                 free(rz->header->extra);
834                 free(rz->header->name);
835                 free(rz->header->comment);
836                 free(rz->header);
837         }
838         if(rz->index){
839                 free(rz->index->bin_offsets);
840                 free(rz->index->cell_offsets);
841                 free(rz->index);
842         }
843         free(rz->stream);
844 #ifdef _USE_KNETFILE
845     if (rz->mode == 'r')
846         knet_close(rz->x.fpr);
847     if (rz->mode == 'w')
848         close(rz->x.fpw);
849 #else
850         close(rz->filedes);
851 #endif
852         free(rz);
853 }
854
855 #endif