Imported Debian patch 0.1.5c-1
[samtools.git] / bgzip.c
1 /*
2  * The Broad Institute
3  * SOFTWARE COPYRIGHT NOTICE AGREEMENT
4  * This software and its documentation are copyright 2008 by the
5  * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
6  *
7  * This software is supplied without any warranty or guaranteed support whatsoever.
8  * Neither the Broad Institute nor MIT can be responsible for its use, misuse,
9  * or functionality.
10  */
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <fcntl.h>
15 #include <unistd.h>
16 #include <errno.h>
17 #include "bgzf.h"
18
19 static const int WINDOW_SIZE = 64 * 1024;
20
21 static int bgzip_main_usage()
22 {
23         printf("\n");
24         printf("Usage:   bgzip [options] [file] ...\n\n");
25         printf("Options: -c      write on standard output, keep original files unchanged\n");
26         printf("         -d      decompress\n");
27         // printf("         -l      list compressed file contents\n");
28         printf("         -b INT  decompress at virtual file pointer INT\n");
29         printf("         -s INT  decompress INT bytes in the uncompressed file\n");
30         printf("         -h      give this help\n");
31         printf("\n");
32         return 0;
33 }
34
35 static int write_open(const char *fn, int is_forced)
36 {
37         int fd = -1;
38         char c;
39         if (!is_forced) {
40                 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0644)) < 0 && errno == EEXIST) {
41                         printf("bgzip: %s already exists; do you wish to overwrite (y or n)? ", fn);
42                         scanf("%c", &c);
43                         if (c != 'Y' && c != 'y') {
44                                 printf("bgzip: not overwritten\n");
45                                 exit(1);
46                         }
47                 }
48         }
49         if (fd < 0) {
50                 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0) {
51                         fprintf(stderr, "bgzip: %s: Fail to write\n", fn);
52                         exit(1);
53                 }
54         }
55         return fd;
56 }
57
58 static
59 void
60 fail(BGZF* fp)
61 {
62     printf("Error: %s\n", fp->error);
63     exit(1);
64 }
65
66 int main(int argc, char **argv)
67 {
68         int c, compress, pstdout, is_forced;
69         BGZF *rz;
70         void *buffer;
71         long start, end, size;
72
73         compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
74         while((c  = getopt(argc, argv, "cdlhfb:s:")) >= 0){
75                 switch(c){
76                 case 'h': return bgzip_main_usage();
77                 case 'd': compress = 0; break;
78                 case 'c': pstdout = 1; break;
79                 // case 'l': compress = 2; break;
80                 case 'b': start = atol(optarg); break;
81                 case 's': size = atol(optarg); break;
82                 case 'f': is_forced = 1; break;
83                 }
84         }
85         if (size >= 0) end = start + size;
86         if(end >= 0 && end < start){
87                 fprintf(stderr, " -- Illegal region: [%ld, %ld] --\n", start, end);
88                 return 1;
89         }
90         if(compress == 1){
91                 int f_src, f_dst = -1;
92                 if(argc > optind){
93                         if((f_src = open(argv[optind], O_RDONLY)) < 0){
94                                 fprintf(stderr, " -- Cannot open file: %s --\n", argv[optind]);
95                                 return 1;
96                         }
97                         if(pstdout){
98                                 f_dst = fileno(stdout);
99                         } else {
100                                 char *name = malloc(sizeof(strlen(argv[optind]) + 5));
101                                 strcpy(name, argv[optind]);
102                                 strcat(name, ".gz");
103                                 f_dst = write_open(name, is_forced);
104                                 if (f_dst < 0) return 1;
105                                 free(name);
106                         }
107                 } else if(pstdout){ 
108                         f_src = fileno(stdin);
109                         f_dst = fileno(stdout);
110                 } else return bgzip_main_usage();
111                 rz = bgzf_fdopen(f_dst, "w");
112                 buffer = malloc(WINDOW_SIZE);
113                 while((c = read(f_src, buffer, WINDOW_SIZE)) > 0) {
114                   if (bgzf_write(rz, buffer, c) < 0) {
115                     fail(rz);
116                   }
117                 }
118                 // f_dst will be closed here
119                 if (bgzf_close(rz) < 0) {
120                   fail(rz);
121                 }
122                 if (argc > optind) unlink(argv[optind]);
123                 free(buffer);
124                 close(f_src);
125                 return 0;
126         } else {
127                 if(argc <= optind) return bgzip_main_usage();
128                 int f_dst;
129                 if (argc > optind && !pstdout) {
130                   char *name;
131                   if (strstr(argv[optind], ".gz") - argv[optind] != strlen(argv[optind]) - 3) {
132                     printf("bgzip: %s: unknown suffix -- ignored\n", argv[optind]);
133                     return 1;
134                   }
135                   name = strdup(argv[optind]);
136                   name[strlen(name) - 3] = '\0';
137                   f_dst = write_open(name, is_forced);
138                   free(name);
139                 } else f_dst = fileno(stdout);
140                 rz = bgzf_open(argv[optind], "r");
141                 if (rz == NULL) {
142                   printf("Could not open file: %s\n", argv[optind]);
143                   return 1;
144                 }
145                 buffer = malloc(WINDOW_SIZE);
146                 if (bgzf_seek(rz, start, SEEK_SET) < 0) {
147                   fail(rz);
148                 }
149                 while(1){
150                   if(end < 0) c = bgzf_read(rz, buffer, WINDOW_SIZE);
151                   else c = bgzf_read(rz, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
152                   if(c == 0) break;
153                   if (c < 0) fail(rz);
154                   start += c;
155                   write(f_dst, buffer, c);
156                   if(end >= 0 && start >= end) break;
157                 }
158                 free(buffer);
159                 if (bgzf_close(rz) < 0) {
160                   fail(rz);
161                 }
162                 if (!pstdout) unlink(argv[optind]);
163                 return 0;
164         }
165 }
166