55e7029ed9f99d932c50e3938ac0e262b84d0517
[pysam.git] / tabix / bgzip.c.pysam.c
1 #include "pysam.h"
2
3 /* The MIT License
4
5    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
6
7    Permission is hereby granted, free of charge, to any person obtaining a copy
8    of this software and associated documentation files (the "Software"), to deal
9    in the Software without restriction, including without limitation the rights
10    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11    copies of the Software, and to permit persons to whom the Software is
12    furnished to do so, subject to the following conditions:
13
14    The above copyright notice and this permission notice shall be included in
15    all copies or substantial portions of the Software.
16
17    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23    THE SOFTWARE.
24 */
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include <stdio.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31 #include <errno.h>
32 #include <sys/select.h>
33 #include <sys/stat.h>
34 #include "bgzf.h"
35
36 static const int WINDOW_SIZE = 64 * 1024;
37
38 static int bgzip_main_usage()
39 {
40         fprintf(pysamerr, "\n");
41         fprintf(pysamerr, "Usage:   bgzip [options] [file] ...\n\n");
42         fprintf(pysamerr, "Options: -c      write on standard output, keep original files unchanged\n");
43         fprintf(pysamerr, "         -d      decompress\n");
44         fprintf(pysamerr, "         -f      overwrite files without asking\n");
45         fprintf(pysamerr, "         -b INT  decompress at virtual file pointer INT\n");
46         fprintf(pysamerr, "         -s INT  decompress INT bytes in the uncompressed file\n");
47         fprintf(pysamerr, "         -h      give this help\n");
48         fprintf(pysamerr, "\n");
49         return 1;
50 }
51
52 static int write_open(const char *fn, int is_forced)
53 {
54         int fd = -1;
55         char c;
56         if (!is_forced) {
57                 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
58                         fprintf(pysamerr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
59                         scanf("%c", &c);
60                         if (c != 'Y' && c != 'y') {
61                                 fprintf(pysamerr, "[bgzip] not overwritten\n");
62                                 exit(1);
63                         }
64                 }
65         }
66         if (fd < 0) {
67                 if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
68                         fprintf(pysamerr, "[bgzip] %s: Fail to write\n", fn);
69                         exit(1);
70                 }
71         }
72         return fd;
73 }
74
75 static void fail(BGZF* fp)
76 {
77     fprintf(pysamerr, "Error: %s\n", fp->error);
78     exit(1);
79 }
80
81 int main(int argc, char **argv)
82 {
83         int c, compress, pstdout, is_forced;
84         BGZF *fp;
85         void *buffer;
86         long start, end, size;
87
88         compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
89         while((c  = getopt(argc, argv, "cdhfb:s:")) >= 0){
90                 switch(c){
91                 case 'h': return bgzip_main_usage();
92                 case 'd': compress = 0; break;
93                 case 'c': pstdout = 1; break;
94                 case 'b': start = atol(optarg); break;
95                 case 's': size = atol(optarg); break;
96                 case 'f': is_forced = 1; break;
97                 }
98         }
99         if (size >= 0) end = start + size;
100         if (end >= 0 && end < start) {
101                 fprintf(pysamerr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
102                 return 1;
103         }
104         if (compress == 1) {
105                 struct stat sbuf;
106                 int f_src = fileno(stdin);
107                 int f_dst = fileno(stdout);
108
109                 if ( argc>optind )
110                 {
111                         if ( stat(argv[optind],&sbuf)<0 ) 
112                         { 
113                                 fprintf(pysamerr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
114                                 return 1; 
115                         }
116
117                         if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
118                                 fprintf(pysamerr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
119                                 return 1;
120                         }
121
122                         if (pstdout)
123                                 f_dst = fileno(stdout);
124                         else
125                         {
126                                 char *name = malloc(strlen(argv[optind]) + 5);
127                                 strcpy(name, argv[optind]);
128                                 strcat(name, ".gz");
129                                 f_dst = write_open(name, is_forced);
130                                 if (f_dst < 0) return 1;
131                                 free(name);
132                         }
133                 }
134                 else if (!pstdout && isatty(fileno((FILE *)stdout)) )
135                         return bgzip_main_usage();
136
137                 fp = bgzf_fdopen(f_dst, "w");
138                 buffer = malloc(WINDOW_SIZE);
139                 while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
140                         if (bgzf_write(fp, buffer, c) < 0) fail(fp);
141                 // f_dst will be closed here
142                 if (bgzf_close(fp) < 0) fail(fp);
143                 if (argc > optind && !pstdout) unlink(argv[optind]);
144                 free(buffer);
145                 close(f_src);
146                 return 0;
147         } else {
148                 struct stat sbuf;
149                 int f_dst;
150
151                 if ( argc>optind )
152                 {
153                         if ( stat(argv[optind],&sbuf)<0 )
154                         {
155                                 fprintf(pysamerr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
156                                 return 1;
157                         }
158                         char *name;
159                         int len = strlen(argv[optind]);
160                         if ( strcmp(argv[optind]+len-3,".gz") )
161                         {
162                                 fprintf(pysamerr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
163                                 return 1;
164                         }
165                         fp = bgzf_open(argv[optind], "r");
166                         if (fp == NULL) {
167                                 fprintf(pysamerr, "[bgzip] Could not open file: %s\n", argv[optind]);
168                                 return 1;
169                         }
170
171                         if (pstdout) {
172                                 f_dst = fileno(stdout);
173                         }
174                         else {
175                                 name = strdup(argv[optind]);
176                                 name[strlen(name) - 3] = '\0';
177                                 f_dst = write_open(name, is_forced);
178                                 free(name);
179                         }
180                 }
181                 else if (!pstdout && isatty(fileno((FILE *)stdin)) )
182                         return bgzip_main_usage();
183                 else
184                 {
185                         f_dst = fileno(stdout);
186                         fp = bgzf_fdopen(fileno(stdin), "r");
187                         if (fp == NULL) {
188                                 fprintf(pysamerr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
189                                 return 1;
190                         }
191                 }
192                 buffer = malloc(WINDOW_SIZE);
193                 if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
194                 while (1) {
195                         if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
196                         else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
197                         if (c == 0) break;
198                         if (c < 0) fail(fp);
199                         start += c;
200                         write(f_dst, buffer, c);
201                         if (end >= 0 && start >= end) break;
202                 }
203                 free(buffer);
204                 if (bgzf_close(fp) < 0) fail(fp);
205                 if (!pstdout) unlink(argv[optind]);
206                 return 0;
207         }
208 }