Merge commit 'upstream/0.2.3'
[tabix.git] / main.c
1 #include <string.h>
2 #include <unistd.h>
3 #include <stdlib.h>
4 #include <stdio.h>
5 #include <sys/stat.h>
6 #include "bgzf.h"
7 #include "tabix.h"
8
9 #define PACKAGE_VERSION "0.2.3 (r876)"
10
11 int main(int argc, char *argv[])
12 {
13         int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0;
14         ti_conf_t conf = ti_conf_gff;
15         while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhf")) >= 0) {
16                 switch (c) {
17                 case '0': conf.preset |= TI_FLAG_UCSC; break;
18                 case 'S': skip = atoi(optarg); break;
19                 case 'c': meta = optarg[0]; break;
20                 case 'p':
21                         if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
22                         else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
23                         else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
24                         else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
25                         else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
26                         else {
27                                 fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
28                                 return 1;
29                         }
30                         break;
31                 case 's': conf.sc = atoi(optarg); break;
32                 case 'b': conf.bc = atoi(optarg); break;
33                 case 'e': conf.ec = atoi(optarg); break;
34         case 'l': list_chrms = 1; break;
35         case 'h': print_header = 1; break;
36                 case 'f': force = 1; break;
37                 }
38         }
39         if (skip >= 0) conf.line_skip = skip;
40         if (meta >= 0) conf.meta_char = meta;
41         if (optind == argc) {
42                 fprintf(stderr, "\n");
43                 fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
44                 fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
45                 fprintf(stderr, "Usage:   tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
46                 fprintf(stderr, "Options: -p STR     preset: gff, bed, sam, vcf, psltbl [gff]\n");
47                 fprintf(stderr, "         -s INT     sequence name column [1]\n");
48                 fprintf(stderr, "         -b INT     start column [4]\n");
49                 fprintf(stderr, "         -e INT     end column [5]\n");
50                 fprintf(stderr, "         -S INT     skip first INT lines [0]\n");
51                 fprintf(stderr, "         -c CHAR    symbol for comment/meta lines [#]\n");
52                 fprintf(stderr, "         -0         zero-based coordinate\n");
53                 fprintf(stderr, "         -h         print the VCF header\n");
54                 fprintf(stderr, "         -l         list chromosome names\n");
55                 fprintf(stderr, "         -f         force to overwrite the index\n");
56                 fprintf(stderr, "\n");
57                 return 1;
58         }
59     if (list_chrms) {
60                 ti_index_t *idx;
61                 int i, n;
62                 const char **names;
63                 idx = ti_index_load(argv[optind]);
64                 if (idx == 0) {
65                         fprintf(stderr, "[main] fail to load the index file.\n");
66                         return 1;
67                 }
68                 names = ti_seqname(idx, &n);
69                 for (i = 0; i < n; ++i) printf("%s\n", names[i]);
70                 free(names);
71                 ti_index_destroy(idx);
72                 return 0;
73         }
74         if (optind + 1 == argc) {
75                 if (force == 0) {
76                         struct stat buf;
77                         char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
78                         strcat(strcpy(fnidx, argv[optind]), ".tbi");
79                         if (stat(fnidx, &buf) == 0) {
80                                 fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
81                                 free(fnidx);
82                                 return 1;
83                         }
84                         free(fnidx);
85                 }
86         if ( is_bgzipped(argv[optind])!=1 )
87         {
88             fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
89             return 1;
90         }
91                 return ti_index_build(argv[optind], &conf);
92         }
93         { // retrieve
94                 tabix_t *t;
95                 if ((t = ti_open(argv[optind], 0)) == 0) {
96                         fprintf(stderr, "[main] fail to open the data file.\n");
97                         return 1;
98                 }
99                 if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
100                         ti_iter_t iter;
101                         const char *s;
102                         int len;
103                         iter = ti_query(t, 0, 0, 0);
104                         while ((s = ti_read(t, iter, &len)) != 0) {
105                                 fputs(s, stdout); fputc('\n', stdout);
106                         }
107                         ti_iter_destroy(iter);
108                 } else { // retrieve from specified regions
109                         int i;
110                         if ( ti_lazy_index_load(t) ) 
111             {
112                 fprintf(stderr,"[tabix] failed to load the index file.\n");
113                 return 1;
114             }
115
116             ti_iter_t iter;
117             const char *s;
118             int len;
119             if ( print_header )
120             {
121                 // If requested, print the header lines here
122                 iter = ti_query(t, 0, 0, 0);
123                 while ((s = ti_read(t, iter, &len)) != 0) {
124                     if ( *s != '#' ) break;
125                     fputs(s, stdout); fputc('\n', stdout);
126                 }
127                 ti_iter_destroy(iter);
128             }
129                         for (i = optind + 1; i < argc; ++i) {
130                                 int tid, beg, end;
131                                 if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
132                                         iter = ti_queryi(t, tid, beg, end);
133                                         while ((s = ti_read(t, iter, &len)) != 0) {
134                                                 fputs(s, stdout); fputc('\n', stdout);
135                                         }
136                                         ti_iter_destroy(iter);
137                                 } 
138                 // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
139                         }
140                 }
141                 ti_close(t);
142         }
143         return 0;
144 }