Imported Debian patch 0.1.5c-1
[samtools.git] / kseq.h
1 /* The MIT License
2
3    Copyright (c) 2008 Genome Research Ltd (GRL).
4
5    Permission is hereby granted, free of charge, to any person obtaining
6    a copy of this software and associated documentation files (the
7    "Software"), to deal in the Software without restriction, including
8    without limitation the rights to use, copy, modify, merge, publish,
9    distribute, sublicense, and/or sell copies of the Software, and to
10    permit persons to whom the Software is furnished to do so, subject to
11    the following conditions:
12
13    The above copyright notice and this permission notice shall be
14    included in all copies or substantial portions of the Software.
15
16    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23    SOFTWARE.
24 */
25
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
28 /* Last Modified: 12APR2009 */
29
30 #ifndef AC_KSEQ_H
31 #define AC_KSEQ_H
32
33 #include <ctype.h>
34 #include <string.h>
35 #include <stdlib.h>
36
37 #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
38 #define KS_SEP_TAB   1 // isspace() && !' '
39 #define KS_SEP_MAX   1
40
41 #define __KS_TYPE(type_t)                                               \
42         typedef struct __kstream_t {                            \
43                 char *buf;                                                              \
44                 int begin, end, is_eof;                                 \
45                 type_t f;                                                               \
46         } kstream_t;
47
48 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
49 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
50
51 #define __KS_BASIC(type_t, __bufsize)                                                           \
52         static inline kstream_t *ks_init(type_t f)                                              \
53         {                                                                                                                               \
54                 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));       \
55                 ks->f = f;                                                                                                      \
56                 ks->buf = (char*)malloc(__bufsize);                                                     \
57                 return ks;                                                                                                      \
58         }                                                                                                                               \
59         static inline void ks_destroy(kstream_t *ks)                                    \
60         {                                                                                                                               \
61                 if (ks) {                                                                                                       \
62                         free(ks->buf);                                                                                  \
63                         free(ks);                                                                                               \
64                 }                                                                                                                       \
65         }
66
67 #define __KS_GETC(__read, __bufsize)                                            \
68         static inline int ks_getc(kstream_t *ks)                                \
69         {                                                                                                               \
70                 if (ks->is_eof && ks->begin >= ks->end) return -1;      \
71                 if (ks->begin >= ks->end) {                                                     \
72                         ks->begin = 0;                                                                  \
73                         ks->end = __read(ks->f, ks->buf, __bufsize);    \
74                         if (ks->end < __bufsize) ks->is_eof = 1;                \
75                         if (ks->end == 0) return -1;                                    \
76                 }                                                                                                       \
77                 return (int)ks->buf[ks->begin++];                                       \
78         }
79
80 #ifndef KSTRING_T
81 #define KSTRING_T kstring_t
82 typedef struct __kstring_t {
83         size_t l, m;
84         char *s;
85 } kstring_t;
86 #endif
87
88 #ifndef kroundup32
89 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
90 #endif
91
92 #define __KS_GETUNTIL(__read, __bufsize)                                                                \
93         static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
94         {                                                                                                                                       \
95                 if (dret) *dret = 0;                                                                                    \
96                 str->l = 0;                                                                                                             \
97                 if (ks->begin >= ks->end && ks->is_eof) return -1;                              \
98                 for (;;) {                                                                                                              \
99                         int i;                                                                                                          \
100                         if (ks->begin >= ks->end) {                                                                     \
101                                 if (!ks->is_eof) {                                                                              \
102                                         ks->begin = 0;                                                                          \
103                                         ks->end = __read(ks->f, ks->buf, __bufsize);            \
104                                         if (ks->end < __bufsize) ks->is_eof = 1;                        \
105                                         if (ks->end == 0) break;                                                        \
106                                 } else break;                                                                                   \
107                         }                                                                                                                       \
108                         if (delimiter > KS_SEP_MAX) {                                                           \
109                                 for (i = ks->begin; i < ks->end; ++i)                                   \
110                                         if (ks->buf[i] == delimiter) break;                                     \
111                         } else if (delimiter == KS_SEP_SPACE) {                                         \
112                                 for (i = ks->begin; i < ks->end; ++i)                                   \
113                                         if (isspace(ks->buf[i])) break;                                         \
114                         } else if (delimiter == KS_SEP_TAB) {                                           \
115                                 for (i = ks->begin; i < ks->end; ++i)                                   \
116                                         if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
117                         } else i = 0; /* never come to here! */                                         \
118                         if (str->m - str->l < i - ks->begin + 1) {                                      \
119                                 str->m = str->l + (i - ks->begin) + 1;                                  \
120                                 kroundup32(str->m);                                                                             \
121                                 str->s = (char*)realloc(str->s, str->m);                                \
122                         }                                                                                                                       \
123                         memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
124                         str->l = str->l + (i - ks->begin);                                                      \
125                         ks->begin = i + 1;                                                                                      \
126                         if (i < ks->end) {                                                                                      \
127                                 if (dret) *dret = ks->buf[i];                                                   \
128                                 break;                                                                                                  \
129                         }                                                                                                                       \
130                 }                                                                                                                               \
131                 if (str->l == 0) {                                                                                              \
132                         str->m = 1;                                                                                                     \
133                         str->s = (char*)calloc(1, 1);                                                           \
134                 }                                                                                                                               \
135                 str->s[str->l] = '\0';                                                                                  \
136                 return str->l;                                                                                                  \
137         }
138
139 #define KSTREAM_INIT(type_t, __read, __bufsize) \
140         __KS_TYPE(type_t)                                                       \
141         __KS_BASIC(type_t, __bufsize)                           \
142         __KS_GETC(__read, __bufsize)                            \
143         __KS_GETUNTIL(__read, __bufsize)
144
145 #define __KSEQ_BASIC(type_t)                                                                                    \
146         static inline kseq_t *kseq_init(type_t fd)                                                      \
147         {                                                                                                                                       \
148                 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));                                 \
149                 s->f = ks_init(fd);                                                                                             \
150                 return s;                                                                                                               \
151         }                                                                                                                                       \
152         static inline void kseq_rewind(kseq_t *ks)                                                      \
153         {                                                                                                                                       \
154                 ks->last_char = 0;                                                                                              \
155                 ks->f->is_eof = ks->f->begin = ks->f->end = 0;                                  \
156         }                                                                                                                                       \
157         static inline void kseq_destroy(kseq_t *ks)                                                     \
158         {                                                                                                                                       \
159                 if (!ks) return;                                                                                                \
160                 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
161                 ks_destroy(ks->f);                                                                                              \
162                 free(ks);                                                                                                               \
163         }
164
165 /* Return value:
166    >=0  length of the sequence (normal)
167    -1   end-of-file
168    -2   truncated quality string
169  */
170 #define __KSEQ_READ                                                                                                             \
171         static int kseq_read(kseq_t *seq)                                                                       \
172         {                                                                                                                                       \
173                 int c;                                                                                                                  \
174                 kstream_t *ks = seq->f;                                                                                 \
175                 if (seq->last_char == 0) { /* then jump to the next header line */ \
176                         while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');        \
177                         if (c == -1) return -1; /* end of file */                                       \
178                         seq->last_char = c;                                                                                     \
179                 } /* the first header char has been read */                                             \
180                 seq->comment.l = seq->seq.l = seq->qual.l = 0;                                  \
181                 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;                  \
182                 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);                 \
183                 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
184                         if (isgraph(c)) { /* printable non-space character */           \
185                                 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
186                                         seq->seq.m = seq->seq.l + 2;                                            \
187                                         kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
188                                         seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
189                                 }                                                                                                               \
190                                 seq->seq.s[seq->seq.l++] = (char)c;                                             \
191                         }                                                                                                                       \
192                 }                                                                                                                               \
193                 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
194                 seq->seq.s[seq->seq.l] = 0;     /* null terminated string */            \
195                 if (c != '+') return seq->seq.l; /* FASTA */                                    \
196                 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */    \
197                         seq->qual.m = seq->seq.m;                                                                       \
198                         seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);         \
199                 }                                                                                                                               \
200                 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
201                 if (c == -1) return -2; /* we should not stop here */                   \
202                 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)             \
203                         if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
204                 seq->qual.s[seq->qual.l] = 0; /* null terminated string */              \
205                 seq->last_char = 0;     /* we have not come to the next header line */ \
206                 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
207                 return seq->seq.l;                                                                                              \
208         }
209
210 #define __KSEQ_TYPE(type_t)                                             \
211         typedef struct {                                                        \
212                 kstring_t name, comment, seq, qual;             \
213                 int last_char;                                                  \
214                 kstream_t *f;                                                   \
215         } kseq_t;
216
217 #define KSEQ_INIT(type_t, __read)                               \
218         KSTREAM_INIT(type_t, __read, 4096)                      \
219         __KSEQ_TYPE(type_t)                                                     \
220         __KSEQ_BASIC(type_t)                                            \
221         __KSEQ_READ
222
223 #endif