1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Last Modified: 2017-02-11 */
27
+
28
+ #ifndef AC_KSEQ_H
29
+ #define AC_KSEQ_H
30
+
31
+ #include <ctype.h>
32
+ #include <stdint.h>
33
+ #include <string.h>
34
+ #include <stdlib.h>
35
+
36
+ #define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
37
+ #define KS_SEP_TAB 1 // isspace() && !' '
38
+ #define KS_SEP_LINE 2 // line separator: "\n" (Unix) or "\r\n" (Windows)
39
+ #define KS_SEP_MAX 2
40
+
41
+ #define __KS_TYPE (type_t ) \
42
+ typedef struct __kstream_t { \
43
+ unsigned char *buf; \
44
+ int begin, end, is_eof; \
45
+ type_t f; \
46
+ } kstream_t;
47
+
48
+ #define ks_err (ks ) ((ks)->end < 0)
49
+ #define ks_eof (ks ) ((ks)->is_eof && (ks)->begin >= (ks)->end)
50
+ #define ks_rewind (ks ) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
51
+
52
+ #define __KS_BASIC (type_t , __bufsize ) \
53
+ static inline kstream_t *ks_init(type_t f) \
54
+ { \
55
+ kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
56
+ ks->f = f; \
57
+ ks->buf = (unsigned char*)malloc(__bufsize); \
58
+ return ks; \
59
+ } \
60
+ static inline void ks_destroy(kstream_t *ks) \
61
+ { \
62
+ if (ks) { \
63
+ free(ks->buf); \
64
+ free(ks); \
65
+ } \
66
+ }
67
+
68
+ #define __KS_GETC (__read , __bufsize ) \
69
+ static inline int ks_getc(kstream_t *ks) \
70
+ { \
71
+ if (ks_err(ks)) return -3; \
72
+ if (ks_eof(ks)) return -1; \
73
+ if (ks->begin >= ks->end) { \
74
+ ks->begin = 0; \
75
+ ks->end = __read(ks->f, ks->buf, __bufsize); \
76
+ if (ks->end == 0) { ks->is_eof = 1; return -1; } \
77
+ else if (ks->end < 0) { ks->is_eof = 1; return -3; } \
78
+ } \
79
+ return (int)ks->buf[ks->begin++]; \
80
+ }
81
+
82
+ #ifndef KSTRING_T
83
+ #define KSTRING_T kstring_t
84
+ typedef struct __kstring_t {
85
+ size_t l , m ;
86
+ char * s ;
87
+ } kstring_t ;
88
+ #endif
89
+
90
+ #ifndef kroundup32
91
+ #define kroundup32 (x ) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
92
+ #endif
93
+
94
+ #ifndef kroundup64
95
+ #define kroundup64 (x ) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, (x)|=(x)>>32, ++(x))
96
+ #endif
97
+
98
+ #define __KS_GETUNTIL (__read , __bufsize ) \
99
+ static int64_t ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
100
+ { \
101
+ int gotany = 0; \
102
+ if (dret) *dret = 0; \
103
+ str->l = append? str->l : 0; \
104
+ for (;;) { \
105
+ int i; \
106
+ if (ks_err(ks)) return -3; \
107
+ if (ks->begin >= ks->end) { \
108
+ if (!ks->is_eof) { \
109
+ ks->begin = 0; \
110
+ ks->end = __read(ks->f, ks->buf, __bufsize); \
111
+ if (ks->end == 0) { ks->is_eof = 1; break; } \
112
+ if (ks->end == -1) { ks->is_eof = 1; return -3; } \
113
+ } else break; \
114
+ } \
115
+ if (delimiter == KS_SEP_LINE) { \
116
+ unsigned char *sep = (unsigned char*)memchr(ks->buf + ks->begin, '\n', ks->end - ks->begin); \
117
+ i = sep != NULL ? sep - ks->buf : ks->end; \
118
+ } else if (delimiter > KS_SEP_MAX) { \
119
+ for (i = ks->begin; i < ks->end; ++i) \
120
+ if (ks->buf[i] == delimiter) break; \
121
+ } else if (delimiter == KS_SEP_SPACE) { \
122
+ for (i = ks->begin; i < ks->end; ++i) \
123
+ if (isspace(ks->buf[i])) break; \
124
+ } else if (delimiter == KS_SEP_TAB) { \
125
+ for (i = ks->begin; i < ks->end; ++i) \
126
+ if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
127
+ } else i = 0; /* never come to here! */ \
128
+ if (str -> m - str -> l < (size_t )(i - ks -> begin + 1 )) { \
129
+ str -> m = str -> l + (i - ks -> begin ) + 1 ; \
130
+ kroundup64 (str -> m ); \
131
+ str -> s = (char * )realloc (str -> s , str -> m ); \
132
+ } \
133
+ gotany = 1 ; \
134
+ memcpy (str -> s + str -> l , ks -> buf + ks -> begin , i - ks -> begin ); \
135
+ str -> l = str -> l + (i - ks -> begin ); \
136
+ ks -> begin = i + 1 ; \
137
+ if (i < ks -> end ) { \
138
+ if (dret ) * dret = ks -> buf [i ]; \
139
+ break ; \
140
+ } \
141
+ } \
142
+ if (!gotany && ks_eof (ks )) return -1 ; \
143
+ if (str -> s == 0 ) { \
144
+ str -> m = 1 ; \
145
+ str -> s = (char * )calloc (1 , 1 ); \
146
+ } else if (delimiter == KS_SEP_LINE && str -> l > 1 && str -> s [str -> l - 1 ] == '\r' ) -- str -> l ; \
147
+ str -> s [str -> l ] = '\0' ; \
148
+ return str -> l ; \
149
+ } \
150
+ static inline int ks_getuntil (kstream_t * ks , int delimiter , kstring_t * str , int * dret ) \
151
+ { return ks_getuntil2 (ks , delimiter , str , dret , 0 ); }
152
+
153
+ #define KSTREAM_INIT (type_t , __read , __bufsize ) \
154
+ __KS_TYPE(type_t) \
155
+ __KS_BASIC(type_t, __bufsize) \
156
+ __KS_GETC(__read, __bufsize) \
157
+ __KS_GETUNTIL(__read, __bufsize)
158
+
159
+ #define kseq_rewind (ks ) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
160
+
161
+ #define __KSEQ_BASIC (SCOPE , type_t ) \
162
+ SCOPE kseq_t *kseq_init(type_t fd) \
163
+ { \
164
+ kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
165
+ s->f = ks_init(fd); \
166
+ return s; \
167
+ } \
168
+ SCOPE void kseq_destroy(kseq_t *ks) \
169
+ { \
170
+ if (!ks) return; \
171
+ free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
172
+ ks_destroy(ks->f); \
173
+ free(ks); \
174
+ }
175
+
176
+ /* Return value:
177
+ >=0 length of the sequence (normal)
178
+ -1 end-of-file
179
+ -2 truncated quality string
180
+ -3 error reading stream
181
+ */
182
+ #define __KSEQ_READ (SCOPE ) \
183
+ SCOPE int64_t kseq_read(kseq_t *seq) \
184
+ { \
185
+ int c,r; \
186
+ kstream_t *ks = seq->f; \
187
+ if (seq->last_char == 0) { /* then jump to the next header line */ \
188
+ while ((c = ks_getc (ks )) >= 0 && c != '>' && c != '@' ); \
189
+ if (c < 0 ) return c ; /* end of file or error*/ \
190
+ seq -> last_char = c ; \
191
+ } /* else: the first header char has been read in the previous call */ \
192
+ seq -> comment .l = seq -> seq .l = seq -> qual .l = 0 ; /* reset all members */ \
193
+ if ((r = ks_getuntil (ks , 0 , & seq -> name , & c )) < 0 ) return r ; /* normal exit: EOF or error */ \
194
+ if (c != '\n' ) ks_getuntil (ks , KS_SEP_LINE , & seq -> comment , 0 ); /* read FASTA/Q comment */ \
195
+ if (seq -> seq .s == 0 ) { /* we can do this in the loop below, but that is slower */ \
196
+ seq -> seq .m = 256 ; \
197
+ seq -> seq .s = (char * )malloc (seq -> seq .m ); \
198
+ } \
199
+ while ((c = ks_getc (ks )) >= 0 && c != '>' && c != '+' && c != '@' ) { \
200
+ if (c == '\n' ) continue ; /* skip empty lines */ \
201
+ seq -> seq .s [seq -> seq .l ++ ] = c ; /* this is safe: we always have enough space for 1 char */ \
202
+ ks_getuntil2 (ks , KS_SEP_LINE , & seq -> seq , 0 , 1 ); /* read the rest of the line */ \
203
+ } \
204
+ if (c == '>' || c == '@' ) seq -> last_char = c ; /* the first header char has been read */ \
205
+ if (seq -> seq .l + 1 >= seq -> seq .m ) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
206
+ seq -> seq .m = seq -> seq .l + 2 ; \
207
+ kroundup64 (seq -> seq .m ); /* rounded to the next closest 2^k */ \
208
+ seq -> seq .s = (char * )realloc (seq -> seq .s , seq -> seq .m ); \
209
+ } \
210
+ seq -> seq .s [seq -> seq .l ] = 0 ; /* null terminated string */ \
211
+ seq -> is_fastq = (c == '+' ); \
212
+ if (!seq -> is_fastq ) return seq -> seq .l ; /* FASTA */ \
213
+ if (seq -> qual .m < seq -> seq .m ) { /* allocate memory for qual in case insufficient */ \
214
+ seq -> qual .m = seq -> seq .m ; \
215
+ seq -> qual .s = (char * )realloc (seq -> qual .s , seq -> qual .m ); \
216
+ } \
217
+ while ((c = ks_getc (ks )) >= 0 && c != '\n' ); /* skip the rest of '+' line */ \
218
+ if (c == -1 ) return -2 ; /* error: no quality string */ \
219
+ while ((c = ks_getuntil2 (ks , KS_SEP_LINE , & seq -> qual , 0 , 1 ) >= 0 && seq -> qual .l < seq -> seq .l )); \
220
+ if (c == -3 ) return -3 ; /* stream error */ \
221
+ seq -> last_char = 0 ; /* we have not come to the next header line */ \
222
+ if (seq -> seq .l != seq -> qual .l ) return -2 ; /* error: qual string is of a different length */ \
223
+ return seq -> seq .l ; \
224
+ }
225
+
226
+ #define __KSEQ_TYPE (type_t ) \
227
+ typedef struct { \
228
+ kstring_t name, comment, seq, qual; \
229
+ int last_char, is_fastq; \
230
+ kstream_t *f; \
231
+ } kseq_t;
232
+
233
+ #define KSEQ_INIT2 (SCOPE , type_t , __read ) \
234
+ KSTREAM_INIT(type_t, __read, 16384) \
235
+ __KSEQ_TYPE(type_t) \
236
+ __KSEQ_BASIC(SCOPE, type_t) \
237
+ __KSEQ_READ(SCOPE)
238
+
239
+ #define KSEQ_INIT (type_t , __read ) KSEQ_INIT2(static, type_t, __read)
240
+
241
+ #define KSEQ_DECLARE (type_t ) \
242
+ __KS_TYPE(type_t) \
243
+ __KSEQ_TYPE(type_t) \
244
+ extern kseq_t *kseq_init(type_t fd); \
245
+ void kseq_destroy(kseq_t *ks); \
246
+ int64_t kseq_read(kseq_t *seq);
247
+
248
+ #endif
0 commit comments