Skip to content

Commit 564394d

Browse files
committed
refactor: 将代码更新为C99,将一些宏改为inline
1 parent 54b262f commit 564394d

File tree

6 files changed

+84
-83
lines changed

6 files changed

+84
-83
lines changed

makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
CC=gcc
22
#SANITIZE=-fsanitize=address
3-
CFLAGS=-Wall -Wextra -Werror -Wshadow $(SANITIZE) -g -O2
3+
CFLAGS=-std=c99 -Wall -Wextra -Werror -Wshadow $(SANITIZE) -g -O2
44
SOURCEDIR=src
55
BUILDDIR=build
66
OBJECTS=$(BUILDDIR)/re.o $(BUILDDIR)/NFA.o $(BUILDDIR)/irregex.o

src/NFA.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,13 @@
33
#define NFANODE_INITIAL_SIZE 64
44

55
static NFANode *NFANode_nnew(int size) {
6-
size = size > NFANODE_EDGE_INITIAL_SIZE ? \
6+
size = size > NFANODE_EDGE_INITIAL_SIZE ?
77
size : NFANODE_EDGE_INITIAL_SIZE;
88
NFANode *n = (NFANode *)alloc(sizeof(NFANode));
99
n->is_end = false;
1010
n->size = size;
1111
n->num = 0;
1212
n->edges = (NFAEdge *)RE_calloc(n->size, sizeof(NFAEdge));
13-
14-
#ifdef DRAW_NFA
15-
n->visited_on_draw = false;
16-
#endif
17-
1813
return n;
1914
}
2015

@@ -27,7 +22,9 @@ static void NFANode_resize(NFANode *n) {
2722
n->edges = (NFAEdge *)RE_realloc(n->edges, n->size, sizeof(NFAEdge));
2823
}
2924

30-
#define NFAEdge_drop(e) free(e)
25+
static inline void NFAEdge_drop(NFAEdge *e) {
26+
free(e);
27+
}
3128

3229
void NFANode_drop(NFANode *n) {
3330
if (n->edges != NULL && n->size != 0) {
@@ -38,14 +35,13 @@ void NFANode_drop(NFANode *n) {
3835

3936
#define NFAGRAPH_INITIAL_SIZE 256
4037
NFAGraph NFAGraph_nnew(int size) {
41-
NFAGraph g = {
38+
return (NFAGraph) {
4239
.begin = NULL,
4340
.end = NULL,
4441
.size = size,
4542
.nodes = (NFANode **)RE_calloc(size, sizeof(NFANode *)),
4643
.num = 0
4744
};
48-
return g;
4945
}
5046

5147
NFAGraph NFAGraph_new(void) {
@@ -156,7 +152,7 @@ static void NFAGraph_merge(NFAGraph *dst, NFAGraph *src) {
156152
int ns = src->num;
157153
if (nd + ns > dst->size) {
158154
dst->size = (dst->num + src->num) * 2 + 4;
159-
dst->nodes = (NFANode **)RE_realloc(dst->nodes, dst->size, \
155+
dst->nodes = (NFANode **)RE_realloc(dst->nodes, dst->size,
160156
sizeof(NFANode *));
161157
}
162158
for (int i = nd, j = 0; j < ns; ++i, ++j) {
@@ -167,14 +163,16 @@ static void NFAGraph_merge(NFAGraph *dst, NFAGraph *src) {
167163
NFAGraph_drop(src);
168164
}
169165

170-
#define not_end_any_longer(n) \
166+
static inline void unset_end(NFANode *n) {
171167
n->is_end = false;
168+
}
172169

173-
#define set_end(n) \
174-
n->is_end = true
170+
static inline void set_end(NFANode *n) {
171+
n->is_end = true;
172+
}
175173

176174
static NFAGraph NFA_cat(NFAGraph *front, NFAGraph *back) {
177-
not_end_any_longer(front->end);
175+
unset_end(front->end);
178176
NFANode_add_epsilon_edge(front->end, back->begin);
179177
front->end = back->end;
180178
NFAGraph_merge(front, back);
@@ -195,7 +193,7 @@ static NFAGraph NFA_or(NFAGraph *subgraphs, int ngraphs) {
195193
set_end(end);
196194
for (int i = 0; i < ngraphs; ++i) {
197195
NFANode_add_epsilon_edge(begin, subgraphs[i].begin);
198-
not_end_any_longer(subgraphs[i].end);
196+
unset_end(subgraphs[i].end);
199197
NFANode_add_epsilon_edge(subgraphs[i].end, end);
200198
NFAGraph_merge(&g, &subgraphs[i]);
201199
}
@@ -257,7 +255,7 @@ static NFAGraph NFA_piece2NFA(RE_Piece *p) {
257255
} else if (p->max == 1 && p->min == 0) { // {0, 1} ?
258256
NFANode_add_epsilon_edge(g.begin, g.end);
259257
} else { // {n, m}
260-
NFANode **begins = (NFANode **)RE_calloc(p->max - p->min, \
258+
NFANode **begins = (NFANode **)RE_calloc(p->max - p->min,
261259
sizeof(NFANode *));
262260
NFAGraph gcpy = NFAGraph_clone(&g);
263261

src/NFA.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@ typedef struct NFANode {
1818
int num;
1919
int size;
2020
NFAEdge *edges;
21-
#ifdef DRAW_NFA
22-
bool visited_on_draw; // DFS
23-
#endif
2421
} NFANode;
2522

2623
typedef struct {

src/irregex.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ NFAGraph regex_compile(const char *regex) {
99
return g;
1010
}
1111

12-
static int regex_execute_rec(NFANode *n, const char *pos, bool *matched, \
12+
static int regex_execute_rec(NFANode *n, const char *pos, bool *matched,
1313
const char **last_visited_pos) {
1414
if (n->is_end) {
1515
*matched = true;
@@ -28,7 +28,7 @@ static int regex_execute_rec(NFANode *n, const char *pos, bool *matched, \
2828
res = regex_execute_rec(e->next, pos, matched, last_visited_pos);
2929
} else {
3030
if (e->ch[(int)*pos]) {
31-
res = 1 + regex_execute_rec(e->next, pos + 1, matched, \
31+
res = 1 + regex_execute_rec(e->next, pos + 1, matched,
3232
last_visited_pos);
3333
}
3434
}
@@ -40,7 +40,7 @@ static int regex_execute_rec(NFANode *n, const char *pos, bool *matched, \
4040

4141
int regex_execute(NFAGraph *g, const char *str) {
4242
bool matched = false;
43-
const char **last_visited_pos = (const char **)RE_calloc(g->num, \
43+
const char **last_visited_pos = (const char **)RE_calloc(g->num,
4444
sizeof(const char *));
4545
int len = regex_execute_rec(g->begin, str, &matched, last_visited_pos);
4646
free(last_visited_pos);

src/re.c

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ RE_State *RES_new(const char *str) {
3030
}
3131

3232
static RE_Token Token_new(void) {
33-
RE_Token token;
34-
token.t = UNKNOWN;
33+
RE_Token token = {
34+
.t = UNKNOWN
35+
};
3536
memset(&token.u, 0, 256);
3637
return token;
3738
}
@@ -76,7 +77,8 @@ void Re_drop(RE_Node *n) {
7677

7778
static RE_Branch *Branch_resize(RE_Branch *branch) {
7879
branch->size = branch->size * 2 + 4;
79-
branch->p = (RE_Piece **)RE_realloc(branch->p, branch->size, sizeof(RE_Piece *));
80+
branch->p = (RE_Piece **)RE_realloc(branch->p, branch->size,
81+
sizeof(RE_Piece *));
8082
return branch;
8183
}
8284

@@ -90,26 +92,27 @@ static RE_Node *Node_new(void) {
9092

9193
static RE_Node *Node_resize(RE_Node *node) {
9294
node->size = node->size * 2 + 4;
93-
node->b = (RE_Branch **)RE_realloc(node->b, node->size, sizeof(RE_Branch *));
95+
node->b = (RE_Branch **)RE_realloc(node->b, node->size,
96+
sizeof(RE_Branch *));
9497
return node;
9598
}
9699

97-
void fill_by_range(int begin, int end, bool *ch, bool fill) {
100+
inline void fill_by_range(int begin, int end, bool *ch, bool fill) {
98101
for (int i = begin; i <= end; ++i) {
99102
ch[i] = fill;
100103
}
101104
}
102-
#
103105

104-
static void fill_by_string(char *s, bool *ch, bool fill) {
105-
while (*s) {
106+
static inline void fill_by_string(char *s, bool *ch, bool fill) {
107+
while (*s != '\0') {
106108
ch[(int)*s] = fill;
107109
s++;
108110
}
109111
}
110112

111-
#define fill_by_char(c, ch, fill) \
112-
ch[(int)c] = fill
113+
static inline void fill_by_char(int c, bool *ch, bool fill) {
114+
ch[c] = fill;
115+
}
113116

114117
static RE_Token get_token_escaped(RE_State *st) {
115118
RE_Token res = Token_new();
@@ -185,9 +188,12 @@ static RE_Token get_token_escaped(RE_State *st) {
185188
return res;
186189
}
187190

188-
#define cmp_class(s, class_name, shift) \
189-
(!strncmp(s, class_name, strlen(class_name)) && \
190-
(shift = strlen(class_name)) > 0)
191+
static inline bool cmp_class(const char *s, const char *class_name,
192+
int *shift) {
193+
int len = strlen(class_name);
194+
return (strncmp(s, class_name, len) == 0 &&
195+
(*shift = len) > 0);
196+
}
191197

192198
// charset is complex and I am lazy
193199
static RE_Token get_token_charset(RE_State *st) {
@@ -220,7 +226,7 @@ static RE_Token get_token_charset(RE_State *st) {
220226
break;
221227
case '-':
222228
if (st->str_read_pos[1] != ']') {
223-
fill_by_range(st->str_read_pos[-1], st->str_read_pos[1], \
229+
fill_by_range(st->str_read_pos[-1], st->str_read_pos[1],
224230
res.u.ch, fill);
225231
st->str_read_pos++;
226232
} else { // ']' can be the last character in bracket
@@ -237,42 +243,42 @@ static RE_Token get_token_charset(RE_State *st) {
237243
// print, punct, space, upper, word, xdigit
238244
st->str_read_pos += 2;
239245
int shift = 0;
240-
if (cmp_class(st->str_read_pos, "ascii", shift)) {
246+
if (cmp_class(st->str_read_pos, "ascii", &shift)) {
241247
fill_by_range(0, 255, res.u.ch, fill);
242-
} else if (cmp_class(st->str_read_pos, "alnum", shift)) {
248+
} else if (cmp_class(st->str_read_pos, "alnum", &shift)) {
243249
fill_by_range((int)'a', (int)'z', res.u.ch, fill);
244250
fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
245251
fill_by_range((int)'0', (int)'9', res.u.ch, fill);
246-
} else if (cmp_class(st->str_read_pos, "alpha", shift)) {
252+
} else if (cmp_class(st->str_read_pos, "alpha", &shift)) {
247253
fill_by_range((int)'a', (int)'z', res.u.ch, fill);
248254
fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
249-
} else if (cmp_class(st->str_read_pos, "blank", shift)) {
255+
} else if (cmp_class(st->str_read_pos, "blank", &shift)) {
250256
res.u.ch[(int)' '] = fill;
251257
res.u.ch[(int)'\t'] = fill;
252-
} else if (cmp_class(st->str_read_pos, "cntrl", shift)) {
258+
} else if (cmp_class(st->str_read_pos, "cntrl", &shift)) {
253259
fill_by_range((int)'\x00', (int)'\x1F', res.u.ch, fill);
254260
res.u.ch[(int)'\x7F'] = fill;
255-
} else if (cmp_class(st->str_read_pos, "digit", shift)) {
261+
} else if (cmp_class(st->str_read_pos, "digit", &shift)) {
256262
fill_by_range((int)'0', (int)'9', res.u.ch, fill);
257-
} else if (cmp_class(st->str_read_pos, "graph", shift)) {
263+
} else if (cmp_class(st->str_read_pos, "graph", &shift)) {
258264
fill_by_range((int)'\x21', (int)'\x7E', res.u.ch, fill);
259-
} else if (cmp_class(st->str_read_pos, "lower", shift)) {
265+
} else if (cmp_class(st->str_read_pos, "lower", &shift)) {
260266
fill_by_range((int)'a', (int)'z', res.u.ch, fill);
261-
} else if (cmp_class(st->str_read_pos, "print", shift)) {
267+
} else if (cmp_class(st->str_read_pos, "print", &shift)) {
262268
fill_by_range((int)'\x20', (int)'\x7E', res.u.ch, fill);
263-
} else if (cmp_class(st->str_read_pos, "punct", shift)) {
264-
fill_by_string("][!\"#$%&'()*+,./:;<=>?@\\^_`{|}~-", res.u.ch, \
269+
} else if (cmp_class(st->str_read_pos, "punct", &shift)) {
270+
fill_by_string("][!\"#$%&'()*+,./:;<=>?@\\^_`{|}~-", res.u.ch,
265271
fill);
266-
} else if (cmp_class(st->str_read_pos, "space", shift)) {
272+
} else if (cmp_class(st->str_read_pos, "space", &shift)) {
267273
fill_by_string(" \t\r\n\v\f", res.u.ch, fill);
268-
} else if (cmp_class(st->str_read_pos, "upper", shift)) {
274+
} else if (cmp_class(st->str_read_pos, "upper", &shift)) {
269275
fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
270-
} else if (cmp_class(st->str_read_pos, "word", shift)) {
276+
} else if (cmp_class(st->str_read_pos, "word", &shift)) {
271277
fill_by_range((int)'a', (int)'z', res.u.ch, fill);
272278
fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
273279
fill_by_range((int)'0', (int)'9', res.u.ch, fill);
274280
res.u.ch[(int)'-'] = fill;
275-
} else if (cmp_class(st->str_read_pos, "xdigit", shift)) {
281+
} else if (cmp_class(st->str_read_pos, "xdigit", &shift)) {
276282
fill_by_range((int)'a', (int)'f', res.u.ch, fill);
277283
fill_by_range((int)'A', (int)'F', res.u.ch, fill);
278284
fill_by_range((int)'0', (int)'9', res.u.ch, fill);
@@ -410,9 +416,10 @@ static RE_Token get_token(RE_State *st) {
410416
return res;
411417
}
412418

413-
#define unget_token(st, t) \
414-
st->unget = t; \
419+
static inline void unget_token(RE_State *st, RE_Token t) {
420+
st->unget = t;
415421
st->has_unget = true;
422+
}
416423

417424
static RE_Atom *parse_atom(RE_State *st) {
418425
RE_Atom *res = alloc(sizeof(RE_Atom));
@@ -474,8 +481,8 @@ static RE_Branch *parse_branch(RE_State *st) {
474481
RE_Token peek = get_token(st);
475482

476483
// jyi's dialect: the outer ')' can be omited
477-
if (peek.t == END \
478-
|| (peek.t == META && peek.u.metachar == ')') \
484+
if (peek.t == END
485+
|| (peek.t == META && peek.u.metachar == ')')
479486
|| (peek.t == META && peek.u.metachar == '|')) {
480487
unget_token(st, peek);
481488
break;

src/re2graph.c

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,29 @@
11
#include "NFA.h"
22

3-
#ifdef DRAW_NFA
3+
void print_charset(FILE *f, bool *ch) {
4+
bool has_more = true;
5+
int cnt = 0;
6+
for (int i = 0; i < 256; ++i) {
7+
if (ch[i]) {
8+
if (isgraph(i)) {
9+
if (strchr("\"", i)) {
10+
fprintf(f, "%c", '\\');
11+
}
12+
fprintf(f, "%c", i);
13+
cnt++;
14+
if (cnt > CHARSET_SHOW_MAX) {
15+
break;
16+
}
17+
} else {
18+
has_more = false;
19+
}
20+
}
21+
}
22+
if (!has_more) {
23+
fprintf(f, "...");
24+
}
25+
fprintf(f, "\"];\n");
26+
}
427

528
#define CHARSET_SHOW_MAX 16
629

@@ -19,36 +42,14 @@ void draw_NFA(FILE *f, NFAGraph *g, char *regexp) {
1942
NFAEdge *e = &n->edges[j];
2043
fprintf(f, " s%d->s%d [label=\"", n->id, e->next->id);
2144
if (e->is_epsilon) {
22-
fprintf(f, "e\"];\n");
45+
fprintf(f, "ε\"];\n");
2346
} else {
24-
bool has_more = true;
25-
int cnt = 0;
26-
for (int i = 0; i < 256; ++i) {
27-
if (e->ch[i]) {
28-
if (isgraph(i)) {
29-
if (strchr("\"", i)) {
30-
fprintf(f, "%c", '\\');
31-
}
32-
fprintf(f, "%c", i);
33-
cnt++;
34-
if (cnt > CHARSET_SHOW_MAX) {
35-
break;
36-
}
37-
} else {
38-
has_more = false;
39-
}
40-
}
41-
}
42-
if (!has_more) {
43-
fprintf(f, "...");
44-
}
45-
fprintf(f, "\"];\n");
47+
print_charset(f, e->ch);
4648
}
4749
}
4850
}
4951
fprintf(f, "}\n");
5052
}
51-
#endif
5253

5354
int main(int argc, char *argv[]) {
5455
assert(argc == 2);
@@ -59,15 +60,13 @@ int main(int argc, char *argv[]) {
5960
NFAGraph g = regex2NFA(re);
6061
Re_drop(re);
6162

62-
#ifdef DRAW_NFA
6363
puts("\033[1;032mregex:\033[0m generate .dot file for NFA GRAPH...");
6464
FILE *f = fopen("./NFA-graph.dot", "w");
6565
draw_NFA(f, &g, argv[1]);
6666
fclose(f);
6767
puts("OK");
6868
puts(".dot file is saved at ./NFA-graph.dot");
6969
puts("");
70-
#endif
7170

7271
NFAGraph_clear(&g);
7372
NFAGraph_drop(&g);

0 commit comments

Comments
 (0)