refactor: 将代码更新为C99，将一些宏改为inline

vaaandark · vaaandark · commit 564394d8ea35 · 2022-09-01T10:53:08.000+08:00
diff --git a/makefile b/makefile
@@ -1,6 +1,6 @@
 CC=gcc
 #SANITIZE=-fsanitize=address
-CFLAGS=-Wall -Wextra -Werror -Wshadow $(SANITIZE) -g -O2
+CFLAGS=-std=c99 -Wall -Wextra -Werror -Wshadow $(SANITIZE) -g -O2
 SOURCEDIR=src
 BUILDDIR=build
 OBJECTS=$(BUILDDIR)/re.o $(BUILDDIR)/NFA.o $(BUILDDIR)/irregex.o
diff --git a/src/NFA.c b/src/NFA.c
@@ -3,18 +3,13 @@
 #define NFANODE_INITIAL_SIZE 64
 
 static NFANode *NFANode_nnew(int size) {
-    size = size > NFANODE_EDGE_INITIAL_SIZE ? \
+    size = size > NFANODE_EDGE_INITIAL_SIZE ?
                 size : NFANODE_EDGE_INITIAL_SIZE;
     NFANode *n = (NFANode *)alloc(sizeof(NFANode));
     n->is_end = false;
     n->size = size;
     n->num = 0;
     n->edges = (NFAEdge *)RE_calloc(n->size, sizeof(NFAEdge));
-
-#ifdef DRAW_NFA
-    n->visited_on_draw = false;
-#endif
-
     return n;
 }
 
@@ -27,7 +22,9 @@ static void NFANode_resize(NFANode *n) {
     n->edges = (NFAEdge *)RE_realloc(n->edges, n->size, sizeof(NFAEdge));
 }
 
-#define NFAEdge_drop(e) free(e)
+static inline void NFAEdge_drop(NFAEdge *e) {
+    free(e);
+}
 
 void NFANode_drop(NFANode *n) {
     if (n->edges != NULL && n->size != 0) {
@@ -38,14 +35,13 @@ void NFANode_drop(NFANode *n) {
 
 #define NFAGRAPH_INITIAL_SIZE 256
 NFAGraph NFAGraph_nnew(int size) {
-    NFAGraph g = {
+    return (NFAGraph) {
         .begin = NULL,
         .end = NULL,
         .size = size,
         .nodes = (NFANode **)RE_calloc(size, sizeof(NFANode *)),
         .num = 0
     };
-    return g;
 }
 
 NFAGraph NFAGraph_new(void) {
@@ -156,7 +152,7 @@ static void NFAGraph_merge(NFAGraph *dst, NFAGraph *src) {
     int ns = src->num;
     if (nd + ns > dst->size) {
         dst->size = (dst->num + src->num) * 2 + 4;
-        dst->nodes = (NFANode **)RE_realloc(dst->nodes, dst->size, \
+        dst->nodes = (NFANode **)RE_realloc(dst->nodes, dst->size,
                 sizeof(NFANode *));
     }
     for (int i = nd, j = 0; j < ns; ++i, ++j) {
@@ -167,14 +163,16 @@ static void NFAGraph_merge(NFAGraph *dst, NFAGraph *src) {
     NFAGraph_drop(src);
 }
 
-#define not_end_any_longer(n) \
+static inline void unset_end(NFANode *n) {
     n->is_end = false;
+}
 
-#define set_end(n) \
-    n->is_end = true
+static inline void set_end(NFANode *n) {
+    n->is_end = true;
+}
 
 static NFAGraph NFA_cat(NFAGraph *front, NFAGraph *back) {
-    not_end_any_longer(front->end);
+    unset_end(front->end);
     NFANode_add_epsilon_edge(front->end, back->begin);
     front->end = back->end;
     NFAGraph_merge(front, back);
@@ -195,7 +193,7 @@ static NFAGraph NFA_or(NFAGraph *subgraphs, int ngraphs) {
     set_end(end);
     for (int i = 0; i < ngraphs; ++i) {
         NFANode_add_epsilon_edge(begin, subgraphs[i].begin);
-        not_end_any_longer(subgraphs[i].end);
+        unset_end(subgraphs[i].end);
         NFANode_add_epsilon_edge(subgraphs[i].end, end);
         NFAGraph_merge(&g, &subgraphs[i]);
     }
@@ -257,7 +255,7 @@ static NFAGraph NFA_piece2NFA(RE_Piece *p) {
     } else if (p->max == 1 && p->min == 0) { // {0, 1} ?
         NFANode_add_epsilon_edge(g.begin, g.end);
     } else { // {n, m}
-        NFANode **begins = (NFANode **)RE_calloc(p->max - p->min, \
+        NFANode **begins = (NFANode **)RE_calloc(p->max - p->min,
                 sizeof(NFANode *));
         NFAGraph gcpy = NFAGraph_clone(&g);
 
diff --git a/src/NFA.h b/src/NFA.h
@@ -18,9 +18,6 @@ typedef struct NFANode {
     int num;
     int size;
     NFAEdge *edges;
-#ifdef DRAW_NFA
-    bool visited_on_draw; // DFS
-#endif
 } NFANode;
 
 typedef struct {
diff --git a/src/irregex.c b/src/irregex.c
@@ -9,7 +9,7 @@ NFAGraph regex_compile(const char *regex) {
     return g;
 }
 
-static int regex_execute_rec(NFANode *n, const char *pos, bool *matched, \
+static int regex_execute_rec(NFANode *n, const char *pos, bool *matched,
         const char **last_visited_pos) {
     if (n->is_end) {
         *matched = true;
@@ -28,7 +28,7 @@ static int regex_execute_rec(NFANode *n, const char *pos, bool *matched, \
             res = regex_execute_rec(e->next, pos, matched, last_visited_pos);
         } else {
             if (e->ch[(int)*pos]) {
-                res = 1 + regex_execute_rec(e->next, pos + 1, matched, \
+                res = 1 + regex_execute_rec(e->next, pos + 1, matched,
                         last_visited_pos);
             }
         }
@@ -40,7 +40,7 @@ static int regex_execute_rec(NFANode *n, const char *pos, bool *matched, \
 
 int regex_execute(NFAGraph *g, const char *str) {
     bool matched = false;
-    const char **last_visited_pos = (const char **)RE_calloc(g->num, \
+    const char **last_visited_pos = (const char **)RE_calloc(g->num,
             sizeof(const char *));
     int len = regex_execute_rec(g->begin, str, &matched, last_visited_pos);
     free(last_visited_pos);
diff --git a/src/re.c b/src/re.c
@@ -30,8 +30,9 @@ RE_State *RES_new(const char *str) {
 }
 
 static RE_Token Token_new(void) {
-    RE_Token token;
-    token.t = UNKNOWN;
+    RE_Token token = {
+        .t = UNKNOWN
+    };
     memset(&token.u, 0, 256);
     return token;
 }
@@ -76,7 +77,8 @@ void Re_drop(RE_Node *n) {
 
 static RE_Branch *Branch_resize(RE_Branch *branch) {
     branch->size = branch->size * 2 + 4;
-    branch->p = (RE_Piece **)RE_realloc(branch->p, branch->size, sizeof(RE_Piece *));
+    branch->p = (RE_Piece **)RE_realloc(branch->p, branch->size,
+            sizeof(RE_Piece *));
     return branch;
 }
 
@@ -90,26 +92,27 @@ static RE_Node *Node_new(void) {
 
 static RE_Node *Node_resize(RE_Node *node) {
     node->size = node->size * 2 + 4;
-    node->b = (RE_Branch **)RE_realloc(node->b, node->size, sizeof(RE_Branch *));
+    node->b = (RE_Branch **)RE_realloc(node->b, node->size,
+            sizeof(RE_Branch *));
     return node;
 }
 
-void fill_by_range(int begin, int end, bool *ch, bool fill) {
+inline void fill_by_range(int begin, int end, bool *ch, bool fill) {
     for (int i = begin; i <= end; ++i) {
         ch[i] = fill;
     }
 }
-#
 
-static void fill_by_string(char *s, bool *ch, bool fill) {
-    while (*s) {
+static inline void fill_by_string(char *s, bool *ch, bool fill) {
+    while (*s != '\0') {
         ch[(int)*s] = fill;
         s++;
     }
 }
 
-#define fill_by_char(c, ch, fill) \
-        ch[(int)c] = fill
+static inline void fill_by_char(int c, bool *ch, bool fill) {
+    ch[c] = fill;
+}
 
 static RE_Token get_token_escaped(RE_State *st) {
     RE_Token res = Token_new();
@@ -185,9 +188,12 @@ static RE_Token get_token_escaped(RE_State *st) {
     return res;
 }
 
-#define cmp_class(s, class_name, shift) \
-    (!strncmp(s, class_name, strlen(class_name)) && \
-     (shift = strlen(class_name)) > 0)
+static inline bool cmp_class(const char *s, const char *class_name,
+        int *shift) {
+    int len = strlen(class_name);
+    return (strncmp(s, class_name, len) == 0 &&
+            (*shift = len) > 0);
+}
 
 // charset is complex and I am lazy
 static RE_Token get_token_charset(RE_State *st) {
@@ -220,7 +226,7 @@ static RE_Token get_token_charset(RE_State *st) {
             break;
         case '-':
             if (st->str_read_pos[1] != ']') {
-                fill_by_range(st->str_read_pos[-1], st->str_read_pos[1], \
+                fill_by_range(st->str_read_pos[-1], st->str_read_pos[1],
                         res.u.ch, fill);
                 st->str_read_pos++;
             } else { // ']' can be the last character in bracket
@@ -237,42 +243,42 @@ static RE_Token get_token_charset(RE_State *st) {
             // print, punct, space, upper, word, xdigit
             st->str_read_pos += 2;
             int shift = 0;
-            if (cmp_class(st->str_read_pos, "ascii", shift)) {
+            if (cmp_class(st->str_read_pos, "ascii", &shift)) {
                 fill_by_range(0, 255, res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "alnum", shift)) {
+            } else if (cmp_class(st->str_read_pos, "alnum", &shift)) {
                 fill_by_range((int)'a', (int)'z', res.u.ch, fill);
                 fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
                 fill_by_range((int)'0', (int)'9', res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "alpha", shift)) {
+            } else if (cmp_class(st->str_read_pos, "alpha", &shift)) {
                 fill_by_range((int)'a', (int)'z', res.u.ch, fill);
                 fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "blank", shift)) {
+            } else if (cmp_class(st->str_read_pos, "blank", &shift)) {
                 res.u.ch[(int)' '] = fill;
                 res.u.ch[(int)'\t'] = fill;
-            } else if (cmp_class(st->str_read_pos, "cntrl", shift)) {
+            } else if (cmp_class(st->str_read_pos, "cntrl", &shift)) {
                 fill_by_range((int)'\x00', (int)'\x1F', res.u.ch, fill);
                 res.u.ch[(int)'\x7F'] = fill;
-            } else if (cmp_class(st->str_read_pos, "digit", shift)) {
+            } else if (cmp_class(st->str_read_pos, "digit", &shift)) {
                 fill_by_range((int)'0', (int)'9', res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "graph", shift)) {
+            } else if (cmp_class(st->str_read_pos, "graph", &shift)) {
                 fill_by_range((int)'\x21', (int)'\x7E', res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "lower", shift)) {
+            } else if (cmp_class(st->str_read_pos, "lower", &shift)) {
                 fill_by_range((int)'a', (int)'z', res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "print", shift)) {
+            } else if (cmp_class(st->str_read_pos, "print", &shift)) {
                 fill_by_range((int)'\x20', (int)'\x7E', res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "punct", shift)) {
-                fill_by_string("][!\"#$%&'()*+,./:;<=>?@\\^_`{|}~-", res.u.ch, \
+            } else if (cmp_class(st->str_read_pos, "punct", &shift)) {
+                fill_by_string("][!\"#$%&'()*+,./:;<=>?@\\^_`{|}~-", res.u.ch,
                         fill);
-            } else if (cmp_class(st->str_read_pos, "space", shift)) {
+            } else if (cmp_class(st->str_read_pos, "space", &shift)) {
                 fill_by_string(" \t\r\n\v\f", res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "upper", shift)) {
+            } else if (cmp_class(st->str_read_pos, "upper", &shift)) {
                 fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
-            } else if (cmp_class(st->str_read_pos, "word", shift)) {
+            } else if (cmp_class(st->str_read_pos, "word", &shift)) {
                 fill_by_range((int)'a', (int)'z', res.u.ch, fill);
                 fill_by_range((int)'A', (int)'Z', res.u.ch, fill);
                 fill_by_range((int)'0', (int)'9', res.u.ch, fill);
                 res.u.ch[(int)'-'] = fill;
-            } else if (cmp_class(st->str_read_pos, "xdigit", shift)) {
+            } else if (cmp_class(st->str_read_pos, "xdigit", &shift)) {
                 fill_by_range((int)'a', (int)'f', res.u.ch, fill);
                 fill_by_range((int)'A', (int)'F', res.u.ch, fill);
                 fill_by_range((int)'0', (int)'9', res.u.ch, fill);
@@ -410,9 +416,10 @@ static RE_Token get_token(RE_State *st) {
     return res;
 }
 
-#define unget_token(st, t) \
-    st->unget = t; \
+static inline void unget_token(RE_State *st, RE_Token t) {
+    st->unget = t;
     st->has_unget = true;
+}
 
 static RE_Atom *parse_atom(RE_State *st) {
     RE_Atom *res = alloc(sizeof(RE_Atom));
@@ -474,8 +481,8 @@ static RE_Branch *parse_branch(RE_State *st) {
         RE_Token peek = get_token(st);
 
         // jyi's dialect: the outer ')' can be omited
-        if (peek.t == END \
-                || (peek.t == META && peek.u.metachar == ')') \
+        if (peek.t == END
+                || (peek.t == META && peek.u.metachar == ')')
                 || (peek.t == META && peek.u.metachar == '|')) {
             unget_token(st, peek);
             break;
diff --git a/src/re2graph.c b/src/re2graph.c
@@ -1,6 +1,29 @@
 #include "NFA.h"
 
-#ifdef DRAW_NFA
+void print_charset(FILE *f, bool *ch) {
+    bool has_more = true;
+    int cnt = 0;
+    for (int i = 0; i < 256; ++i) {
+        if (ch[i]) {
+            if (isgraph(i)) {
+                if (strchr("\"", i)) {
+                    fprintf(f, "%c", '\\');
+                }
+                fprintf(f, "%c", i);
+                cnt++;
+                if (cnt > CHARSET_SHOW_MAX) {
+                    break;
+                }
+            } else {
+                has_more = false;
+            }
+        }
+    }
+    if (!has_more) {
+        fprintf(f, "...");
+    }
+    fprintf(f, "\"];\n");
+}
 
 #define CHARSET_SHOW_MAX 16
 
@@ -19,36 +42,14 @@ void draw_NFA(FILE *f, NFAGraph *g, char *regexp) {
             NFAEdge *e = &n->edges[j];
             fprintf(f, "    s%d->s%d [label=\"", n->id, e->next->id);
             if (e->is_epsilon) {
-                fprintf(f, "e\"];\n");
+                fprintf(f, "ε\"];\n");
             } else {
-                bool has_more = true;
-                int cnt = 0;
-                for (int i = 0; i < 256; ++i) {
-                    if (e->ch[i]) {
-                        if (isgraph(i)) {
-                            if (strchr("\"", i)) {
-                                fprintf(f, "%c", '\\');
-                            }
-                            fprintf(f, "%c", i);
-                            cnt++;
-                            if (cnt > CHARSET_SHOW_MAX) {
-                                break;
-                            }
-                        } else {
-                            has_more = false;
-                        }
-                    }
-                }
-                if (!has_more) {
-                    fprintf(f, "...");
-                }
-                fprintf(f, "\"];\n");
+                print_charset(f, e->ch);
             }
         }
     }
     fprintf(f, "}\n");
 }
-#endif
 
 int main(int argc, char *argv[]) {
     assert(argc == 2);
@@ -59,15 +60,13 @@ int main(int argc, char *argv[]) {
     NFAGraph g = regex2NFA(re);
     Re_drop(re);
 
-#ifdef DRAW_NFA
     puts("\033[1;032mregex:\033[0m generate .dot file for NFA GRAPH...");
     FILE *f = fopen("./NFA-graph.dot", "w");
     draw_NFA(f, &g, argv[1]);
     fclose(f);
     puts("OK");
     puts(".dot file is saved at ./NFA-graph.dot");
     puts("");
-#endif
 
     NFAGraph_clear(&g);
     NFAGraph_drop(&g);