Skip to content

Commit

Permalink
Merge pull request #2624 from xrmx/pcre2
Browse files Browse the repository at this point in the history
core/regexp: add to pcre2 support
  • Loading branch information
xrmx authored Apr 6, 2024
2 parents 39f3ade + cb2e4ca commit c164fb9
Show file tree
Hide file tree
Showing 15 changed files with 227 additions and 134 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/compile-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ jobs:
build:
strategy:
matrix:
libpcre: ["libpcre3-dev", "libpcre2-dev"]
os: ["ubuntu-20.04", "ubuntu-22.04"]
cc: [gcc, clang]
include:
- os: ubuntu-20.04
php: "php7.4"
php-config: "php-config7.4"
cc: "gcc"
- os: ubuntu-22.04
php: "php8.1"
php-config: "php-config8.1"
cc: "gcc"
- os: ubuntu-22.04
php: "php8.1"
php-config: "php-config8.1"
exclude:
- os: ubuntu-20.04
cc: "clang"

runs-on: ${{ matrix.os }}
Expand All @@ -35,7 +35,7 @@ jobs:
run: |
sudo apt update -qq
sudo apt install --no-install-recommends -qqyf python3-dev \
libxml2-dev libpcre3-dev libcap2-dev \
libxml2-dev ${{ matrix.libpcre }} libcap2-dev \
libargon2-0-dev libsodium-dev \
${{ matrix.php }}-dev lib${{ matrix.php }}-embed \
liblua5.1-0-dev ruby-dev \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
run: |
sudo apt update -qq
sudo apt install --no-install-recommends -qqyf python${{ matrix.python-version }}-dev \
libpcre3-dev libjansson-dev libcap2-dev \
libpcre2-dev libjansson-dev libcap2-dev \
curl check
- name: Install distutils
if: contains(fromJson('["3.6","3.7","3.8","3.9","3.10","3.11","3.12"]'), matrix.python-version)
Expand Down
2 changes: 1 addition & 1 deletion check/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LDFLAGS = $(shell pkg-config --libs check)
LDFLAGS += -ldl -lz
LDFLAGS += $(shell xml2-config --libs)
LDFLAGS += $(shell pkg-config --libs openssl)
LDFLAGS += $(shell pcre-config --libs)
LDFLAGS += $(shell pcre2-config --libs8)
LDFLAGS += $(shell pkg-config --libs jansson)

UNAME_S := $(shell uname -s)
Expand Down
10 changes: 5 additions & 5 deletions core/alarm.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ static struct uwsgi_alarm_instance *uwsgi_alarm_get_instance(char *name) {
}


#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
static int uwsgi_alarm_log_add(char *alarms, char *regexp, int negate) {

struct uwsgi_alarm_log *old_ual = NULL, *ual = uwsgi.alarm_logs;
Expand All @@ -170,7 +170,7 @@ static int uwsgi_alarm_log_add(char *alarms, char *regexp, int negate) {
}

ual = uwsgi_calloc(sizeof(struct uwsgi_alarm_log));
if (uwsgi_regexp_build(regexp, &ual->pattern, &ual->pattern_extra)) {
if (uwsgi_regexp_build(regexp, &ual->pattern)) {
free(ual);
return -1;
}
Expand Down Expand Up @@ -331,7 +331,7 @@ void uwsgi_alarms_init() {
usl = usl->next;
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
// then map log-alarm
usl = uwsgi.alarm_logs_list;
while (usl) {
Expand Down Expand Up @@ -377,14 +377,14 @@ void uwsgi_alarm_trigger_uai(struct uwsgi_alarm_instance *uai, char *msg, size_t
}
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
// check if a log should raise an alarm
void uwsgi_alarm_log_check(char *msg, size_t len) {
if (!uwsgi_strncmp(msg, len, "[uwsgi-alarm", 12))
return;
struct uwsgi_alarm_log *ual = uwsgi.alarm_logs;
while (ual) {
if (uwsgi_regexp_match(ual->pattern, ual->pattern_extra, msg, len) >= 0) {
if (uwsgi_regexp_match(ual->pattern, msg, len) >= 0) {
if (!ual->negate) {
struct uwsgi_alarm_ll *uall = ual->alarms;
while (uall) {
Expand Down
2 changes: 1 addition & 1 deletion core/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ int uwsgi_logic_opt_if_not_hostname(char *key, char *value) {
return 0;
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
int uwsgi_logic_opt_if_hostname_match(char *key, char *value) {
uwsgi.logic_opt_if_failed = 0;
if (uwsgi_regexp_match_pattern(uwsgi.logic_opt_data, uwsgi.hostname)) {
Expand Down
14 changes: 7 additions & 7 deletions core/logging.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ void uwsgi_setup_log_master(void) {
usl = usl->next;
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
// set logger by its id
struct uwsgi_regexp_list *url = uwsgi.log_route;
while (url) {
Expand Down Expand Up @@ -1443,11 +1443,11 @@ int uwsgi_master_log(void) {

ssize_t rlen = read(uwsgi.shared->worker_log_pipe[0], uwsgi.log_master_buf, uwsgi.log_master_bufsize);
if (rlen > 0) {
#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
uwsgi_alarm_log_check(uwsgi.log_master_buf, rlen);
struct uwsgi_regexp_list *url = uwsgi.log_drain_rules;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
return 0;
}
url = url->next;
Expand All @@ -1456,7 +1456,7 @@ int uwsgi_master_log(void) {
int show = 0;
url = uwsgi.log_filter_rules;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
show = 1;
break;
}
Expand All @@ -1469,7 +1469,7 @@ int uwsgi_master_log(void) {
url = uwsgi.log_route;
int finish = 0;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
struct uwsgi_logger *ul_route = (struct uwsgi_logger *) url->custom_ptr;
if (ul_route) {
uwsgi_log_func_do(uwsgi.requested_log_encoders, ul_route, uwsgi.log_master_buf, rlen);
Expand Down Expand Up @@ -1509,11 +1509,11 @@ int uwsgi_master_req_log(void) {

ssize_t rlen = read(uwsgi.shared->worker_req_log_pipe[0], uwsgi.log_master_buf, uwsgi.log_master_bufsize);
if (rlen > 0) {
#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
struct uwsgi_regexp_list *url = uwsgi.log_req_route;
int finish = 0;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
struct uwsgi_logger *ul_route = (struct uwsgi_logger *) url->custom_ptr;
if (ul_route) {
uwsgi_log_func_do(uwsgi.requested_log_req_encoders, ul_route, uwsgi.log_master_buf, rlen);
Expand Down
102 changes: 85 additions & 17 deletions core/regexp.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
#include "uwsgi.h"

extern struct uwsgi_server uwsgi;
Expand All @@ -10,51 +10,119 @@ void uwsgi_opt_pcre_jit(char *opt, char *value, void *foobar) {
if (ret != 0 || has_jit != 1)
return;
uwsgi.pcre_jit = PCRE_STUDY_JIT_COMPILE;
#elif defined(PCRE2_CONFIG_JIT)
int has_jit = 0, ret;
ret = pcre2_config(PCRE2_CONFIG_JIT, &has_jit);
if (ret != 0)
return;
uwsgi.pcre_jit = has_jit;
#endif
}

int uwsgi_regexp_build(char *re, pcre ** pattern, pcre_extra ** pattern_extra) {
int uwsgi_regexp_build(char *re, uwsgi_pcre ** pattern) {

#ifdef UWSGI_PCRE2
int errnbr;
long unsigned int erroff;

*pattern = pcre2_compile((const unsigned char *) re, PCRE2_ZERO_TERMINATED, 0, &errnbr, &erroff, NULL);
#else
const char *errstr;
int erroff;

*pattern = pcre_compile((const char *) re, 0, &errstr, &erroff, NULL);
if (!*pattern) {
*pattern = uwsgi_malloc(sizeof(uwsgi_pcre));
(*pattern)->p = pcre_compile((const char *) re, 0, &errstr, &erroff, NULL);
#endif
#ifdef UWSGI_PCRE2
if (!(*pattern)) {
uwsgi_log("pcre error: code %d at offset %d\n", errnbr, erroff);
#else
if (!((*pattern)->p)) {
uwsgi_log("pcre error: %s at offset %d\n", errstr, erroff);
#endif
return -1;
}

#ifdef UWSGI_PCRE2
if (uwsgi.pcre_jit) {
errnbr = pcre2_jit_compile(*pattern, PCRE2_JIT_COMPLETE);
if (errnbr) {
pcre2_code_free(*pattern);
uwsgi_log("pcre JIT compile error code %d\n", errnbr);
return -1;
}
#else
int opt = uwsgi.pcre_jit;

*pattern_extra = (pcre_extra *) pcre_study((const pcre *) *pattern, opt, &errstr);
if (*pattern_extra == NULL && errstr != NULL) {
pcre_free(*pattern);
(*pattern)->extra = (pcre_extra *) pcre_study((const pcre *) (*pattern)->p, opt, &errstr);
if ((*pattern)->extra == NULL && errstr != NULL) {
pcre_free((*pattern)->p);
free(*pattern);
uwsgi_log("pcre (study) error: %s\n", errstr);
return -1;
#endif
}

return 0;

}

int uwsgi_regexp_match(pcre * pattern, pcre_extra * pattern_extra, char *subject, int length) {

return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, NULL, 0);
int uwsgi_regexp_match(uwsgi_pcre *pattern, const char *subject, int length) {
#ifdef UWSGI_PCRE2
return pcre2_match(pattern, (const unsigned char *)subject, length, 0, 0, NULL, NULL);
#else
return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0);
#endif
}

int uwsgi_regexp_match_ovec(pcre * pattern, pcre_extra * pattern_extra, char *subject, int length, int *ovec, int n) {
int uwsgi_regexp_match_ovec(uwsgi_pcre *pattern, const char *subject, int length, int *ovec, int n) {

#ifdef UWSGI_PCRE2
int rc;
int i;
pcre2_match_data *match_data;
size_t *pcre2_ovec;

match_data = pcre2_match_data_create_from_pattern(pattern, NULL);
rc = pcre2_match(pattern, (const unsigned char *)subject, length, 0, 0, match_data, NULL);

/*
* Quoting PCRE{,2} spec, "The first pair of integers, ovector[0]
* and ovector[1], identify the portion of the subject string matched
* by the entire pattern. The next pair is used for the first capturing
* subpattern, and so on." Therefore, the ovector size is the number of
* capturing subpatterns (INFO_CAPTURECOUNT), from uwsgi_regexp_ovector(),
* as matching pairs, plus room for the first pair.
*/
if (n > 0) {
return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, ovec, (n + 1) * 3);
// copy pcre2 output vector to uwsgi output vector
pcre2_ovec = pcre2_get_ovector_pointer(match_data);
for (i=0;i<(n+1)*2;i++) {
ovec[i] = pcre2_ovec[i];
}
#else
if (n > 0) {
return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, ovec, PCRE_OVECTOR_BYTESIZE(n));
#endif
}
return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, NULL, 0);

#ifdef UWSGI_PCRE2
pcre2_match_data_free(match_data);

return rc;
#else
return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0);
#endif
}

int uwsgi_regexp_ovector(pcre * pattern, pcre_extra * pattern_extra) {
int uwsgi_regexp_ovector(const uwsgi_pcre *pattern) {

int n;

if (pcre_fullinfo((const pcre *) pattern, (const pcre_extra *) pattern_extra, PCRE_INFO_CAPTURECOUNT, &n))
#ifdef UWSGI_PCRE2
if (pcre2_pattern_info(pattern, PCRE2_INFO_CAPTURECOUNT, &n))
#else
if (pcre_fullinfo((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, PCRE_INFO_CAPTURECOUNT, &n))
#endif
return 0;

return n;
Expand All @@ -66,7 +134,7 @@ char *uwsgi_regexp_apply_ovec(char *src, int src_n, char *dst, int dst_n, int *o
int dollar = 0;

size_t dollars = n;

for(i=0;i<dst_n;i++) {
if (dst[i] == '$') {
dollars++;
Expand Down
41 changes: 25 additions & 16 deletions core/routing.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ int uwsgi_apply_routes_do(struct uwsgi_route *routes, struct wsgi_request *wsgi_
subject = *subject2 ;
subject_len = *subject_len2;
}
n = uwsgi_regexp_match_ovec(routes->pattern, routes->pattern_extra, subject, subject_len, routes->ovector[wsgi_req->async_id], routes->ovn[wsgi_req->async_id]);
n = uwsgi_regexp_match_ovec(routes->pattern, subject, subject_len, routes->ovector[wsgi_req->async_id], routes->ovn[wsgi_req->async_id]);
}
else {
int ret = routes->if_func(wsgi_req, routes);
Expand Down Expand Up @@ -506,15 +506,15 @@ void uwsgi_fixup_routes(struct uwsgi_route *ur) {

// fill them if needed... (this is an optimization for route with a static subject)
if (ur->subject && ur->subject_len) {
if (uwsgi_regexp_build(ur->orig_route, &ur->pattern, &ur->pattern_extra)) {
if (uwsgi_regexp_build(ur->orig_route, &ur->pattern)) {
exit(1);
}

int i;
for(i=0;i<uwsgi.cores;i++) {
ur->ovn[i] = uwsgi_regexp_ovector(ur->pattern, ur->pattern_extra);
ur->ovn[i] = uwsgi_regexp_ovector(ur->pattern);
if (ur->ovn[i] > 0) {
ur->ovector[i] = uwsgi_calloc(sizeof(int) * (3 * (ur->ovn[i] + 1)));
ur->ovector[i] = uwsgi_calloc(sizeof(int) * PCRE_OVECTOR_BYTESIZE(ur->ovn[i]));
}
}
}
Expand Down Expand Up @@ -1484,38 +1484,47 @@ static int uwsgi_route_condition_regexp(struct wsgi_request *wsgi_req, struct uw
ur->condition_ub[wsgi_req->async_id] = uwsgi_routing_translate(wsgi_req, ur, NULL, 0, ur->subject_str, semicolon - ur->subject_str);
if (!ur->condition_ub[wsgi_req->async_id]) return -1;

pcre *pattern;
pcre_extra *pattern_extra;
uwsgi_pcre *pattern;
char *re = uwsgi_concat2n(semicolon+1, ur->subject_str_len - ((semicolon+1) - ur->subject_str), "", 0);
if (uwsgi_regexp_build(re, &pattern, &pattern_extra)) {
if (uwsgi_regexp_build(re, &pattern)) {
free(re);
return -1;
}
free(re);

// a condition has no initialized vectors, let's create them
ur->ovn[wsgi_req->async_id] = uwsgi_regexp_ovector(pattern, pattern_extra);
ur->ovn[wsgi_req->async_id] = uwsgi_regexp_ovector(pattern);
if (ur->ovn[wsgi_req->async_id] > 0) {
ur->ovector[wsgi_req->async_id] = uwsgi_calloc(sizeof(int) * (3 * (ur->ovn[wsgi_req->async_id] + 1)));
}

if (uwsgi_regexp_match_ovec(pattern, pattern_extra, ur->condition_ub[wsgi_req->async_id]->buf, ur->condition_ub[wsgi_req->async_id]->pos, ur->ovector[wsgi_req->async_id], ur->ovn[wsgi_req->async_id] ) >= 0) {
pcre_free(pattern);
if (uwsgi_regexp_match_ovec(pattern, ur->condition_ub[wsgi_req->async_id]->buf, ur->condition_ub[wsgi_req->async_id]->pos, ur->ovector[wsgi_req->async_id], ur->ovn[wsgi_req->async_id] ) >= 0) {
#ifdef UWSGI_PCRE2
pcre2_code_free(pattern);
#else
pcre_free(pattern->p);
#ifdef PCRE_STUDY_JIT_COMPILE
pcre_free_study(pattern_extra);
pcre_free_study(pattern->extra);
#else
pcre_free(pattern_extra);
pcre_free(pattern->extra);
#endif
free(pattern);
#endif
return 1;
}

pcre_free(pattern);
#ifdef UWSGI_PCRE2
pcre2_code_free(pattern);
#else
pcre_free(pattern->p);
#ifdef PCRE_STUDY_JIT_COMPILE
pcre_free_study(pattern_extra);
pcre_free_study(pattern->extra);
#else
pcre_free(pattern_extra);
pcre_free(pattern->extra);
#endif
return 0;
free(pattern);
#endif
return 0;
}

static int uwsgi_route_condition_empty(struct wsgi_request *wsgi_req, struct uwsgi_route *ur) {
Expand Down
Loading

0 comments on commit c164fb9

Please sign in to comment.