Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core/regexp: add to pcre2 support #2624

Merged
merged 1 commit into from
Apr 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/compile-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ jobs:
build:
strategy:
matrix:
libpcre: ["libpcre3-dev", "libpcre2-dev"]
os: ["ubuntu-20.04", "ubuntu-22.04"]
cc: [gcc, clang]
include:
- os: ubuntu-20.04
php: "php7.4"
php-config: "php-config7.4"
cc: "gcc"
- os: ubuntu-22.04
php: "php8.1"
php-config: "php-config8.1"
cc: "gcc"
- os: ubuntu-22.04
php: "php8.1"
php-config: "php-config8.1"
exclude:
- os: ubuntu-20.04
cc: "clang"

runs-on: ${{ matrix.os }}
Expand All @@ -35,7 +35,7 @@ jobs:
run: |
sudo apt update -qq
sudo apt install --no-install-recommends -qqyf python3-dev \
libxml2-dev libpcre3-dev libcap2-dev \
libxml2-dev ${{ matrix.libpcre }} libcap2-dev \
libargon2-0-dev libsodium-dev \
${{ matrix.php }}-dev lib${{ matrix.php }}-embed \
liblua5.1-0-dev ruby-dev \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
run: |
sudo apt update -qq
sudo apt install --no-install-recommends -qqyf python${{ matrix.python-version }}-dev \
libpcre3-dev libjansson-dev libcap2-dev \
libpcre2-dev libjansson-dev libcap2-dev \
curl check
- name: Install distutils
if: contains(fromJson('["3.6","3.7","3.8","3.9","3.10","3.11","3.12"]'), matrix.python-version)
Expand Down
2 changes: 1 addition & 1 deletion check/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ LDFLAGS = $(shell pkg-config --libs check)
LDFLAGS += -ldl -lz
LDFLAGS += $(shell xml2-config --libs)
LDFLAGS += $(shell pkg-config --libs openssl)
LDFLAGS += $(shell pcre-config --libs)
LDFLAGS += $(shell pcre2-config --libs8)
LDFLAGS += $(shell pkg-config --libs jansson)

UNAME_S := $(shell uname -s)
Expand Down
10 changes: 5 additions & 5 deletions core/alarm.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ static struct uwsgi_alarm_instance *uwsgi_alarm_get_instance(char *name) {
}


#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
static int uwsgi_alarm_log_add(char *alarms, char *regexp, int negate) {

struct uwsgi_alarm_log *old_ual = NULL, *ual = uwsgi.alarm_logs;
Expand All @@ -170,7 +170,7 @@ static int uwsgi_alarm_log_add(char *alarms, char *regexp, int negate) {
}

ual = uwsgi_calloc(sizeof(struct uwsgi_alarm_log));
if (uwsgi_regexp_build(regexp, &ual->pattern, &ual->pattern_extra)) {
if (uwsgi_regexp_build(regexp, &ual->pattern)) {
free(ual);
return -1;
}
Expand Down Expand Up @@ -331,7 +331,7 @@ void uwsgi_alarms_init() {
usl = usl->next;
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
// then map log-alarm
usl = uwsgi.alarm_logs_list;
while (usl) {
Expand Down Expand Up @@ -377,14 +377,14 @@ void uwsgi_alarm_trigger_uai(struct uwsgi_alarm_instance *uai, char *msg, size_t
}
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
// check if a log should raise an alarm
void uwsgi_alarm_log_check(char *msg, size_t len) {
if (!uwsgi_strncmp(msg, len, "[uwsgi-alarm", 12))
return;
struct uwsgi_alarm_log *ual = uwsgi.alarm_logs;
while (ual) {
if (uwsgi_regexp_match(ual->pattern, ual->pattern_extra, msg, len) >= 0) {
if (uwsgi_regexp_match(ual->pattern, msg, len) >= 0) {
if (!ual->negate) {
struct uwsgi_alarm_ll *uall = ual->alarms;
while (uall) {
Expand Down
2 changes: 1 addition & 1 deletion core/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ int uwsgi_logic_opt_if_not_hostname(char *key, char *value) {
return 0;
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
int uwsgi_logic_opt_if_hostname_match(char *key, char *value) {
uwsgi.logic_opt_if_failed = 0;
if (uwsgi_regexp_match_pattern(uwsgi.logic_opt_data, uwsgi.hostname)) {
Expand Down
14 changes: 7 additions & 7 deletions core/logging.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@ void uwsgi_setup_log_master(void) {
usl = usl->next;
}

#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
// set logger by its id
struct uwsgi_regexp_list *url = uwsgi.log_route;
while (url) {
Expand Down Expand Up @@ -1443,11 +1443,11 @@ int uwsgi_master_log(void) {

ssize_t rlen = read(uwsgi.shared->worker_log_pipe[0], uwsgi.log_master_buf, uwsgi.log_master_bufsize);
if (rlen > 0) {
#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
uwsgi_alarm_log_check(uwsgi.log_master_buf, rlen);
struct uwsgi_regexp_list *url = uwsgi.log_drain_rules;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
return 0;
}
url = url->next;
Expand All @@ -1456,7 +1456,7 @@ int uwsgi_master_log(void) {
int show = 0;
url = uwsgi.log_filter_rules;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
show = 1;
break;
}
Expand All @@ -1469,7 +1469,7 @@ int uwsgi_master_log(void) {
url = uwsgi.log_route;
int finish = 0;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
struct uwsgi_logger *ul_route = (struct uwsgi_logger *) url->custom_ptr;
if (ul_route) {
uwsgi_log_func_do(uwsgi.requested_log_encoders, ul_route, uwsgi.log_master_buf, rlen);
Expand Down Expand Up @@ -1509,11 +1509,11 @@ int uwsgi_master_req_log(void) {

ssize_t rlen = read(uwsgi.shared->worker_req_log_pipe[0], uwsgi.log_master_buf, uwsgi.log_master_bufsize);
if (rlen > 0) {
#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
struct uwsgi_regexp_list *url = uwsgi.log_req_route;
int finish = 0;
while (url) {
if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) {
if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) {
struct uwsgi_logger *ul_route = (struct uwsgi_logger *) url->custom_ptr;
if (ul_route) {
uwsgi_log_func_do(uwsgi.requested_log_req_encoders, ul_route, uwsgi.log_master_buf, rlen);
Expand Down
102 changes: 85 additions & 17 deletions core/regexp.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ifdef UWSGI_PCRE
#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2)
#include "uwsgi.h"

extern struct uwsgi_server uwsgi;
Expand All @@ -10,51 +10,119 @@ void uwsgi_opt_pcre_jit(char *opt, char *value, void *foobar) {
if (ret != 0 || has_jit != 1)
return;
uwsgi.pcre_jit = PCRE_STUDY_JIT_COMPILE;
#elif defined(PCRE2_CONFIG_JIT)
int has_jit = 0, ret;
ret = pcre2_config(PCRE2_CONFIG_JIT, &has_jit);
if (ret != 0)
return;
uwsgi.pcre_jit = has_jit;
#endif
}

int uwsgi_regexp_build(char *re, pcre ** pattern, pcre_extra ** pattern_extra) {
int uwsgi_regexp_build(char *re, uwsgi_pcre ** pattern) {

#ifdef UWSGI_PCRE2
int errnbr;
long unsigned int erroff;

*pattern = pcre2_compile((const unsigned char *) re, PCRE2_ZERO_TERMINATED, 0, &errnbr, &erroff, NULL);
#else
const char *errstr;
int erroff;

*pattern = pcre_compile((const char *) re, 0, &errstr, &erroff, NULL);
if (!*pattern) {
*pattern = uwsgi_malloc(sizeof(uwsgi_pcre));
(*pattern)->p = pcre_compile((const char *) re, 0, &errstr, &erroff, NULL);
#endif
#ifdef UWSGI_PCRE2
if (!(*pattern)) {
uwsgi_log("pcre error: code %d at offset %d\n", errnbr, erroff);
#else
if (!((*pattern)->p)) {
uwsgi_log("pcre error: %s at offset %d\n", errstr, erroff);
#endif
return -1;
}

#ifdef UWSGI_PCRE2
if (uwsgi.pcre_jit) {
errnbr = pcre2_jit_compile(*pattern, PCRE2_JIT_COMPLETE);
if (errnbr) {
pcre2_code_free(*pattern);
uwsgi_log("pcre JIT compile error code %d\n", errnbr);
return -1;
}
#else
int opt = uwsgi.pcre_jit;

*pattern_extra = (pcre_extra *) pcre_study((const pcre *) *pattern, opt, &errstr);
if (*pattern_extra == NULL && errstr != NULL) {
pcre_free(*pattern);
(*pattern)->extra = (pcre_extra *) pcre_study((const pcre *) (*pattern)->p, opt, &errstr);
if ((*pattern)->extra == NULL && errstr != NULL) {
pcre_free((*pattern)->p);
free(*pattern);
uwsgi_log("pcre (study) error: %s\n", errstr);
return -1;
#endif
}

return 0;

}

int uwsgi_regexp_match(pcre * pattern, pcre_extra * pattern_extra, char *subject, int length) {

return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, NULL, 0);
int uwsgi_regexp_match(uwsgi_pcre *pattern, const char *subject, int length) {
#ifdef UWSGI_PCRE2
return pcre2_match(pattern, (const unsigned char *)subject, length, 0, 0, NULL, NULL);
#else
return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0);
#endif
}

int uwsgi_regexp_match_ovec(pcre * pattern, pcre_extra * pattern_extra, char *subject, int length, int *ovec, int n) {
int uwsgi_regexp_match_ovec(uwsgi_pcre *pattern, const char *subject, int length, int *ovec, int n) {

#ifdef UWSGI_PCRE2
int rc;
int i;
pcre2_match_data *match_data;
size_t *pcre2_ovec;

match_data = pcre2_match_data_create_from_pattern(pattern, NULL);
rc = pcre2_match(pattern, (const unsigned char *)subject, length, 0, 0, match_data, NULL);

/*
* Quoting PCRE{,2} spec, "The first pair of integers, ovector[0]
* and ovector[1], identify the portion of the subject string matched
* by the entire pattern. The next pair is used for the first capturing
* subpattern, and so on." Therefore, the ovector size is the number of
* capturing subpatterns (INFO_CAPTURECOUNT), from uwsgi_regexp_ovector(),
* as matching pairs, plus room for the first pair.
*/
if (n > 0) {
return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, ovec, (n + 1) * 3);
// copy pcre2 output vector to uwsgi output vector
pcre2_ovec = pcre2_get_ovector_pointer(match_data);
for (i=0;i<(n+1)*2;i++) {
ovec[i] = pcre2_ovec[i];
}
#else
if (n > 0) {
return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, ovec, PCRE_OVECTOR_BYTESIZE(n));
#endif
}
return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, NULL, 0);

#ifdef UWSGI_PCRE2
pcre2_match_data_free(match_data);

return rc;
#else
return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0);
#endif
}

int uwsgi_regexp_ovector(pcre * pattern, pcre_extra * pattern_extra) {
int uwsgi_regexp_ovector(const uwsgi_pcre *pattern) {

int n;

if (pcre_fullinfo((const pcre *) pattern, (const pcre_extra *) pattern_extra, PCRE_INFO_CAPTURECOUNT, &n))
#ifdef UWSGI_PCRE2
if (pcre2_pattern_info(pattern, PCRE2_INFO_CAPTURECOUNT, &n))
#else
if (pcre_fullinfo((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, PCRE_INFO_CAPTURECOUNT, &n))
#endif
return 0;

return n;
Expand All @@ -66,7 +134,7 @@ char *uwsgi_regexp_apply_ovec(char *src, int src_n, char *dst, int dst_n, int *o
int dollar = 0;

size_t dollars = n;

for(i=0;i<dst_n;i++) {
if (dst[i] == '$') {
dollars++;
Expand Down
41 changes: 25 additions & 16 deletions core/routing.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ int uwsgi_apply_routes_do(struct uwsgi_route *routes, struct wsgi_request *wsgi_
subject = *subject2 ;
subject_len = *subject_len2;
}
n = uwsgi_regexp_match_ovec(routes->pattern, routes->pattern_extra, subject, subject_len, routes->ovector[wsgi_req->async_id], routes->ovn[wsgi_req->async_id]);
n = uwsgi_regexp_match_ovec(routes->pattern, subject, subject_len, routes->ovector[wsgi_req->async_id], routes->ovn[wsgi_req->async_id]);
}
else {
int ret = routes->if_func(wsgi_req, routes);
Expand Down Expand Up @@ -506,15 +506,15 @@ void uwsgi_fixup_routes(struct uwsgi_route *ur) {

// fill them if needed... (this is an optimization for route with a static subject)
if (ur->subject && ur->subject_len) {
if (uwsgi_regexp_build(ur->orig_route, &ur->pattern, &ur->pattern_extra)) {
if (uwsgi_regexp_build(ur->orig_route, &ur->pattern)) {
exit(1);
}

int i;
for(i=0;i<uwsgi.cores;i++) {
ur->ovn[i] = uwsgi_regexp_ovector(ur->pattern, ur->pattern_extra);
ur->ovn[i] = uwsgi_regexp_ovector(ur->pattern);
if (ur->ovn[i] > 0) {
ur->ovector[i] = uwsgi_calloc(sizeof(int) * (3 * (ur->ovn[i] + 1)));
ur->ovector[i] = uwsgi_calloc(sizeof(int) * PCRE_OVECTOR_BYTESIZE(ur->ovn[i]));
}
}
}
Expand Down Expand Up @@ -1484,38 +1484,47 @@ static int uwsgi_route_condition_regexp(struct wsgi_request *wsgi_req, struct uw
ur->condition_ub[wsgi_req->async_id] = uwsgi_routing_translate(wsgi_req, ur, NULL, 0, ur->subject_str, semicolon - ur->subject_str);
if (!ur->condition_ub[wsgi_req->async_id]) return -1;

pcre *pattern;
pcre_extra *pattern_extra;
uwsgi_pcre *pattern;
char *re = uwsgi_concat2n(semicolon+1, ur->subject_str_len - ((semicolon+1) - ur->subject_str), "", 0);
if (uwsgi_regexp_build(re, &pattern, &pattern_extra)) {
if (uwsgi_regexp_build(re, &pattern)) {
free(re);
return -1;
}
free(re);

// a condition has no initialized vectors, let's create them
ur->ovn[wsgi_req->async_id] = uwsgi_regexp_ovector(pattern, pattern_extra);
ur->ovn[wsgi_req->async_id] = uwsgi_regexp_ovector(pattern);
if (ur->ovn[wsgi_req->async_id] > 0) {
ur->ovector[wsgi_req->async_id] = uwsgi_calloc(sizeof(int) * (3 * (ur->ovn[wsgi_req->async_id] + 1)));
}

if (uwsgi_regexp_match_ovec(pattern, pattern_extra, ur->condition_ub[wsgi_req->async_id]->buf, ur->condition_ub[wsgi_req->async_id]->pos, ur->ovector[wsgi_req->async_id], ur->ovn[wsgi_req->async_id] ) >= 0) {
pcre_free(pattern);
if (uwsgi_regexp_match_ovec(pattern, ur->condition_ub[wsgi_req->async_id]->buf, ur->condition_ub[wsgi_req->async_id]->pos, ur->ovector[wsgi_req->async_id], ur->ovn[wsgi_req->async_id] ) >= 0) {
#ifdef UWSGI_PCRE2
pcre2_code_free(pattern);
#else
pcre_free(pattern->p);
#ifdef PCRE_STUDY_JIT_COMPILE
pcre_free_study(pattern_extra);
pcre_free_study(pattern->extra);
#else
pcre_free(pattern_extra);
pcre_free(pattern->extra);
#endif
free(pattern);
#endif
return 1;
}

pcre_free(pattern);
#ifdef UWSGI_PCRE2
pcre2_code_free(pattern);
#else
pcre_free(pattern->p);
#ifdef PCRE_STUDY_JIT_COMPILE
pcre_free_study(pattern_extra);
pcre_free_study(pattern->extra);
#else
pcre_free(pattern_extra);
pcre_free(pattern->extra);
#endif
return 0;
free(pattern);
#endif
return 0;
}

static int uwsgi_route_condition_empty(struct wsgi_request *wsgi_req, struct uwsgi_route *ur) {
Expand Down
Loading