From cb2e4ca9fbf0a8a73123887135a5687490875219 Mon Sep 17 00:00:00 2001 From: Riccardo Magliocchetti Date: Tue, 25 Jul 2023 16:17:52 +0200 Subject: [PATCH] core/regexp: add to pcre2 support Co-authored-by: loqs --- .github/workflows/compile-test.yml | 12 ++-- .github/workflows/test.yml | 2 +- check/Makefile | 2 +- core/alarm.c | 10 +-- core/config.c | 2 +- core/logging.c | 14 ++-- core/regexp.c | 102 ++++++++++++++++++++++++----- core/routing.c | 41 +++++++----- core/ssl.c | 8 +-- core/static.c | 20 +++--- core/utils.c | 13 ++-- core/uwsgi.c | 20 +++--- plugins/php/php_plugin.c | 12 ++-- uwsgi.h | 58 +++++++++------- uwsgiconfig.py | 45 +++++++------ 15 files changed, 227 insertions(+), 134 deletions(-) diff --git a/.github/workflows/compile-test.yml b/.github/workflows/compile-test.yml index d1ec99799e..6b56aea31d 100644 --- a/.github/workflows/compile-test.yml +++ b/.github/workflows/compile-test.yml @@ -10,18 +10,18 @@ jobs: build: strategy: matrix: + libpcre: ["libpcre3-dev", "libpcre2-dev"] + os: ["ubuntu-20.04", "ubuntu-22.04"] + cc: [gcc, clang] include: - os: ubuntu-20.04 php: "php7.4" php-config: "php-config7.4" - cc: "gcc" - - os: ubuntu-22.04 - php: "php8.1" - php-config: "php-config8.1" - cc: "gcc" - os: ubuntu-22.04 php: "php8.1" php-config: "php-config8.1" + exclude: + - os: ubuntu-20.04 cc: "clang" runs-on: ${{ matrix.os }} @@ -35,7 +35,7 @@ jobs: run: | sudo apt update -qq sudo apt install --no-install-recommends -qqyf python3-dev \ - libxml2-dev libpcre3-dev libcap2-dev \ + libxml2-dev ${{ matrix.libpcre }} libcap2-dev \ libargon2-0-dev libsodium-dev \ ${{ matrix.php }}-dev lib${{ matrix.php }}-embed \ liblua5.1-0-dev ruby-dev \ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ba0cbbb49f..6c396344d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,7 +21,7 @@ jobs: run: | sudo apt update -qq sudo apt install --no-install-recommends -qqyf python${{ matrix.python-version }}-dev \ - libpcre3-dev libjansson-dev libcap2-dev \ + libpcre2-dev libjansson-dev libcap2-dev \ curl check - name: Install distutils if: contains(fromJson('["3.6","3.7","3.8","3.9","3.10","3.11","3.12"]'), matrix.python-version) diff --git a/check/Makefile b/check/Makefile index ac69b37891..a9a6555769 100644 --- a/check/Makefile +++ b/check/Makefile @@ -4,7 +4,7 @@ LDFLAGS = $(shell pkg-config --libs check) LDFLAGS += -ldl -lz LDFLAGS += $(shell xml2-config --libs) LDFLAGS += $(shell pkg-config --libs openssl) -LDFLAGS += $(shell pcre-config --libs) +LDFLAGS += $(shell pcre2-config --libs8) LDFLAGS += $(shell pkg-config --libs jansson) UNAME_S := $(shell uname -s) diff --git a/core/alarm.c b/core/alarm.c index 7691a7d0a4..bdd7b00ce4 100644 --- a/core/alarm.c +++ b/core/alarm.c @@ -160,7 +160,7 @@ static struct uwsgi_alarm_instance *uwsgi_alarm_get_instance(char *name) { } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) static int uwsgi_alarm_log_add(char *alarms, char *regexp, int negate) { struct uwsgi_alarm_log *old_ual = NULL, *ual = uwsgi.alarm_logs; @@ -170,7 +170,7 @@ static int uwsgi_alarm_log_add(char *alarms, char *regexp, int negate) { } ual = uwsgi_calloc(sizeof(struct uwsgi_alarm_log)); - if (uwsgi_regexp_build(regexp, &ual->pattern, &ual->pattern_extra)) { + if (uwsgi_regexp_build(regexp, &ual->pattern)) { free(ual); return -1; } @@ -331,7 +331,7 @@ void uwsgi_alarms_init() { usl = usl->next; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) // then map log-alarm usl = uwsgi.alarm_logs_list; while (usl) { @@ -377,14 +377,14 @@ void uwsgi_alarm_trigger_uai(struct uwsgi_alarm_instance *uai, char *msg, size_t } } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) // check if a log should raise an alarm void uwsgi_alarm_log_check(char *msg, size_t len) { if (!uwsgi_strncmp(msg, len, "[uwsgi-alarm", 12)) return; struct uwsgi_alarm_log *ual = uwsgi.alarm_logs; while (ual) { - if (uwsgi_regexp_match(ual->pattern, ual->pattern_extra, msg, len) >= 0) { + if (uwsgi_regexp_match(ual->pattern, msg, len) >= 0) { if (!ual->negate) { struct uwsgi_alarm_ll *uall = ual->alarms; while (uall) { diff --git a/core/config.c b/core/config.c index 7153dfa164..164bd84250 100644 --- a/core/config.c +++ b/core/config.c @@ -336,7 +336,7 @@ int uwsgi_logic_opt_if_not_hostname(char *key, char *value) { return 0; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) int uwsgi_logic_opt_if_hostname_match(char *key, char *value) { uwsgi.logic_opt_if_failed = 0; if (uwsgi_regexp_match_pattern(uwsgi.logic_opt_data, uwsgi.hostname)) { diff --git a/core/logging.c b/core/logging.c index e7ab9f9307..b174614c6b 100644 --- a/core/logging.c +++ b/core/logging.c @@ -414,7 +414,7 @@ void uwsgi_setup_log_master(void) { usl = usl->next; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) // set logger by its id struct uwsgi_regexp_list *url = uwsgi.log_route; while (url) { @@ -1443,11 +1443,11 @@ int uwsgi_master_log(void) { ssize_t rlen = read(uwsgi.shared->worker_log_pipe[0], uwsgi.log_master_buf, uwsgi.log_master_bufsize); if (rlen > 0) { -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) uwsgi_alarm_log_check(uwsgi.log_master_buf, rlen); struct uwsgi_regexp_list *url = uwsgi.log_drain_rules; while (url) { - if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) { + if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) { return 0; } url = url->next; @@ -1456,7 +1456,7 @@ int uwsgi_master_log(void) { int show = 0; url = uwsgi.log_filter_rules; while (url) { - if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) { + if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) { show = 1; break; } @@ -1469,7 +1469,7 @@ int uwsgi_master_log(void) { url = uwsgi.log_route; int finish = 0; while (url) { - if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) { + if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) { struct uwsgi_logger *ul_route = (struct uwsgi_logger *) url->custom_ptr; if (ul_route) { uwsgi_log_func_do(uwsgi.requested_log_encoders, ul_route, uwsgi.log_master_buf, rlen); @@ -1509,11 +1509,11 @@ int uwsgi_master_req_log(void) { ssize_t rlen = read(uwsgi.shared->worker_req_log_pipe[0], uwsgi.log_master_buf, uwsgi.log_master_bufsize); if (rlen > 0) { -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *url = uwsgi.log_req_route; int finish = 0; while (url) { - if (uwsgi_regexp_match(url->pattern, url->pattern_extra, uwsgi.log_master_buf, rlen) >= 0) { + if (uwsgi_regexp_match(url->pattern, uwsgi.log_master_buf, rlen) >= 0) { struct uwsgi_logger *ul_route = (struct uwsgi_logger *) url->custom_ptr; if (ul_route) { uwsgi_log_func_do(uwsgi.requested_log_req_encoders, ul_route, uwsgi.log_master_buf, rlen); diff --git a/core/regexp.c b/core/regexp.c index a0569db7bd..74bb77751e 100644 --- a/core/regexp.c +++ b/core/regexp.c @@ -1,4 +1,4 @@ -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) #include "uwsgi.h" extern struct uwsgi_server uwsgi; @@ -10,51 +10,119 @@ void uwsgi_opt_pcre_jit(char *opt, char *value, void *foobar) { if (ret != 0 || has_jit != 1) return; uwsgi.pcre_jit = PCRE_STUDY_JIT_COMPILE; +#elif defined(PCRE2_CONFIG_JIT) + int has_jit = 0, ret; + ret = pcre2_config(PCRE2_CONFIG_JIT, &has_jit); + if (ret != 0) + return; + uwsgi.pcre_jit = has_jit; #endif } -int uwsgi_regexp_build(char *re, pcre ** pattern, pcre_extra ** pattern_extra) { +int uwsgi_regexp_build(char *re, uwsgi_pcre ** pattern) { + +#ifdef UWSGI_PCRE2 + int errnbr; + long unsigned int erroff; + *pattern = pcre2_compile((const unsigned char *) re, PCRE2_ZERO_TERMINATED, 0, &errnbr, &erroff, NULL); +#else const char *errstr; int erroff; - *pattern = pcre_compile((const char *) re, 0, &errstr, &erroff, NULL); - if (!*pattern) { + *pattern = uwsgi_malloc(sizeof(uwsgi_pcre)); + (*pattern)->p = pcre_compile((const char *) re, 0, &errstr, &erroff, NULL); +#endif +#ifdef UWSGI_PCRE2 + if (!(*pattern)) { + uwsgi_log("pcre error: code %d at offset %d\n", errnbr, erroff); +#else + if (!((*pattern)->p)) { uwsgi_log("pcre error: %s at offset %d\n", errstr, erroff); +#endif return -1; } +#ifdef UWSGI_PCRE2 + if (uwsgi.pcre_jit) { + errnbr = pcre2_jit_compile(*pattern, PCRE2_JIT_COMPLETE); + if (errnbr) { + pcre2_code_free(*pattern); + uwsgi_log("pcre JIT compile error code %d\n", errnbr); + return -1; + } +#else int opt = uwsgi.pcre_jit; - *pattern_extra = (pcre_extra *) pcre_study((const pcre *) *pattern, opt, &errstr); - if (*pattern_extra == NULL && errstr != NULL) { - pcre_free(*pattern); + (*pattern)->extra = (pcre_extra *) pcre_study((const pcre *) (*pattern)->p, opt, &errstr); + if ((*pattern)->extra == NULL && errstr != NULL) { + pcre_free((*pattern)->p); + free(*pattern); uwsgi_log("pcre (study) error: %s\n", errstr); return -1; +#endif } return 0; } -int uwsgi_regexp_match(pcre * pattern, pcre_extra * pattern_extra, char *subject, int length) { - - return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, NULL, 0); +int uwsgi_regexp_match(uwsgi_pcre *pattern, const char *subject, int length) { +#ifdef UWSGI_PCRE2 + return pcre2_match(pattern, (const unsigned char *)subject, length, 0, 0, NULL, NULL); +#else + return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0); +#endif } -int uwsgi_regexp_match_ovec(pcre * pattern, pcre_extra * pattern_extra, char *subject, int length, int *ovec, int n) { +int uwsgi_regexp_match_ovec(uwsgi_pcre *pattern, const char *subject, int length, int *ovec, int n) { +#ifdef UWSGI_PCRE2 + int rc; + int i; + pcre2_match_data *match_data; + size_t *pcre2_ovec; + + match_data = pcre2_match_data_create_from_pattern(pattern, NULL); + rc = pcre2_match(pattern, (const unsigned char *)subject, length, 0, 0, match_data, NULL); + + /* + * Quoting PCRE{,2} spec, "The first pair of integers, ovector[0] + * and ovector[1], identify the portion of the subject string matched + * by the entire pattern. The next pair is used for the first capturing + * subpattern, and so on." Therefore, the ovector size is the number of + * capturing subpatterns (INFO_CAPTURECOUNT), from uwsgi_regexp_ovector(), + * as matching pairs, plus room for the first pair. + */ if (n > 0) { - return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, ovec, (n + 1) * 3); + // copy pcre2 output vector to uwsgi output vector + pcre2_ovec = pcre2_get_ovector_pointer(match_data); + for (i=0;i<(n+1)*2;i++) { + ovec[i] = pcre2_ovec[i]; + } +#else + if (n > 0) { + return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, ovec, PCRE_OVECTOR_BYTESIZE(n)); +#endif } - return pcre_exec((const pcre *) pattern, (const pcre_extra *) pattern_extra, subject, length, 0, 0, NULL, 0); + +#ifdef UWSGI_PCRE2 + pcre2_match_data_free(match_data); + + return rc; +#else + return pcre_exec((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, subject, length, 0, 0, NULL, 0); +#endif } -int uwsgi_regexp_ovector(pcre * pattern, pcre_extra * pattern_extra) { +int uwsgi_regexp_ovector(const uwsgi_pcre *pattern) { int n; - - if (pcre_fullinfo((const pcre *) pattern, (const pcre_extra *) pattern_extra, PCRE_INFO_CAPTURECOUNT, &n)) +#ifdef UWSGI_PCRE2 + if (pcre2_pattern_info(pattern, PCRE2_INFO_CAPTURECOUNT, &n)) +#else + if (pcre_fullinfo((const pcre *) pattern->p, (const pcre_extra *) pattern->extra, PCRE_INFO_CAPTURECOUNT, &n)) +#endif return 0; return n; @@ -66,7 +134,7 @@ char *uwsgi_regexp_apply_ovec(char *src, int src_n, char *dst, int dst_n, int *o int dollar = 0; size_t dollars = n; - + for(i=0;ipattern, routes->pattern_extra, subject, subject_len, routes->ovector[wsgi_req->async_id], routes->ovn[wsgi_req->async_id]); + n = uwsgi_regexp_match_ovec(routes->pattern, subject, subject_len, routes->ovector[wsgi_req->async_id], routes->ovn[wsgi_req->async_id]); } else { int ret = routes->if_func(wsgi_req, routes); @@ -506,15 +506,15 @@ void uwsgi_fixup_routes(struct uwsgi_route *ur) { // fill them if needed... (this is an optimization for route with a static subject) if (ur->subject && ur->subject_len) { - if (uwsgi_regexp_build(ur->orig_route, &ur->pattern, &ur->pattern_extra)) { + if (uwsgi_regexp_build(ur->orig_route, &ur->pattern)) { exit(1); } int i; for(i=0;iovn[i] = uwsgi_regexp_ovector(ur->pattern, ur->pattern_extra); + ur->ovn[i] = uwsgi_regexp_ovector(ur->pattern); if (ur->ovn[i] > 0) { - ur->ovector[i] = uwsgi_calloc(sizeof(int) * (3 * (ur->ovn[i] + 1))); + ur->ovector[i] = uwsgi_calloc(sizeof(int) * PCRE_OVECTOR_BYTESIZE(ur->ovn[i])); } } } @@ -1484,38 +1484,47 @@ static int uwsgi_route_condition_regexp(struct wsgi_request *wsgi_req, struct uw ur->condition_ub[wsgi_req->async_id] = uwsgi_routing_translate(wsgi_req, ur, NULL, 0, ur->subject_str, semicolon - ur->subject_str); if (!ur->condition_ub[wsgi_req->async_id]) return -1; - pcre *pattern; - pcre_extra *pattern_extra; + uwsgi_pcre *pattern; char *re = uwsgi_concat2n(semicolon+1, ur->subject_str_len - ((semicolon+1) - ur->subject_str), "", 0); - if (uwsgi_regexp_build(re, &pattern, &pattern_extra)) { + if (uwsgi_regexp_build(re, &pattern)) { free(re); return -1; } free(re); // a condition has no initialized vectors, let's create them - ur->ovn[wsgi_req->async_id] = uwsgi_regexp_ovector(pattern, pattern_extra); + ur->ovn[wsgi_req->async_id] = uwsgi_regexp_ovector(pattern); if (ur->ovn[wsgi_req->async_id] > 0) { ur->ovector[wsgi_req->async_id] = uwsgi_calloc(sizeof(int) * (3 * (ur->ovn[wsgi_req->async_id] + 1))); } - if (uwsgi_regexp_match_ovec(pattern, pattern_extra, ur->condition_ub[wsgi_req->async_id]->buf, ur->condition_ub[wsgi_req->async_id]->pos, ur->ovector[wsgi_req->async_id], ur->ovn[wsgi_req->async_id] ) >= 0) { - pcre_free(pattern); + if (uwsgi_regexp_match_ovec(pattern, ur->condition_ub[wsgi_req->async_id]->buf, ur->condition_ub[wsgi_req->async_id]->pos, ur->ovector[wsgi_req->async_id], ur->ovn[wsgi_req->async_id] ) >= 0) { +#ifdef UWSGI_PCRE2 + pcre2_code_free(pattern); +#else + pcre_free(pattern->p); #ifdef PCRE_STUDY_JIT_COMPILE - pcre_free_study(pattern_extra); + pcre_free_study(pattern->extra); #else - pcre_free(pattern_extra); + pcre_free(pattern->extra); +#endif + free(pattern); #endif return 1; } - pcre_free(pattern); +#ifdef UWSGI_PCRE2 + pcre2_code_free(pattern); +#else + pcre_free(pattern->p); #ifdef PCRE_STUDY_JIT_COMPILE - pcre_free_study(pattern_extra); + pcre_free_study(pattern->extra); #else - pcre_free(pattern_extra); + pcre_free(pattern->extra); #endif - return 0; + free(pattern); +#endif + return 0; } static int uwsgi_route_condition_empty(struct wsgi_request *wsgi_req, struct uwsgi_route *ur) { diff --git a/core/ssl.c b/core/ssl.c index 1fafd659e9..18b1103642 100644 --- a/core/ssl.c +++ b/core/ssl.c @@ -145,10 +145,10 @@ static int uwsgi_sni_cb(SSL *ssl, int *ad, void *arg) { if (uwsgi.subscription_dotsplit) goto end; -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *url = uwsgi.sni_regexp; while(url) { - if (uwsgi_regexp_match(url->pattern, url->pattern_extra, (char *)servername, servername_len) >= 0) { + if (uwsgi_regexp_match(url->pattern, (char *)servername, servername_len) >= 0) { SSL_set_SSL_CTX(ssl, url->custom_ptr); return SSL_TLSEXT_ERR_OK; } @@ -628,7 +628,7 @@ void uwsgi_opt_sni(char *opt, char *value, void *foobar) { return; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) if (!strcmp(opt, "sni-regexp")) { struct uwsgi_regexp_list *url = uwsgi_regexp_new_list(&uwsgi.sni_regexp, v); url->custom_ptr = ctx; @@ -637,7 +637,7 @@ void uwsgi_opt_sni(char *opt, char *value, void *foobar) { #endif struct uwsgi_string_list *usl = uwsgi_string_new_list(&uwsgi.sni, v); usl->custom_ptr = ctx; -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) } #endif diff --git a/core/static.c b/core/static.c index 27f225e4bf..9764db291c 100644 --- a/core/static.c +++ b/core/static.c @@ -35,11 +35,11 @@ int uwsgi_static_want_gzip(struct wsgi_request *wsgi_req, char *filename, size_t usl = usl->next; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) // check for regexp struct uwsgi_regexp_list *url = uwsgi.static_gzip; while(url) { - if (uwsgi_regexp_match(url->pattern, url->pattern_extra, filename, *filename_len) >= 0) { + if (uwsgi_regexp_match(url->pattern, filename, *filename_len) >= 0) { goto gzip; } url = url->next; @@ -220,7 +220,7 @@ int uwsgi_add_expires_type(struct wsgi_request *wsgi_req, char *mime_type, int m return 0; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) int uwsgi_add_expires(struct wsgi_request *wsgi_req, char *filename, int filename_len, struct stat *st) { struct uwsgi_dyn_dict *udd = uwsgi.static_expires; @@ -229,7 +229,7 @@ int uwsgi_add_expires(struct wsgi_request *wsgi_req, char *filename, int filenam char expires[31]; while (udd) { - if (uwsgi_regexp_match(udd->pattern, udd->pattern_extra, filename, filename_len) >= 0) { + if (uwsgi_regexp_match(udd->pattern, filename, filename_len) >= 0) { int delta = uwsgi_str_num(udd->value, udd->vallen); int size = uwsgi_http_date(now + delta, expires); if (size > 0) { @@ -242,7 +242,7 @@ int uwsgi_add_expires(struct wsgi_request *wsgi_req, char *filename, int filenam udd = uwsgi.static_expires_mtime; while (udd) { - if (uwsgi_regexp_match(udd->pattern, udd->pattern_extra, filename, filename_len) >= 0) { + if (uwsgi_regexp_match(udd->pattern, filename, filename_len) >= 0) { int delta = uwsgi_str_num(udd->value, udd->vallen); int size = uwsgi_http_date(st->st_mtime + delta, expires); if (size > 0) { @@ -264,7 +264,7 @@ int uwsgi_add_expires_path_info(struct wsgi_request *wsgi_req, struct stat *st) char expires[31]; while (udd) { - if (uwsgi_regexp_match(udd->pattern, udd->pattern_extra, wsgi_req->path_info, wsgi_req->path_info_len) >= 0) { + if (uwsgi_regexp_match(udd->pattern, wsgi_req->path_info, wsgi_req->path_info_len) >= 0) { int delta = uwsgi_str_num(udd->value, udd->vallen); int size = uwsgi_http_date(now + delta, expires); if (size > 0) { @@ -277,7 +277,7 @@ int uwsgi_add_expires_path_info(struct wsgi_request *wsgi_req, struct stat *st) udd = uwsgi.static_expires_path_info_mtime; while (udd) { - if (uwsgi_regexp_match(udd->pattern, udd->pattern_extra, wsgi_req->path_info, wsgi_req->path_info_len) >= 0) { + if (uwsgi_regexp_match(udd->pattern, wsgi_req->path_info, wsgi_req->path_info_len) >= 0) { int delta = uwsgi_str_num(udd->value, udd->vallen); int size = uwsgi_http_date(st->st_mtime + delta, expires); if (size > 0) { @@ -299,7 +299,7 @@ int uwsgi_add_expires_uri(struct wsgi_request *wsgi_req, struct stat *st) { char expires[31]; while (udd) { - if (uwsgi_regexp_match(udd->pattern, udd->pattern_extra, wsgi_req->uri, wsgi_req->uri_len) >= 0) { + if (uwsgi_regexp_match(udd->pattern, wsgi_req->uri, wsgi_req->uri_len) >= 0) { int delta = uwsgi_str_num(udd->value, udd->vallen); int size = uwsgi_http_date(now + delta, expires); if (size > 0) { @@ -312,7 +312,7 @@ int uwsgi_add_expires_uri(struct wsgi_request *wsgi_req, struct stat *st) { udd = uwsgi.static_expires_uri_mtime; while (udd) { - if (uwsgi_regexp_match(udd->pattern, udd->pattern_extra, wsgi_req->uri, wsgi_req->uri_len) >= 0) { + if (uwsgi_regexp_match(udd->pattern, wsgi_req->uri, wsgi_req->uri_len) >= 0) { int delta = uwsgi_str_num(udd->value, udd->vallen); int size = uwsgi_http_date(st->st_mtime + delta, expires); if (size > 0) { @@ -511,7 +511,7 @@ int uwsgi_real_file_serve(struct wsgi_request *wsgi_req, char *real_filename, si if (uwsgi_response_prepare_headers(wsgi_req, "200 OK", 6)) return -1; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) uwsgi_add_expires(wsgi_req, real_filename, real_filename_len, st); uwsgi_add_expires_path_info(wsgi_req, st); uwsgi_add_expires_uri(wsgi_req, st); diff --git a/core/utils.c b/core/utils.c index 420a343975..44c7a52f37 100644 --- a/core/utils.c +++ b/core/utils.c @@ -2340,7 +2340,7 @@ struct uwsgi_string_list *uwsgi_string_new_list(struct uwsgi_string_list **list, return uwsgi_string; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *uwsgi_regexp_custom_new_list(struct uwsgi_regexp_list **list, char *value, char *custom) { struct uwsgi_regexp_list *url = *list, *old_url; @@ -2359,7 +2359,7 @@ struct uwsgi_regexp_list *uwsgi_regexp_custom_new_list(struct uwsgi_regexp_list old_url->next = url; } - if (uwsgi_regexp_build(value, &url->pattern, &url->pattern_extra)) { + if (uwsgi_regexp_build(value, &url->pattern)) { exit(1); } url->next = NULL; @@ -2372,14 +2372,13 @@ struct uwsgi_regexp_list *uwsgi_regexp_custom_new_list(struct uwsgi_regexp_list int uwsgi_regexp_match_pattern(char *pattern, char *str) { - pcre *regexp; - pcre_extra *regexp_extra; + uwsgi_pcre *regexp; - if (uwsgi_regexp_build(pattern, ®exp, ®exp_extra)) + if (uwsgi_regexp_build(pattern, ®exp)) return 1; - return !uwsgi_regexp_match(regexp, regexp_extra, str, strlen(str)); -} + return !uwsgi_regexp_match(regexp, str, strlen(str)); +} #endif diff --git a/core/uwsgi.c b/core/uwsgi.c index 915528f7c7..abcf467ec4 100755 --- a/core/uwsgi.c +++ b/core/uwsgi.c @@ -109,7 +109,7 @@ static struct uwsgi_option uwsgi_base_options[] = { {"if-hostname", required_argument, 0, "(opt logic) check for hostname", uwsgi_opt_logic, (void *) uwsgi_logic_opt_if_hostname, UWSGI_OPT_IMMEDIATE}, {"if-not-hostname", required_argument, 0, "(opt logic) check for hostname", uwsgi_opt_logic, (void *) uwsgi_logic_opt_if_not_hostname, UWSGI_OPT_IMMEDIATE}, -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) {"if-hostname-match", required_argument, 0, "(opt logic) try to match hostname against a regular expression", uwsgi_opt_logic, (void *) uwsgi_logic_opt_if_hostname_match, UWSGI_OPT_IMMEDIATE}, {"if-not-hostname-match", required_argument, 0, "(opt logic) try to match hostname against a regular expression", uwsgi_opt_logic, (void *) uwsgi_logic_opt_if_not_hostname_match, UWSGI_OPT_IMMEDIATE}, #endif @@ -565,7 +565,7 @@ static struct uwsgi_option uwsgi_base_options[] = { {"ksm", optional_argument, 0, "enable Linux KSM", uwsgi_opt_set_int, &uwsgi.linux_ksm, 0}, #endif #endif -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) {"pcre-jit", no_argument, 0, "enable pcre jit (if available)", uwsgi_opt_pcre_jit, NULL, UWSGI_OPT_IMMEDIATE}, #endif {"never-swap", no_argument, 0, "lock all memory pages avoiding swapping", uwsgi_opt_true, &uwsgi.never_swap, 0}, @@ -701,7 +701,7 @@ static struct uwsgi_option uwsgi_base_options[] = { {"ssl-enable-sslv3", no_argument, 0, "enable SSLv3 (insecure)", uwsgi_opt_true, &uwsgi.sslv3, 0}, {"ssl-enable-tlsv1", no_argument, 0, "enable TLSv1 (insecure)", uwsgi_opt_true, &uwsgi.tlsv1, 0}, {"ssl-option", required_argument, 0, "set a raw ssl option (numeric value)", uwsgi_opt_add_string_list, &uwsgi.ssl_options, 0}, -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) {"sni-regexp", required_argument, 0, "add an SNI-governed SSL context (the key is a regexp)", uwsgi_opt_sni, NULL, 0}, #endif {"ssl-tmp-dir", required_argument, 0, "store ssl-related temp files in the specified directory", uwsgi_opt_set_str, &uwsgi.ssl_tmp_dir, 0}, @@ -743,7 +743,7 @@ static struct uwsgi_option uwsgi_base_options[] = { {"worker-log-req-encoder", required_argument, 0, "add an item in the log req encoder chain", uwsgi_opt_add_string_list, &uwsgi.requested_log_req_encoders, 0}, -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) {"log-drain", required_argument, 0, "drain (do not show) log lines matching the specified regexp", uwsgi_opt_add_regexp_list, &uwsgi.log_drain_rules, UWSGI_OPT_MASTER | UWSGI_OPT_LOG_MASTER}, {"log-filter", required_argument, 0, "show only log lines matching the specified regexp", uwsgi_opt_add_regexp_list, &uwsgi.log_filter_rules, UWSGI_OPT_MASTER | UWSGI_OPT_LOG_MASTER}, {"log-route", required_argument, 0, "log to the specified named logger if regexp applied on logline matches", uwsgi_opt_add_regexp_custom_list, &uwsgi.log_route, UWSGI_OPT_MASTER | UWSGI_OPT_LOG_MASTER}, @@ -764,7 +764,7 @@ static struct uwsgi_option uwsgi_base_options[] = { {"alarm-lq", required_argument, 0, "raise the specified alarm when the socket backlog queue is full", uwsgi_opt_add_string_list, &uwsgi.alarm_backlog, UWSGI_OPT_MASTER}, {"alarm-listen-queue", required_argument, 0, "raise the specified alarm when the socket backlog queue is full", uwsgi_opt_add_string_list, &uwsgi.alarm_backlog, UWSGI_OPT_MASTER}, {"listen-queue-alarm", required_argument, 0, "raise the specified alarm when the socket backlog queue is full", uwsgi_opt_add_string_list, &uwsgi.alarm_backlog, UWSGI_OPT_MASTER}, -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) {"log-alarm", required_argument, 0, "raise the specified alarm when a log line matches the specified regexp, syntax: [,alarm...] ", uwsgi_opt_add_string_list, &uwsgi.alarm_logs_list, UWSGI_OPT_MASTER | UWSGI_OPT_LOG_MASTER}, {"alarm-log", required_argument, 0, "raise the specified alarm when a log line matches the specified regexp, syntax: [,alarm...] ", uwsgi_opt_add_string_list, &uwsgi.alarm_logs_list, UWSGI_OPT_MASTER | UWSGI_OPT_LOG_MASTER}, {"not-log-alarm", required_argument, 0, "skip the specified alarm when a log line matches the specified regexp, syntax: [,alarm...] ", uwsgi_opt_add_string_list_custom, &uwsgi.alarm_logs_list, UWSGI_OPT_MASTER | UWSGI_OPT_LOG_MASTER}, @@ -945,7 +945,7 @@ static struct uwsgi_option uwsgi_base_options[] = { {"static-expires-type", required_argument, 0, "set the Expires header based on content type", uwsgi_opt_add_dyn_dict, &uwsgi.static_expires_type, UWSGI_OPT_MIME}, {"static-expires-type-mtime", required_argument, 0, "set the Expires header based on content type and file mtime", uwsgi_opt_add_dyn_dict, &uwsgi.static_expires_type_mtime, UWSGI_OPT_MIME}, -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) {"static-expires", required_argument, 0, "set the Expires header based on filename regexp", uwsgi_opt_add_regexp_dyn_dict, &uwsgi.static_expires, UWSGI_OPT_MIME}, {"static-expires-mtime", required_argument, 0, "set the Expires header based on filename regexp and file mtime", uwsgi_opt_add_regexp_dyn_dict, &uwsgi.static_expires_mtime, UWSGI_OPT_MIME}, @@ -2500,7 +2500,7 @@ void uwsgi_setup(int argc, char *argv[], char *envp[]) { #endif uwsgi_log_initial("clock source: %s\n", uwsgi.clock->name); -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) if (uwsgi.pcre_jit) { uwsgi_log_initial("pcre jit enabled\n"); } @@ -4302,7 +4302,7 @@ void uwsgi_opt_add_string_list_custom(char *opt, char *value, void *list) { usl->custom = 1; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) void uwsgi_opt_add_regexp_list(char *opt, char *value, void *list) { struct uwsgi_regexp_list **ptr = (struct uwsgi_regexp_list **) list; uwsgi_regexp_new_list(ptr, value); @@ -4568,7 +4568,7 @@ void uwsgi_opt_add_dyn_dict(char *opt, char *value, void *dict) { } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) void uwsgi_opt_add_regexp_dyn_dict(char *opt, char *value, void *dict) { char *space = strchr(value, ' '); @@ -4583,7 +4583,7 @@ void uwsgi_opt_add_regexp_dyn_dict(char *opt, char *value, void *dict) { char *regexp = uwsgi_concat2n(value, space - value, "", 0); - if (uwsgi_regexp_build(regexp, &new_udd->pattern, &new_udd->pattern_extra)) { + if (uwsgi_regexp_build(regexp, &new_udd->pattern)) { exit(1); } diff --git a/plugins/php/php_plugin.c b/plugins/php/php_plugin.c index d9b615bac9..375fa6940a 100644 --- a/plugins/php/php_plugin.c +++ b/plugins/php/php_plugin.c @@ -16,7 +16,7 @@ struct uwsgi_php { struct uwsgi_string_list *index; struct uwsgi_string_list *set; struct uwsgi_string_list *append_config; -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *app_bypass; #endif struct uwsgi_string_list *vars; @@ -70,7 +70,7 @@ struct uwsgi_option uwsgi_php_options[] = { {"php-fallback", required_argument, 0, "run the specified php script when the requested one does not exist", uwsgi_opt_set_str, &uphp.fallback, 0}, {"php-fallback2", required_argument, 0, "run the specified php script relative to the document root when the requested one does not exist", uwsgi_opt_set_str, &uphp.fallback2, 0}, {"php-fallback-qs", required_argument, 0, "php-fallback with QUERY_STRING set", uwsgi_opt_set_str, &uphp.fallback_qs, 0}, -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) {"php-app-bypass", required_argument, 0, "if the regexp matches the uri the --php-app is bypassed", uwsgi_opt_add_regexp_list, &uphp.app_bypass, 0}, #endif {"php-var", required_argument, 0, "add/overwrite a CGI variable at each request", uwsgi_opt_add_string_list, &uphp.vars, 0}, @@ -874,10 +874,14 @@ int uwsgi_php_request(struct wsgi_request *wsgi_req) { wsgi_req->document_root_len = strlen(wsgi_req->document_root); if (uphp.app) { -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *bypass = uphp.app_bypass; while (bypass) { +#ifdef UWSGI_PCRE2 + if (uwsgi_regexp_match(bypass->pattern, wsgi_req->uri, wsgi_req->uri_len) >= 0) { +#else if (uwsgi_regexp_match(bypass->pattern, bypass->pattern_extra, wsgi_req->uri, wsgi_req->uri_len) >= 0) { +#endif goto oldstyle; } bypass = bypass->next; @@ -914,7 +918,7 @@ int uwsgi_php_request(struct wsgi_request *wsgi_req) { goto secure2; } -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) oldstyle: #endif diff --git a/uwsgi.h b/uwsgi.h index 69f9fbf8ab..c512cc74ee 100755 --- a/uwsgi.h +++ b/uwsgi.h @@ -449,8 +449,26 @@ struct uwsgi_lock_ops { #define uwsgi_wait_read_req(x) uwsgi.wait_read_hook(x->fd, uwsgi.socket_timeout) ; x->switches++ #define uwsgi_wait_write_req(x) uwsgi.wait_write_hook(x->fd, uwsgi.socket_timeout) ; x->switches++ -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) +#ifdef UWSGI_PCRE2 + +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#define PCRE_OVECTOR_BYTESIZE(n) (n+1)*2 + +typedef pcre2_code uwsgi_pcre; + +#else + #include +#define PCRE_OVECTOR_BYTESIZE(n) (n+1)*3 + +typedef struct { + pcre *p; + pcre_extra *extra; +} uwsgi_pcre; + +#endif #endif struct uwsgi_dyn_dict { @@ -466,9 +484,8 @@ struct uwsgi_dyn_dict { struct uwsgi_dyn_dict *prev; struct uwsgi_dyn_dict *next; -#ifdef UWSGI_PCRE - pcre *pattern; - pcre_extra *pattern_extra; +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) + uwsgi_pcre *pattern; #endif }; @@ -479,11 +496,10 @@ struct uwsgi_hook { struct uwsgi_hook *next; }; -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list { - pcre *pattern; - pcre_extra *pattern_extra; + uwsgi_pcre *pattern; uint64_t custom; char *custom_str; @@ -1100,11 +1116,11 @@ struct uwsgi_plugin { void (*post_uwsgi_fork) (int); }; -#ifdef UWSGI_PCRE -int uwsgi_regexp_build(char *, pcre **, pcre_extra **); -int uwsgi_regexp_match(pcre *, pcre_extra *, char *, int); -int uwsgi_regexp_match_ovec(pcre *, pcre_extra *, char *, int, int *, int); -int uwsgi_regexp_ovector(pcre *, pcre_extra *); +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) +int uwsgi_regexp_build(char *, uwsgi_pcre **); +int uwsgi_regexp_match(uwsgi_pcre *, const char *, int); +int uwsgi_regexp_match_ovec(uwsgi_pcre *, const char *, int, int *, int); +int uwsgi_regexp_ovector(const uwsgi_pcre *); char *uwsgi_regexp_apply_ovec(char *, int, char *, int, int *, int); int uwsgi_regexp_match_pattern(char *pattern, char *str); @@ -1195,8 +1211,7 @@ struct uwsgi_spooler { struct uwsgi_route { - pcre *pattern; - pcre_extra *pattern_extra; + uwsgi_pcre *pattern; char *orig_route; @@ -1305,15 +1320,14 @@ struct uwsgi_alarm_fd { struct uwsgi_alarm_fd *uwsgi_add_alarm_fd(int, char *, size_t, char *, size_t); -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_alarm_ll { struct uwsgi_alarm_instance *alarm; struct uwsgi_alarm_ll *next; }; struct uwsgi_alarm_log { - pcre *pattern; - pcre_extra *pattern_extra; + uwsgi_pcre *pattern; int negate; struct uwsgi_alarm_ll *alarms; struct uwsgi_alarm_log *next; @@ -2258,7 +2272,7 @@ struct uwsgi_server { struct uwsgi_string_list *requested_log_encoders; struct uwsgi_string_list *requested_log_req_encoders; -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) int pcre_jit; struct uwsgi_regexp_list *log_drain_rules; struct uwsgi_regexp_list *log_filter_rules; @@ -2340,7 +2354,7 @@ struct uwsgi_server { int static_gzip_all; struct uwsgi_string_list *static_gzip_dir; struct uwsgi_string_list *static_gzip_ext; -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *static_gzip; #endif @@ -2745,7 +2759,7 @@ struct uwsgi_server { int ssl_sessions_timeout; struct uwsgi_cache *ssl_sessions_cache; char *ssl_tmp_dir; -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *sni_regexp; #endif struct uwsgi_string_list *sni; @@ -3667,7 +3681,7 @@ void uwsgi_close_all_unshared_sockets(void); void uwsgi_shutdown_all_sockets(void); struct uwsgi_string_list *uwsgi_string_new_list(struct uwsgi_string_list **, char *); -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) struct uwsgi_regexp_list *uwsgi_regexp_custom_new_list(struct uwsgi_regexp_list **, char *, char *); #define uwsgi_regexp_new_list(x, y) uwsgi_regexp_custom_new_list(x, y, NULL); #endif @@ -3941,7 +3955,7 @@ void uwsgi_opt_add_addr_list(char *, char *, void *); void uwsgi_opt_add_string_list_custom(char *, char *, void *); void uwsgi_opt_add_dyn_dict(char *, char *, void *); void uwsgi_opt_binary_append_data(char *, char *, void *); -#ifdef UWSGI_PCRE +#if defined(UWSGI_PCRE) || defined(UWSGI_PCRE2) void uwsgi_opt_pcre_jit(char *, char *, void *); void uwsgi_opt_add_regexp_dyn_dict(char *, char *, void *); void uwsgi_opt_add_regexp_list(char *, char *, void *); diff --git a/uwsgiconfig.py b/uwsgiconfig.py index e68ca4ac79..f9bc612b79 100644 --- a/uwsgiconfig.py +++ b/uwsgiconfig.py @@ -1099,30 +1099,29 @@ def get_gcll(self): has_pcre = False - # re-enable after pcre fix - if self.get('pcre'): - if self.get('pcre') == 'auto': - pcreconf = spcall('pcre-config --libs') - if pcreconf: - self.libs.append(pcreconf) - pcreconf = spcall("pcre-config --cflags") - self.cflags.append(pcreconf) - self.gcc_list.append('core/regexp') - self.cflags.append("-DUWSGI_PCRE") - has_pcre = True - + required_pcre = self.get('pcre') + if required_pcre: + pcre_libs = spcall('pcre2-config --libs8') + if pcre_libs: + pcre_cflags = spcall("pcre2-config --cflags") + pcre_define = "-DUWSGI_PCRE2" else: - pcreconf = spcall('pcre-config --libs') - if pcreconf is None: - print("*** libpcre headers unavailable. uWSGI build is interrupted. You have to install pcre development package or disable pcre") - sys.exit(1) - else: - self.libs.append(pcreconf) - pcreconf = spcall("pcre-config --cflags") - self.cflags.append(pcreconf) - self.gcc_list.append('core/regexp') - self.cflags.append("-DUWSGI_PCRE") - has_pcre = True + pcre_libs = spcall('pcre-config --libs') + pcre_cflags = spcall("pcre-config --cflags") + pcre_define = "-DUWSGI_PCRE" + else: + pcre_libs = None + + if required_pcre: + if required_pcre != 'auto' and pcre_libs is None: + print("*** libpcre headers unavailable. uWSGI build is interrupted. You have to install pcre development package or disable pcre") + sys.exit(1) + + self.libs.append(pcre_libs) + self.cflags.append(pcre_cflags) + self.gcc_list.append('core/regexp') + self.cflags.append(pcre_define) + has_pcre = True if has_pcre: report['pcre'] = True