Skip to content

Commit

Permalink
More fine granular regex config options
Browse files Browse the repository at this point in the history
  • Loading branch information
mmd-osm committed Aug 14, 2023
1 parent b6e5c2d commit 95ab88b
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 36 deletions.
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

option(HAVE_OVERPASS_XML "Build with Overpass XML dialect" ON)
option(HAVE_FASTCGI "Build with FastCGI library" ON)
option(HAVE_POSIX "Use C POSIX regular expresions" ON)
option(HAVE_PCRE "Build with PCRE library" ON)
option(HAVE_PCREJIT "Build with PCRE library (JIT)" ON)
option(HAVE_ICU "BUild with ICU library" OFF)
option(HAVE_TCMALLOC "Build with tcmalloc library" ON)
option(HAVE_LZ4 "Build with lz4 library" ON)
Expand Down Expand Up @@ -107,7 +109,7 @@ if (HAVE_FASTCGI)
find_package(FCGI++ REQUIRED)
endif()

if (HAVE_PCRE)
if (HAVE_PCRE OR HAVE_PCREJIT)
find_package(PCRE2 REQUIRED)
endif()

Expand Down Expand Up @@ -160,7 +162,7 @@ if (HAVE_TCMALLOC)
list(APPEND LIBS ${Tcmalloc_LIBRARY})
endif()

if (HAVE_PCRE)
if (HAVE_PCRE OR HAVE_PCREJIT)
list(APPEND LIBS ${PCRE2_LIBRARIES})
endif()

Expand Down
2 changes: 2 additions & 0 deletions cmake/config.h.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#cmakedefine HAVE_FASTCGI 1
#cmakedefine HAVE_POSIX 1
#cmakedefine HAVE_PCRE 1
#cmakedefine HAVE_PCREJIT 1
#cmakedefine HAVE_TCMALLOC 1
#cmakedefine HAVE_LZ4 1
#cmakedefine HAVE_OPENMP 1
Expand Down
2 changes: 2 additions & 0 deletions src/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,10 @@ AC_LANG_POP([C++])


AC_DEFINE(HAVE_LIBOSMIUM, 1, [Define if you have libosmium library])
AC_DEFINE(HAVE_POSIX, 1, [Define if you have C POSIX regex library])
AC_DEFINE(HAVE_ICU, 1, [Define if you have ICU library])
AC_DEFINE(HAVE_PCRE, 1, [Define if you have PCRE library])
AC_DEFINE(HAVE_PCREJIT, 1, [Define if you have PCRE library with JIT])
AC_DEFINE(HAVE_OPENMP, 1, [Define if you have OpenMP])

# Some unused output formats are no longer compiled in by default
Expand Down
74 changes: 48 additions & 26 deletions src/overpass_api/data/regular_expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@

#include "sys/types.h"
#include "locale.h"
#ifdef HAVE_POSIX
#include "regex.h"
#endif

#include <iostream>
#include <string>
Expand All @@ -40,7 +42,7 @@ using icu::UnicodeString;
using icu::RegexMatcher;
#endif

#ifdef HAVE_PCRE
#if defined(HAVE_PCRE) || defined(HAVE_PCREJIT)
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
#endif
Expand Down Expand Up @@ -95,8 +97,8 @@ class Regular_Expression
Strategy strategy;
};


class Regular_Expression_POSIX : public Regular_Expression
#ifdef HAVE_POSIX
class Regular_Expression_POSIX final : public Regular_Expression
{
public:

Expand Down Expand Up @@ -153,13 +155,13 @@ class Regular_Expression_POSIX : public Regular_Expression
private:
regex_t preg;
};

#endif



#ifdef HAVE_ICU

class Regular_Expression_ICU : public Regular_Expression
class Regular_Expression_ICU final : public Regular_Expression
{
public:

Expand Down Expand Up @@ -265,9 +267,9 @@ class Regular_Expression_ICU : public Regular_Expression

#endif

#ifdef HAVE_PCRE
#if defined(HAVE_PCRE) || defined(HAVE_PCREJIT)

class Regular_Expression_PCRE : public Regular_Expression
class Regular_Expression_PCRE final : public Regular_Expression
{
public:

Expand Down Expand Up @@ -301,6 +303,7 @@ class Regular_Expression_PCRE : public Regular_Expression
throw Regular_Expression_Error(std::string(reinterpret_cast<const char*>(buffer), size));
}

#ifdef HAVE_PCREJIT
if (enable_jit) {
pcre2_config(PCRE2_CONFIG_JIT, &pcre2_jit_on);
}
Expand All @@ -323,6 +326,7 @@ class Regular_Expression_PCRE : public Regular_Expression

pcre2_jit_stack_assign(mcontext, NULL, jit_stack);
}
#endif

match_data = pcre2_match_data_create_from_pattern(re, NULL);
if (match_data == nullptr) {
Expand Down Expand Up @@ -367,9 +371,17 @@ class Regular_Expression_PCRE : public Regular_Expression

uint32_t options = 0;

int rc;
int rc = 0;

#if defined(HAVE_PCRE) && defined(HAVE_PCREJIT)
if (pcre2_jit_on) {
#elif defined(HAVE_PCREJIT)
if (true) {
#else
if (false) {
#endif

#ifdef HAVE_PCREJIT
rc = pcre2_jit_match(
re, /* the compiled pattern */
reinterpret_cast<PCRE2_SPTR>(line.data()), /* the subject string */
Expand All @@ -378,8 +390,10 @@ class Regular_Expression_PCRE : public Regular_Expression
options, /* options */
match_data, /* block for storing the result */
NULL); /* use default match context */
#endif
}
else {
#ifdef HAVE_PCRE
rc = pcre2_match(
re, /* the compiled pattern */
reinterpret_cast<PCRE2_SPTR>(line.data()), /* the subject string */
Expand All @@ -388,6 +402,7 @@ class Regular_Expression_PCRE : public Regular_Expression
options, /* options */
match_data, /* block for storing the result */
NULL); /* use default match context */
#endif
}

if (rc < 0) {
Expand Down Expand Up @@ -426,9 +441,17 @@ class Regular_Expression_PCRE : public Regular_Expression

uint32_t options = 0;

int rc;
int rc = 0;

#if defined(HAVE_PCRE) && defined(HAVE_PCREJIT)
if (pcre2_jit_on) {
#elif defined(HAVE_PCREJIT)
if (true) {
#else
if (false) {
#endif

#ifdef HAVE_PCREJIT
rc = pcre2_jit_match(
re, /* the compiled pattern */
reinterpret_cast<PCRE2_SPTR>(line.data()), /* the subject string */
Expand All @@ -437,8 +460,10 @@ class Regular_Expression_PCRE : public Regular_Expression
options, /* options */
match_data, /* block for storing the result */
NULL); /* use default match context */
#endif
}
else {
#ifdef HAVE_PCRE
rc = pcre2_match(
re, /* the compiled pattern */
reinterpret_cast<PCRE2_SPTR>(line.data()), /* the subject string */
Expand All @@ -447,6 +472,7 @@ class Regular_Expression_PCRE : public Regular_Expression
options, /* options */
match_data, /* block for storing the result */
NULL); /* use default match context */
#endif
}

if (rc < 0) {
Expand Down Expand Up @@ -488,30 +514,26 @@ class Regular_Expression_Factory

public:

static Regular_Expression* get_regexp_engine(const std::string& engine, const std::string& regex, bool case_sensitive )
static Regular_Expression* get_regexp(const std::string& engine, const std::string& regex, bool case_sensitive )
{
if (engine == "ICU") {

#ifdef HAVE_ICU
return new Regular_Expression_ICU(regex, case_sensitive);
#else
throw std::runtime_error("ICU support not available");
if (engine == "ICU") return new Regular_Expression_ICU(regex, case_sensitive);
#endif
} else if (engine == "PCRE") {

#ifdef HAVE_PCRE
return new Regular_Expression_PCRE(regex, case_sensitive, false);
#else
throw std::runtime_error("PCRE support not available");
if (engine == "PCRE") return new Regular_Expression_PCRE(regex, case_sensitive, false);
#endif
} else if (engine == "PCREJIT") {
#ifdef HAVE_PCRE
return new Regular_Expression_PCRE(regex, case_sensitive, true);
#else
throw std::runtime_error("PCRE support not available");

#ifdef HAVE_PCREJIT
if (engine == "PCREJIT") return new Regular_Expression_PCRE(regex, case_sensitive, true);
#endif

#ifdef HAVE_POSIX
if (engine == "POSIX") return new Regular_Expression_POSIX(regex, case_sensitive);
#endif
}

// always fall back to POSIX
return new Regular_Expression_POSIX(regex, case_sensitive);
throw std::runtime_error("No suitable regular expression engine found.");
}
};

Expand Down
17 changes: 12 additions & 5 deletions src/overpass_api/statements/osm_script.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>

#include "../core/settings.h"
#include "../frontend/output_handler_parser.h"
Expand All @@ -27,6 +29,7 @@
#include "print.h"



Generic_Statement_Maker< Osm_Script_Statement > Osm_Script_Statement::statement_maker("osm-script");


Expand Down Expand Up @@ -63,6 +66,7 @@ Osm_Script_Statement::Osm_Script_Statement
desired_timestamp(NOW), comparison_timestamp(0), add_deletion_information(false),
max_allowed_time(0), max_allowed_space(0)
{

std::map< std::string, std::string > attributes;

attributes["bbox"] = "";
Expand Down Expand Up @@ -121,14 +125,17 @@ Osm_Script_Statement::Osm_Script_Statement
}
}

if (attributes["regexp"] == "POSIX" ||
attributes["regexp"] == "ICU" ||
attributes["regexp"] == "PCRE" ||
attributes["regexp"] == "PCREJIT")
if (std::find(supported_engines.begin(), supported_engines.end(), attributes["regexp"]) != supported_engines.end())
global_settings.set_regexp_engine(attributes["regexp"]);
else
{
add_static_error("For the attribute \"regexp\" of the element \"osm-script\" the only allowed values are \"POSIX\", \"PCRE\", \"PCREJIT\" and \"ICU\".");
if (!supported_engines.empty()) {
std::ostringstream oss;
std::copy(supported_engines.begin(), supported_engines.end(), std::ostream_iterator<std::string>(oss, ", "));
std::string tmp = oss.str();
std::string res(tmp.data(), tmp.size() - 2);
add_static_error("For the attribute \"regexp\" of the element \"osm-script\" the only allowed values are: " + res + ".");
}
}

if (!attributes["bbox"].empty())
Expand Down
23 changes: 23 additions & 0 deletions src/overpass_api/statements/osm_script.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@
#include "bbox_query.h"
#include "statement.h"

#ifdef HAVE_CONFIG_H
#include <config.h>
#undef VERSION
#endif


class Output_Handle;

Expand Down Expand Up @@ -54,6 +59,24 @@ class Osm_Script_Statement : public Statement
uint32 max_allowed_time;
uint64 max_allowed_space;
Statement::Factory* factory = nullptr;

inline static const std::vector<std::string> supported_engines
{
#ifdef HAVE_POSIX
"POSIX",
#endif
#ifdef HAVE_ICU
"ICU",
#endif
#ifdef HAVE_PCRE
"PCRE",
#endif
#ifdef HAVE_PCREJIT
"PCREJIT",
#endif
};

};


#endif
6 changes: 3 additions & 3 deletions src/overpass_api/statements/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ void Query_Statement::add_statement(Statement* statement, std::string text)



struct Trivial_Regex
struct Trivial_Regex final
{
public:
bool matches(const std::string&, bool use_buffer = true) const { return true; }
Expand Down Expand Up @@ -2382,7 +2382,7 @@ Has_Kv_Statement::Has_Kv_Statement

try
{
key_regex = Regular_Expression_Factory::get_regexp_engine(global_settings.get_regexp_engine(), attributes["regk"], case_sensitive);
key_regex = Regular_Expression_Factory::get_regexp(global_settings.get_regexp_engine(), attributes["regk"], case_sensitive);
key = attributes["regk"];
}
catch (Regular_Expression_Error& e)
Expand All @@ -2400,7 +2400,7 @@ Has_Kv_Statement::Has_Kv_Statement

try
{
regex = Regular_Expression_Factory::get_regexp_engine(global_settings.get_regexp_engine(), attributes["regv"], case_sensitive);
regex = Regular_Expression_Factory::get_regexp(global_settings.get_regexp_engine(), attributes["regv"], case_sensitive);
value = attributes["regv"];
}
catch (Regular_Expression_Error& e)
Expand Down

0 comments on commit 95ab88b

Please sign in to comment.