From da580d789bc1901caaeccca277a40dd9a347ba73 Mon Sep 17 00:00:00 2001 From: Manikanta Date: Tue, 6 Jan 2026 02:03:42 -0500 Subject: [PATCH 1/2] Replacing pcrecpp with regex --- Makefile | 3 ++- ODBCLoader.cpp | 43 ++++++++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 7fa3064..1e51872 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,7 @@ TMPDIR ?= /tmp SDK_HOME ?= /opt/vertica/sdk SHELL = /bin/bash VSQL ?= /opt/vertica/bin/vsql +VSQL_OPTS ?= LOADER_DEBUG = 0 TARGET ?= ./lib @@ -56,4 +57,4 @@ test: ## Actual build target $(TARGET)/ODBCLoader.so: ODBCLoader.cpp $(SDK_HOME)/include/Vertica.cpp $(SDK_HOME)/include/BuildInfo.h mkdir -p $(TARGET) - $(CXX) $(ALL_CXXFLAGS) -o $@ $(SDK_HOME)/include/Vertica.cpp ODBCLoader.cpp -lodbc -lpcrecpp -lpcre + $(CXX) $(ALL_CXXFLAGS) -o $@ $(SDK_HOME)/include/Vertica.cpp ODBCLoader.cpp -lodbc diff --git a/ODBCLoader.cpp b/ODBCLoader.cpp index 2d441d5..95aaa76 100644 --- a/ODBCLoader.cpp +++ b/ODBCLoader.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include // To deal with TimeTz and TimestampTz. // No standard native-SQL representation for these, @@ -32,13 +32,13 @@ #define DEF_ROWSET 100 // Default rowset #define MAX_PRELEN 2048 // Max predicate length #define MAX_PRENUM 10 // Max predicate number -#define REG_CASTRM R"(::\w+(\(.*?\))*)" -#define REG_ANYMTC R"(\s*=\s*ANY\s*\(ARRAY\[(.*)\])" -#define REG_ANYREP R"( IN(\1)" +#define REG_CASTRM R"(::\w+(\([^()]*\))*)" +#define REG_ANYMTC R"(\s*=\s*ANY\s*\(ARRAY\[([^\]]*)\])" +#define REG_ANYREP " IN($1)" #define REG_TILDEM R"(\s*~~\s*)" -#define REG_TILDER R"( LIKE )" +#define REG_TILDER " LIKE " #define REG_ENDSCO R"(\s*;\s*$)" -#define REG_QUERYP R"(^\s*\(*\s*override_query\s*<\s*'\s*(.*)\s*'.*$)" +#define REG_QUERYP R"(^\s*\(*\s*override_query\s*<\s*'\s*([\s\S]*)\s*'[\s\S]*$)" using namespace Vertica; @@ -539,20 +539,27 @@ class ODBCLoader : public UDParser { #if LOADER_DEBUG srvInterface.log("DEBUG predicate [%s] length=%zu, string=<%s>", pred, strlen(mpred.c_str()), mpred.c_str()); #endif - if ( pcrecpp::RE(REG_QUERYP, pcrecpp::RE_Options(PCRE_DOTALL)).FullMatch(mpred) ) { - pcrecpp::RE(REG_QUERYP, pcrecpp::RE_Options(PCRE_DOTALL)).GlobalReplace("\\1", &mpred) ; - query = mpred ; - oq_flag = true ; + { + static const std::regex re_query(REG_QUERYP, std::regex::ECMAScript | std::regex::icase); + static const std::regex re_any(REG_ANYMTC, std::regex::ECMAScript | std::regex::icase); + static const std::regex re_tilde(REG_TILDEM, std::regex::ECMAScript | std::regex::icase); + + std::smatch m; + if (std::regex_match(mpred, m, re_query)) { + mpred = m[1].str(); + query = mpred ; + oq_flag = true ; #if LOADER_DEBUG srvInterface.log("DEBUG new query length=%zu, new query string=<%s>",query.length(), query.c_str()); #endif - } else if ( src_rfilter ) { - pcrecpp::RE(REG_ANYMTC).GlobalReplace(REG_ANYREP, &mpred) ; // to replace ANY(ARRAY()) with IN() - pcrecpp::RE(REG_TILDEM).GlobalReplace(REG_TILDER, &mpred) ; // to replace ~~ with LIKE + } else if ( src_rfilter ) { + mpred = std::regex_replace(mpred, re_any, REG_ANYREP); // to replace ANY(ARRAY()) with IN() + mpred = std::regex_replace(mpred, re_tilde, REG_TILDER); // to replace ~~ with LIKE if ( l++ ) predicates += " AND " + mpred ; else predicates += " WHERE " + mpred ; + } } } else { break ; @@ -560,7 +567,10 @@ class ODBCLoader : public UDParser { } // Remove ending semicolon from "query" (if any) - pcrecpp::RE(REG_ENDSCO).GlobalReplace("", &query) ; + { + static const std::regex re_end(REG_ENDSCO, std::regex::ECMAScript); + query = std::regex_replace(query, re_end, ""); + } // Check "hidden" parameters __query_col_name__ and __query_col_idx__ to filter out columns if ( src_cfilter ) { @@ -581,7 +591,10 @@ srvInterface.log("-----> External Table Columns, colInTable=<%d>", colInTable); } // MF to remove Vertica casts (::) - pcrecpp::RE(REG_CASTRM).GlobalReplace("", &slist) ; + { + static const std::regex re_cast(REG_CASTRM, std::regex::ECMAScript); + slist = std::regex_replace(slist, re_cast, ""); + } query = "SELECT " + slist + " FROM ( " + query + " ) sq" ; } else { query = "SELECT " + From 91c73798d91f8e1d03912748b462622d875e1b4c Mon Sep 17 00:00:00 2001 From: mkottakota1 <149763406+mkottakota1@users.noreply.github.com> Date: Wed, 7 Jan 2026 15:23:30 +0530 Subject: [PATCH 2/2] Updating readme file We have removed the dependency on PCRE, removing the documentation related to pcre --- README.md | 54 ------------------------------------------------------ 1 file changed, 54 deletions(-) diff --git a/README.md b/README.md index 90ac124..2761d24 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,6 @@ Data retrieved from external databases is neither converted into an intermediate In order to install the ODBCLoader package you need to install on **all nodes of your Vertica cluster**: - an ODBC Driver Manager. This library has been tested with unixODBC. You need to install the development libraries (for example: ``yum install unixODBC-devel``) - the ODBC Drivers to interface the remote databases -- Perl Compatible Regular Expression library (``yum install pcre-devel pcre-cpp``) In order to compile the ODBCLoader you also have to setup a development environment as defined in the standard documentation ([Setting Up a Development Environment](https://www.vertica.com/docs/10.1.x/HTML/Content/Authoring/ExtendingVertica/UDx/DevEnvironment.htm)). @@ -177,59 +176,6 @@ $ rm -rf build && make install LOADER_DEBUG=1 ``` this will print extra messages in the Vertica log files (either ``UDxLogs/UDxFencedProcesses.log`` or ``vertica.log`` depending if the library was "FENCED" or "UNFENCED"). **Caution:** don't do this in production because it will flood your logs with debug messages and slowdown everything. -### PCRE Missing symbols - -The following error has been reported, during the deloyment phase, on a few Linux Distributions: -``` -undefined symbol: _ZNK7pcrecpp2RE13GlobalReplaceERKNS_11StringPieceEPSs -``` - -#### To fix this issue you might want to... - -**STEP 1: get rid of the standard pcre packages**: -Remove ``pcre-devel`` and ``pcre-cpp`` packages (if installed) using the appropriate package management commands. For example: - -``` -# yum remove pcre-devel pcre-cpp -``` - -**STEP 2: install PCRE from sources**: -``` -# tar xzvf pcre-8.45.tar.gz -# cd pcre-8.45 -# ./configure CXXFLAGS='-std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0' -# make && make install -``` - -**STEP 3: update you ld.so config and recreate its cache**: -``` -# echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf && rm /etc/ld.so.cache && ldconfig -``` - -#### But if existing version PCRE must be kept, you could... - -**STEP 1: install PCRE from sources to a dedicated location**: -``` -# tar xzvf pcre-8.45.tar.gz -# cd pcre-8.45 -# ./configure CXXFLAGS='-std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0' --prefix=/opt/pcre -# make && make install -``` - -**STEP 2: set PATHs for PCRE header files and libraries**: -``` -echo 'export LD_LIBRARY_PATH=/opt/pcre/lib:${LD_LIBRARY_PATH}' >> /home/dbadmin/.bashrc - -export CPLUS_INCLUDE_PATH=/opt/pcre/include:${CPLUS_INCLUDE_PATH} -export LIBRARY_PATH=/opt/pcre/lib:${LIBRARY_PATH} -export LD_LIBRARY_PATH=/opt/pcre/lib:${LD_LIBRARY_PATH} - -# restart vertica database to effect settings -admintools -t stop_db -d testdb; admintools -t start_db -d testdb - -# Building and installing the library as mentioned before -``` - ## Sample ODBC Configurations The following two configuration files ```odbc.ini``` and ```odbcinst.ini``` have been used to define two data sources: **pmf** to connect to PostgreSQL and **mmf** to connect to MySQL: ```