From f89fc640f9ca1b2cccf0dc496b0b7ea434b53fad Mon Sep 17 00:00:00 2001 From: Mattias Ellert Date: Mon, 2 Oct 2023 09:25:13 +0200 Subject: [PATCH] Support PCRE2 --- .../pythonizations/test/import_load_libs.py | 1 + cmake/modules/FindPCRE2.cmake | 106 ++++++++++++++++++ cmake/modules/SearchInstalledSoftware.cmake | 17 +-- core/base/CMakeLists.txt | 9 +- core/base/src/TPRegexp.cxx | 99 +++++++++++++++- tutorials/legacy/regexp/regexp.C | 6 +- 6 files changed, 223 insertions(+), 15 deletions(-) create mode 100644 cmake/modules/FindPCRE2.cmake diff --git a/bindings/pyroot/pythonizations/test/import_load_libs.py b/bindings/pyroot/pythonizations/test/import_load_libs.py index 087db7da5c..179c76f6fd 100644 --- a/bindings/pyroot/pythonizations/test/import_load_libs.py +++ b/bindings/pyroot/pythonizations/test/import_load_libs.py @@ -23,6 +23,7 @@ class ImportLoadLibs(unittest.TestCase): 'libc', 'libdl', 'libpcre', + 'libpcre2-8', # libCling and dependencies 'libCling.*', 'librt', diff --git a/cmake/modules/FindPCRE2.cmake b/cmake/modules/FindPCRE2.cmake new file mode 100644 index 0000000000..2417453e8b --- /dev/null +++ b/cmake/modules/FindPCRE2.cmake @@ -0,0 +1,106 @@ +# Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. +# All rights reserved. +# +# For the licensing terms see $ROOTSYS/LICENSE. +# For the list of contributors see $ROOTSYS/README/CREDITS. + +#.rst: +# FindPCRE2 +# -------- +# +# Find PCRE2 library +# +# Imported Targets +# ^^^^^^^^^^^^^^^^ +# +# This module defines :prop_tgt:`IMPORTED` target: +# +# ``PCRE2::PCRE2`` +# The pcre2 library, if found. +# +# Result Variables +# ^^^^^^^^^^^^^^^^ +# This module will set the following variables in your project: +# +# ``PCRE2_FOUND`` +# True if PCRE2 has been found. +# ``PCRE2_INCLUDE_DIRS`` +# Where to find pcre2.h +# ``PCRE2_LIBRARIES`` +# The libraries to link against to use PCRE2. +# ``PCRE2_VERSION`` +# The version of the PCRE2 found (e.g. 10.42) +# +# Obsolete variables +# ^^^^^^^^^^^^^^^^^^ +# +# The following variables may also be set, for backwards compatibility: +# +# ``PCRE2_PCRE2_LIBRARY`` +# where to find the PCRE2_PCRE2 library. +# ``PCRE2_INCLUDE_DIR`` +# where to find the pcre2.h header (same as PCRE2_INCLUDE_DIRS) +# + +foreach(var PCRE2_FOUND PCRE2_INCLUDE_DIR PCRE2_PCRE2_LIBRARY PCRE2_LIBRARIES) + unset(${var} CACHE) +endforeach() + +find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h PATH_SUFFIXES include) +mark_as_advanced(PCRE2_INCLUDE_DIR) + +if (PCRE2_INCLUDE_DIR AND EXISTS "${PCRE2_INCLUDE_DIR}/pcre2.h") + file(STRINGS "${PCRE2_INCLUDE_DIR}/pcre2.h" PCRE2_H REGEX "^#define PCRE2_(MAJOR|MINOR).*$") + string(REGEX REPLACE "^.*PCRE2_MAJOR[ ]+([0-9]+).*$" "\\1" PCRE2_VERSION_MAJOR "${PCRE2_H}") + string(REGEX REPLACE "^.*PCRE2_MINOR[ ]+([0-9]+).*$" "\\1" PCRE2_VERSION_MINOR "${PCRE2_H}") + set(PCRE2_VERSION "${PCRE2_VERSION_MAJOR}.${PCRE2_VERSION_MINOR}") +endif() + +if(NOT PCRE2_PCRE2_LIBRARY) + find_library(PCRE2_PCRE2_LIBRARY_RELEASE NAMES pcre2-8) + find_library(PCRE2_PCRE2_LIBRARY_DEBUG NAMES pcre2-8${CMAKE_DEBUG_POSTFIX} pcre2-8d) + include(SelectLibraryConfigurations) + select_library_configurations(PCRE2_PCRE2) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(PCRE2 + REQUIRED_VARS + PCRE2_INCLUDE_DIR + PCRE2_PCRE2_LIBRARY + VERSION_VAR + PCRE2_VERSION +) + +if(PCRE2_FOUND) + set(PCRE2_INCLUDE_DIRS "${PCRE2_INCLUDE_DIR}") + + if (NOT PCRE2_LIBRARIES) + set(PCRE2_LIBRARIES "${PCRE2_PCRE2_LIBRARY}") + endif() + + if(NOT TARGET PCRE2::PCRE2) + add_library(PCRE2::PCRE2 UNKNOWN IMPORTED) + set_target_properties(PCRE2::PCRE2 PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIRS}") + + if(PCRE2_PCRE2_LIBRARY_DEBUG) + set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY + IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(PCRE2::PCRE2 PROPERTIES + IMPORTED_LOCATION_DEBUG "${PCRE2_PCRE2_LIBRARY_DEBUG}") + endif() + + if(PCRE2_PCRE2_LIBRARY_RELEASE) + set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY + IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(PCRE2::PCRE2 PROPERTIES + IMPORTED_LOCATION_RELEASE "${PCRE2_PCRE2_LIBRARY_RELEASE}") + endif() + + if(NOT PCRE2_PCRE2_LIBRARY_DEBUG AND NOT PCRE2_PCRE2_LIBRARY_RELEASE) + set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY + IMPORTED_LOCATION "${PCRE2_PCRE2_LIBRARY}") + endif() + endif() +endif() diff --git a/cmake/modules/SearchInstalledSoftware.cmake b/cmake/modules/SearchInstalledSoftware.cmake index 464155fd50..252aa56737 100644 --- a/cmake/modules/SearchInstalledSoftware.cmake +++ b/cmake/modules/SearchInstalledSoftware.cmake @@ -204,13 +204,16 @@ if(NOT builtin_pcre) foreach(suffix FOUND INCLUDE_DIR PCRE_LIBRARY) unset(PCRE_${suffix} CACHE) endforeach() - if(fail-on-missing) - find_package(PCRE REQUIRED) - else() - find_package(PCRE) - if(NOT PCRE_FOUND) - message(STATUS "PCRE not found. Switching on builtin_pcre option") - set(builtin_pcre ON CACHE BOOL "Enabled because PCRE not found (${builtin_pcre_description})" FORCE) + find_package(PCRE2) + if(NOT PCRE2_FOUND) + if(fail-on-missing) + find_package(PCRE REQUIRED) + else() + find_package(PCRE) + if(NOT PCRE_FOUND) + message(STATUS "PCRE not found. Switching on builtin_pcre option") + set(builtin_pcre ON CACHE BOOL "Enabled because PCRE not found (${builtin_pcre_description})" FORCE) + endif() endif() endif() endif() diff --git a/core/base/CMakeLists.txt b/core/base/CMakeLists.txt index df701fedea..5798c93775 100644 --- a/core/base/CMakeLists.txt +++ b/core/base/CMakeLists.txt @@ -214,7 +214,14 @@ target_include_directories(Core PUBLIC $ ) -target_link_libraries(Core PRIVATE PCRE::PCRE) +if(PCRE2_FOUND) + target_link_libraries(Core PRIVATE PCRE2::PCRE2) + set_source_files_properties(src/TPRegexp.cxx + TARGET_DIRECTORY Core + PROPERTIES COMPILE_DEFINITIONS USE_PCRE2) +else() + target_link_libraries(Core PRIVATE PCRE::PCRE) +endif() ROOT_INSTALL_HEADERS(${BASE_HEADER_DIRS}) diff --git a/core/base/src/TPRegexp.cxx b/core/base/src/TPRegexp.cxx index 949b8cc8e9..d70f3e5b8b 100644 --- a/core/base/src/TPRegexp.cxx +++ b/core/base/src/TPRegexp.cxx @@ -25,19 +25,36 @@ found at : http://perldoc.perl.org/perlre.html #include "TObjString.h" #include "TError.h" +#ifdef USE_PCRE2 +#ifdef R__WIN32 +#define PCRE2_STATIC +#endif +#define PCRE2_CODE_UNIT_WIDTH 8 +#include +#define PCRE_CASELESS PCRE2_CASELESS +#define PCRE_MULTILINE PCRE2_MULTILINE +#define PCRE_DOTALL PCRE2_DOTALL +#define PCRE_EXTENDED PCRE2_EXTENDED +#define PCRE_ERROR_NOMATCH PCRE2_ERROR_NOMATCH +#else #ifdef R__WIN32 #define PCRE_STATIC #endif #include +#endif #include #include struct PCREPriv_t { +#ifdef USE_PCRE2 + pcre2_code *fPCRE; + PCREPriv_t() { fPCRE = nullptr; } +#else pcre *fPCRE; pcre_extra *fPCREExtra; - PCREPriv_t() { fPCRE = nullptr; fPCREExtra = nullptr; } +#endif }; @@ -79,10 +96,15 @@ TPRegexp::TPRegexp(const TPRegexp &p) TPRegexp::~TPRegexp() { +#ifdef USE_PCRE2 + if (fPriv->fPCRE) + pcre2_code_free(fPriv->fPCRE); +#else if (fPriv->fPCRE) pcre_free(fPriv->fPCRE); if (fPriv->fPCREExtra) pcre_free(fPriv->fPCREExtra); +#endif delete fPriv; } @@ -93,12 +115,18 @@ TPRegexp &TPRegexp::operator=(const TPRegexp &p) { if (this != &p) { fPattern = p.fPattern; +#ifdef USE_PCRE2 + if (fPriv->fPCRE) + pcre2_code_free(fPriv->fPCRE); + fPriv->fPCRE = nullptr; +#else if (fPriv->fPCRE) pcre_free(fPriv->fPCRE); fPriv->fPCRE = nullptr; if (fPriv->fPCREExtra) pcre_free(fPriv->fPCREExtra); fPriv->fPCREExtra = nullptr; +#endif fPCREOpts = p.fPCREOpts; } return *this; @@ -197,31 +225,50 @@ TString TPRegexp::GetModifiers() const void TPRegexp::Compile() { +#ifdef USE_PCRE2 + if (fPriv->fPCRE) + pcre2_code_free(fPriv->fPCRE); +#else if (fPriv->fPCRE) pcre_free(fPriv->fPCRE); +#endif if (fPCREOpts & kPCRE_DEBUG_MSGS) Info("Compile", "PREGEX compiling %s", fPattern.Data()); +#ifdef USE_PCRE2 + int errcode; + PCRE2_SIZE patIndex; + fPriv->fPCRE = pcre2_compile((PCRE2_SPTR)fPattern.Data(), fPattern.Length(), + fPCREOpts & kPCRE_INTMASK, + &errcode, &patIndex, nullptr); +#else const char *errstr; Int_t patIndex; fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK, &errstr, &patIndex, nullptr); +#endif if (!fPriv->fPCRE) { +#ifdef USE_PCRE2 + PCRE2_UCHAR errstr[256]; + pcre2_get_error_message(errcode, errstr, 256); +#endif if (fgThrowAtCompileError) { throw std::runtime_error (TString::Format("TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s", - fPattern.Data(), patIndex, errstr).Data()); + fPattern.Data(), (int)patIndex, errstr).Data()); } else { Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s", - fPattern.Data(), patIndex, errstr); + fPattern.Data(), (int)patIndex, errstr); return; } } +#ifndef USE_PCRE2 if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE)) Optimize(); +#endif } //////////////////////////////////////////////////////////////////////////////// @@ -229,6 +276,7 @@ void TPRegexp::Compile() void TPRegexp::Optimize() { +#ifndef USE_PCRE2 if (fPriv->fPCREExtra) pcre_free(fPriv->fPCREExtra); @@ -243,6 +291,7 @@ void TPRegexp::Optimize() Error("Optimize", "Optimization of TPRegexp(%s) failed: %s", fPattern.Data(), errstr); } +#endif } //////////////////////////////////////////////////////////////////////////////// @@ -308,21 +357,43 @@ Int_t TPRegexp::MatchInternal(const TString &s, Int_t start, Int_t nMaxMatch, TArrayI *pos) const { Int_t *offVec = new Int_t[3*nMaxMatch]; + +#ifdef USE_PCRE2 + pcre2_match_data *match_data; + match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr); + Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR8)s.Data(), + s.Length(), start, 0, + match_data, nullptr); +#else // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS. Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(), s.Length(), start, 0, offVec, 3*nMaxMatch); +#endif if (nrMatch == PCRE_ERROR_NOMATCH) nrMatch = 0; else if (nrMatch <= 0) { Error("Match","pcre_exec error = %d", nrMatch); +#ifdef USE_PCRE2 + pcre2_match_data_free(match_data); +#endif delete [] offVec; return 0; } - if (pos) + if (pos) { +#ifdef USE_PCRE2 + PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data); + for (int i = 0; i < 2 * nrMatch; ++i) + offVec[i] = oVec[i]; +#endif pos->Set(2*nrMatch, offVec); + } + +#ifdef USE_PCRE2 + pcre2_match_data_free(match_data); +#endif delete [] offVec; return nrMatch; @@ -404,13 +475,24 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern, Int_t offset = start; Int_t last = 0; +#ifdef USE_PCRE2 + pcre2_match_data *match_data; + match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr); +#endif + while (kTRUE) { // find next matching subs // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS. +#ifdef USE_PCRE2 + Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR)s.Data(), + s.Length(), offset, 0, + match_data, nullptr); +#else Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(), s.Length(), offset, 0, offVec, 3*nMaxMatch); +#endif if (nrMatch == PCRE_ERROR_NOMATCH) { break; @@ -419,6 +501,12 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern, break; } +#ifdef USE_PCRE2 + PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data); + for (int i = 0; i < 2 * nrMatch; ++i) + offVec[i] = oVec[i]; +#endif + // append anything previously unmatched, but not substituted if (last <= offVec[0]) { fin += s(last,offVec[0]-last); @@ -446,6 +534,9 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern, } } +#ifdef USE_PCRE2 + pcre2_match_data_free(match_data); +#endif delete [] offVec; fin += s(last,s.Length()-last); diff --git a/tutorials/legacy/regexp/regexp.C b/tutorials/legacy/regexp/regexp.C index 995b823bce..f38ed6799e 100644 --- a/tutorials/legacy/regexp/regexp.C +++ b/tutorials/legacy/regexp/regexp.C @@ -94,11 +94,11 @@ void regexp() // criteria: // 1) It should be of the form string1@string2 . The "^" and "$" ensure that we compare the complete // email string - // 2) ([\\w-\\.]+) : + // 2) ([\\w\\-\\.]+) : // string1 is only allowed to be composed out of the alphanumeric characters, "-" and "." . // The "+" ensures that string1 can not be empty . // 3) string2 is matched against three different parts : - // a. ((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+)) : + // a. ((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w\\-]+\\.)+)) : // This regular expression ensures that EITHER the string starts with "[" followed by three groups // of numbers, separated by "." , where each group has 1 to 3 numbers, OR alphanumeric strings, // possibly containing "-" characters, separated by "." . @@ -108,7 +108,7 @@ void regexp() // At most one "]" character . TString s5("fons.rademakers@cern.ch"); - TPRegexp r5("^([\\w-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$"); + TPRegexp r5("^([\\w\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$"); cout << "Check if the email address \"" << s5 << "\" is valid: " << (r5.MatchB(s5) ? "TRUE" : "FALSE") << endl; // Substitute Example with pattern modifier : -- 2.41.0