From f89fc640f9ca1b2cccf0dc496b0b7ea434b53fad Mon Sep 17 00:00:00 2001
From: Mattias Ellert <mattias.ellert@physics.uu.se>
Date: Mon, 2 Oct 2023 09:25:13 +0200
Subject: [PATCH] Support PCRE2
---
.../pythonizations/test/import_load_libs.py | 1 +
cmake/modules/FindPCRE2.cmake | 106 ++++++++++++++++++
cmake/modules/SearchInstalledSoftware.cmake | 17 +--
core/base/CMakeLists.txt | 9 +-
core/base/src/TPRegexp.cxx | 99 +++++++++++++++-
tutorials/legacy/regexp/regexp.C | 6 +-
6 files changed, 223 insertions(+), 15 deletions(-)
create mode 100644 cmake/modules/FindPCRE2.cmake
diff --git a/bindings/pyroot/pythonizations/test/import_load_libs.py b/bindings/pyroot/pythonizations/test/import_load_libs.py
index 087db7da5c..179c76f6fd 100644
--- a/bindings/pyroot/pythonizations/test/import_load_libs.py
+++ b/bindings/pyroot/pythonizations/test/import_load_libs.py
@@ -23,6 +23,7 @@ class ImportLoadLibs(unittest.TestCase):
'libc',
'libdl',
'libpcre',
+ 'libpcre2-8',
# libCling and dependencies
'libCling.*',
'librt',
diff --git a/cmake/modules/FindPCRE2.cmake b/cmake/modules/FindPCRE2.cmake
new file mode 100644
index 0000000000..2417453e8b
--- /dev/null
+++ b/cmake/modules/FindPCRE2.cmake
@@ -0,0 +1,106 @@
+# Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.
+# All rights reserved.
+#
+# For the licensing terms see $ROOTSYS/LICENSE.
+# For the list of contributors see $ROOTSYS/README/CREDITS.
+
+#.rst:
+# FindPCRE2
+# --------
+#
+# Find PCRE2 library
+#
+# Imported Targets
+# ^^^^^^^^^^^^^^^^
+#
+# This module defines :prop_tgt:`IMPORTED` target:
+#
+# ``PCRE2::PCRE2``
+# The pcre2 library, if found.
+#
+# Result Variables
+# ^^^^^^^^^^^^^^^^
+# This module will set the following variables in your project:
+#
+# ``PCRE2_FOUND``
+# True if PCRE2 has been found.
+# ``PCRE2_INCLUDE_DIRS``
+# Where to find pcre2.h
+# ``PCRE2_LIBRARIES``
+# The libraries to link against to use PCRE2.
+# ``PCRE2_VERSION``
+# The version of the PCRE2 found (e.g. 10.42)
+#
+# Obsolete variables
+# ^^^^^^^^^^^^^^^^^^
+#
+# The following variables may also be set, for backwards compatibility:
+#
+# ``PCRE2_PCRE2_LIBRARY``
+# where to find the PCRE2_PCRE2 library.
+# ``PCRE2_INCLUDE_DIR``
+# where to find the pcre2.h header (same as PCRE2_INCLUDE_DIRS)
+#
+
+foreach(var PCRE2_FOUND PCRE2_INCLUDE_DIR PCRE2_PCRE2_LIBRARY PCRE2_LIBRARIES)
+ unset(${var} CACHE)
+endforeach()
+
+find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h PATH_SUFFIXES include)
+mark_as_advanced(PCRE2_INCLUDE_DIR)
+
+if (PCRE2_INCLUDE_DIR AND EXISTS "${PCRE2_INCLUDE_DIR}/pcre2.h")
+ file(STRINGS "${PCRE2_INCLUDE_DIR}/pcre2.h" PCRE2_H REGEX "^#define PCRE2_(MAJOR|MINOR).*$")
+ string(REGEX REPLACE "^.*PCRE2_MAJOR[ ]+([0-9]+).*$" "\\1" PCRE2_VERSION_MAJOR "${PCRE2_H}")
+ string(REGEX REPLACE "^.*PCRE2_MINOR[ ]+([0-9]+).*$" "\\1" PCRE2_VERSION_MINOR "${PCRE2_H}")
+ set(PCRE2_VERSION "${PCRE2_VERSION_MAJOR}.${PCRE2_VERSION_MINOR}")
+endif()
+
+if(NOT PCRE2_PCRE2_LIBRARY)
+ find_library(PCRE2_PCRE2_LIBRARY_RELEASE NAMES pcre2-8)
+ find_library(PCRE2_PCRE2_LIBRARY_DEBUG NAMES pcre2-8${CMAKE_DEBUG_POSTFIX} pcre2-8d)
+ include(SelectLibraryConfigurations)
+ select_library_configurations(PCRE2_PCRE2)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PCRE2
+ REQUIRED_VARS
+ PCRE2_INCLUDE_DIR
+ PCRE2_PCRE2_LIBRARY
+ VERSION_VAR
+ PCRE2_VERSION
+)
+
+if(PCRE2_FOUND)
+ set(PCRE2_INCLUDE_DIRS "${PCRE2_INCLUDE_DIR}")
+
+ if (NOT PCRE2_LIBRARIES)
+ set(PCRE2_LIBRARIES "${PCRE2_PCRE2_LIBRARY}")
+ endif()
+
+ if(NOT TARGET PCRE2::PCRE2)
+ add_library(PCRE2::PCRE2 UNKNOWN IMPORTED)
+ set_target_properties(PCRE2::PCRE2 PROPERTIES
+ INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIRS}")
+
+ if(PCRE2_PCRE2_LIBRARY_DEBUG)
+ set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY
+ IMPORTED_CONFIGURATIONS DEBUG)
+ set_target_properties(PCRE2::PCRE2 PROPERTIES
+ IMPORTED_LOCATION_DEBUG "${PCRE2_PCRE2_LIBRARY_DEBUG}")
+ endif()
+
+ if(PCRE2_PCRE2_LIBRARY_RELEASE)
+ set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY
+ IMPORTED_CONFIGURATIONS RELEASE)
+ set_target_properties(PCRE2::PCRE2 PROPERTIES
+ IMPORTED_LOCATION_RELEASE "${PCRE2_PCRE2_LIBRARY_RELEASE}")
+ endif()
+
+ if(NOT PCRE2_PCRE2_LIBRARY_DEBUG AND NOT PCRE2_PCRE2_LIBRARY_RELEASE)
+ set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY
+ IMPORTED_LOCATION "${PCRE2_PCRE2_LIBRARY}")
+ endif()
+ endif()
+endif()
diff --git a/cmake/modules/SearchInstalledSoftware.cmake b/cmake/modules/SearchInstalledSoftware.cmake
index 464155fd50..252aa56737 100644
--- a/cmake/modules/SearchInstalledSoftware.cmake
+++ b/cmake/modules/SearchInstalledSoftware.cmake
@@ -204,13 +204,16 @@ if(NOT builtin_pcre)
foreach(suffix FOUND INCLUDE_DIR PCRE_LIBRARY)
unset(PCRE_${suffix} CACHE)
endforeach()
- if(fail-on-missing)
- find_package(PCRE REQUIRED)
- else()
- find_package(PCRE)
- if(NOT PCRE_FOUND)
- message(STATUS "PCRE not found. Switching on builtin_pcre option")
- set(builtin_pcre ON CACHE BOOL "Enabled because PCRE not found (${builtin_pcre_description})" FORCE)
+ find_package(PCRE2)
+ if(NOT PCRE2_FOUND)
+ if(fail-on-missing)
+ find_package(PCRE REQUIRED)
+ else()
+ find_package(PCRE)
+ if(NOT PCRE_FOUND)
+ message(STATUS "PCRE not found. Switching on builtin_pcre option")
+ set(builtin_pcre ON CACHE BOOL "Enabled because PCRE not found (${builtin_pcre_description})" FORCE)
+ endif()
endif()
endif()
endif()
diff --git a/core/base/CMakeLists.txt b/core/base/CMakeLists.txt
index df701fedea..5798c93775 100644
--- a/core/base/CMakeLists.txt
+++ b/core/base/CMakeLists.txt
@@ -214,7 +214,14 @@ target_include_directories(Core PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/v7/inc>
)
-target_link_libraries(Core PRIVATE PCRE::PCRE)
+if(PCRE2_FOUND)
+ target_link_libraries(Core PRIVATE PCRE2::PCRE2)
+ set_source_files_properties(src/TPRegexp.cxx
+ TARGET_DIRECTORY Core
+ PROPERTIES COMPILE_DEFINITIONS USE_PCRE2)
+else()
+ target_link_libraries(Core PRIVATE PCRE::PCRE)
+endif()
ROOT_INSTALL_HEADERS(${BASE_HEADER_DIRS})
diff --git a/core/base/src/TPRegexp.cxx b/core/base/src/TPRegexp.cxx
index 949b8cc8e9..d70f3e5b8b 100644
--- a/core/base/src/TPRegexp.cxx
+++ b/core/base/src/TPRegexp.cxx
@@ -25,19 +25,36 @@ found at : http://perldoc.perl.org/perlre.html
#include "TObjString.h"
#include "TError.h"
+#ifdef USE_PCRE2
+#ifdef R__WIN32
+#define PCRE2_STATIC
+#endif
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+#define PCRE_CASELESS PCRE2_CASELESS
+#define PCRE_MULTILINE PCRE2_MULTILINE
+#define PCRE_DOTALL PCRE2_DOTALL
+#define PCRE_EXTENDED PCRE2_EXTENDED
+#define PCRE_ERROR_NOMATCH PCRE2_ERROR_NOMATCH
+#else
#ifdef R__WIN32
#define PCRE_STATIC
#endif
#include <pcre.h>
+#endif
#include <vector>
#include <stdexcept>
struct PCREPriv_t {
+#ifdef USE_PCRE2
+ pcre2_code *fPCRE;
+ PCREPriv_t() { fPCRE = nullptr; }
+#else
pcre *fPCRE;
pcre_extra *fPCREExtra;
-
PCREPriv_t() { fPCRE = nullptr; fPCREExtra = nullptr; }
+#endif
};
@@ -79,10 +96,15 @@ TPRegexp::TPRegexp(const TPRegexp &p)
TPRegexp::~TPRegexp()
{
+#ifdef USE_PCRE2
+ if (fPriv->fPCRE)
+ pcre2_code_free(fPriv->fPCRE);
+#else
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
+#endif
delete fPriv;
}
@@ -93,12 +115,18 @@ TPRegexp &TPRegexp::operator=(const TPRegexp &p)
{
if (this != &p) {
fPattern = p.fPattern;
+#ifdef USE_PCRE2
+ if (fPriv->fPCRE)
+ pcre2_code_free(fPriv->fPCRE);
+ fPriv->fPCRE = nullptr;
+#else
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
fPriv->fPCRE = nullptr;
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
fPriv->fPCREExtra = nullptr;
+#endif
fPCREOpts = p.fPCREOpts;
}
return *this;
@@ -197,31 +225,50 @@ TString TPRegexp::GetModifiers() const
void TPRegexp::Compile()
{
+#ifdef USE_PCRE2
+ if (fPriv->fPCRE)
+ pcre2_code_free(fPriv->fPCRE);
+#else
if (fPriv->fPCRE)
pcre_free(fPriv->fPCRE);
+#endif
if (fPCREOpts & kPCRE_DEBUG_MSGS)
Info("Compile", "PREGEX compiling %s", fPattern.Data());
+#ifdef USE_PCRE2
+ int errcode;
+ PCRE2_SIZE patIndex;
+ fPriv->fPCRE = pcre2_compile((PCRE2_SPTR)fPattern.Data(), fPattern.Length(),
+ fPCREOpts & kPCRE_INTMASK,
+ &errcode, &patIndex, nullptr);
+#else
const char *errstr;
Int_t patIndex;
fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
&errstr, &patIndex, nullptr);
+#endif
if (!fPriv->fPCRE) {
+#ifdef USE_PCRE2
+ PCRE2_UCHAR errstr[256];
+ pcre2_get_error_message(errcode, errstr, 256);
+#endif
if (fgThrowAtCompileError) {
throw std::runtime_error
(TString::Format("TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s",
- fPattern.Data(), patIndex, errstr).Data());
+ fPattern.Data(), (int)patIndex, errstr).Data());
} else {
Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
- fPattern.Data(), patIndex, errstr);
+ fPattern.Data(), (int)patIndex, errstr);
return;
}
}
+#ifndef USE_PCRE2
if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
Optimize();
+#endif
}
////////////////////////////////////////////////////////////////////////////////
@@ -229,6 +276,7 @@ void TPRegexp::Compile()
void TPRegexp::Optimize()
{
+#ifndef USE_PCRE2
if (fPriv->fPCREExtra)
pcre_free(fPriv->fPCREExtra);
@@ -243,6 +291,7 @@ void TPRegexp::Optimize()
Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
fPattern.Data(), errstr);
}
+#endif
}
////////////////////////////////////////////////////////////////////////////////
@@ -308,21 +357,43 @@ Int_t TPRegexp::MatchInternal(const TString &s, Int_t start,
Int_t nMaxMatch, TArrayI *pos) const
{
Int_t *offVec = new Int_t[3*nMaxMatch];
+
+#ifdef USE_PCRE2
+ pcre2_match_data *match_data;
+ match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr);
+ Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR8)s.Data(),
+ s.Length(), start, 0,
+ match_data, nullptr);
+#else
// pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
s.Length(), start, 0,
offVec, 3*nMaxMatch);
+#endif
if (nrMatch == PCRE_ERROR_NOMATCH)
nrMatch = 0;
else if (nrMatch <= 0) {
Error("Match","pcre_exec error = %d", nrMatch);
+#ifdef USE_PCRE2
+ pcre2_match_data_free(match_data);
+#endif
delete [] offVec;
return 0;
}
- if (pos)
+ if (pos) {
+#ifdef USE_PCRE2
+ PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data);
+ for (int i = 0; i < 2 * nrMatch; ++i)
+ offVec[i] = oVec[i];
+#endif
pos->Set(2*nrMatch, offVec);
+ }
+
+#ifdef USE_PCRE2
+ pcre2_match_data_free(match_data);
+#endif
delete [] offVec;
return nrMatch;
@@ -404,13 +475,24 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
Int_t offset = start;
Int_t last = 0;
+#ifdef USE_PCRE2
+ pcre2_match_data *match_data;
+ match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr);
+#endif
+
while (kTRUE) {
// find next matching subs
// pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
+#ifdef USE_PCRE2
+ Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR)s.Data(),
+ s.Length(), offset, 0,
+ match_data, nullptr);
+#else
Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
s.Length(), offset, 0,
offVec, 3*nMaxMatch);
+#endif
if (nrMatch == PCRE_ERROR_NOMATCH) {
break;
@@ -419,6 +501,12 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
break;
}
+#ifdef USE_PCRE2
+ PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data);
+ for (int i = 0; i < 2 * nrMatch; ++i)
+ offVec[i] = oVec[i];
+#endif
+
// append anything previously unmatched, but not substituted
if (last <= offVec[0]) {
fin += s(last,offVec[0]-last);
@@ -446,6 +534,9 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
}
}
+#ifdef USE_PCRE2
+ pcre2_match_data_free(match_data);
+#endif
delete [] offVec;
fin += s(last,s.Length()-last);
diff --git a/tutorials/legacy/regexp/regexp.C b/tutorials/legacy/regexp/regexp.C
index 995b823bce..f38ed6799e 100644
--- a/tutorials/legacy/regexp/regexp.C
+++ b/tutorials/legacy/regexp/regexp.C
@@ -94,11 +94,11 @@ void regexp()
// criteria:
// 1) It should be of the form string1@string2 . The "^" and "$" ensure that we compare the complete
// email string
- // 2) ([\\w-\\.]+) :
+ // 2) ([\\w\\-\\.]+) :
// string1 is only allowed to be composed out of the alphanumeric characters, "-" and "." .
// The "+" ensures that string1 can not be empty .
// 3) string2 is matched against three different parts :
- // a. ((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+)) :
+ // a. ((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w\\-]+\\.)+)) :
// This regular expression ensures that EITHER the string starts with "[" followed by three groups
// of numbers, separated by "." , where each group has 1 to 3 numbers, OR alphanumeric strings,
// possibly containing "-" characters, separated by "." .
@@ -108,7 +108,7 @@ void regexp()
// At most one "]" character .
TString s5("fons.rademakers@cern.ch");
- TPRegexp r5("^([\\w-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$");
+ TPRegexp r5("^([\\w\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$");
cout << "Check if the email address \"" << s5 << "\" is valid: " << (r5.MatchB(s5) ? "TRUE" : "FALSE") << endl;
// Substitute Example with pattern modifier :
--
2.41.0