Blob Blame History Raw
From f89fc640f9ca1b2cccf0dc496b0b7ea434b53fad Mon Sep 17 00:00:00 2001
From: Mattias Ellert <mattias.ellert@physics.uu.se>
Date: Mon, 2 Oct 2023 09:25:13 +0200
Subject: [PATCH] Support PCRE2

---
 .../pythonizations/test/import_load_libs.py   |   1 +
 cmake/modules/FindPCRE2.cmake                 | 106 ++++++++++++++++++
 cmake/modules/SearchInstalledSoftware.cmake   |  17 +--
 core/base/CMakeLists.txt                      |   9 +-
 core/base/src/TPRegexp.cxx                    |  99 +++++++++++++++-
 tutorials/legacy/regexp/regexp.C              |   6 +-
 6 files changed, 223 insertions(+), 15 deletions(-)
 create mode 100644 cmake/modules/FindPCRE2.cmake

diff --git a/bindings/pyroot/pythonizations/test/import_load_libs.py b/bindings/pyroot/pythonizations/test/import_load_libs.py
index 087db7da5c..179c76f6fd 100644
--- a/bindings/pyroot/pythonizations/test/import_load_libs.py
+++ b/bindings/pyroot/pythonizations/test/import_load_libs.py
@@ -23,6 +23,7 @@ class ImportLoadLibs(unittest.TestCase):
             'libc',
             'libdl',
             'libpcre',
+            'libpcre2-8',
             # libCling and dependencies
             'libCling.*',
             'librt',
diff --git a/cmake/modules/FindPCRE2.cmake b/cmake/modules/FindPCRE2.cmake
new file mode 100644
index 0000000000..2417453e8b
--- /dev/null
+++ b/cmake/modules/FindPCRE2.cmake
@@ -0,0 +1,106 @@
+# Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.
+# All rights reserved.
+#
+# For the licensing terms see $ROOTSYS/LICENSE.
+# For the list of contributors see $ROOTSYS/README/CREDITS.
+
+#.rst:
+# FindPCRE2
+# --------
+#
+# Find PCRE2 library
+#
+# Imported Targets
+# ^^^^^^^^^^^^^^^^
+#
+# This module defines :prop_tgt:`IMPORTED` target:
+#
+# ``PCRE2::PCRE2``
+#   The pcre2 library, if found.
+#
+# Result Variables
+# ^^^^^^^^^^^^^^^^
+# This module will set the following variables in your project:
+#
+# ``PCRE2_FOUND``
+#   True if PCRE2 has been found.
+# ``PCRE2_INCLUDE_DIRS``
+#   Where to find pcre2.h
+# ``PCRE2_LIBRARIES``
+#   The libraries to link against to use PCRE2.
+# ``PCRE2_VERSION``
+#   The version of the PCRE2 found (e.g. 10.42)
+#
+# Obsolete variables
+# ^^^^^^^^^^^^^^^^^^
+#
+# The following variables may also be set, for backwards compatibility:
+#
+# ``PCRE2_PCRE2_LIBRARY``
+#   where to find the PCRE2_PCRE2 library.
+# ``PCRE2_INCLUDE_DIR``
+#   where to find the pcre2.h header (same as PCRE2_INCLUDE_DIRS)
+#
+
+foreach(var PCRE2_FOUND PCRE2_INCLUDE_DIR PCRE2_PCRE2_LIBRARY PCRE2_LIBRARIES)
+  unset(${var} CACHE)
+endforeach()
+
+find_path(PCRE2_INCLUDE_DIR NAMES pcre2.h PATH_SUFFIXES include)
+mark_as_advanced(PCRE2_INCLUDE_DIR)
+
+if (PCRE2_INCLUDE_DIR AND EXISTS "${PCRE2_INCLUDE_DIR}/pcre2.h")
+  file(STRINGS "${PCRE2_INCLUDE_DIR}/pcre2.h" PCRE2_H REGEX "^#define PCRE2_(MAJOR|MINOR).*$")
+  string(REGEX REPLACE "^.*PCRE2_MAJOR[ ]+([0-9]+).*$" "\\1" PCRE2_VERSION_MAJOR "${PCRE2_H}")
+  string(REGEX REPLACE "^.*PCRE2_MINOR[ ]+([0-9]+).*$" "\\1" PCRE2_VERSION_MINOR "${PCRE2_H}")
+  set(PCRE2_VERSION "${PCRE2_VERSION_MAJOR}.${PCRE2_VERSION_MINOR}")
+endif()
+
+if(NOT PCRE2_PCRE2_LIBRARY)
+  find_library(PCRE2_PCRE2_LIBRARY_RELEASE NAMES pcre2-8)
+  find_library(PCRE2_PCRE2_LIBRARY_DEBUG NAMES pcre2-8${CMAKE_DEBUG_POSTFIX} pcre2-8d)
+  include(SelectLibraryConfigurations)
+  select_library_configurations(PCRE2_PCRE2)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(PCRE2
+  REQUIRED_VARS
+    PCRE2_INCLUDE_DIR
+    PCRE2_PCRE2_LIBRARY
+  VERSION_VAR
+    PCRE2_VERSION
+)
+
+if(PCRE2_FOUND)
+  set(PCRE2_INCLUDE_DIRS "${PCRE2_INCLUDE_DIR}")
+
+  if (NOT PCRE2_LIBRARIES)
+    set(PCRE2_LIBRARIES "${PCRE2_PCRE2_LIBRARY}")
+  endif()
+
+  if(NOT TARGET PCRE2::PCRE2)
+    add_library(PCRE2::PCRE2 UNKNOWN IMPORTED)
+    set_target_properties(PCRE2::PCRE2 PROPERTIES
+      INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIRS}")
+
+    if(PCRE2_PCRE2_LIBRARY_DEBUG)
+      set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY
+        IMPORTED_CONFIGURATIONS DEBUG)
+      set_target_properties(PCRE2::PCRE2 PROPERTIES
+        IMPORTED_LOCATION_DEBUG "${PCRE2_PCRE2_LIBRARY_DEBUG}")
+    endif()
+
+    if(PCRE2_PCRE2_LIBRARY_RELEASE)
+      set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY
+        IMPORTED_CONFIGURATIONS RELEASE)
+      set_target_properties(PCRE2::PCRE2 PROPERTIES
+        IMPORTED_LOCATION_RELEASE "${PCRE2_PCRE2_LIBRARY_RELEASE}")
+    endif()
+
+    if(NOT PCRE2_PCRE2_LIBRARY_DEBUG AND NOT PCRE2_PCRE2_LIBRARY_RELEASE)
+      set_property(TARGET PCRE2::PCRE2 APPEND PROPERTY
+        IMPORTED_LOCATION "${PCRE2_PCRE2_LIBRARY}")
+    endif()
+  endif()
+endif()
diff --git a/cmake/modules/SearchInstalledSoftware.cmake b/cmake/modules/SearchInstalledSoftware.cmake
index 464155fd50..252aa56737 100644
--- a/cmake/modules/SearchInstalledSoftware.cmake
+++ b/cmake/modules/SearchInstalledSoftware.cmake
@@ -204,13 +204,16 @@ if(NOT builtin_pcre)
   foreach(suffix FOUND INCLUDE_DIR PCRE_LIBRARY)
     unset(PCRE_${suffix} CACHE)
   endforeach()
-  if(fail-on-missing)
-    find_package(PCRE REQUIRED)
-  else()
-    find_package(PCRE)
-    if(NOT PCRE_FOUND)
-      message(STATUS "PCRE not found. Switching on builtin_pcre option")
-      set(builtin_pcre ON CACHE BOOL "Enabled because PCRE not found (${builtin_pcre_description})" FORCE)
+  find_package(PCRE2)
+  if(NOT PCRE2_FOUND)
+    if(fail-on-missing)
+      find_package(PCRE REQUIRED)
+    else()
+      find_package(PCRE)
+      if(NOT PCRE_FOUND)
+        message(STATUS "PCRE not found. Switching on builtin_pcre option")
+        set(builtin_pcre ON CACHE BOOL "Enabled because PCRE not found (${builtin_pcre_description})" FORCE)
+      endif()
     endif()
   endif()
 endif()
diff --git a/core/base/CMakeLists.txt b/core/base/CMakeLists.txt
index df701fedea..5798c93775 100644
--- a/core/base/CMakeLists.txt
+++ b/core/base/CMakeLists.txt
@@ -214,7 +214,14 @@ target_include_directories(Core PUBLIC
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/v7/inc>
 )
 
-target_link_libraries(Core PRIVATE PCRE::PCRE)
+if(PCRE2_FOUND)
+  target_link_libraries(Core PRIVATE PCRE2::PCRE2)
+  set_source_files_properties(src/TPRegexp.cxx
+    TARGET_DIRECTORY Core
+    PROPERTIES COMPILE_DEFINITIONS USE_PCRE2)
+else()
+  target_link_libraries(Core PRIVATE PCRE::PCRE)
+endif()
 
 ROOT_INSTALL_HEADERS(${BASE_HEADER_DIRS})
 
diff --git a/core/base/src/TPRegexp.cxx b/core/base/src/TPRegexp.cxx
index 949b8cc8e9..d70f3e5b8b 100644
--- a/core/base/src/TPRegexp.cxx
+++ b/core/base/src/TPRegexp.cxx
@@ -25,19 +25,36 @@ found at : http://perldoc.perl.org/perlre.html
 #include "TObjString.h"
 #include "TError.h"
 
+#ifdef USE_PCRE2
+#ifdef R__WIN32
+#define PCRE2_STATIC
+#endif
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+#define PCRE_CASELESS  PCRE2_CASELESS
+#define PCRE_MULTILINE PCRE2_MULTILINE
+#define PCRE_DOTALL    PCRE2_DOTALL
+#define PCRE_EXTENDED  PCRE2_EXTENDED
+#define PCRE_ERROR_NOMATCH PCRE2_ERROR_NOMATCH
+#else
 #ifdef R__WIN32
 #define PCRE_STATIC
 #endif
 #include <pcre.h>
+#endif
 
 #include <vector>
 #include <stdexcept>
 
 struct PCREPriv_t {
+#ifdef USE_PCRE2
+   pcre2_code *fPCRE;
+   PCREPriv_t() { fPCRE = nullptr; }
+#else
    pcre       *fPCRE;
    pcre_extra *fPCREExtra;
-
    PCREPriv_t() { fPCRE = nullptr; fPCREExtra = nullptr; }
+#endif
 };
 
 
@@ -79,10 +96,15 @@ TPRegexp::TPRegexp(const TPRegexp &p)
 
 TPRegexp::~TPRegexp()
 {
+#ifdef USE_PCRE2
+   if (fPriv->fPCRE)
+      pcre2_code_free(fPriv->fPCRE);
+#else
    if (fPriv->fPCRE)
       pcre_free(fPriv->fPCRE);
    if (fPriv->fPCREExtra)
       pcre_free(fPriv->fPCREExtra);
+#endif
    delete fPriv;
 }
 
@@ -93,12 +115,18 @@ TPRegexp &TPRegexp::operator=(const TPRegexp &p)
 {
    if (this != &p) {
       fPattern = p.fPattern;
+#ifdef USE_PCRE2
+      if (fPriv->fPCRE)
+         pcre2_code_free(fPriv->fPCRE);
+      fPriv->fPCRE = nullptr;
+#else
       if (fPriv->fPCRE)
          pcre_free(fPriv->fPCRE);
       fPriv->fPCRE = nullptr;
       if (fPriv->fPCREExtra)
          pcre_free(fPriv->fPCREExtra);
       fPriv->fPCREExtra = nullptr;
+#endif
       fPCREOpts  = p.fPCREOpts;
    }
    return *this;
@@ -197,31 +225,50 @@ TString TPRegexp::GetModifiers() const
 
 void TPRegexp::Compile()
 {
+#ifdef USE_PCRE2
+   if (fPriv->fPCRE)
+      pcre2_code_free(fPriv->fPCRE);
+#else
    if (fPriv->fPCRE)
       pcre_free(fPriv->fPCRE);
+#endif
 
    if (fPCREOpts & kPCRE_DEBUG_MSGS)
       Info("Compile", "PREGEX compiling %s", fPattern.Data());
 
+#ifdef USE_PCRE2
+   int errcode;
+   PCRE2_SIZE patIndex;
+   fPriv->fPCRE = pcre2_compile((PCRE2_SPTR)fPattern.Data(), fPattern.Length(),
+                                fPCREOpts & kPCRE_INTMASK,
+                                &errcode, &patIndex, nullptr);
+#else
    const char *errstr;
    Int_t patIndex;
    fPriv->fPCRE = pcre_compile(fPattern.Data(), fPCREOpts & kPCRE_INTMASK,
                                &errstr, &patIndex, nullptr);
+#endif
 
    if (!fPriv->fPCRE) {
+#ifdef USE_PCRE2
+      PCRE2_UCHAR errstr[256];
+      pcre2_get_error_message(errcode, errstr, 256);
+#endif
       if (fgThrowAtCompileError) {
          throw std::runtime_error
             (TString::Format("TPRegexp::Compile() compilation of TPRegexp(%s) failed at: %d because %s",
-                             fPattern.Data(), patIndex, errstr).Data());
+                             fPattern.Data(), (int)patIndex, errstr).Data());
       } else {
          Error("Compile", "compilation of TPRegexp(%s) failed at: %d because %s",
-               fPattern.Data(), patIndex, errstr);
+               fPattern.Data(), (int)patIndex, errstr);
          return;
       }
    }
 
+#ifndef USE_PCRE2
    if (fPriv->fPCREExtra || (fPCREOpts & kPCRE_OPTIMIZE))
       Optimize();
+#endif
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -229,6 +276,7 @@ void TPRegexp::Compile()
 
 void TPRegexp::Optimize()
 {
+#ifndef USE_PCRE2
    if (fPriv->fPCREExtra)
       pcre_free(fPriv->fPCREExtra);
 
@@ -243,6 +291,7 @@ void TPRegexp::Optimize()
       Error("Optimize", "Optimization of TPRegexp(%s) failed: %s",
             fPattern.Data(), errstr);
    }
+#endif
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -308,21 +357,43 @@ Int_t TPRegexp::MatchInternal(const TString &s, Int_t start,
                               Int_t nMaxMatch, TArrayI *pos) const
 {
    Int_t *offVec = new Int_t[3*nMaxMatch];
+
+#ifdef USE_PCRE2
+   pcre2_match_data *match_data;
+   match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr);
+   Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR8)s.Data(),
+                               s.Length(), start, 0,
+                               match_data, nullptr);
+#else
    // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
    Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
                              s.Length(), start, 0,
                              offVec, 3*nMaxMatch);
+#endif
 
    if (nrMatch == PCRE_ERROR_NOMATCH)
       nrMatch = 0;
    else if (nrMatch <= 0) {
       Error("Match","pcre_exec error = %d", nrMatch);
+#ifdef USE_PCRE2
+      pcre2_match_data_free(match_data);
+#endif
       delete [] offVec;
       return 0;
    }
 
-   if (pos)
+   if (pos) {
+#ifdef USE_PCRE2
+      PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data);
+      for (int i = 0; i < 2 * nrMatch; ++i)
+         offVec[i] = oVec[i];
+#endif
       pos->Set(2*nrMatch, offVec);
+   }
+
+#ifdef USE_PCRE2
+   pcre2_match_data_free(match_data);
+#endif
    delete [] offVec;
 
    return nrMatch;
@@ -404,13 +475,24 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
    Int_t offset = start;
    Int_t last = 0;
 
+#ifdef USE_PCRE2
+   pcre2_match_data *match_data;
+   match_data = pcre2_match_data_create_from_pattern(fPriv->fPCRE, nullptr);
+#endif
+
    while (kTRUE) {
 
       // find next matching subs
       // pcre_exec allows less options - see pcre_internal.h PUBLIC_EXEC_OPTIONS.
+#ifdef USE_PCRE2
+      Int_t nrMatch = pcre2_match(fPriv->fPCRE, (PCRE2_SPTR)s.Data(),
+                                  s.Length(), offset, 0,
+                                  match_data, nullptr);
+#else
       Int_t nrMatch = pcre_exec(fPriv->fPCRE, fPriv->fPCREExtra, s.Data(),
                                 s.Length(), offset, 0,
                                 offVec, 3*nMaxMatch);
+#endif
 
       if (nrMatch == PCRE_ERROR_NOMATCH) {
          break;
@@ -419,6 +501,12 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
          break;
       }
 
+#ifdef USE_PCRE2
+      PCRE2_SIZE *oVec = pcre2_get_ovector_pointer(match_data);
+      for (int i = 0; i < 2 * nrMatch; ++i)
+         offVec[i] = oVec[i];
+#endif
+
       // append anything previously unmatched, but not substituted
       if (last <= offVec[0]) {
          fin += s(last,offVec[0]-last);
@@ -446,6 +534,9 @@ Int_t TPRegexp::SubstituteInternal(TString &s, const TString &replacePattern,
       }
    }
 
+#ifdef USE_PCRE2
+   pcre2_match_data_free(match_data);
+#endif
    delete [] offVec;
 
    fin += s(last,s.Length()-last);
diff --git a/tutorials/legacy/regexp/regexp.C b/tutorials/legacy/regexp/regexp.C
index 995b823bce..f38ed6799e 100644
--- a/tutorials/legacy/regexp/regexp.C
+++ b/tutorials/legacy/regexp/regexp.C
@@ -94,11 +94,11 @@ void regexp()
    // criteria:
    // 1) It should be of the form string1@string2 . The "^" and "$" ensure that we compare the complete
    //    email string
-   // 2) ([\\w-\\.]+)  :
+   // 2) ([\\w\\-\\.]+)  :
    //    string1 is only allowed to be composed out of the alphanumeric characters, "-" and "." .
    //    The "+" ensures that string1 can not be empty .
    // 3) string2 is matched against three different parts :
-   //    a. ((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+))  :
+   //    a. ((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w\\-]+\\.)+))  :
    //       This regular expression ensures that EITHER the string starts with "[" followed by three groups
    //       of numbers, separated by "." , where each group has 1 to 3 numbers, OR alphanumeric strings,
    //       possibly containing "-" characters, separated by "." .
@@ -108,7 +108,7 @@ void regexp()
    //       At most one "]" character .
 
    TString s5("fons.rademakers@cern.ch");
-   TPRegexp r5("^([\\w-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$");
+   TPRegexp r5("^([\\w\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([\\w\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$");
    cout << "Check if the email address \"" << s5 << "\" is valid: " << (r5.MatchB(s5) ? "TRUE" : "FALSE") << endl;
 
    // Substitute Example with pattern modifier :
-- 
2.41.0