Blob Blame History Raw
From 1767e8be04f300ad10f2b358c281a23f0759e3d3 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Thu, 4 Aug 2016 17:15:38 +0000
Subject: [PATCH] Fix character class bug when a Unicode property was present
 with \D etc in a non-ucp character class in a wide character mode.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1664 2f5784b3-3f2a-0410-8824-cb99058d5e15

Petr Písař: Ported to 8.39.

diff --git a/pcre_compile.c b/pcre_compile.c
index 7cd3950..fb80ed1 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -5579,6 +5579,34 @@ for (;; ptr++)
 #endif
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
       {
+      /* For non-UCP wide characters, in a non-negative class containing \S or
+      similar (should_flip_negation is set), all characters greater than 255
+      must be in the class. */
+
+      if (
+#if defined COMPILE_PCRE8
+           utf &&
+#endif
+           should_flip_negation && !negate_class && (options & PCRE_UCP) == 0)
+        {
+        *class_uchardata++ = XCL_RANGE;
+        if (utf)   /* Will always be utf in the 8-bit library */
+          {
+          class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
+          class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+          }
+        else       /* Can only happen for the 16-bit & 32-bit libraries */
+          {
+#if defined COMPILE_PCRE16
+          *class_uchardata++ = 0x100;
+          *class_uchardata++ = 0xffffu;
+#elif defined COMPILE_PCRE32
+          *class_uchardata++ = 0x100;
+          *class_uchardata++ = 0xffffffffu;
+#endif
+          }
+        }
+
       *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
       *code++ = OP_XCLASS;
       code += LINK_SIZE;
diff --git a/testdata/testinput16 b/testdata/testinput16
index 15419e6..7ccde0a 100644
--- a/testdata/testinput16
+++ b/testdata/testinput16
@@ -38,4 +38,30 @@
 /s+/i8SI
     SSss\x{17f}
 
+/[\W\p{Any}]/BZ
+    abc
+    123 
+
+/[\W\pL]/BZ
+    abc
+    ** Failers 
+    123
+    
+/[\D]/8
+    \x{1d7cf}
+
+/[\D\P{Nd}]/8
+    \x{1d7cf}
+
+/[^\D]/8
+    a9b
+    ** Failers
+    \x{1d7cf}
+
+/[^\D\P{Nd}]/8
+    a9b
+    \x{1d7cf}
+    ** Failers
+    \x{10000}
+
 /-- End of testinput16 --/
diff --git a/testdata/testinput19 b/testdata/testinput19
index ce45afc..dfe8c7b 100644
--- a/testdata/testinput19
+++ b/testdata/testinput19
@@ -25,4 +25,21 @@
 /s+/i8SI
     SSss\x{17f}
 
+/[\D]/8
+    \x{1d7cf}
+
+/[\D\P{Nd}]/8
+    \x{1d7cf}
+
+/[^\D]/8
+    a9b
+    ** Failers
+    \x{1d7cf}
+
+/[^\D\P{Nd}]/8
+    a9b
+    \x{1d7cf}
+    ** Failers
+    \x{10000}
+
 /-- End of testinput19 --/ 
diff --git a/testdata/testinput7 b/testdata/testinput7
index 00b9738..f44a810 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -838,15 +838,6 @@ of case for anything other than the ASCII letters. --/
 /^s?c/mi8I
     scat
 
-/[\W\p{Any}]/BZ
-    abc
-    123 
-
-/[\W\pL]/BZ
-    abc
-    ** Failers 
-    123     
-
 /a[[:punct:]b]/WBZ
 
 /a[[:punct:]b]/8WBZ
diff --git a/testdata/testoutput16 b/testdata/testoutput16
index fd184cd..e6ba26a 100644
--- a/testdata/testoutput16
+++ b/testdata/testoutput16
@@ -138,4 +138,56 @@ Starting chars: S s \xc5
     SSss\x{17f}
  0: SSss\x{17f}
 
+/[\W\p{Any}]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-/:-@[-^`{-\xff\p{Any}]
+        Ket
+        End
+------------------------------------------------------------------
+    abc
+ 0: a
+    123 
+ 0: 1
+
+/[\W\pL]/BZ
+------------------------------------------------------------------
+        Bra
+        [\x00-/:-@[-^`{-\xff\p{L}]
+        Ket
+        End
+------------------------------------------------------------------
+    abc
+ 0: a
+    ** Failers 
+ 0: *
+    123
+No match
+    
+/[\D]/8
+    \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[\D\P{Nd}]/8
+    \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[^\D]/8
+    a9b
+ 0: 9
+    ** Failers
+No match
+    \x{1d7cf}
+No match
+
+/[^\D\P{Nd}]/8
+    a9b
+ 0: 9
+    \x{1d7cf}
+ 0: \x{1d7cf}
+    ** Failers
+No match
+    \x{10000}
+No match
+
 /-- End of testinput16 --/
diff --git a/testdata/testoutput19 b/testdata/testoutput19
index eb8a8f6..982bea4 100644
--- a/testdata/testoutput19
+++ b/testdata/testoutput19
@@ -105,4 +105,30 @@ Starting chars: S s \xff
     SSss\x{17f}
  0: SSss\x{17f}
 
+/[\D]/8
+    \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[\D\P{Nd}]/8
+    \x{1d7cf}
+ 0: \x{1d7cf}
+
+/[^\D]/8
+    a9b
+ 0: 9
+    ** Failers
+No match
+    \x{1d7cf}
+No match
+
+/[^\D\P{Nd}]/8
+    a9b
+ 0: 9
+    \x{1d7cf}
+ 0: \x{1d7cf}
+    ** Failers
+No match
+    \x{10000}
+No match
+
 /-- End of testinput19 --/ 
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index fdfff64..2b167b2 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -2295,32 +2295,6 @@ Need char = 'c' (caseless)
     scat
  0: sc
 
-/[\W\p{Any}]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-/:-@[-^`{-\xff\p{Any}]
-        Ket
-        End
-------------------------------------------------------------------
-    abc
- 0: a
-    123 
- 0: 1
-
-/[\W\pL]/BZ
-------------------------------------------------------------------
-        Bra
-        [\x00-/:-@[-^`{-\xff\p{L}]
-        Ket
-        End
-------------------------------------------------------------------
-    abc
- 0: a
-    ** Failers 
- 0: *
-    123     
-No match
-
 /a[[:punct:]b]/WBZ
 ------------------------------------------------------------------
         Bra
-- 
2.5.5