From 1767e8be04f300ad10f2b358c281a23f0759e3d3 Mon Sep 17 00:00:00 2001 From: ph10 Date: Thu, 4 Aug 2016 17:15:38 +0000 Subject: [PATCH] Fix character class bug when a Unicode property was present with \D etc in a non-ucp character class in a wide character mode. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1664 2f5784b3-3f2a-0410-8824-cb99058d5e15 Petr Písař: Ported to 8.39. diff --git a/pcre_compile.c b/pcre_compile.c index 7cd3950..fb80ed1 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -5579,6 +5579,34 @@ for (;; ptr++) #endif #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 { + /* For non-UCP wide characters, in a non-negative class containing \S or + similar (should_flip_negation is set), all characters greater than 255 + must be in the class. */ + + if ( +#if defined COMPILE_PCRE8 + utf && +#endif + should_flip_negation && !negate_class && (options & PCRE_UCP) == 0) + { + *class_uchardata++ = XCL_RANGE; + if (utf) /* Will always be utf in the 8-bit library */ + { + class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); + } + else /* Can only happen for the 16-bit & 32-bit libraries */ + { +#if defined COMPILE_PCRE16 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffu; +#elif defined COMPILE_PCRE32 + *class_uchardata++ = 0x100; + *class_uchardata++ = 0xffffffffu; +#endif + } + } + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ *code++ = OP_XCLASS; code += LINK_SIZE; diff --git a/testdata/testinput16 b/testdata/testinput16 index 15419e6..7ccde0a 100644 --- a/testdata/testinput16 +++ b/testdata/testinput16 @@ -38,4 +38,30 @@ /s+/i8SI SSss\x{17f} +/[\W\p{Any}]/BZ + abc + 123 + +/[\W\pL]/BZ + abc + ** Failers + 123 + +/[\D]/8 + \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + +/[^\D]/8 + a9b + ** Failers + \x{1d7cf} + +/[^\D\P{Nd}]/8 + a9b + \x{1d7cf} + ** Failers + \x{10000} + /-- End of testinput16 --/ diff --git a/testdata/testinput19 b/testdata/testinput19 index ce45afc..dfe8c7b 100644 --- a/testdata/testinput19 +++ b/testdata/testinput19 @@ -25,4 +25,21 @@ /s+/i8SI SSss\x{17f} +/[\D]/8 + \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + +/[^\D]/8 + a9b + ** Failers + \x{1d7cf} + +/[^\D\P{Nd}]/8 + a9b + \x{1d7cf} + ** Failers + \x{10000} + /-- End of testinput19 --/ diff --git a/testdata/testinput7 b/testdata/testinput7 index 00b9738..f44a810 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -838,15 +838,6 @@ of case for anything other than the ASCII letters. --/ /^s?c/mi8I scat -/[\W\p{Any}]/BZ - abc - 123 - -/[\W\pL]/BZ - abc - ** Failers - 123 - /a[[:punct:]b]/WBZ /a[[:punct:]b]/8WBZ diff --git a/testdata/testoutput16 b/testdata/testoutput16 index fd184cd..e6ba26a 100644 --- a/testdata/testoutput16 +++ b/testdata/testoutput16 @@ -138,4 +138,56 @@ Starting chars: S s \xc5 SSss\x{17f} 0: SSss\x{17f} +/[\W\p{Any}]/BZ +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/BZ +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + ** Failers + 0: * + 123 +No match + +/[\D]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[^\D]/8 + a9b + 0: 9 + ** Failers +No match + \x{1d7cf} +No match + +/[^\D\P{Nd}]/8 + a9b + 0: 9 + \x{1d7cf} + 0: \x{1d7cf} + ** Failers +No match + \x{10000} +No match + /-- End of testinput16 --/ diff --git a/testdata/testoutput19 b/testdata/testoutput19 index eb8a8f6..982bea4 100644 --- a/testdata/testoutput19 +++ b/testdata/testoutput19 @@ -105,4 +105,30 @@ Starting chars: S s \xff SSss\x{17f} 0: SSss\x{17f} +/[\D]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[\D\P{Nd}]/8 + \x{1d7cf} + 0: \x{1d7cf} + +/[^\D]/8 + a9b + 0: 9 + ** Failers +No match + \x{1d7cf} +No match + +/[^\D\P{Nd}]/8 + a9b + 0: 9 + \x{1d7cf} + 0: \x{1d7cf} + ** Failers +No match + \x{10000} +No match + /-- End of testinput19 --/ diff --git a/testdata/testoutput7 b/testdata/testoutput7 index fdfff64..2b167b2 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -2295,32 +2295,6 @@ Need char = 'c' (caseless) scat 0: sc -/[\W\p{Any}]/BZ ------------------------------------------------------------------- - Bra - [\x00-/:-@[-^`{-\xff\p{Any}] - Ket - End ------------------------------------------------------------------- - abc - 0: a - 123 - 0: 1 - -/[\W\pL]/BZ ------------------------------------------------------------------- - Bra - [\x00-/:-@[-^`{-\xff\p{L}] - Ket - End ------------------------------------------------------------------- - abc - 0: a - ** Failers - 0: * - 123 -No match - /a[[:punct:]b]/WBZ ------------------------------------------------------------------ Bra -- 2.5.5