Blob Blame History Raw
From f34cc5af94622240abbf730ac82c4f91cc4ffb83 Mon Sep 17 00:00:00 2001
From: Hugo van der Sanden <hv@crypt.org>
Date: Tue, 4 Oct 2016 14:40:11 +0100
Subject: [PATCH] anchored/floating substrings must be utf8 if target is
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ported to 5.24.4:

commit 2814f4b3549f665a6f9203ac9e890ae1e415e0dc
Author: Hugo van der Sanden <hv@crypt.org>
Date:   Tue Oct 4 14:40:11 2016 +0100

    [perl #129350] anchored/floating substrings must be utf8 if target is

    If the target is utf8 and either the anchored or floating substrings
    are not, we need to create utf8 copies to check against. The state
    of the two substrings may not be the same, but we were only testing
    whichever we planned to check first.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 regexec.c     | 3 ++-
 t/re/re_tests | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/regexec.c b/regexec.c
index ff8e89c..6904546 100644
--- a/regexec.c
+++ b/regexec.c
@@ -703,7 +703,8 @@ Perl_re_intuit_start(pTHX_
     reginfo->poscache_maxiter = 0;
 
     if (utf8_target) {
-	if (!prog->check_utf8 && prog->check_substr)
+        if ((!prog->anchored_utf8 && prog->anchored_substr)
+                || (!prog->float_utf8 && prog->float_substr))
 	    to_utf8_substr(prog);
 	check = prog->check_utf8;
     } else {
diff --git a/t/re/re_tests b/t/re/re_tests
index ab7ddbb..8b0feaa 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1969,6 +1969,7 @@ ab(?#Comment){2}c	abbc	y	$&	abbc
 aa$|a(?R)a|a	aaa	y	$&	aaa		# [perl 128420] recursive matches
 (?:\1|a)([bcd])\1(?:(?R)|e)\1	abbaccaddedcb	y	$&	abbaccaddedcb		# [perl 128420] recursive match with backreferences
 (?il)\x{100}|\x{100}|\x{FF}	\xFF	y	$&	\xFF
+\b\z0*\x{100}	.\x{100}	n	-	-	# [perl #129350] crashed in intuit_start
 
 # Keep these lines at the end of the file
 # vim: softtabstop=0 noexpandtab
-- 
2.14.3