Blob Blame History Raw
From 5682a8a8aac39001f6868603bc81a462994c30bb Mon Sep 17 00:00:00 2001
From: Hugo van der Sanden <hv@crypt.org>
Date: Tue, 15 Sep 2020 14:02:54 +0100
Subject: [PATCH] assume worst-case for GOSUBs we don't analyse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During study_chunk, under various conditions we avoid recursing into
a GOSUB. But we must avoid giving the enclosing scope the idea that
this GOSUB would match only an empty string, since that could trigger
wrong optimizations (eg CURLYX => CURLYM in the ticket).

So we mark the construct as infinite, as in the code branch where we
_do_ recurse into it.

Petr Písař: Ported from 9c256e3fab02bde17b249fc6a1057d34ca0a608d to
5.30.3.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 regcomp.c     | 7 ++++++-
 t/re/re_tests | 2 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/regcomp.c b/regcomp.c
index 7426e83..df88311 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -5145,7 +5145,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      * might result in a minlen of 1 and not of 4,
                      * but this doesn't make us mismatch, just try a bit
                      * harder than we should.
-                     * */
+                     *
+                     * However we must assume this GOSUB is infinite, to
+                     * avoid wrongly applying other optimizations in the
+                     * enclosing scope - see GH 18096, for example.
+                     */
+                    is_inf = is_inf_internal = 1;
                     scan= regnext(scan);
                     continue;
                 }
diff --git a/t/re/re_tests b/t/re/re_tests
index d07c943..10867e0 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -2017,6 +2017,8 @@ AB\s+\x{100}	AB \x{100}X	y	-	-
 /\x{30c3}?[\x{30a2}\x{30a4}\x{30a6}\x{30a8}\x{30aa}-\x{30e2}\x{30e4}\x{30e6}\x{30e8}-\x{30f4}](?:[\x{30e3}\x{30e5}\x{30e7}\x{30a1}\x{30a3}\x{30a5}\x{30a7}\x{30a9}])?\x{30fc}?\x{30f3}?/	\x{30de}\x{30fc}\x{30af}\x{30b5}\x{30fc}\x{30d3}\x{30b9}	y	$&	\x{30de}\x{30fc}	# part of [perl #133942
 /[\x{3041}-\x{3093}]+/	\x{6f22}\x{5b57}\x{3001}\x{30ab}\x{30bf}\x{30ab}\x{30ca}\x{3001}\x{3072}\x{3089}\x{304c}\x{306a}\x{306e}\x{5165}\x{3063}\x{305f}String	y	$&	\x{3072}\x{3089}\x{304c}\x{306a}\x{306e}	# [perl #133978]
 /(?:0)|(?:)(?:[1-9])/	q0	y	$&	0	# [https://github.com/Perl/perl5/issues/17372]
+^((\w|<(\s)*(?1)(?3)*>)(?:(?3)*\+(?3)*(?2))*)(?3)*\+	a + b + <c + d>	y	$1	a + b		# [GH #18096]
+^((\w|<(\s)*(?1)(?3)*>)(?:(?3)*\+(?3)*(?2))*)(?3)*\+	a + <b> + c	y	$1	a + <b>		# [GH #18096]
 # Keep these lines at the end of the file
 # pat	string	y/n/etc	expr	expected-expr	skip-reason	comment
 # vim: softtabstop=0 noexpandtab
-- 
2.25.4