Blob Blame History Raw
From 00acf0e2f3f01a3057fc099c60e4f530d744619b Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Wed, 16 Oct 2019 17:12:13 +0000
Subject: [PATCH] Fix error offset bug introduced at 1176.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1179 6239d852-aaf2-0410-a92c-79f79f948069
Petr Písař: Ported to 10.33.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 src/pcre2_match.c     |  4 ++++
 testdata/testinput10  |  6 ++++++
 testdata/testoutput10 | 10 ++++++++++
 3 files changed, 20 insertions(+)

diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 9f34e20..cca4d3a 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6186,6 +6186,10 @@ if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
   }
 match_data->subject = NULL;
 
+/* Zero the error offset in case the first code unit is invalid UTF. */
+
+match_data->startchar = 0;
+
 /* If the pattern was successfully studied with JIT support, run the JIT
 executable instead of the rest of this function. Most options must be set at
 compile time for the JIT code to be usable. Fallback to the normal code path if
diff --git a/testdata/testinput10 b/testdata/testinput10
index 4399f82..19d2f2f 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -493,4 +493,10 @@
 
 /(?(á/utf
 
+/x/utf
+    abxyz
+    \x80\=startchar
+    abc\x80\=startchar
+    abc\x80\=startchar,offset=3
+
 # End of testinput10
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index dfecda1..dd91c45 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1651,4 +1651,14 @@ Failed: error 142 at offset 4: syntax error in subpattern name (missing terminat
 /(?(á/utf
 Failed: error 142 at offset 5: syntax error in subpattern name (missing terminator?)
 
+/x/utf
+    abxyz
+ 0: x
+    \x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
+    abc\x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
+    abc\x80\=startchar,offset=3
+Error -36 (bad UTF-8 offset)
+
 # End of testinput10
-- 
2.21.0