Blob Blame History Raw
From 1c9ad4b954e5888ff6acdcb8e7073b123641b536 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 6 Feb 2016 16:40:59 +0000
Subject: [PATCH] Fix pcre2test loop when a callout is in an initial
 lookbehind.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@488 6239d852-aaf2-0410-a92c-79f79f948069

Petr Písař: Ported to 10.21.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 doc/pcre2test.1      |  4 +++-
 src/pcre2test.c      | 38 ++++++++++++++++++++++++++++----------
 testdata/testinput2  |  5 +++++
 testdata/testoutput2 | 15 +++++++++++++++
 4 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/doc/pcre2test.1 b/doc/pcre2test.1
index b8eef93..4845cc7 100644
--- a/doc/pcre2test.1
+++ b/doc/pcre2test.1
@@ -1475,7 +1475,9 @@ item to be tested. For example:
 This output indicates that callout number 0 occurred for a match attempt
 starting at the fourth character of the subject string, when the pointer was at
 the seventh character, and when the next pattern item was \ed. Just
-one circumflex is output if the start and current positions are the same.
+one circumflex is output if the start and current positions are the same, or if 
+the current position precedes the start position, which can happen if the 
+callout is in a lookbehind assertion.
 .P
 Callouts numbered 255 are assumed to be automatic callouts, inserted as a
 result of the \fB/auto_callout\fP pattern modifier. In this case, instead of
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 0a5879e..bc9099f 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -2546,12 +2546,13 @@ return (int)(pp - p);
 
 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
 For printing *MARK strings, a negative length is given. If handed a NULL file,
-just counts chars without printing. */
+just counts chars without printing (because pchar() does that). */
 
 static int pchars8(PCRE2_SPTR8 p, int length, BOOL utf, FILE *f)
 {
 uint32_t c = 0;
 int yield = 0;
+
 if (length < 0) length = p[-1];
 while (length-- > 0)
   {
@@ -2569,6 +2570,7 @@ while (length-- > 0)
   c = *p++;
   yield += pchar(c, utf, f);
   }
+
 return yield;
 }
 #endif
@@ -4975,6 +4977,7 @@ static int
 callout_function(pcre2_callout_block_8 *cb, void *callout_data_ptr)
 {
 uint32_t i, pre_start, post_start, subject_length;
+PCRE2_SIZE current_position;
 BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
 BOOL callout_capture = (dat_datctl.control & CTL_CALLOUT_CAPTURE) != 0;
 
@@ -5025,21 +5028,36 @@ if (callout_capture)
     }
   }
 
-/* Re-print the subject in canonical form, the first time or if giving full
-datails. On subsequent calls in the same match, we use pchars just to find the
-printed lengths of the substrings. */
+/* Re-print the subject in canonical form (with escapes for non-printing
+characters), the first time, or if giving full details. On subsequent calls in
+the same match, we use PCHARS() just to find the printed lengths of the
+substrings. */
 
 if (f != NULL) fprintf(f, "--->");
 
+/* The subject before the match start. */
+
 PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f);
 
+/* If a lookbehind is involved, the current position may be earlier than the 
+match start. If so, use the match start instead. */
+
+current_position = (cb->current_position >= cb->start_match)? 
+  cb->current_position : cb->start_match;
+
+/* The subject between the match start and the current position. */
+
 PCHARS(post_start, cb->subject, cb->start_match,
-  cb->current_position - cb->start_match, utf, f);
+  current_position - cb->start_match, utf, f);
 
-PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
+/* Print from the current position to the end. */
+
+PCHARSV(cb->subject, current_position, cb->subject_length - current_position, 
+  utf, f);
 
-PCHARSV(cb->subject, cb->current_position,
-  cb->subject_length - cb->current_position, utf, f);
+/* Calculate the total subject printed length (no print). */
+
+PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL);
 
 if (f != NULL) fprintf(f, "\n");
 
@@ -7021,7 +7039,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
     struct rlimit rlim;
     if (U32OVERFLOW(uli))
       {
-      fprintf(stderr, "+++ Argument for -S is too big\n");
+      fprintf(stderr, "** Argument for -S is too big\n");
       exit(1);
       }
     stack_size = (uint32_t)uli;
@@ -7073,7 +7091,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
       {
       if (U32OVERFLOW(uli))
         {
-        fprintf(stderr, "+++ Argument for %s is too big\n", arg);
+        fprintf(stderr, "** Argument for %s is too big\n", arg);
         exit(1);
         }
       timeitm = (int)uli;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 86c149d..67c40ba 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4802,4 +4802,9 @@ a)"xI
 
 /(?J)(?'a'))(?'a')/
 
+/(?<=((?C)0))/
+    9010
+\= Expect no match
+    abc
+
 # End of testinput2 
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 87c7204..32aa066 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -15161,4 +15161,19 @@ MK: A\x00b
 /(?J)(?'a'))(?'a')/
 Failed: error 122 at offset 10: unmatched closing parenthesis
 
+/(?<=((?C)0))/
+    9010
+--->9010
+  0  ^       0
+  0   ^      0
+ 0: 
+ 1: 0
+\= Expect no match
+    abc
+--->abc
+  0  ^      0
+  0   ^     0
+  0    ^    0
+No match
+
 # End of testinput2 
-- 
2.5.0