Blob Blame History Raw
From 31152356367ef3cf3440c0431d2898f198e4dd18 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
Date: Tue, 27 May 2014 13:18:31 +0000
Subject: [PATCH] Fix empty-matching possessive zero-repeat groups bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1478 2f5784b3-3f2a-0410-8824-cb99058d5e15
Signed-off-by: Petr Písař <ppisar@redhat.com>

Petr Pisar: Ported to 8.35.

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 pcre_exec.c          | 43 +++++++++++++++++++++++++++----------------
 testdata/testinput1  |  9 +++++++++
 testdata/testinput8  |  6 ++++++
 testdata/testoutput1 | 12 ++++++++++++
 testdata/testoutput8 |  8 ++++++++
 5 files changed, 62 insertions(+), 16 deletions(-)

diff --git a/pcre_exec.c b/pcre_exec.c
index 5dec992..5a8dbad 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -1167,11 +1167,16 @@ for (;;)
         if (rrc == MATCH_KETRPOS)
           {
           offset_top = md->end_offset_top;
-          eptr = md->end_match_ptr;
           ecode = md->start_code + code_offset;
           save_capture_last = md->capture_last;
           matched_once = TRUE;
           mstart = md->start_match_ptr;    /* In case \K changed it */
+          if (eptr == md->end_match_ptr)   /* Matched an empty string */
+            {
+            do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+            break;
+            }  
+          eptr = md->end_match_ptr;
           continue;
           }
 
@@ -1241,10 +1246,15 @@ for (;;)
       if (rrc == MATCH_KETRPOS)
         {
         offset_top = md->end_offset_top;
-        eptr = md->end_match_ptr;
         ecode = md->start_code + code_offset;
         matched_once = TRUE;
         mstart = md->start_match_ptr;   /* In case \K reset it */
+        if (eptr == md->end_match_ptr)  /* Matched an empty string */
+          {
+          do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
+          break;
+          }  
+        eptr = md->end_match_ptr;
         continue;
         }
 
@@ -1894,7 +1904,7 @@ for (;;)
     case OP_KETRMAX:
     case OP_KETRPOS:
     prev = ecode - GET(ecode, 1);
-
+    
     /* If this was a group that remembered the subject start, in order to break
     infinite repeats of empty string matches, retrieve the subject start from
     the chain. Otherwise, set it NULL. */
@@ -1919,7 +1929,7 @@ for (;;)
       md->start_match_ptr = mstart;
       RRETURN(MATCH_MATCH);         /* Sets md->mark */
       }
-
+      
     /* For capturing groups we have to check the group number back at the start
     and if necessary complete handling an extraction by setting the offsets and
     bumping the high water mark. Whole-pattern recursion is coded as a recurse
@@ -1979,6 +1989,19 @@ for (;;)
         }
       }
 
+    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
+    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
+    at a time from the outer level, thus saving stack. This must precede the 
+    empty string test - in this case that test is done at the outer level. */
+
+    if (*ecode == OP_KETRPOS)
+      {
+      md->start_match_ptr = mstart;    /* In case \K reset it */
+      md->end_match_ptr = eptr;
+      md->end_offset_top = offset_top;
+      RRETURN(MATCH_KETRPOS);
+      }
+
     /* For an ordinary non-repeating ket, just continue at this level. This
     also happens for a repeating ket if no characters were matched in the
     group. This is the forcible breaking of infinite loops as implemented in
@@ -2001,18 +2024,6 @@ for (;;)
       break;
       }
 
-    /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
-    and return the MATCH_KETRPOS. This makes it possible to do the repeats one
-    at a time from the outer level, thus saving stack. */
-
-    if (*ecode == OP_KETRPOS)
-      {
-      md->start_match_ptr = mstart;    /* In case \K reset it */
-      md->end_match_ptr = eptr;
-      md->end_offset_top = offset_top;
-      RRETURN(MATCH_KETRPOS);
-      }
-
     /* The normal repeating kets try the rest of the pattern or restart from
     the preceding bracket, in the appropriate order. In the second case, we can
     use tail recursion to avoid using another stack frame, unless we have an
diff --git a/testdata/testinput1 b/testdata/testinput1
index f933692..ffb9455 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -5675,4 +5675,13 @@ AbcdCBefgBhiBqz
 /[\Q]a\E]+/
     aa]]
 
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+
 /-- End of testinput1 --/
diff --git a/testdata/testinput8 b/testdata/testinput8
index bb2747b..06334cd 100644
--- a/testdata/testinput8
+++ b/testdata/testinput8
@@ -4831,4 +4831,10 @@
 /[ab]{2,}?/
     aaaa    
 
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+
 /-- End of testinput8 --/
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 3d9a328..b2ae430 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -9325,4 +9325,16 @@ No match
     aa]]
  0: aa]]
 
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+ 0: NON QUOTED "QUOT""ED" AFTER 
+
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+ 0: NON QUOTED "QUOT""ED" AFTER 
+
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")++\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+ 0: NON QUOTED "QUOT""ED" AFTER 
+
 /-- End of testinput1 --/
diff --git a/testdata/testoutput8 b/testdata/testoutput8
index 3861ea4..95c4e4d 100644
--- a/testdata/testoutput8
+++ b/testdata/testoutput8
@@ -7777,4 +7777,12 @@ Matched, but offsets vector is too small to show all matches
  1: aaa
  2: aa
 
+'\A(?:[^\"]++|\"(?:[^\"]*+|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+ 0: NON QUOTED "QUOT""ED" AFTER 
+
+'\A(?:[^\"]++|\"(?:[^\"]++|\"\")*+\")++'
+    NON QUOTED \"QUOT\"\"ED\" AFTER \"NOT MATCHED
+ 0: NON QUOTED "QUOT""ED" AFTER 
+
 /-- End of testinput8 --/
-- 
1.9.3