Blob Blame History Raw
From c34ce5444f58690d78c99ac99365c7e070eec314 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Sat, 27 May 2017 15:49:29 +0000
Subject: [PATCH] Fix lookbehind with zero-length branch in DFA matching. Fixes
 oss-fuzz 1959.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Petr Písař: Ported to 10.21:

commit 25926aac413c2b67359bc03c2a11a86a7a0718da
Author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date:   Sat May 27 15:49:29 2017 +0000

    Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959.

    git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@807 6239d852-aaf2-0410-a92c-79f79f948069

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 src/pcre2_dfa_match.c | 28 ++++++++++------------------
 testdata/testinput6   |  6 ++++++
 testdata/testoutput6  | 10 ++++++++++
 3 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 927159a..3cd62b4 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -375,14 +375,10 @@ internal_dfa_match(
 {
 stateblock *active_states, *new_states, *temp_states;
 stateblock *next_active_state, *next_new_state;
-
 const uint8_t *ctypes, *lcc, *fcc;
 PCRE2_SPTR ptr;
 PCRE2_SPTR end_code;
-PCRE2_SPTR first_op;
-
 dfa_recursion_info new_recursive;
-
 int active_count, new_count, match_count;
 
 /* Some fields in the mb block are frequently referenced, so we load them into
@@ -417,21 +413,15 @@ active_states = (stateblock *)(workspace + 2);
 next_new_state = new_states = active_states + wscount;
 new_count = 0;
 
-first_op = this_start_code + 1 + LINK_SIZE +
-  ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
-    *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
-    ? IMM2_SIZE:0);
-
 /* The first thing in any (sub) pattern is a bracket of some sort. Push all
 the alternative states onto the list, and find out where the end is. This
 makes is possible to use this function recursively, when we want to stop at a
 matching internal ket rather than at the end.
 
-If the first opcode in the first alternative is OP_REVERSE, we are dealing with
-a backward assertion. In that case, we have to find out the maximum amount to
-move back, and set up each alternative appropriately. */
+If we are dealing with a backward assertion we have to find out the maximum
+amount to move back, and set up each alternative appropriately. */
 
-if (*first_op == OP_REVERSE)
+if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
   {
   size_t max_back = 0;
   size_t gone_back;
@@ -476,15 +466,17 @@ if (*first_op == OP_REVERSE)
   if (current_subject < mb->start_used_ptr)
     mb->start_used_ptr = current_subject;
 
-  /* Now we can process the individual branches. */
+  /* Now we can process the individual branches. There will be an OP_REVERSE at
+  the start of each branch, except when the length of the branch is zero. */
 
   end_code = this_start_code;
   do
     {
-    size_t back = GET(end_code, 2+LINK_SIZE);
+    uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
+    size_t back = (revlen == 0)? 0 : GET(end_code, 2+LINK_SIZE);
     if (back <= gone_back)
       {
-      int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
+      int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
       ADD_NEW_DATA(-bstate, 0, gone_back - back);
       }
     end_code += GET(end_code, 1);
@@ -544,7 +536,7 @@ for (;;)
   BOOL partial_newline = FALSE;
   BOOL could_continue = reset_could_continue;
   reset_could_continue = FALSE;
-  
+
   if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
 
   /* Make the new state list into the active state list and empty the
@@ -632,7 +624,7 @@ for (;;)
 
     code = start_code + state_offset;
     codevalue = *code;
-    
+
     /* If this opcode inspects a character, but we are at the end of the
     subject, remember the fact for use when testing for a partial match. */
 
diff --git a/testdata/testinput6 b/testdata/testinput6
index dd328aa..ccd5941 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -4882,4 +4882,10 @@
 /(02-)?[0-9]{3}-[0-9]{3}/
     02-123-123
 
+/(?<=abc|)/
+    abcde\=aftertext
+    
+/(?<=|abc)/ 
+    abcde\=aftertext
+
 # End of testinput6
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 1cfec35..1e94601 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -7677,4 +7677,14 @@ No match
     02-123-123
  0: 02-123-123
 
+/(?<=abc|)/
+    abcde\=aftertext
+ 0: 
+ 0+ abcde
+    
+/(?<=|abc)/ 
+    abcde\=aftertext
+ 0: 
+ 0+ abcde
+
 # End of testinput6
-- 
2.9.4