Blob Blame History Raw
From 2b029aba91d42edb9dd958306a7909e2bb459b01 Mon Sep 17 00:00:00 2001
From: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date: Tue, 1 Nov 2016 15:58:28 +0000
Subject: [PATCH] Fix auto-anchor bug when .* is inside an assertion.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ported to 10.22:

commit 6fba816130cccd2158dc9a6d30b03bb2bb31ef8c
Author: ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>
Date:   Tue Nov 1 15:58:28 2016 +0000

    Fix auto-anchor bug when .* is inside an assertion.

    git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@587 6239d852-aaf2-0410-a92c-
79f79f948069

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 src/pcre2_compile.c  | 33 +++++++++++++++++++++++----------
 testdata/testinput1  |  3 +++
 testdata/testoutput1 |  4 ++++
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index fe37310..b9b9361 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -7960,13 +7960,14 @@ Arguments:
                    the less precise approach
   cb             points to the compile data block
   atomcount      atomic group level
+  inassert       TRUE if in an assertion
 
 Returns:     TRUE or FALSE
 */
 
 static BOOL
 is_anchored(register PCRE2_SPTR code, unsigned int bracket_map,
-  compile_block *cb, int atomcount)
+  compile_block *cb, int atomcount, BOOL inassert)
 {
 do {
    PCRE2_SPTR scode = first_significant_code(
@@ -7978,7 +7979,8 @@ do {
    if (op == OP_BRA  || op == OP_BRAPOS ||
        op == OP_SBRA || op == OP_SBRAPOS)
      {
-     if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
+     if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) 
+       return FALSE;
      }
 
    /* Capturing brackets */
@@ -7988,33 +7990,44 @@ do {
      {
      int n = GET2(scode, 1+LINK_SIZE);
      int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
-     if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE;
+     if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
      }
 
-   /* Positive forward assertions and conditions */
+   /* Positive forward assertion */
 
-   else if (op == OP_ASSERT || op == OP_COND)
+   else if (op == OP_ASSERT)
      {
-     if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
+     if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
+     }
+
+   /* Condition */
+
+   else if (op == OP_COND)
+     {
+     if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) 
+       return FALSE;
      }
 
    /* Atomic groups */
 
    else if (op == OP_ONCE || op == OP_ONCE_NC)
      {
-     if (!is_anchored(scode, bracket_map, cb, atomcount + 1))
+     if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert))
        return FALSE;
      }
 
    /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
    it isn't in brackets that are or may be referenced or inside an atomic
-   group. There is also an option that disables auto-anchoring. */
+   group or an assertion. Also the pattern must not contain *PRUNE or *SKIP,
+   because these break the feature. Consider, for example, /(?s).*?(*PRUNE)b/
+   with the subject "aab", which matches "b", i.e. not at the start of a line.
+   There is also an option that disables auto-anchoring. */
 
    else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
              op == OP_TYPEPOSSTAR))
      {
      if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 ||
-         atomcount > 0 || cb->had_pruneorskip ||
+         atomcount > 0 || cb->had_pruneorskip || inassert ||
          (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
        return FALSE;
      }
@@ -8984,7 +8997,7 @@ there are no occurrences of *PRUNE or *SKIP (though there is an option to
 disable this case). */
 
 if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
-     is_anchored(codestart, 0, &cb, 0))
+     is_anchored(codestart, 0, &cb, 0, FALSE))
   re->overall_options |= PCRE2_ANCHORED;
 
 /* If the pattern is still not anchored and we do not have a first code unit,
diff --git a/testdata/testinput1 b/testdata/testinput1
index 0d680d3..2b4ec2c 100644
--- a/testdata/testinput1
+++ b/testdata/testinput1
@@ -5798,4 +5798,7 @@ name)/mark
 /(?=.*X)X$/ 
     \  X
 
+/(?s)(?=.*?)b/
+    aabc
+
 # End of testinput1 
diff --git a/testdata/testoutput1 b/testdata/testoutput1
index 02e07bf..774a5ec 100644
--- a/testdata/testoutput1
+++ b/testdata/testoutput1
@@ -9265,4 +9265,8 @@ No match
     \  X
  0: X
 
+/(?s)(?=.*?)b/
+    aabc
+ 0: b
+
 # End of testinput1 
-- 
2.7.4