4380350
From 46ed1a703b067e5b679eacf6500a54dae35f8130 Mon Sep 17 00:00:00 2001
4380350
From: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>
4380350
Date: Thu, 3 Dec 2015 17:05:40 +0000
4380350
Subject: [PATCH] Fix /x bug when pattern starts with white space and (?-x)
4380350
MIME-Version: 1.0
4380350
Content-Type: text/plain; charset=UTF-8
4380350
Content-Transfer-Encoding: 8bit
4380350
4380350
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1617 2f5784b3-3f2a-0410-8824-cb99058d5e15
4380350
4380350
Petr Písař: Ported to 8.38.
4380350
4380350
diff --git a/pcre_compile.c b/pcre_compile.c
4380350
index beed46b..57719b9 100644
4380350
--- a/pcre_compile.c
4380350
+++ b/pcre_compile.c
4380350
@@ -7607,39 +7607,15 @@ for (;; ptr++)
4380350
         newoptions = (options | set) & (~unset);
4380350
 
4380350
         /* If the options ended with ')' this is not the start of a nested
4380350
-        group with option changes, so the options change at this level. If this
4380350
-        item is right at the start of the pattern, the options can be
4380350
-        abstracted and made external in the pre-compile phase, and ignored in
4380350
-        the compile phase. This can be helpful when matching -- for instance in
4380350
-        caseless checking of required bytes.
4380350
-
4380350
-        If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are
4380350
-        definitely *not* at the start of the pattern because something has been
4380350
-        compiled. In the pre-compile phase, however, the code pointer can have
4380350
-        that value after the start, because it gets reset as code is discarded
4380350
-        during the pre-compile. However, this can happen only at top level - if
4380350
-        we are within parentheses, the starting BRA will still be present. At
4380350
-        any parenthesis level, the length value can be used to test if anything
4380350
-        has been compiled at that level. Thus, a test for both these conditions
4380350
-        is necessary to ensure we correctly detect the start of the pattern in
4380350
-        both phases.
4380350
-
4380350
+        group with option changes, so the options change at this level. 
4380350
         If we are not at the pattern start, reset the greedy defaults and the
4380350
         case value for firstchar and reqchar. */
4380350
 
4380350
         if (*ptr == CHAR_RIGHT_PARENTHESIS)
4380350
           {
4380350
-          if (code == cd->start_code + 1 + LINK_SIZE &&
4380350
-               (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
4380350
-            {
4380350
-            cd->external_options = newoptions;
4380350
-            }
4380350
-          else
4380350
-            {
4380350
-            greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
4380350
-            greedy_non_default = greedy_default ^ 1;
4380350
-            req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
4380350
-            }
4380350
+          greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
4380350
+          greedy_non_default = greedy_default ^ 1;
4380350
+          req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0;
4380350
 
4380350
           /* Change options at this level, and pass them back for use
4380350
           in subsequent branches. */
4380350
diff --git a/testdata/testoutput2 b/testdata/testoutput2
4380350
index ac33cc4..6c42897 100644
4380350
--- a/testdata/testoutput2
4380350
+++ b/testdata/testoutput2
4380350
@@ -419,7 +419,7 @@ Need char = '>'
4380350
 
4380350
 /(?U)<.*>/I
4380350
 Capturing subpattern count = 0
4380350
-Options: ungreedy
4380350
+No options
4380350
 First char = '<'
4380350
 Need char = '>'
4380350
     abc<def>ghi<klm>nop
4380350
@@ -443,7 +443,7 @@ Need char = '='
4380350
 
4380350
 /(?U)={3,}?/I
4380350
 Capturing subpattern count = 0
4380350
-Options: ungreedy
4380350
+No options
4380350
 First char = '='
4380350
 Need char = '='
4380350
     abc========def
4380350
@@ -477,7 +477,7 @@ Failed: lookbehind assertion is not fixed length at offset 12
4380350
 
4380350
 /(?i)abc/I
4380350
 Capturing subpattern count = 0
4380350
-Options: caseless
4380350
+No options
4380350
 First char = 'a' (caseless)
4380350
 Need char = 'c' (caseless)
4380350
 
4380350
@@ -489,7 +489,7 @@ No need char
4380350
 
4380350
 /(?i)^1234/I
4380350
 Capturing subpattern count = 0
4380350
-Options: anchored caseless
4380350
+Options: anchored
4380350
 No first char
4380350
 No need char
4380350
 
4380350
@@ -502,7 +502,7 @@ No need char
4380350
 /(?s).*/I
4380350
 Capturing subpattern count = 0
4380350
 May match empty string
4380350
-Options: anchored dotall
4380350
+Options: anchored
4380350
 No first char
4380350
 No need char
4380350
 
4380350
@@ -516,7 +516,7 @@ Starting chars: a b c d
4380350
 
4380350
 /(?i)[abcd]/IS
4380350
 Capturing subpattern count = 0
4380350
-Options: caseless
4380350
+No options
4380350
 No first char
4380350
 No need char
4380350
 Subject length lower bound = 1
4380350
@@ -524,7 +524,7 @@ Starting chars: A B C D a b c d
4380350
 
4380350
 /(?m)[xy]|(b|c)/IS
4380350
 Capturing subpattern count = 1
4380350
-Options: multiline
4380350
+No options
4380350
 No first char
4380350
 No need char
4380350
 Subject length lower bound = 1
4380350
@@ -538,7 +538,7 @@ No need char
4380350
 
4380350
 /(?i)(^a|^b)/Im
4380350
 Capturing subpattern count = 1
4380350
-Options: caseless multiline
4380350
+Options: multiline
4380350
 First char at start or follows newline
4380350
 No need char
4380350
 
4380350
@@ -1179,7 +1179,7 @@ No need char
4380350
         End
4380350
 ------------------------------------------------------------------
4380350
 Capturing subpattern count = 1
4380350
-Options: anchored dotall
4380350
+Options: anchored
4380350
 No first char
4380350
 No need char
4380350
 
4380350
@@ -2735,7 +2735,7 @@ No match
4380350
         End
4380350
 ------------------------------------------------------------------
4380350
 Capturing subpattern count = 0
4380350
-Options: caseless extended
4380350
+Options: extended
4380350
 First char = 'a' (caseless)
4380350
 Need char = 'c' (caseless)
4380350
 
4380350
@@ -2748,7 +2748,7 @@ Need char = 'c' (caseless)
4380350
         End
4380350
 ------------------------------------------------------------------
4380350
 Capturing subpattern count = 0
4380350
-Options: caseless extended
4380350
+Options: extended
4380350
 First char = 'a' (caseless)
4380350
 Need char = 'c' (caseless)
4380350
 
4380350
@@ -3095,7 +3095,7 @@ Need char = 'b'
4380350
         End
4380350
 ------------------------------------------------------------------
4380350
 Capturing subpattern count = 0
4380350
-Options: ungreedy
4380350
+No options
4380350
 First char = 'x'
4380350
 Need char = 'b'
4380350
     xaaaab
4380350
@@ -3497,7 +3497,7 @@ Need char = 'c'
4380350
 
4380350
 /(?i)[ab]/IS
4380350
 Capturing subpattern count = 0
4380350
-Options: caseless
4380350
+No options
4380350
 No first char
4380350
 No need char
4380350
 Subject length lower bound = 1
4380350
@@ -6299,7 +6299,7 @@ Capturing subpattern count = 3
4380350
 Named capturing subpatterns:
4380350
   A   2
4380350
   A   3
4380350
-Options: anchored dupnames
4380350
+Options: anchored
4380350
 Duplicate name status changes
4380350
 No first char
4380350
 No need char
4380350
-- 
4380350
2.4.3
4380350