Blob Blame History Raw
commit 99d3c7e1308beb1ce9a3c535ca4b6581ebd653ee
Author: Paolo Bonzini <bonzini@gnu.org>
Date:   Tue Sep 21 17:00:55 2010 +0200

    dfa: process range expressions consistently with system regex
    
    The actual meaning of range expressions in glibc is not exactly strcoll,
    which makes the behavior of grep hard to predict when compiled with the
    system regex.  Leave to the system regex matcher the decision of which
    single-byte characters are matched by a range expression.
    
    This partially reverts a change made in commit 0d38a8bb (which made
    sense at the time, but not now that src/dfa.c is not doing multibyte
    character set matching anymore).
    
    * src/dfa.c (in_coll_range): Remove.
    (parse_bracket_exp): Use system regex to find which single-char
    bytes match a range expression.

diff --git a/src/dfa.c b/src/dfa.c
index a2f4174..f3e066f 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -697,13 +697,6 @@ static unsigned char const *buf_end;	/* reference to end in dfaexec().  */
 
 #endif /* MBS_SUPPORT */
 
-static int
-in_coll_range (char ch, char from, char to)
-{
-  char c[6] = { from, 0, ch, 0, to, 0 };
-  return strcoll (&c[0], &c[2]) <= 0 && strcoll (&c[2], &c[4]) <= 0;
-}
-
 typedef int predicate (int);
 
 /* The following list maps the names of the Posix named character classes
@@ -979,10 +972,22 @@ parse_bracket_exp (void)
                 for (c = c1; c <= c2; c++)
                   setbit_case_fold (c, ccl);
               else
-                for (c = 0; c < NOTCHAR; ++c)
-                  if (!(case_fold && isupper (c))
-                      && in_coll_range (c, c1, c2))
-                    setbit_case_fold (c, ccl);
+                {
+                  /* Defer to the system regex library about the meaning
+                     of range expressions.  */
+                  regex_t re;
+                  char pattern[6] = { '[', c1, '-', c2, ']', 0 };
+                  char subject[2] = { 0, 0 };
+                  regcomp (&re, pattern, REG_NOSUB);
+                  for (c = 0; c < NOTCHAR; ++c)
+                    {
+                      subject[0] = c;
+                      if (!(case_fold && isupper (c))
+                          && regexec (&re, subject, 0, NULL, 0) != REG_NOMATCH)
+                        setbit_case_fold (c, ccl);
+                    }
+                  regfree (&re);
+                }
             }
 
           colon_warning_state |= 8;