Blob Blame History Raw
commit f3a6cc0a560a17f32a3e90d2f20501a53cab6058
Author: Andreas Schwab <schwab@redhat.com>
Date:   Tue Nov 29 10:52:22 2011 +0100

    Fix access after end of search string in regex matcher

--- a/ChangeLog	2011-11-30 12:43:22.312632113 -0700
+++ b/ChangeLog	2011-11-30 12:43:50.569624022 -0700
@@ -1,3 +1,14 @@
+2011-11-29  Andreas Schwab  <schwab@redhat.com>
+
+	* locale/weight.h (findidx): Add parameter len.
+	* locale/weightwc.h (findidx): Likewise.
+	* posix/fnmatch_loop.c (FCT): Adjust caller.
+	* posix/regcomp.c (build_equiv_class): Likewise.
+	* posix/regex_internal.h (re_string_elem_size_at): Likewise.
+	* posix/regexec.c (check_node_accept_bytes): Likewise.
+	* string/strcoll_l.c (STRCOLL): Likewise.
+	* string/strxfrm_l.c (STRXFRM): Likewise.
+
 2011-11-14  Andreas Schwab  <schwab@redhat.com>
 
 	* malloc/arena.c (arena_get2): Don't call reused_arena when
diff --git a/locale/weight.h b/locale/weight.h
index dc70a00..967e176 100644
--- a/locale/weight.h
+++ b/locale/weight.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996,1997,1998,1999,2000,2003,2004 Free Software Foundation, Inc.
+/* Copyright (C) 1996,1997,1998,1999,2000,2003,2004,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Written by Ulrich Drepper, <drepper@cygnus.com>.
 
@@ -20,7 +20,7 @@
 /* Find index of weight.  */
 auto inline int32_t
 __attribute ((always_inline))
-findidx (const unsigned char **cpp)
+findidx (const unsigned char **cpp, size_t len)
 {
   int_fast32_t i = table[*(*cpp)++];
   const unsigned char *cp;
@@ -34,6 +34,7 @@ findidx (const unsigned char **cpp)
      Search for the correct one.  */
   cp = &extra[-i];
   usrc = *cpp;
+  --len;
   while (1)
     {
       size_t nhere;
@@ -56,7 +57,7 @@ findidx (const unsigned char **cpp)
 	     already.  */
 	  size_t cnt;
 
-	  for (cnt = 0; cnt < nhere; ++cnt)
+	  for (cnt = 0; cnt < nhere && cnt < len; ++cnt)
 	    if (cp[cnt] != usrc[cnt])
 	      break;
 
@@ -79,13 +80,13 @@ findidx (const unsigned char **cpp)
 	  size_t cnt;
 	  size_t offset = 0;
 
-	  for (cnt = 0; cnt < nhere; ++cnt)
+	  for (cnt = 0; cnt < nhere && cnt < len; ++cnt)
 	    if (cp[cnt] != usrc[cnt])
 	      break;
 
 	  if (cnt != nhere)
 	    {
-	      if (cp[cnt] > usrc[cnt])
+	      if (cnt == len || cp[cnt] > usrc[cnt])
 		{
 		  /* Cannot be in this range.  */
 		  cp += 2 * nhere;
diff --git a/locale/weightwc.h b/locale/weightwc.h
index 9ea1126..7862091 100644
--- a/locale/weightwc.h
+++ b/locale/weightwc.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996-2001,2003,2004,2005,2007 Free Software Foundation, Inc.
+/* Copyright (C) 1996-2001,2003,2004,2005,2007,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Written by Ulrich Drepper, <drepper@cygnus.com>.
 
@@ -20,7 +20,7 @@
 /* Find index of weight.  */
 auto inline int32_t
 __attribute ((always_inline))
-findidx (const wint_t **cpp)
+findidx (const wint_t **cpp, size_t len)
 {
   wint_t ch = *(*cpp)++;
   int32_t i = __collidx_table_lookup ((const char *) table, ch);
@@ -32,6 +32,7 @@ findidx (const wint_t **cpp)
   /* Oh well, more than one sequence starting with this byte.
      Search for the correct one.  */
   const int32_t *cp = (const int32_t *) &extra[-i];
+  --len;
   while (1)
     {
       size_t nhere;
@@ -54,7 +55,7 @@ findidx (const wint_t **cpp)
 	     already.  */
 	  size_t cnt;
 
-	  for (cnt = 0; cnt < nhere; ++cnt)
+	  for (cnt = 0; cnt < nhere && cnt < len; ++cnt)
 	    if (cp[cnt] != usrc[cnt])
 	      break;
 
@@ -75,7 +76,7 @@ findidx (const wint_t **cpp)
 	  size_t cnt;
 	  size_t offset;
 
-	  for (cnt = 0; cnt < nhere - 1; ++cnt)
+	  for (cnt = 0; cnt < nhere - 1 && cnt < len; ++cnt)
 	    if (cp[cnt] != usrc[cnt])
 	      break;
 
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 18a6667..72bd3ee 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -412,7 +412,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			  _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 # endif
 
-			idx = findidx (&cp);
+			idx = findidx (&cp, 1);
 			if (idx != 0)
 			  {
 			    /* We found a table entry.  Now see whether the
@@ -422,7 +422,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			    int32_t idx2;
 			    const UCHAR *np = (const UCHAR *) n;
 
-			    idx2 = findidx (&np);
+			    idx2 = findidx (&np, string_end - n);
 			    if (idx2 != 0
 				&& (idx >> 24) == (idx2 >> 24)
 				&& len == weights[idx2 & 0xffffff])
diff --git a/posix/regcomp.c b/posix/regcomp.c
index b238c08..34ee845 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1,5 +1,5 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
+   Copyright (C) 2002-2007,2009,2010,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
 
@@ -3409,19 +3409,18 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
 						   _NL_COLLATE_EXTRAMB);
       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
 						_NL_COLLATE_INDIRECTMB);
-      idx1 = findidx (&cp);
-      if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
+      idx1 = findidx (&cp, -1);
+      if (BE (idx1 == 0 || *cp != '\0', 0))
 	/* This isn't a valid character.  */
 	return REG_ECOLLATE;
 
       /* Build single byte matcing table for this equivalence class.  */
-      char_buf[1] = (unsigned char) '\0';
       len = weights[idx1 & 0xffffff];
       for (ch = 0; ch < SBC_MAX; ++ch)
 	{
 	  char_buf[0] = ch;
 	  cp = char_buf;
-	  idx2 = findidx (&cp);
+	  idx2 = findidx (&cp, 1);
 /*
 	  idx2 = table[ch];
 */

--- a/posix/regex_internal.h	2011-11-30 12:47:02.706567482 -0700
+++ a/posix/regex_internal.h	2011-11-30 12:47:32.969558337 -0700
@@ -756,7 +756,7 @@
       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
 						_NL_COLLATE_INDIRECTMB);
       p = pstr->mbs + idx;
-      tmp = findidx (&p);
+      tmp = findidx (&p, pstr->len - idx);
       return p - pstr->mbs - idx;
     }
   else
diff --git a/posix/regexec.c b/posix/regexec.c
index 9e0c565..3ea810b 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -3924,7 +3924,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
 	      indirect = (const int32_t *)
 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
-	      int32_t idx = findidx (&cp);
+	      int32_t idx = findidx (&cp, elem_len);
 	      if (idx > 0)
 		for (i = 0; i < cset->nequiv_classes; ++i)
 		  {
diff --git a/string/strcoll_l.c b/string/strcoll_l.c
index d8d1139..fb77d08 100644
--- a/string/strcoll_l.c
+++ b/string/strcoll_l.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-1997,2002,2004,2007,2010 Free Software Foundation, Inc.
+/* Copyright (C) 1995-1997,2002,2004,2007,2010,2011 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Written by Ulrich Drepper <drepper@gnu.org>, 1995.
 
@@ -205,7 +205,7 @@ STRCOLL (s1, s2, l)
 
 		while (*us1 != L('\0'))
 		  {
-		    int32_t tmp = findidx (&us1);
+		    int32_t tmp = findidx (&us1, -1);
 		    rule1arr[idx1max] = tmp >> 24;
 		    idx1arr[idx1max] = tmp & 0xffffff;
 		    idx1cnt = idx1max++;
@@ -267,7 +267,7 @@ STRCOLL (s1, s2, l)
 
 		while (*us2 != L('\0'))
 		  {
-		    int32_t tmp = findidx (&us2);
+		    int32_t tmp = findidx (&us2, -1);
 		    rule2arr[idx2max] = tmp >> 24;
 		    idx2arr[idx2max] = tmp & 0xffffff;
 		    idx2cnt = idx2max++;
diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c
index 220253c..b06556d 100644
--- a/string/strxfrm_l.c
+++ b/string/strxfrm_l.c
@@ -176,7 +176,7 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
   idxmax = 0;
   do
     {
-      int32_t tmp = findidx (&usrc);
+      int32_t tmp = findidx (&usrc, -1);
       rulearr[idxmax] = tmp >> 24;
       idxarr[idxmax] = tmp & 0xffffff;