commit f3a6cc0a560a17f32a3e90d2f20501a53cab6058 Author: Andreas Schwab Date: Tue Nov 29 10:52:22 2011 +0100 Fix access after end of search string in regex matcher --- a/ChangeLog 2011-11-30 12:43:22.312632113 -0700 +++ b/ChangeLog 2011-11-30 12:43:50.569624022 -0700 @@ -1,3 +1,14 @@ +2011-11-29 Andreas Schwab + + * locale/weight.h (findidx): Add parameter len. + * locale/weightwc.h (findidx): Likewise. + * posix/fnmatch_loop.c (FCT): Adjust caller. + * posix/regcomp.c (build_equiv_class): Likewise. + * posix/regex_internal.h (re_string_elem_size_at): Likewise. + * posix/regexec.c (check_node_accept_bytes): Likewise. + * string/strcoll_l.c (STRCOLL): Likewise. + * string/strxfrm_l.c (STRXFRM): Likewise. + 2011-11-14 Andreas Schwab * malloc/arena.c (arena_get2): Don't call reused_arena when diff --git a/locale/weight.h b/locale/weight.h index dc70a00..967e176 100644 --- a/locale/weight.h +++ b/locale/weight.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1996,1997,1998,1999,2000,2003,2004 Free Software Foundation, Inc. +/* Copyright (C) 1996,1997,1998,1999,2000,2003,2004,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Ulrich Drepper, . @@ -20,7 +20,7 @@ /* Find index of weight. */ auto inline int32_t __attribute ((always_inline)) -findidx (const unsigned char **cpp) +findidx (const unsigned char **cpp, size_t len) { int_fast32_t i = table[*(*cpp)++]; const unsigned char *cp; @@ -34,6 +34,7 @@ findidx (const unsigned char **cpp) Search for the correct one. */ cp = &extra[-i]; usrc = *cpp; + --len; while (1) { size_t nhere; @@ -56,7 +57,7 @@ findidx (const unsigned char **cpp) already. */ size_t cnt; - for (cnt = 0; cnt < nhere; ++cnt) + for (cnt = 0; cnt < nhere && cnt < len; ++cnt) if (cp[cnt] != usrc[cnt]) break; @@ -79,13 +80,13 @@ findidx (const unsigned char **cpp) size_t cnt; size_t offset = 0; - for (cnt = 0; cnt < nhere; ++cnt) + for (cnt = 0; cnt < nhere && cnt < len; ++cnt) if (cp[cnt] != usrc[cnt]) break; if (cnt != nhere) { - if (cp[cnt] > usrc[cnt]) + if (cnt == len || cp[cnt] > usrc[cnt]) { /* Cannot be in this range. */ cp += 2 * nhere; diff --git a/locale/weightwc.h b/locale/weightwc.h index 9ea1126..7862091 100644 --- a/locale/weightwc.h +++ b/locale/weightwc.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1996-2001,2003,2004,2005,2007 Free Software Foundation, Inc. +/* Copyright (C) 1996-2001,2003,2004,2005,2007,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Ulrich Drepper, . @@ -20,7 +20,7 @@ /* Find index of weight. */ auto inline int32_t __attribute ((always_inline)) -findidx (const wint_t **cpp) +findidx (const wint_t **cpp, size_t len) { wint_t ch = *(*cpp)++; int32_t i = __collidx_table_lookup ((const char *) table, ch); @@ -32,6 +32,7 @@ findidx (const wint_t **cpp) /* Oh well, more than one sequence starting with this byte. Search for the correct one. */ const int32_t *cp = (const int32_t *) &extra[-i]; + --len; while (1) { size_t nhere; @@ -54,7 +55,7 @@ findidx (const wint_t **cpp) already. */ size_t cnt; - for (cnt = 0; cnt < nhere; ++cnt) + for (cnt = 0; cnt < nhere && cnt < len; ++cnt) if (cp[cnt] != usrc[cnt]) break; @@ -75,7 +76,7 @@ findidx (const wint_t **cpp) size_t cnt; size_t offset; - for (cnt = 0; cnt < nhere - 1; ++cnt) + for (cnt = 0; cnt < nhere - 1 && cnt < len; ++cnt) if (cp[cnt] != usrc[cnt]) break; diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index 18a6667..72bd3ee 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -412,7 +412,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); # endif - idx = findidx (&cp); + idx = findidx (&cp, 1); if (idx != 0) { /* We found a table entry. Now see whether the @@ -422,7 +422,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used) int32_t idx2; const UCHAR *np = (const UCHAR *) n; - idx2 = findidx (&np); + idx2 = findidx (&np, string_end - n); if (idx2 != 0 && (idx >> 24) == (idx2 >> 24) && len == weights[idx2 & 0xffffff]) diff --git a/posix/regcomp.c b/posix/regcomp.c index b238c08..34ee845 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc. + Copyright (C) 2002-2007,2009,2010,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -3409,19 +3409,18 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx1 = findidx (&cp); - if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) + idx1 = findidx (&cp, -1); + if (BE (idx1 == 0 || *cp != '\0', 0)) /* This isn't a valid character. */ return REG_ECOLLATE; /* Build single byte matcing table for this equivalence class. */ - char_buf[1] = (unsigned char) '\0'; len = weights[idx1 & 0xffffff]; for (ch = 0; ch < SBC_MAX; ++ch) { char_buf[0] = ch; cp = char_buf; - idx2 = findidx (&cp); + idx2 = findidx (&cp, 1); /* idx2 = table[ch]; */ --- a/posix/regex_internal.h 2011-11-30 12:47:02.706567482 -0700 +++ a/posix/regex_internal.h 2011-11-30 12:47:32.969558337 -0700 @@ -756,7 +756,7 @@ indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); p = pstr->mbs + idx; - tmp = findidx (&p); + tmp = findidx (&p, pstr->len - idx); return p - pstr->mbs - idx; } else diff --git a/posix/regexec.c b/posix/regexec.c index 9e0c565..3ea810b 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -3924,7 +3924,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - int32_t idx = findidx (&cp); + int32_t idx = findidx (&cp, elem_len); if (idx > 0) for (i = 0; i < cset->nequiv_classes; ++i) { diff --git a/string/strcoll_l.c b/string/strcoll_l.c index d8d1139..fb77d08 100644 --- a/string/strcoll_l.c +++ b/string/strcoll_l.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995-1997,2002,2004,2007,2010 Free Software Foundation, Inc. +/* Copyright (C) 1995-1997,2002,2004,2007,2010,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Written by Ulrich Drepper , 1995. @@ -205,7 +205,7 @@ STRCOLL (s1, s2, l) while (*us1 != L('\0')) { - int32_t tmp = findidx (&us1); + int32_t tmp = findidx (&us1, -1); rule1arr[idx1max] = tmp >> 24; idx1arr[idx1max] = tmp & 0xffffff; idx1cnt = idx1max++; @@ -267,7 +267,7 @@ STRCOLL (s1, s2, l) while (*us2 != L('\0')) { - int32_t tmp = findidx (&us2); + int32_t tmp = findidx (&us2, -1); rule2arr[idx2max] = tmp >> 24; idx2arr[idx2max] = tmp & 0xffffff; idx2cnt = idx2max++; diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c index 220253c..b06556d 100644 --- a/string/strxfrm_l.c +++ b/string/strxfrm_l.c @@ -176,7 +176,7 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) idxmax = 0; do { - int32_t tmp = findidx (&usrc); + int32_t tmp = findidx (&usrc, -1); rulearr[idxmax] = tmp >> 24; idxarr[idxmax] = tmp & 0xffffff;