From bbcbf3796f10b0c3b107207828794fbdb9d6b87a Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Aug 29 2018 09:40:05 +0000 Subject: regex: Fix memory overread when pattern contains NUL byte (#1622674) --- diff --git a/glibc-1622674-1.patch b/glibc-1622674-1.patch new file mode 100644 index 0000000..4d94590 --- /dev/null +++ b/glibc-1622674-1.patch @@ -0,0 +1,41 @@ +commit 58559f14437d2aa71753a29fed435efa06aa4576 +Author: Paul Eggert +Date: Tue Aug 28 21:54:28 2018 +0200 + + regex: fix uninitialized memory access + + I introduced this bug into gnulib in commit + 8335a4d6c7b4448cd0bcb6d0bebf1d456bcfdb17 dated 2006-04-10; + eventually it was merged into glibc. The bug was found by + project-repo and reported here: + https://lists.gnu.org/r/sed-devel/2018-08/msg00017.html + Diagnosis and draft fix reported by Assaf Gordon here: + https://lists.gnu.org/r/bug-gnulib/2018-08/msg00071.html + https://lists.gnu.org/r/bug-gnulib/2018-08/msg00142.html + * posix/regex_internal.c (build_wcs_upper_buffer): + Fix bug when mbrtowc returns 0. + + (cherry picked from commit bc680b336971305cb39896b30d72dc7101b62242) + +diff --git a/posix/regex_internal.c b/posix/regex_internal.c +index 7f0083b918de6530..b10588f1ccbb1992 100644 +--- a/posix/regex_internal.c ++++ b/posix/regex_internal.c +@@ -317,7 +317,7 @@ build_wcs_upper_buffer (re_string_t *pstr) + mbclen = __mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); +- if (BE (mbclen < (size_t) -2, 1)) ++ if (BE (0 < mbclen && mbclen < (size_t) -2, 1)) + { + wchar_t wcu = __towupper (wc); + if (wcu != wc) +@@ -386,7 +386,7 @@ build_wcs_upper_buffer (re_string_t *pstr) + else + p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; + mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); +- if (BE (mbclen < (size_t) -2, 1)) ++ if (BE (0 < mbclen && mbclen < (size_t) -2, 1)) + { + wchar_t wcu = __towupper (wc); + if (wcu != wc) diff --git a/glibc-1622674-2.patch b/glibc-1622674-2.patch new file mode 100644 index 0000000..ecf37f7 --- /dev/null +++ b/glibc-1622674-2.patch @@ -0,0 +1,226 @@ +commit 0b79004569e5ce1669136b8c41564c3809730f15 +Author: Florian Weimer +Date: Tue Aug 28 12:57:46 2018 +0200 + + regex: Add test tst-regcomp-truncated [BZ #23578] + + (cherry picked from commit 761404b74d9853ce1608195e24f25b78a910591a) + +diff --git a/posix/Makefile b/posix/Makefile +index 00c62841a282f15a..83162123f9c927a0 100644 +--- a/posix/Makefile ++++ b/posix/Makefile +@@ -96,7 +96,7 @@ tests := test-errno tstgetopt testfnm runtests runptests \ + tst-posix_fadvise tst-posix_fadvise64 \ + tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \ + tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \ +- bug-regex38 ++ bug-regex38 tst-regcomp-truncated + tests-internal := bug-regex5 bug-regex20 bug-regex33 \ + tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 \ + tst-glob_lstat_compat tst-spawn4-compat +@@ -194,6 +194,7 @@ $(objpfx)tst-regex2.out: $(gen-locales) + $(objpfx)tst-regexloc.out: $(gen-locales) + $(objpfx)tst-rxspencer.out: $(gen-locales) + $(objpfx)tst-rxspencer-no-utf8.out: $(gen-locales) ++$(objpfx)tst-regcomp-truncated.out: $(gen-locales) + endif + + # If we will use the generic uname implementation, we must figure out what +diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c +new file mode 100644 +index 0000000000000000..a4a1581bbc2b39eb +--- /dev/null ++++ b/posix/tst-regcomp-truncated.c +@@ -0,0 +1,191 @@ ++/* Test compilation of truncated regular expressions. ++ Copyright (C) 2018 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* This test constructs various patterns in an attempt to trigger ++ over-reading the regular expression compiler, such as bug ++ 23578. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Locales to test. */ ++static const char locales[][17] = ++ { ++ "C", ++ "en_US.UTF-8", ++ "de_DE.ISO-8859-1", ++ }; ++ ++/* Syntax options. Will be combined with other flags. */ ++static const reg_syntax_t syntaxes[] = ++ { ++ RE_SYNTAX_EMACS, ++ RE_SYNTAX_AWK, ++ RE_SYNTAX_GNU_AWK, ++ RE_SYNTAX_POSIX_AWK, ++ RE_SYNTAX_GREP, ++ RE_SYNTAX_EGREP, ++ RE_SYNTAX_POSIX_EGREP, ++ RE_SYNTAX_POSIX_BASIC, ++ RE_SYNTAX_POSIX_EXTENDED, ++ RE_SYNTAX_POSIX_MINIMAL_EXTENDED, ++ }; ++ ++/* Trailing characters placed after the initial character. */ ++static const char trailing_strings[][4] = ++ { ++ "", ++ "[", ++ "\\", ++ "[\\", ++ "(", ++ "(\\", ++ "\\(", ++ }; ++ ++static int ++do_test (void) ++{ ++ /* Staging buffer for the constructed regular expression. */ ++ char buffer[16]; ++ ++ /* Allocation used to detect over-reading by the regular expression ++ compiler. */ ++ struct support_next_to_fault ntf ++ = support_next_to_fault_allocate (sizeof (buffer)); ++ ++ /* Arbitrary Unicode codepoint at which we stop generating ++ characters. We do not probe the whole range because that would ++ take too long due to combinatorical exploision as the result of ++ combination with other flags. */ ++ static const wchar_t last_character = 0xfff; ++ ++ for (size_t locale_idx = 0; locale_idx < array_length (locales); ++ ++ locale_idx) ++ { ++ if (setlocale (LC_ALL, locales[locale_idx]) == NULL) ++ { ++ support_record_failure (); ++ printf ("error: setlocale (\"%s\"): %m", locales[locale_idx]); ++ continue; ++ } ++ if (test_verbose > 0) ++ printf ("info: testing locale \"%s\"\n", locales[locale_idx]); ++ ++ for (wchar_t wc = 0; wc <= last_character; ++wc) ++ { ++ char *after_wc; ++ if (wc == 0) ++ { ++ /* wcrtomb treats L'\0' in a special way. */ ++ *buffer = '\0'; ++ after_wc = &buffer[1]; ++ } ++ else ++ { ++ mbstate_t ps = { }; ++ size_t ret = wcrtomb (buffer, wc, &ps); ++ if (ret == (size_t) -1) ++ { ++ /* EILSEQ means that the target character set ++ cannot encode the character. */ ++ if (errno != EILSEQ) ++ { ++ support_record_failure (); ++ printf ("error: wcrtomb (0x%x) failed: %m\n", ++ (unsigned) wc); ++ } ++ continue; ++ } ++ TEST_VERIFY_EXIT (ret != 0); ++ after_wc = &buffer[ret]; ++ } ++ ++ for (size_t trailing_idx = 0; ++ trailing_idx < array_length (trailing_strings); ++ ++trailing_idx) ++ { ++ char *after_trailing ++ = stpcpy (after_wc, trailing_strings[trailing_idx]); ++ ++ for (int do_nul = 0; do_nul < 2; ++do_nul) ++ { ++ char *after_nul; ++ if (do_nul) ++ { ++ *after_trailing = '\0'; ++ after_nul = &after_trailing[1]; ++ } ++ else ++ after_nul = after_trailing; ++ ++ size_t length = after_nul - buffer; ++ ++ /* Make sure that the faulting region starts ++ after the used portion of the buffer. */ ++ char *ntf_start = ntf.buffer + sizeof (buffer) - length; ++ memcpy (ntf_start, buffer, length); ++ ++ for (const reg_syntax_t *psyntax = syntaxes; ++ psyntax < array_end (syntaxes); ++psyntax) ++ for (int do_icase = 0; do_icase < 2; ++do_icase) ++ { ++ re_syntax_options = *psyntax; ++ if (do_icase) ++ re_syntax_options |= RE_ICASE; ++ ++ regex_t reg; ++ memset (®, 0, sizeof (reg)); ++ const char *msg = re_compile_pattern ++ (ntf_start, length, ®); ++ if (msg != NULL) ++ { ++ if (test_verbose > 0) ++ { ++ char *quoted = support_quote_blob ++ (buffer, length); ++ printf ("info: compilation failed for pattern" ++ " \"%s\", syntax 0x%lx: %s\n", ++ quoted, re_syntax_options, msg); ++ free (quoted); ++ } ++ } ++ else ++ regfree (®); ++ } ++ } ++ } ++ } ++ } ++ ++ support_next_to_fault_free (&ntf); ++ ++ return 0; ++} ++ ++#include diff --git a/glibc.spec b/glibc.spec index 1b33560..466441e 100644 --- a/glibc.spec +++ b/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.28 %define glibcversion 2.28 -%define glibcrelease 7%{?dist} +%define glibcrelease 8%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -166,6 +166,8 @@ Patch29: glibc-error-va_end.patch Patch30: glibc-nscd-leak.patch Patch31: glibc-nss_files-leak.patch Patch32: glibc-rh1622669.patch +Patch33: glibc-1622674-1.patch +Patch34: glibc-1622674-2.patch ############################################################################## # Continued list of core "glibc" package information: @@ -1881,6 +1883,9 @@ fi %endif %changelog +* Wed Aug 29 2018 Florian Weimer - 2.28-8 +- regex: Fix memory overread when pattern contains NUL byte (#1622674) + * Wed Aug 29 2018 Florian Weimer - 2.28-7 - nptl: Fix waiters-after-spinning case in pthread_cond_broadcast (#1622669)