From bbcbf3796f10b0c3b107207828794fbdb9d6b87a Mon Sep 17 00:00:00 2001
From: Florian Weimer <fweimer@redhat.com>
Date: Aug 29 2018 09:40:05 +0000
Subject: regex: Fix memory overread when pattern contains NUL byte (#1622674)


---

diff --git a/glibc-1622674-1.patch b/glibc-1622674-1.patch
new file mode 100644
index 0000000..4d94590
--- /dev/null
+++ b/glibc-1622674-1.patch
@@ -0,0 +1,41 @@
+commit 58559f14437d2aa71753a29fed435efa06aa4576
+Author: Paul Eggert <eggert@cs.ucla.edu>
+Date:   Tue Aug 28 21:54:28 2018 +0200
+
+    regex: fix uninitialized memory access
+    
+    I introduced this bug into gnulib in commit
+    8335a4d6c7b4448cd0bcb6d0bebf1d456bcfdb17 dated 2006-04-10;
+    eventually it was merged into glibc.  The bug was found by
+    project-repo <bugs@feusi.co> and reported here:
+    https://lists.gnu.org/r/sed-devel/2018-08/msg00017.html
+    Diagnosis and draft fix reported by Assaf Gordon here:
+    https://lists.gnu.org/r/bug-gnulib/2018-08/msg00071.html
+    https://lists.gnu.org/r/bug-gnulib/2018-08/msg00142.html
+    * posix/regex_internal.c (build_wcs_upper_buffer):
+    Fix bug when mbrtowc returns 0.
+    
+    (cherry picked from commit bc680b336971305cb39896b30d72dc7101b62242)
+
+diff --git a/posix/regex_internal.c b/posix/regex_internal.c
+index 7f0083b918de6530..b10588f1ccbb1992 100644
+--- a/posix/regex_internal.c
++++ b/posix/regex_internal.c
+@@ -317,7 +317,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
+ 	  mbclen = __mbrtowc (&wc,
+ 			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ 			       + byte_idx), remain_len, &pstr->cur_state);
+-	  if (BE (mbclen < (size_t) -2, 1))
++	  if (BE (0 < mbclen && mbclen < (size_t) -2, 1))
+ 	    {
+ 	      wchar_t wcu = __towupper (wc);
+ 	      if (wcu != wc)
+@@ -386,7 +386,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
+ 	else
+ 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
+ 	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
+-	if (BE (mbclen < (size_t) -2, 1))
++	if (BE (0 < mbclen && mbclen < (size_t) -2, 1))
+ 	  {
+ 	    wchar_t wcu = __towupper (wc);
+ 	    if (wcu != wc)
diff --git a/glibc-1622674-2.patch b/glibc-1622674-2.patch
new file mode 100644
index 0000000..ecf37f7
--- /dev/null
+++ b/glibc-1622674-2.patch
@@ -0,0 +1,226 @@
+commit 0b79004569e5ce1669136b8c41564c3809730f15
+Author: Florian Weimer <fweimer@redhat.com>
+Date:   Tue Aug 28 12:57:46 2018 +0200
+
+    regex: Add test tst-regcomp-truncated [BZ #23578]
+    
+    (cherry picked from commit 761404b74d9853ce1608195e24f25b78a910591a)
+
+diff --git a/posix/Makefile b/posix/Makefile
+index 00c62841a282f15a..83162123f9c927a0 100644
+--- a/posix/Makefile
++++ b/posix/Makefile
+@@ -96,7 +96,7 @@ tests		:= test-errno tstgetopt testfnm runtests runptests \
+ 		   tst-posix_fadvise tst-posix_fadvise64 \
+ 		   tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \
+ 		   tst-glob-tilde test-ssize-max tst-spawn4 bug-regex37 \
+-		   bug-regex38
++		   bug-regex38 tst-regcomp-truncated
+ tests-internal	:= bug-regex5 bug-regex20 bug-regex33 \
+ 		   tst-rfc3484 tst-rfc3484-2 tst-rfc3484-3 \
+ 		   tst-glob_lstat_compat tst-spawn4-compat
+@@ -194,6 +194,7 @@ $(objpfx)tst-regex2.out: $(gen-locales)
+ $(objpfx)tst-regexloc.out: $(gen-locales)
+ $(objpfx)tst-rxspencer.out: $(gen-locales)
+ $(objpfx)tst-rxspencer-no-utf8.out: $(gen-locales)
++$(objpfx)tst-regcomp-truncated.out: $(gen-locales)
+ endif
+ 
+ # If we will use the generic uname implementation, we must figure out what
+diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c
+new file mode 100644
+index 0000000000000000..a4a1581bbc2b39eb
+--- /dev/null
++++ b/posix/tst-regcomp-truncated.c
+@@ -0,0 +1,191 @@
++/* Test compilation of truncated regular expressions.
++   Copyright (C) 2018 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* This test constructs various patterns in an attempt to trigger
++   over-reading the regular expression compiler, such as bug
++   23578.  */
++
++#include <array_length.h>
++#include <errno.h>
++#include <locale.h>
++#include <regex.h>
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <support/check.h>
++#include <support/next_to_fault.h>
++#include <support/support.h>
++#include <support/test-driver.h>
++#include <wchar.h>
++
++/* Locales to test.  */
++static const char locales[][17] =
++  {
++    "C",
++    "en_US.UTF-8",
++    "de_DE.ISO-8859-1",
++  };
++
++/* Syntax options.  Will be combined with other flags.  */
++static const reg_syntax_t syntaxes[] =
++  {
++    RE_SYNTAX_EMACS,
++    RE_SYNTAX_AWK,
++    RE_SYNTAX_GNU_AWK,
++    RE_SYNTAX_POSIX_AWK,
++    RE_SYNTAX_GREP,
++    RE_SYNTAX_EGREP,
++    RE_SYNTAX_POSIX_EGREP,
++    RE_SYNTAX_POSIX_BASIC,
++    RE_SYNTAX_POSIX_EXTENDED,
++    RE_SYNTAX_POSIX_MINIMAL_EXTENDED,
++  };
++
++/* Trailing characters placed after the initial character.  */
++static const char trailing_strings[][4] =
++  {
++    "",
++    "[",
++    "\\",
++    "[\\",
++    "(",
++    "(\\",
++    "\\(",
++  };
++
++static int
++do_test (void)
++{
++  /* Staging buffer for the constructed regular expression.  */
++  char buffer[16];
++
++  /* Allocation used to detect over-reading by the regular expression
++     compiler.  */
++  struct support_next_to_fault ntf
++    = support_next_to_fault_allocate (sizeof (buffer));
++
++  /* Arbitrary Unicode codepoint at which we stop generating
++     characters.  We do not probe the whole range because that would
++     take too long due to combinatorical exploision as the result of
++     combination with other flags.  */
++  static const wchar_t last_character = 0xfff;
++
++  for (size_t locale_idx = 0; locale_idx < array_length (locales);
++       ++ locale_idx)
++    {
++      if (setlocale (LC_ALL, locales[locale_idx]) == NULL)
++        {
++          support_record_failure ();
++          printf ("error: setlocale (\"%s\"): %m", locales[locale_idx]);
++          continue;
++        }
++      if (test_verbose > 0)
++        printf ("info: testing locale \"%s\"\n", locales[locale_idx]);
++
++      for (wchar_t wc = 0; wc <= last_character; ++wc)
++        {
++          char *after_wc;
++          if (wc == 0)
++            {
++              /* wcrtomb treats L'\0' in a special way.  */
++              *buffer = '\0';
++              after_wc = &buffer[1];
++            }
++          else
++            {
++              mbstate_t ps = { };
++              size_t ret = wcrtomb (buffer, wc, &ps);
++              if (ret == (size_t) -1)
++                {
++                  /* EILSEQ means that the target character set
++                     cannot encode the character.  */
++                  if (errno != EILSEQ)
++                    {
++                      support_record_failure ();
++                      printf ("error: wcrtomb (0x%x) failed: %m\n",
++                              (unsigned) wc);
++                    }
++                  continue;
++                }
++              TEST_VERIFY_EXIT (ret != 0);
++              after_wc = &buffer[ret];
++            }
++
++          for (size_t trailing_idx = 0;
++               trailing_idx < array_length (trailing_strings);
++               ++trailing_idx)
++            {
++              char *after_trailing
++                = stpcpy (after_wc, trailing_strings[trailing_idx]);
++
++              for (int do_nul = 0; do_nul < 2; ++do_nul)
++                {
++                  char *after_nul;
++                  if (do_nul)
++                    {
++                      *after_trailing = '\0';
++                      after_nul = &after_trailing[1];
++                    }
++                  else
++                    after_nul = after_trailing;
++
++                  size_t length = after_nul - buffer;
++
++                  /* Make sure that the faulting region starts
++                     after the used portion of the buffer.  */
++                  char *ntf_start = ntf.buffer + sizeof (buffer) - length;
++                  memcpy (ntf_start, buffer, length);
++
++                  for (const reg_syntax_t *psyntax = syntaxes;
++                       psyntax < array_end (syntaxes); ++psyntax)
++                    for (int do_icase = 0; do_icase < 2; ++do_icase)
++                      {
++                        re_syntax_options = *psyntax;
++                        if (do_icase)
++                          re_syntax_options |= RE_ICASE;
++
++                        regex_t reg;
++                        memset (&reg, 0, sizeof (reg));
++                        const char *msg = re_compile_pattern
++                          (ntf_start, length, &reg);
++                        if (msg != NULL)
++                          {
++                            if (test_verbose > 0)
++                              {
++                                char *quoted = support_quote_blob
++                                  (buffer, length);
++                                printf ("info: compilation failed for pattern"
++                                        " \"%s\", syntax 0x%lx: %s\n",
++                                        quoted, re_syntax_options, msg);
++                                free (quoted);
++                              }
++                          }
++                        else
++                          regfree (&reg);
++                      }
++                }
++            }
++        }
++    }
++
++  support_next_to_fault_free (&ntf);
++
++  return 0;
++}
++
++#include <support/test-driver.c>
diff --git a/glibc.spec b/glibc.spec
index 1b33560..466441e 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -1,6 +1,6 @@
 %define glibcsrcdir glibc-2.28
 %define glibcversion 2.28
-%define glibcrelease 7%{?dist}
+%define glibcrelease 8%{?dist}
 # Pre-release tarballs are pulled in from git using a command that is
 # effectively:
 #
@@ -166,6 +166,8 @@ Patch29: glibc-error-va_end.patch
 Patch30: glibc-nscd-leak.patch
 Patch31: glibc-nss_files-leak.patch
 Patch32: glibc-rh1622669.patch
+Patch33: glibc-1622674-1.patch
+Patch34: glibc-1622674-2.patch
 
 ##############################################################################
 # Continued list of core "glibc" package information:
@@ -1881,6 +1883,9 @@ fi
 %endif
 
 %changelog
+* Wed Aug 29 2018 Florian Weimer <fweimer@redhat.com> - 2.28-8
+- regex: Fix memory overread when pattern contains NUL byte (#1622674)
+
 * Wed Aug 29 2018 Florian Weimer <fweimer@redhat.com> - 2.28-7
 - nptl: Fix waiters-after-spinning case in pthread_cond_broadcast (#1622669)