0129572
commit f411207a833d0c49578ebe7062aee3660813ed5f
0129572
Author: Nikita Popov <npv1310@gmail.com>
0129572
Date:   Tue Nov 2 13:21:42 2021 +0500
0129572
0129572
    gconv: Do not emit spurious NUL character in ISO-2022-JP-3 (bug 28524)
0129572
    
0129572
    Bugfix 27256 has introduced another issue:
0129572
    In conversion from ISO-2022-JP-3 encoding, it is possible
0129572
    to force iconv to emit extra NUL character on internal state reset.
0129572
    To do this, it is sufficient to feed iconv with escape sequence
0129572
    which switches active character set.
0129572
    The simplified check 'data->__statep->__count != ASCII_set'
0129572
    introduced by the aforementioned bugfix picks that case and
0129572
    behaves as if '\0' character has been queued thus emitting it.
0129572
    
0129572
    To eliminate this issue, these steps are taken:
0129572
    * Restore original condition
0129572
    '(data->__statep->__count & ~7) != ASCII_set'.
0129572
    It is necessary since bits 0-2 may contain
0129572
    number of buffered input characters.
0129572
    * Check that queued character is not NUL.
0129572
    Similar step is taken for main conversion loop.
0129572
    
0129572
    Bundled test case follows following logic:
0129572
    * Try to convert ISO-2022-JP-3 escape sequence
0129572
    switching active character set
0129572
    * Reset internal state by providing NULL as input buffer
0129572
    * Ensure that nothing has been converted.
0129572
    
0129572
    Signed-off-by: Nikita Popov <npv1310@gmail.com>
0129572
    (cherry picked from commit ff012870b2c02a62598c04daa1e54632e020fd7d)
0129572
0129572
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
0129572
index c216f959df1413f8..d5507a048c6a6508 100644
0129572
--- a/iconvdata/Makefile
0129572
+++ b/iconvdata/Makefile
0129572
@@ -1,4 +1,5 @@
0129572
 # Copyright (C) 1997-2021 Free Software Foundation, Inc.
0129572
+# Copyright (C) The GNU Toolchain Authors.
0129572
 # This file is part of the GNU C Library.
0129572
 
0129572
 # The GNU C Library is free software; you can redistribute it and/or
0129572
@@ -74,7 +75,7 @@ ifeq (yes,$(build-shared))
0129572
 tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
0129572
 	tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
0129572
 	bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
0129572
-	bug-iconv13 bug-iconv14
0129572
+	bug-iconv13 bug-iconv14 bug-iconv15
0129572
 ifeq ($(have-thread-library),yes)
0129572
 tests += bug-iconv3
0129572
 endif
0129572
@@ -327,6 +328,8 @@ $(objpfx)bug-iconv12.out: $(addprefix $(objpfx), $(gconv-modules)) \
0129572
 			  $(addprefix $(objpfx),$(modules.so))
0129572
 $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \
0129572
 			  $(addprefix $(objpfx),$(modules.so))
0129572
+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
0129572
+			  $(addprefix $(objpfx),$(modules.so))
0129572
 
0129572
 $(objpfx)iconv-test.out: run-iconv-test.sh \
0129572
 			 $(addprefix $(objpfx), $(gconv-modules)) \
0129572
diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c
0129572
new file mode 100644
0129572
index 0000000000000000..cc04bd0313a68786
0129572
--- /dev/null
0129572
+++ b/iconvdata/bug-iconv15.c
0129572
@@ -0,0 +1,60 @@
0129572
+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
0129572
+   may emit spurious NUL character on state reset.
0129572
+   Copyright (C) The GNU Toolchain Authors.
0129572
+   This file is part of the GNU C Library.
0129572
+
0129572
+   The GNU C Library is free software; you can redistribute it and/or
0129572
+   modify it under the terms of the GNU Lesser General Public
0129572
+   License as published by the Free Software Foundation; either
0129572
+   version 2.1 of the License, or (at your option) any later version.
0129572
+
0129572
+   The GNU C Library is distributed in the hope that it will be useful,
0129572
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
0129572
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0129572
+   Lesser General Public License for more details.
0129572
+
0129572
+   You should have received a copy of the GNU Lesser General Public
0129572
+   License along with the GNU C Library; if not, see
0129572
+   <https://www.gnu.org/licenses/>.  */
0129572
+
0129572
+#include <stddef.h>
0129572
+#include <iconv.h>
0129572
+#include <support/check.h>
0129572
+
0129572
+static int
0129572
+do_test (void)
0129572
+{
0129572
+  char in[] = "\x1b(I";
0129572
+  char *inbuf = in;
0129572
+  size_t inleft = sizeof (in) - 1;
0129572
+  char out[1];
0129572
+  char *outbuf = out;
0129572
+  size_t outleft = sizeof (out);
0129572
+  iconv_t cd;
0129572
+
0129572
+  cd = iconv_open ("UTF8", "ISO-2022-JP-3");
0129572
+  TEST_VERIFY_EXIT (cd != (iconv_t) -1);
0129572
+
0129572
+  /* First call to iconv should alter internal state.
0129572
+     Now, JISX0201_Kana_set is selected and
0129572
+     state value != ASCII_set.  */
0129572
+  TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
0129572
+
0129572
+  /* No bytes should have been added to
0129572
+     the output buffer at this point.  */
0129572
+  TEST_VERIFY (outbuf == out);
0129572
+  TEST_VERIFY (outleft == sizeof (out));
0129572
+
0129572
+  /* Second call shall emit spurious NUL character in unpatched glibc.  */
0129572
+  TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
0129572
+
0129572
+  /* No characters are expected to be produced.  */
0129572
+  TEST_VERIFY (outbuf == out);
0129572
+  TEST_VERIFY (outleft == sizeof (out));
0129572
+
0129572
+  TEST_VERIFY_EXIT (iconv_close (cd) != -1);
0129572
+
0129572
+  return 0;
0129572
+}
0129572
+
0129572
+#include <support/test-driver.c>
0129572
diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
0129572
index c8ba88cdc9fe9200..5fc0c0f7397935fe 100644
0129572
--- a/iconvdata/iso-2022-jp-3.c
0129572
+++ b/iconvdata/iso-2022-jp-3.c
0129572
@@ -1,5 +1,6 @@
0129572
 /* Conversion module for ISO-2022-JP-3.
0129572
    Copyright (C) 1998-2021 Free Software Foundation, Inc.
0129572
+   Copyright (C) The GNU Toolchain Authors.
0129572
    This file is part of the GNU C Library.
0129572
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
0129572
    and Bruno Haible <bruno@clisp.org>, 2002.
0129572
@@ -81,20 +82,31 @@ enum
0129572
    the output state to the initial state.  This has to be done during the
0129572
    flushing.  */
0129572
 #define EMIT_SHIFT_TO_INIT \
0129572
-  if (data->__statep->__count != ASCII_set)			      \
0129572
+  if ((data->__statep->__count & ~7) != ASCII_set)			      \
0129572
     {									      \
0129572
       if (FROM_DIRECTION)						      \
0129572
 	{								      \
0129572
-	  if (__glibc_likely (outbuf + 4 <= outend))			      \
0129572
+	  uint32_t ch = data->__statep->__count >> 6;			      \
0129572
+									      \
0129572
+	  if (__glibc_unlikely (ch != 0))				      \
0129572
 	    {								      \
0129572
-	      /* Write out the last character.  */			      \
0129572
-	      *((uint32_t *) outbuf) = data->__statep->__count >> 6;	      \
0129572
-	      outbuf += sizeof (uint32_t);				      \
0129572
-	      data->__statep->__count = ASCII_set;			\
0129572
+	      if (__glibc_likely (outbuf + 4 <= outend))		      \
0129572
+		{							      \
0129572
+		  /* Write out the last character.  */			      \
0129572
+		  put32u (outbuf, ch);					      \
0129572
+		  outbuf += 4;						      \
0129572
+		  data->__statep->__count &= 7;				      \
0129572
+		  data->__statep->__count |= ASCII_set;			      \
0129572
+		}							      \
0129572
+	      else							      \
0129572
+		/* We don't have enough room in the output buffer.  */	      \
0129572
+		status = __GCONV_FULL_OUTPUT;				      \
0129572
 	    }								      \
0129572
 	  else								      \
0129572
-	    /* We don't have enough room in the output buffer.  */	      \
0129572
-	    status = __GCONV_FULL_OUTPUT;				      \
0129572
+	    {								      \
0129572
+	      data->__statep->__count &= 7;				      \
0129572
+	      data->__statep->__count |= ASCII_set;			      \
0129572
+	    }								      \
0129572
 	}								      \
0129572
       else								      \
0129572
 	{								      \