9ddfda
From 37f30deca415d6c2606bf088e09f978134b9e2e1 Mon Sep 17 00:00:00 2001
9ddfda
From: Niko Tyni <ntyni@debian.org>
9ddfda
Date: Sun, 10 Mar 2019 19:40:42 +0200
9ddfda
Subject: [PATCH] Fix POSIX::mblen mbstate_t initialization on threaded perls
9ddfda
 with glibc
9ddfda
MIME-Version: 1.0
9ddfda
Content-Type: text/plain; charset=UTF-8
9ddfda
Content-Transfer-Encoding: 8bit
9ddfda
9ddfda
As reported in https://bugs.launchpad.net/bugs/1818953 POSIX::mblen()
9ddfda
is broken on threaded perls with glibc.
9ddfda
9ddfda
  % perl -MPOSIX=mblen -e 'mblen("a", 1)'
9ddfda
  perl: mbrtowc.c:105: __mbrtowc: Assertion `__mbsinit (data.__statep)' failed.
9ddfda
  zsh: abort (core dumped)  perl -MPOSIX=mblen -e 'mblen("a", 1)'
9ddfda
9ddfda
This broke in v5.27.8-134-g6c9ff7e96e which made the function
9ddfda
use mbrlen(3) under the hood on threaded perls.
9ddfda
9ddfda
The problem is initialization of the shift state with
9ddfda
9ddfda
  mbrlen(NULL, 0, &ps));
9ddfda
9ddfda
The glibc documentation for mbrlen(3) at
9ddfda
9ddfda
  https://www.gnu.org/software/libc/manual/html_node/Converting-a-Character.html#Converting-a-Character
9ddfda
9ddfda
does not mention initialization by passing in a null pointer for the
9ddfda
string, only a pointer to a NUL wide character.
9ddfda
9ddfda
   If the next multibyte character corresponds to the NUL wide character,
9ddfda
   the return value is 0. If the next n bytes form a valid multibyte
9ddfda
   character, the number of bytes belonging to this multibyte character
9ddfda
   byte sequence is returned.
9ddfda
9ddfda
Use memset(3) instead for mbstate_t initialization, as suggested in
9ddfda
9ddfda
  https://www.gnu.org/software/libc/manual/html_node/Keeping-the-state.html
9ddfda
9ddfda
with the hope that this is more portable.
9ddfda
9ddfda
While at it, add a few basic test cases. These are in a new file because
9ddfda
they need fresh_perl_is() from test.pl while the existing ones use
9ddfda
Test::More (and conversion of at least posix.t looks way too involved.)
9ddfda
9ddfda
Bug-Ubuntu: https://bugs.launchpad.net/bugs/1818953
9ddfda
Petr Písař: Ported to 5.28.1 from
9ddfda
25d7b7aa379d33ce2e8fe3e2bef4206b35739bc5.
9ddfda
9ddfda
Signed-off-by: Petr Písař <ppisar@redhat.com>
9ddfda
---
9ddfda
 MANIFEST           |  1 +
9ddfda
 ext/POSIX/POSIX.xs |  2 +-
9ddfda
 ext/POSIX/t/mb.t   | 47 ++++++++++++++++++++++++++++++++++++++++++++++
9ddfda
 3 files changed, 49 insertions(+), 1 deletion(-)
9ddfda
 create mode 100644 ext/POSIX/t/mb.t
9ddfda
9ddfda
diff --git a/MANIFEST b/MANIFEST
9ddfda
index 9fbbe9a..e4e63c3 100644
9ddfda
--- a/MANIFEST
9ddfda
+++ b/MANIFEST
9ddfda
@@ -4204,6 +4204,7 @@ ext/POSIX/POSIX.xs		POSIX extension external subroutines
9ddfda
 ext/POSIX/t/export.t		Test @EXPORT and @EXPORT_OK
9ddfda
 ext/POSIX/t/iscrash		See if POSIX isxxx() crashes with threads on Win32
9ddfda
 ext/POSIX/t/math.t		Basic math tests for POSIX
9ddfda
+ext/POSIX/t/mb.t		Multibyte function tests for POSIX
9ddfda
 ext/POSIX/t/posix.t		See if POSIX works
9ddfda
 ext/POSIX/t/sigaction.t		See if POSIX::sigaction works
9ddfda
 ext/POSIX/t/sigset.t		See if POSIX::SigSet works
9ddfda
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
9ddfda
index 7497305..395025a 100644
9ddfda
--- a/ext/POSIX/POSIX.xs
9ddfda
+++ b/ext/POSIX/POSIX.xs
9ddfda
@@ -3318,7 +3318,7 @@ mblen(s, n)
9ddfda
 #endif
9ddfda
     CODE:
9ddfda
 #if defined(USE_ITHREADS) && defined(HAS_MBRLEN)
9ddfda
-        PERL_UNUSED_RESULT(mbrlen(NULL, 0, &ps));   /* Initialize state */
9ddfda
+        memset(&ps, 0, sizeof(ps)); /* Initialize state */
9ddfda
         RETVAL = mbrlen(s, n, &ps); /* Prefer reentrant version */
9ddfda
 #else
9ddfda
         RETVAL = mblen(s, n);
9ddfda
diff --git a/ext/POSIX/t/mb.t b/ext/POSIX/t/mb.t
9ddfda
new file mode 100644
9ddfda
index 0000000..961edf6
9ddfda
--- /dev/null
9ddfda
+++ b/ext/POSIX/t/mb.t
9ddfda
@@ -0,0 +1,47 @@
9ddfda
+#!./perl
9ddfda
+
9ddfda
+# These tests are in a separate file, because they use fresh_perl_is()
9ddfda
+# from test.pl.
9ddfda
+
9ddfda
+# The mb* functions use the "underlying locale" that is not affected by
9ddfda
+# the Perl one.  So we run the tests in a separate "fresh_perl" process
9ddfda
+# with the correct LC_CTYPE set in the environment.
9ddfda
+
9ddfda
+BEGIN {
9ddfda
+    require Config; import Config;
9ddfda
+    if ($^O ne 'VMS' and $Config{'extensions'} !~ /\bPOSIX\b/) {
9ddfda
+	print "1..0\n";
9ddfda
+	exit 0;
9ddfda
+    }
9ddfda
+    unshift @INC, "../../t";
9ddfda
+    require 'loc_tools.pl';
9ddfda
+    require 'test.pl';
9ddfda
+}
9ddfda
+
9ddfda
+plan tests => 3;
9ddfda
+
9ddfda
+use POSIX qw();
9ddfda
+
9ddfda
+SKIP: {
9ddfda
+    skip("mblen() not present", 3) unless $Config{d_mblen};
9ddfda
+
9ddfda
+    is(&POSIX::mblen("a", &POSIX::MB_CUR_MAX), 1, 'mblen() basically works');
9ddfda
+
9ddfda
+    skip("LC_CTYPE locale support not available", 2)
9ddfda
+      unless locales_enabled('LC_CTYPE');
9ddfda
+
9ddfda
+    my $utf8_locale = find_utf8_ctype_locale();
9ddfda
+    skip("no utf8 locale available", 2) unless $utf8_locale;
9ddfda
+
9ddfda
+    local $ENV{LC_CTYPE} = $utf8_locale;
9ddfda
+    local $ENV{LC_ALL};
9ddfda
+    delete $ENV{LC_ALL};
9ddfda
+
9ddfda
+    fresh_perl_is(
9ddfda
+      'use POSIX; print &POSIX::mblen("\x{c3}\x{28}", &POSIX::MB_CUR_MAX)',
9ddfda
+      -1, {}, 'mblen() recognizes invalid multibyte characters');
9ddfda
+
9ddfda
+    fresh_perl_is(
9ddfda
+     'use POSIX; print &POSIX::mblen("\N{GREEK SMALL LETTER SIGMA}", &POSIX::MB_CUR_MAX)',
9ddfda
+     2, {}, 'mblen() works on UTF-8 characters');
9ddfda
+}
9ddfda
-- 
9ddfda
2.20.1
9ddfda