diff --git a/00262-pep538_coerce_legacy_c_locale.patch b/00262-pep538_coerce_legacy_c_locale.patch index 856ef77..d749bac 100644 --- a/00262-pep538_coerce_legacy_c_locale.patch +++ b/00262-pep538_coerce_legacy_c_locale.patch @@ -124,13 +124,14 @@ index ca5f9c2..7aa460b 100644 def assert_python_ok(*args, **env_vars): diff --git a/Lib/test/test_c_locale_coercion.py b/Lib/test/test_c_locale_coercion.py new file mode 100644 -index 0000000..a4b4626 +index 0000000..635c98f --- /dev/null +++ b/Lib/test/test_c_locale_coercion.py -@@ -0,0 +1,353 @@ +@@ -0,0 +1,371 @@ +# Tests the attempted automatic coercion of the C locale to a UTF-8 locale + +import unittest ++import locale +import os +import sys +import sysconfig @@ -146,7 +147,14 @@ index 0000000..a4b4626 + +# Set our expectation for the default encoding used in the C locale +# for the filesystem encoding and the standard streams -+C_LOCALE_STREAM_ENCODING = "ascii" ++ ++# AIX uses iso8859-1 in the C locale, other *nix platforms use ASCII ++if sys.platform.startswith("aix"): ++ C_LOCALE_STREAM_ENCODING = "iso8859-1" ++else: ++ C_LOCALE_STREAM_ENCODING = "ascii" ++ ++# FS encoding is UTF-8 on macOS, other *nix platforms use the locale encoding +if sys.platform == "darwin": + C_LOCALE_FS_ENCODING = "utf-8" +else: @@ -162,24 +170,34 @@ index 0000000..a4b4626 + +# In order to get the warning messages to match up as expected, the candidate +# order here must much the target locale order in Python/pylifecycle.c -+_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8") #, "UTF-8") -+ -+# XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to -+# problems encountered on *BSD systems with those test cases -+# For additional details see: -+# nl_langinfo CODESET error: https://bugs.python.org/issue30647 -+# locale handling differences: https://bugs.python.org/issue30672 ++_C_UTF8_LOCALES = ("C.UTF-8", "C.utf8", "UTF-8") + +# There's no reliable cross-platform way of checking locale alias +# lists, so the only way of knowing which of these locales will work +# is to try them with locale.setlocale(). We do that in a subprocess +# to avoid altering the locale of the test runner. ++# ++# If the relevant locale module attributes exist, and we're not on a platform ++# where we expect it to always succeed, we also check that ++# `locale.nl_langinfo(locale.CODESET)` works, as if it fails, the interpreter ++# will skip locale coercion for that particular target locale ++_check_nl_langinfo_CODESET = bool( ++ sys.platform not in ("darwin", "linux") and ++ hasattr(locale, "nl_langinfo") and ++ hasattr(locale, "CODESET") ++) ++ +def _set_locale_in_subprocess(locale_name): + cmd_fmt = "import locale; print(locale.setlocale(locale.LC_CTYPE, '{}'))" ++ if _check_nl_langinfo_CODESET: ++ # If there's no valid CODESET, we expect coercion to be skipped ++ cmd_fmt += "; import sys; sys.exit(not locale.nl_langinfo(locale.CODESET))" + cmd = cmd_fmt.format(locale_name) + result, py_cmd = run_python_until_end("-c", cmd, __isolated=True) + return result.rc == 0 + ++ ++ +_fields = "fsencoding stdin_info stdout_info stderr_info lang lc_ctype lc_all" +_EncodingDetails = namedtuple("EncodingDetails", _fields) + @@ -695,7 +713,7 @@ index a7afbc7..03f8295 100644 for (i = 0; i < argc; i++) { PyMem_RawFree(argv_copy2[i]); diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c -index a4f7f82..743d9b6 100644 +index a4f7f82..3843297 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -167,6 +167,7 @@ Py_SetStandardStreamEncoding(const char *encoding, const char *errors) @@ -706,7 +724,7 @@ index a4f7f82..743d9b6 100644 /* Global initializations. Can be undone by Py_FinalizeEx(). Don't call this twice without an intervening Py_FinalizeEx() call. When initializations fail, a fatal error is issued and the function does -@@ -301,6 +302,181 @@ import_init(PyInterpreterState *interp, PyObject *sysmod) +@@ -301,6 +302,183 @@ import_init(PyInterpreterState *interp, PyObject *sysmod) } @@ -779,18 +797,10 @@ index a4f7f82..743d9b6 100644 +static _LocaleCoercionTarget _TARGET_LOCALES[] = { + {"C.UTF-8"}, + {"C.utf8"}, -+ /* {"UTF-8"}, */ ++ {"UTF-8"}, + {NULL} +}; + -+/* XXX (ncoghlan): Using UTF-8 as a target locale is currently disabled due to -+ * problems encountered on *BSD systems with those test cases -+ * For additional details see: -+ * nl_langinfo CODESET error: https://bugs.python.org/issue30647 -+ * locale handling differences: https://bugs.python.org/issue30672 -+ */ -+ -+ +static char * +get_default_standard_stream_error_handler(void) +{ @@ -873,6 +883,16 @@ index a4f7f82..743d9b6 100644 + const char *new_locale = setlocale(LC_CTYPE, + target->locale_name); + if (new_locale != NULL) { ++#if !defined(__APPLE__) && defined(HAVE_LANGINFO_H) && defined(CODESET) ++ /* Also ensure that nl_langinfo works in this locale */ ++ char *codeset = nl_langinfo(CODESET); ++ if (!codeset || *codeset == '\0') { ++ /* CODESET is not set or empty, so skip coercion */ ++ new_locale = NULL; ++ setlocale(LC_CTYPE, ""); ++ continue; ++ } ++#endif + /* Successfully configured locale, so make it the default */ + _coerce_default_locale_settings(target); + return; @@ -888,7 +908,7 @@ index a4f7f82..743d9b6 100644 void _Py_InitializeEx_Private(int install_sigs, int install_importlib) { -@@ -315,11 +491,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) +@@ -315,11 +493,19 @@ _Py_InitializeEx_Private(int install_sigs, int install_importlib) initialized = 1; _Py_Finalizing = NULL; @@ -909,7 +929,7 @@ index a4f7f82..743d9b6 100644 #endif if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0') -@@ -1242,12 +1426,8 @@ initstdio(void) +@@ -1242,12 +1428,8 @@ initstdio(void) } } if (!errors && !(pythonioencoding && *pythonioencoding)) {