#14 rebase to latest upstream version
Merged 2 months ago by lzaoral. Opened 2 months ago by lzaoral.
rpms/ lzaoral/coreutils v9.5  into  rawhide

@@ -1,31 +0,0 @@ 

- From c4c5ed8f4e9cd55a12966d4f520e3a13101637d9 Mon Sep 17 00:00:00 2001

- From: Paul Eggert <eggert@cs.ucla.edu>

- Date: Tue, 16 Jan 2024 13:48:32 -0800

- Subject: [PATCH] split: do not shrink hold buffer

- MIME-Version: 1.0

- Content-Type: text/plain; charset=UTF-8

- Content-Transfer-Encoding: 8bit

- 

- * src/split.c (line_bytes_split): Do not shrink hold buffer.

- If it’s large for this batch it’s likely to be large for the next

- batch, and for ‘split’ it’s not worth the complexity/CPU hassle to

- shrink it.  Do not assume hold_size can be bufsize.

- ---

-  src/split.c | 3 ---

-  1 file changed, 3 deletions(-)

- 

- diff --git a/src/split.c b/src/split.c

- index 64020c859..037960a59 100644

- --- a/src/split.c

- +++ b/src/split.c

- @@ -809,10 +809,7 @@ line_bytes_split (intmax_t n_bytes, char *buf, idx_t bufsize)

-              {

-                cwrite (n_out == 0, hold, n_hold);

-                n_out += n_hold;

- -              if (n_hold > bufsize)

- -                hold = xirealloc (hold, bufsize);

-                n_hold = 0;

- -              hold_size = bufsize;

-              }

-  

-            /* Output to eol if present.  */

@@ -1,28 +0,0 @@ 

- From 2616c6be1c244424617997151c67bcab2dacbcfe Mon Sep 17 00:00:00 2001

- From: rpm-build <rpm-build>

- Date: Thu, 31 Aug 2023 14:34:05 +0200

- Subject: [PATCH] coreutils-9.4-systemd-coredump.patch

- 

- Cherry picked from gnulib upstream commits:

- * 1e6a26f9312bb47e070f94b17b14dc1a6ffbb74f ("readutmp: fix core dump if --enable-systemd")

- * 3af1d7b0ce3a8e3ae565e7cea10cee6fd7cb8109 ("readutmp: Fix memory leak introduced by last commit.")

- ---

-  lib/readutmp.c | 2 +-

-  1 file changed, 1 insertion(+), 1 deletion(-)

- 

- diff --git a/lib/readutmp.c b/lib/readutmp.c

- index 0173b7e..ec09feb 100644

- --- a/lib/readutmp.c

- +++ b/lib/readutmp.c

- @@ -795,7 +795,7 @@ read_utmp_from_systemd (idx_t *n_entries, STRUCT_UTMP **utmp_buf, int options)

-      {

-        char **sessions;

-        int num_sessions = sd_get_sessions (&sessions);

- -      if (num_sessions >= 0)

- +      if (num_sessions >= 0 && sessions != NULL)

-          {

-            char **session_ptr;

-            for (session_ptr = sessions; *session_ptr != NULL; session_ptr++)

- -- 

- 2.41.0

- 

@@ -1,205 +0,0 @@ 

- From 73d119f4f8052a9fb6cef13cd9e75d5a4e23311a Mon Sep 17 00:00:00 2001

- From: dann frazier <dann.frazier@canonical.com>

- Date: Wed, 29 Nov 2023 18:32:34 -0700

- Subject: [PATCH] tail: fix tailing sysfs files where PAGE_SIZE > BUFSIZ

- 

- * src/tail.c (file_lines): Ensure we use a buffer size >= PAGE_SIZE when

- searching backwards to avoid seeking within a file,

- which on sysfs files is accepted but also returns no data.

- * tests/tail/tail-sysfs.sh: Add a new test.

- * tests/local.mk: Reference the new test.

- * NEWS: Mention the bug fix.

- Fixes https://bugs.gnu.org/67490

- 

- Upstream-commit: 73d119f4f8052a9fb6cef13cd9e75d5a4e23311a

- Cherry-picked-by: Lukáš Zaoral <lzaoral@redhat.com>

- ---

-  src/tail.c               | 57 +++++++++++++++++++++++++++++-----------

-  tests/local.mk           |  1 +

-  tests/tail/tail-sysfs.sh | 34 ++++++++++++++++++++++++

-  3 files changed, 77 insertions(+), 15 deletions(-)

-  create mode 100755 tests/tail/tail-sysfs.sh

- 

- diff --git a/src/tail.c b/src/tail.c

- index c45f3b65a..6979e0ba3 100644

- --- a/src/tail.c

- +++ b/src/tail.c

- @@ -208,6 +208,9 @@ static int nbpids = 0;

-     that is writing to all followed files.  */

-  static pid_t pid;

-  

- +/* Used to determine the buffer size when scanning backwards in a file.  */

- +static idx_t page_size;

- +

-  /* True if we have ever read standard input.  */

-  static bool have_read_stdin;

-  

- @@ -515,22 +518,40 @@ xlseek (int fd, off_t offset, int whence, char const *filename)

-     Return true if successful.  */

-  

-  static bool

- -file_lines (char const *pretty_filename, int fd, uintmax_t n_lines,

- -            off_t start_pos, off_t end_pos, uintmax_t *read_pos)

- +file_lines (char const *pretty_filename, int fd, struct stat const *sb,

- +            uintmax_t n_lines, off_t start_pos, off_t end_pos,

- +            uintmax_t *read_pos)

-  {

- -  char buffer[BUFSIZ];

- +  char *buffer;

-    size_t bytes_read;

- +  blksize_t bufsize = BUFSIZ;

-    off_t pos = end_pos;

- +  bool ok = true;

-  

-    if (n_lines == 0)

-      return true;

-  

- +  /* Be careful with files with sizes that are a multiple of the page size,

- +     as on /proc or /sys file systems these files accept seeking to within

- +     the file, but then return no data when read.  So use a buffer that's

- +     at least PAGE_SIZE to avoid seeking within such files.

- +

- +     We could also indirectly use a large enough buffer through io_blksize()

- +     however this would be less efficient in the common case, as it would

- +     generally pick a larger buffer size, resulting in reading more data

- +     from the end of the file.  */

- +  affirm (S_ISREG (sb->st_mode));

- +  if (sb->st_size % page_size == 0)

- +    bufsize = MAX (BUFSIZ, page_size);

- +

- +  buffer = xmalloc (bufsize);

- +

-    /* Set 'bytes_read' to the size of the last, probably partial, buffer;

- -     0 < 'bytes_read' <= 'BUFSIZ'.  */

- -  bytes_read = (pos - start_pos) % BUFSIZ;

- +     0 < 'bytes_read' <= 'bufsize'.  */

- +  bytes_read = (pos - start_pos) % bufsize;

-    if (bytes_read == 0)

- -    bytes_read = BUFSIZ;

- -  /* Make 'pos' a multiple of 'BUFSIZ' (0 if the file is short), so that all

- +    bytes_read = bufsize;

- +  /* Make 'pos' a multiple of 'bufsize' (0 if the file is short), so that all

-       reads will be on block boundaries, which might increase efficiency.  */

-    pos -= bytes_read;

-    xlseek (fd, pos, SEEK_SET, pretty_filename);

- @@ -538,7 +559,8 @@ file_lines (char const *pretty_filename, int fd, uintmax_t n_lines,

-    if (bytes_read == SAFE_READ_ERROR)

-      {

-        error (0, errno, _("error reading %s"), quoteaf (pretty_filename));

- -      return false;

- +      ok = false;

- +      goto free_buffer;

-      }

-    *read_pos = pos + bytes_read;

-  

- @@ -565,7 +587,7 @@ file_lines (char const *pretty_filename, int fd, uintmax_t n_lines,

-                xwrite_stdout (nl + 1, bytes_read - (n + 1));

-                *read_pos += dump_remainder (false, pretty_filename, fd,

-                                             end_pos - (pos + bytes_read));

- -              return true;

- +              goto free_buffer;

-              }

-          }

-  

- @@ -577,23 +599,26 @@ file_lines (char const *pretty_filename, int fd, uintmax_t n_lines,

-            xlseek (fd, start_pos, SEEK_SET, pretty_filename);

-            *read_pos = start_pos + dump_remainder (false, pretty_filename, fd,

-                                                    end_pos);

- -          return true;

- +          goto free_buffer;

-          }

- -      pos -= BUFSIZ;

- +      pos -= bufsize;

-        xlseek (fd, pos, SEEK_SET, pretty_filename);

-  

- -      bytes_read = safe_read (fd, buffer, BUFSIZ);

- +      bytes_read = safe_read (fd, buffer, bufsize);

-        if (bytes_read == SAFE_READ_ERROR)

-          {

-            error (0, errno, _("error reading %s"), quoteaf (pretty_filename));

- -          return false;

- +          ok = false;

- +          goto free_buffer;

-          }

-  

-        *read_pos = pos + bytes_read;

-      }

-    while (bytes_read > 0);

-  

- -  return true;

- +free_buffer:

- +  free (buffer);

- +  return ok;

-  }

-  

-  /* Print the last N_LINES lines from the end of the standard input,

- @@ -1915,7 +1940,7 @@ tail_lines (char const *pretty_filename, int fd, uintmax_t n_lines,

-          {

-            *read_pos = end_pos;

-            if (end_pos != 0

- -              && ! file_lines (pretty_filename, fd, n_lines,

- +              && ! file_lines (pretty_filename, fd, &stats, n_lines,

-                                 start_pos, end_pos, read_pos))

-              return false;

-          }

- @@ -2337,6 +2362,8 @@ main (int argc, char **argv)

-  

-    atexit (close_stdout);

-  

- +  page_size = getpagesize ();

- +

-    have_read_stdin = false;

-  

-    count_lines = true;

- diff --git a/tests/local.mk b/tests/local.mk

- index db4ee7ed8..bf03238c3 100644

- --- a/tests/local.mk

- +++ b/tests/local.mk

- @@ -257,6 +257,7 @@ all_tests =					\

-    tests/seq/seq-precision.sh			\

-    tests/head/head.pl				\

-    tests/head/head-elide-tail.pl			\

- +  tests/tail/tail-sysfs.sh			\

-    tests/tail/tail-n0f.sh			\

-    tests/ls/ls-misc.pl				\

-    tests/date/date.pl				\

- diff --git a/tests/tail/tail-sysfs.sh b/tests/tail/tail-sysfs.sh

- new file mode 100755

- index 000000000..00874b3dc

- --- /dev/null

- +++ b/tests/tail/tail-sysfs.sh

- @@ -0,0 +1,34 @@

- +#!/bin/sh

- +# sysfs files have weird properties that can be influenced by page size

- +

- +# Copyright 2023 Free Software Foundation, Inc.

- +

- +# This program is free software: you can redistribute it and/or modify

- +# it under the terms of the GNU General Public License as published by

- +# the Free Software Foundation, either version 3 of the License, or

- +# (at your option) any later version.

- +

- +# This program is distributed in the hope that it will be useful,

- +# but WITHOUT ANY WARRANTY; without even the implied warranty of

- +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

- +# GNU General Public License for more details.

- +

- +# You should have received a copy of the GNU General Public License

- +# along with this program.  If not, see <https://www.gnu.org/licenses/>.

- +

- +. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src

- +print_ver_ tail

- +

- +file='/sys/kernel/profiling'

- +

- +test -r "$file" || skip_ "No $file file"

- +

- +cp -f "$file" exp || framework_failure_

- +

- +tail -n1 "$file" > out || fail=1

- +compare exp out || fail=1

- +

- +tail -c2 "$file" > out || fail=1

- +compare exp out || fail=1

- +

- +Exit $fail

file modified
+12 -12
@@ -1,4 +1,4 @@ 

- From 6e36198f10a2f63b89c89ebb5d5c185b20fb3a63 Mon Sep 17 00:00:00 2001

+ From f072852456c545bd89296bc88cf59ccd63287a68 Mon Sep 17 00:00:00 2001

  From: Kamil Dudka <kdudka@redhat.com>

  Date: Mon, 29 Mar 2010 17:20:34 +0000

  Subject: [PATCH] coreutils-df-direct.patch
@@ -11,10 +11,10 @@ 

   create mode 100755 tests/df/direct.sh

  

  diff --git a/doc/coreutils.texi b/doc/coreutils.texi

- index 5b9a597..6810c15 100644

+ index 8f7f43e..230f1f1 100644

  --- a/doc/coreutils.texi

  +++ b/doc/coreutils.texi

- @@ -12074,6 +12074,13 @@ some systems (notably Solaris), doing this yields more up to date results,

+ @@ -12427,6 +12427,13 @@ some systems (notably Solaris), doing this yields more up to date results,

   but in general this option makes @command{df} much slower, especially when

   there are many or very busy file systems.

   
@@ -29,10 +29,10 @@ 

   @opindex --total

   @cindex grand total of file system size, usage and available space

  diff --git a/src/df.c b/src/df.c

- index 48025b9..c8efa5b 100644

+ index 994f0e3..ceee209 100644

  --- a/src/df.c

  +++ b/src/df.c

- @@ -125,6 +125,9 @@ static bool print_type;

+ @@ -121,6 +121,9 @@ static bool print_type;

   /* If true, print a grand total at the end.  */

   static bool print_grand_total;

   
@@ -42,7 +42,7 @@ 

   /* Grand total data.  */

   static struct fs_usage grand_fsu;

   

- @@ -252,13 +255,15 @@ enum

+ @@ -247,13 +250,15 @@ enum

     NO_SYNC_OPTION = CHAR_MAX + 1,

     SYNC_OPTION,

     TOTAL_OPTION,
@@ -59,7 +59,7 @@ 

     {"inodes", no_argument, nullptr, 'i'},

     {"human-readable", no_argument, nullptr, 'h'},

     {"si", no_argument, nullptr, 'H'},

- @@ -583,7 +588,10 @@ get_header (void)

+ @@ -574,7 +579,10 @@ get_header (void)

     for (col = 0; col < ncolumns; col++)

       {

         char *cell = nullptr;
@@ -71,7 +71,7 @@ 

   

         if (columns[col]->field == SIZE_FIELD

             && (header_mode == DEFAULT_MODE

- @@ -1486,6 +1494,17 @@ get_point (char const *point, const struct stat *statp)

+ @@ -1471,6 +1479,17 @@ get_point (char const *point, const struct stat *statp)

   static void

   get_entry (char const *name, struct stat const *statp)

   {
@@ -89,7 +89,7 @@ 

     if ((S_ISBLK (statp->st_mode) || S_ISCHR (statp->st_mode))

         && get_device (name))

       return;

- @@ -1556,6 +1575,7 @@ or all file systems by default.\n\

+ @@ -1541,6 +1560,7 @@ or all file systems by default.\n\

     -B, --block-size=SIZE  scale sizes by SIZE before printing them; e.g.,\n\

                              '-BM' prints sizes in units of 1,048,576 bytes;\n\

                              see SIZE format below\n\
@@ -97,7 +97,7 @@ 

     -h, --human-readable  print sizes in powers of 1024 (e.g., 1023M)\n\

     -H, --si              print sizes in powers of 1000 (e.g., 1.1G)\n\

   "), stdout);

- @@ -1646,6 +1666,9 @@ main (int argc, char **argv)

+ @@ -1631,6 +1651,9 @@ main (int argc, char **argv)

                 xstrtol_fatal (e, oi, c, long_options, optarg);

             }

             break;
@@ -107,7 +107,7 @@ 

           case 'i':

             if (header_mode == OUTPUT_MODE)

               {

- @@ -1742,6 +1765,13 @@ main (int argc, char **argv)

+ @@ -1727,6 +1750,13 @@ main (int argc, char **argv)

           }

       }

   
@@ -183,5 +183,5 @@ 

  +

  +Exit $fail

  -- 

- 2.31.1

+ 2.44.0

  

file modified
+540 -913
@@ -1,66 +1,69 @@ 

- From 3a1b92e80708319bcc89852e3da1029c3d1ff6b3 Mon Sep 17 00:00:00 2001

+ From 94cf02dfcb1be23dedf8a39af295f28ee2de6013 Mon Sep 17 00:00:00 2001

  From: rpm-build <rpm-build>

  Date: Wed, 30 Aug 2023 17:19:58 +0200

  Subject: [PATCH] coreutils-i18n.patch

  

  ---

-  bootstrap.conf              |   1 +

+  bootstrap.conf              |   2 +

   configure.ac                |   6 +

   lib/linebuffer.h            |   8 +

+  lib/mbchar.c                |  23 ++

+  lib/mbchar.h                | 373 +++++++++++++++++

   lib/mbfile.c                |  20 +

   lib/mbfile.h                | 267 ++++++++++++

+  m4/mbchar.m4                |  13 +

   m4/mbfile.m4                |  14 +

   src/cut.c                   | 508 +++++++++++++++++++++--

   src/expand-common.c         | 114 ++++++

   src/expand-common.h         |  12 +

   src/expand.c                |  90 +++-

   src/fold.c                  | 312 ++++++++++++--

-  src/join.c                  | 359 ++++++++++++++--

   src/local.mk                |   4 +-

   src/pr.c                    | 443 ++++++++++++++++++--

   src/sort.c                  | 792 +++++++++++++++++++++++++++++++++---

   src/unexpand.c              | 102 ++++-

-  src/uniq.c                  | 119 +++++-

   tests/Coreutils.pm          |   3 +

   tests/expand/mb.sh          | 183 +++++++++

   tests/i18n/sort.sh          |  29 ++

   tests/local.mk              |   4 +

   tests/misc/expand.pl        |  42 ++

   tests/misc/fold.pl          |  50 ++-

-  tests/misc/join.pl          |  50 +++

   tests/misc/sort-mb-tests.sh |  45 ++

   tests/misc/unexpand.pl      |  39 ++

   tests/pr/pr-tests.pl        |  49 +++

   tests/sort/sort-merge.pl    |  42 ++

   tests/sort/sort.pl          |  40 +-

   tests/unexpand/mb.sh        | 172 ++++++++

-  tests/uniq/uniq.pl          |  55 +++

-  31 files changed, 3732 insertions(+), 242 deletions(-)

+  30 files changed, 3605 insertions(+), 196 deletions(-)

+  create mode 100644 lib/mbchar.c

+  create mode 100644 lib/mbchar.h

   create mode 100644 lib/mbfile.c

   create mode 100644 lib/mbfile.h

+  create mode 100644 m4/mbchar.m4

   create mode 100644 m4/mbfile.m4

-  create mode 100755 tests/expand/mb.sh

-  create mode 100755 tests/i18n/sort.sh

-  create mode 100755 tests/misc/sort-mb-tests.sh

-  create mode 100755 tests/unexpand/mb.sh

+  create mode 100644 tests/expand/mb.sh

+  create mode 100644 tests/i18n/sort.sh

+  create mode 100644 tests/misc/sort-mb-tests.sh

+  create mode 100644 tests/unexpand/mb.sh

  

  diff --git a/bootstrap.conf b/bootstrap.conf

- index bd73ff2..0e450cd 100644

+ index 126e1e8..b4ccebf 100644

  --- a/bootstrap.conf

  +++ b/bootstrap.conf

- @@ -167,6 +167,7 @@ gnulib_modules="

+ @@ -163,6 +163,8 @@ gnulib_modules="

     maintainer-makefile

     malloc-gnu

     manywarnings

+ +  mbchar

  +  mbfile

     mbrlen

+    mbrtoc32

     mbrtowc

-    mbsalign

  diff --git a/configure.ac b/configure.ac

- index 8ffc0b7..ca3305d 100644

+ index 9cb6ee1..1131ce3 100644

  --- a/configure.ac

  +++ b/configure.ac

- @@ -448,6 +448,12 @@ fi

+ @@ -504,6 +504,12 @@ fi

   # I'm leaving it here for now.  This whole thing needs to be modernized...

   gl_WINSIZE_IN_PTEM

   
@@ -74,7 +77,7 @@ 

   

   if test $gl_cv_sys_tiocgwinsz_needs_termios_h = no && \

  diff --git a/lib/linebuffer.h b/lib/linebuffer.h

- index b4cc8e4..f2bbb52 100644

+ index ae0d55d..5bf5350 100644

  --- a/lib/linebuffer.h

  +++ b/lib/linebuffer.h

  @@ -22,6 +22,11 @@
@@ -99,6 +102,414 @@ 

   };

   

   /* Initialize linebuffer LINEBUFFER for use. */

+ diff --git a/lib/mbchar.c b/lib/mbchar.c

+ new file mode 100644

+ index 0000000..d94b7c3

+ --- /dev/null

+ +++ b/lib/mbchar.c

+ @@ -0,0 +1,23 @@

+ +/* Copyright (C) 2001, 2006, 2009-2024 Free Software Foundation, Inc.

+ +

+ +   This file is free software: you can redistribute it and/or modify

+ +   it under the terms of the GNU Lesser General Public License as

+ +   published by the Free Software Foundation; either version 2.1 of the

+ +   License, or (at your option) any later version.

+ +

+ +   This file is distributed in the hope that it will be useful,

+ +   but WITHOUT ANY WARRANTY; without even the implied warranty of

+ +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

+ +   GNU Lesser General Public License for more details.

+ +

+ +   You should have received a copy of the GNU Lesser General Public License

+ +   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

+ +

+ +

+ +#include <config.h>

+ +

+ +#define MBCHAR_INLINE _GL_EXTERN_INLINE

+ +

+ +#include <limits.h>

+ +

+ +#include "mbchar.h"

+ diff --git a/lib/mbchar.h b/lib/mbchar.h

+ new file mode 100644

+ index 0000000..c06ef11

+ --- /dev/null

+ +++ b/lib/mbchar.h

+ @@ -0,0 +1,373 @@

+ +/* Multibyte character data type.

+ +   Copyright (C) 2001, 2005-2007, 2009-2024 Free Software Foundation, Inc.

+ +

+ +   This file is free software: you can redistribute it and/or modify

+ +   it under the terms of the GNU Lesser General Public License as

+ +   published by the Free Software Foundation; either version 2.1 of the

+ +   License, or (at your option) any later version.

+ +

+ +   This file is distributed in the hope that it will be useful,

+ +   but WITHOUT ANY WARRANTY; without even the implied warranty of

+ +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

+ +   GNU Lesser General Public License for more details.

+ +

+ +   You should have received a copy of the GNU Lesser General Public License

+ +   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */

+ +

+ +/* Written by Bruno Haible <bruno@clisp.org>.  */

+ +

+ +/* A multibyte character is a short subsequence of a char* string,

+ +   representing a single 32-bit wide character.

+ +

+ +   We use multibyte characters instead of 32-bit wide characters because

+ +   of the following goals:

+ +   1) correct multibyte handling, i.e. operate according to the LC_CTYPE

+ +      locale,

+ +   2) ease of maintenance, i.e. the maintainer needs not know all details

+ +      of the ISO C 99 standard,

+ +   3) don't fail grossly if the input is not in the encoding set by the

+ +      locale, because often different encodings are in use in the same

+ +      countries (ISO-8859-1/UTF-8, EUC-JP/Shift_JIS, ...),

+ +   4) fast in the case of ASCII characters.

+ +

+ +   Multibyte characters are only accessed through the mb* macros.

+ +

+ +   mb_ptr (mbc)

+ +     return a pointer to the beginning of the multibyte sequence.

+ +

+ +   mb_len (mbc)

+ +     returns the number of bytes occupied by the multibyte sequence.

+ +     Always > 0.

+ +

+ +   mb_iseq (mbc, sc)

+ +     returns true if mbc is the standard ASCII character sc.

+ +

+ +   mb_isnul (mbc)

+ +     returns true if mbc is the nul character.

+ +

+ +   mb_cmp (mbc1, mbc2)

+ +     returns a positive, zero, or negative value depending on whether mbc1

+ +     sorts after, same or before mbc2.

+ +

+ +   mb_casecmp (mbc1, mbc2)

+ +     returns a positive, zero, or negative value depending on whether mbc1

+ +     sorts after, same or before mbc2, modulo upper/lowercase conversion.

+ +

+ +   mb_equal (mbc1, mbc2)

+ +     returns true if mbc1 and mbc2 are equal.

+ +

+ +   mb_caseequal (mbc1, mbc2)

+ +     returns true if mbc1 and mbc2 are equal modulo upper/lowercase conversion.

+ +

+ +   mb_isalnum (mbc)

+ +     returns true if mbc is alphanumeric.

+ +

+ +   mb_isalpha (mbc)

+ +     returns true if mbc is alphabetic.

+ +

+ +   mb_isascii(mbc)

+ +     returns true if mbc is plain ASCII.

+ +

+ +   mb_isblank (mbc)

+ +     returns true if mbc is a blank.

+ +

+ +   mb_iscntrl (mbc)

+ +     returns true if mbc is a control character.

+ +

+ +   mb_isdigit (mbc)

+ +     returns true if mbc is a decimal digit.

+ +

+ +   mb_isgraph (mbc)

+ +     returns true if mbc is a graphic character.

+ +

+ +   mb_islower (mbc)

+ +     returns true if mbc is lowercase.

+ +

+ +   mb_isprint (mbc)

+ +     returns true if mbc is a printable character.

+ +

+ +   mb_ispunct (mbc)

+ +     returns true if mbc is a punctuation character.

+ +

+ +   mb_isspace (mbc)

+ +     returns true if mbc is a space character.

+ +

+ +   mb_isupper (mbc)

+ +     returns true if mbc is uppercase.

+ +

+ +   mb_isxdigit (mbc)

+ +     returns true if mbc is a hexadecimal digit.

+ +

+ +   mb_width (mbc)

+ +     returns the number of columns on the output device occupied by mbc.

+ +     Always >= 0.

+ +

+ +   mb_putc (mbc, stream)

+ +     outputs mbc on stream, a byte oriented FILE stream opened for output.

+ +

+ +   mb_setascii (&mbc, sc)

+ +     assigns the standard ASCII character sc to mbc.

+ +     (Only available if the 'mbfile' module is in use.)

+ +

+ +   mb_copy (&destmbc, &srcmbc)

+ +     copies srcmbc to destmbc.

+ +

+ +   Here are the function prototypes of the macros.

+ +

+ +   extern const char *  mb_ptr (const mbchar_t mbc);

+ +   extern size_t        mb_len (const mbchar_t mbc);

+ +   extern bool          mb_iseq (const mbchar_t mbc, char sc);

+ +   extern bool          mb_isnul (const mbchar_t mbc);

+ +   extern int           mb_cmp (const mbchar_t mbc1, const mbchar_t mbc2);

+ +   extern int           mb_casecmp (const mbchar_t mbc1, const mbchar_t mbc2);

+ +   extern bool          mb_equal (const mbchar_t mbc1, const mbchar_t mbc2);

+ +   extern bool          mb_caseequal (const mbchar_t mbc1, const mbchar_t mbc2);

+ +   extern bool          mb_isalnum (const mbchar_t mbc);

+ +   extern bool          mb_isalpha (const mbchar_t mbc);

+ +   extern bool          mb_isascii (const mbchar_t mbc);

+ +   extern bool          mb_isblank (const mbchar_t mbc);

+ +   extern bool          mb_iscntrl (const mbchar_t mbc);

+ +   extern bool          mb_isdigit (const mbchar_t mbc);

+ +   extern bool          mb_isgraph (const mbchar_t mbc);

+ +   extern bool          mb_islower (const mbchar_t mbc);

+ +   extern bool          mb_isprint (const mbchar_t mbc);

+ +   extern bool          mb_ispunct (const mbchar_t mbc);

+ +   extern bool          mb_isspace (const mbchar_t mbc);

+ +   extern bool          mb_isupper (const mbchar_t mbc);

+ +   extern bool          mb_isxdigit (const mbchar_t mbc);

+ +   extern int           mb_width (const mbchar_t mbc);

+ +   extern void          mb_putc (const mbchar_t mbc, FILE *stream);

+ +   extern void          mb_setascii (mbchar_t *new, char sc);

+ +   extern void          mb_copy (mbchar_t *new, const mbchar_t *old);

+ + */

+ +

+ +#ifndef _MBCHAR_H

+ +#define _MBCHAR_H 1

+ +

+ +/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE.  */

+ +#if !_GL_CONFIG_H_INCLUDED

+ + #error "Please include config.h first."

+ +#endif

+ +

+ +#include <string.h>

+ +#include <uchar.h>

+ +

+ +_GL_INLINE_HEADER_BEGIN

+ +#ifndef MBCHAR_INLINE

+ +# define MBCHAR_INLINE _GL_INLINE

+ +#endif

+ +

+ +/* The longest multibyte characters, nowadays, are 4 bytes long.

+ +   Regardless of the values of MB_CUR_MAX and MB_LEN_MAX.  */

+ +#define MBCHAR_BUF_SIZE 4

+ +

+ +struct mbchar

+ +{

+ +  const char *ptr;      /* pointer to current character */

+ +  size_t bytes;         /* number of bytes of current character, > 0 */

+ +  bool wc_valid;        /* true if wc is a valid 32-bit wide character */

+ +  char32_t wc;          /* if wc_valid: the current character */

+ +#if defined GNULIB_MBFILE

+ +  char buf[MBCHAR_BUF_SIZE]; /* room for the bytes, used for file input only */

+ +#endif

+ +};

+ +

+ +/* EOF (not a real character) is represented with bytes = 0 and

+ +   wc_valid = false.  */

+ +

+ +typedef struct mbchar mbchar_t;

+ +

+ +/* Access the current character.  */

+ +#define mb_ptr(mbc) ((mbc).ptr)

+ +#define mb_len(mbc) ((mbc).bytes)

+ +

+ +/* Comparison of characters.  */

+ +#define mb_iseq(mbc, sc) ((mbc).wc_valid && (mbc).wc == (sc))

+ +#define mb_isnul(mbc) ((mbc).wc_valid && (mbc).wc == 0)

+ +#define mb_cmp(mbc1, mbc2) \

+ +  ((mbc1).wc_valid                                                      \

+ +   ? ((mbc2).wc_valid                                                   \

+ +      ? _GL_CMP ((mbc1).wc, (mbc2).wc)                                  \

+ +      : -1)                                                             \

+ +   : ((mbc2).wc_valid                                                   \

+ +      ? 1                                                               \

+ +      : (mbc1).bytes == (mbc2).bytes                                    \

+ +        ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes)                 \

+ +        : (mbc1).bytes < (mbc2).bytes                                   \

+ +          ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \

+ +          : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1)))

+ +#define mb_casecmp(mbc1, mbc2) \

+ +  ((mbc1).wc_valid                                                      \

+ +   ? ((mbc2).wc_valid                                                   \

+ +      ? _GL_CMP (c32tolower ((mbc1).wc), c32tolower ((mbc2).wc))        \

+ +      : -1)                                                             \

+ +   : ((mbc2).wc_valid                                                   \

+ +      ? 1                                                               \

+ +      : (mbc1).bytes == (mbc2).bytes                                    \

+ +        ? memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes)                 \

+ +        : (mbc1).bytes < (mbc2).bytes                                   \

+ +          ? (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) > 0 ? 1 : -1) \

+ +          : (memcmp ((mbc1).ptr, (mbc2).ptr, (mbc2).bytes) >= 0 ? 1 : -1)))

+ +#define mb_equal(mbc1, mbc2) \

+ +  ((mbc1).wc_valid && (mbc2).wc_valid                                   \

+ +   ? (mbc1).wc == (mbc2).wc                                             \

+ +   : (mbc1).bytes == (mbc2).bytes                                       \

+ +     && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)

+ +#define mb_caseequal(mbc1, mbc2) \

+ +  ((mbc1).wc_valid && (mbc2).wc_valid                                   \

+ +   ? c32tolower ((mbc1).wc) == c32tolower ((mbc2).wc)                   \

+ +   : (mbc1).bytes == (mbc2).bytes                                       \

+ +     && memcmp ((mbc1).ptr, (mbc2).ptr, (mbc1).bytes) == 0)

+ +

+ +/* <ctype.h>, <wctype.h> classification.  */

+ +#define mb_isascii(mbc) \

+ +  ((mbc).wc_valid && (mbc).wc >= 0 && (mbc).wc <= 127)

+ +#define mb_isalnum(mbc) ((mbc).wc_valid && c32isalnum ((mbc).wc))

+ +#define mb_isalpha(mbc) ((mbc).wc_valid && c32isalpha ((mbc).wc))

+ +#define mb_isblank(mbc) ((mbc).wc_valid && c32isblank ((mbc).wc))

+ +#define mb_iscntrl(mbc) ((mbc).wc_valid && c32iscntrl ((mbc).wc))

+ +#define mb_isdigit(mbc) ((mbc).wc_valid && c32isdigit ((mbc).wc))

+ +#define mb_isgraph(mbc) ((mbc).wc_valid && c32isgraph ((mbc).wc))

+ +#define mb_islower(mbc) ((mbc).wc_valid && c32islower ((mbc).wc))

+ +#define mb_isprint(mbc) ((mbc).wc_valid && c32isprint ((mbc).wc))

+ +#define mb_ispunct(mbc) ((mbc).wc_valid && c32ispunct ((mbc).wc))

+ +#define mb_isspace(mbc) ((mbc).wc_valid && c32isspace ((mbc).wc))

+ +#define mb_isupper(mbc) ((mbc).wc_valid && c32isupper ((mbc).wc))

+ +#define mb_isxdigit(mbc) ((mbc).wc_valid && c32isxdigit ((mbc).wc))

+ +

+ +/* Extra <wchar.h> function.  */

+ +

+ +/* Unprintable characters appear as a small box of width 1.  */

+ +#define MB_UNPRINTABLE_WIDTH 1

+ +

+ +MBCHAR_INLINE int

+ +mb_width_aux (char32_t wc)

+ +{

+ +  int w = c32width (wc);

+ +  /* For unprintable characters, arbitrarily return 0 for control characters

+ +     and MB_UNPRINTABLE_WIDTH otherwise.  */

+ +  return (w >= 0 ? w : c32iscntrl (wc) ? 0 : MB_UNPRINTABLE_WIDTH);

+ +}

+ +

+ +#define mb_width(mbc) \

+ +  ((mbc).wc_valid ? mb_width_aux ((mbc).wc) : MB_UNPRINTABLE_WIDTH)

+ +

+ +/* Output.  */

+ +#define mb_putc(mbc, stream)  fwrite ((mbc).ptr, 1, (mbc).bytes, (stream))

+ +

+ +#if defined GNULIB_MBFILE

+ +/* Assignment.  */

+ +# define mb_setascii(mbc, sc) \

+ +   ((mbc)->ptr = (mbc)->buf, (mbc)->bytes = 1, (mbc)->wc_valid = 1, \

+ +    (mbc)->wc = (mbc)->buf[0] = (sc))

+ +#endif

+ +

+ +/* Copying a character.  */

+ +MBCHAR_INLINE void

+ +mb_copy (mbchar_t *new_mbc, const mbchar_t *old_mbc)

+ +{

+ +#if defined GNULIB_MBFILE

+ +  if (old_mbc->ptr == &old_mbc->buf[0])

+ +    {

+ +      memcpy (&new_mbc->buf[0], &old_mbc->buf[0], old_mbc->bytes);

+ +      new_mbc->ptr = &new_mbc->buf[0];

+ +    }

+ +  else

+ +#endif

+ +    new_mbc->ptr = old_mbc->ptr;

+ +  new_mbc->bytes = old_mbc->bytes;

+ +  if ((new_mbc->wc_valid = old_mbc->wc_valid))

+ +    new_mbc->wc = old_mbc->wc;

+ +}

+ +

+ +

+ +/* is_basic(c) tests whether the single-byte character c is

+ +   - in the ISO C "basic character set" or is one of '@', '$', and '`'

+ +     which ISO C 23 § 5.2.1.1.(1) guarantees to be single-byte and in

+ +     practice are safe to treat as basic in the execution character set,

+ +     or

+ +   - in the POSIX "portable character set", which

+ +     <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap06.html>

+ +     equally guarantees to be single-byte.

+ +   This is a convenience function, and is in this file only to share code

+ +   between mbiter.h, mbuiter.h, and mbfile.h.  */

+ +#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \

+ +    && ('$' == 36) && ('%' == 37) && ('&' == 38) && ('\'' == 39) \

+ +    && ('(' == 40) && (')' == 41) && ('*' == 42) && ('+' == 43) \

+ +    && (',' == 44) && ('-' == 45) && ('.' == 46) && ('/' == 47) \

+ +    && ('0' == 48) && ('1' == 49) && ('2' == 50) && ('3' == 51) \

+ +    && ('4' == 52) && ('5' == 53) && ('6' == 54) && ('7' == 55) \

+ +    && ('8' == 56) && ('9' == 57) && (':' == 58) && (';' == 59) \

+ +    && ('<' == 60) && ('=' == 61) && ('>' == 62) && ('?' == 63) \

+ +    && ('@' == 64) && ('A' == 65) && ('B' == 66) && ('C' == 67) \

+ +    && ('D' == 68) && ('E' == 69) && ('F' == 70) && ('G' == 71) \

+ +    && ('H' == 72) && ('I' == 73) && ('J' == 74) && ('K' == 75) \

+ +    && ('L' == 76) && ('M' == 77) && ('N' == 78) && ('O' == 79) \

+ +    && ('P' == 80) && ('Q' == 81) && ('R' == 82) && ('S' == 83) \

+ +    && ('T' == 84) && ('U' == 85) && ('V' == 86) && ('W' == 87) \

+ +    && ('X' == 88) && ('Y' == 89) && ('Z' == 90) && ('[' == 91) \

+ +    && ('\\' == 92) && (']' == 93) && ('^' == 94) && ('_' == 95) \

+ +    && ('`' == 96) && ('a' == 97) && ('b' == 98) && ('c' == 99) \

+ +    && ('d' == 100) && ('e' == 101) && ('f' == 102) && ('g' == 103) \

+ +    && ('h' == 104) && ('i' == 105) && ('j' == 106) && ('k' == 107) \

+ +    && ('l' == 108) && ('m' == 109) && ('n' == 110) && ('o' == 111) \

+ +    && ('p' == 112) && ('q' == 113) && ('r' == 114) && ('s' == 115) \

+ +    && ('t' == 116) && ('u' == 117) && ('v' == 118) && ('w' == 119) \

+ +    && ('x' == 120) && ('y' == 121) && ('z' == 122) && ('{' == 123) \

+ +    && ('|' == 124) && ('}' == 125) && ('~' == 126)

+ +/* The character set is ISO-646, not EBCDIC. */

+ +# define IS_BASIC_ASCII 1

+ +

+ +/* All locale encodings (see localcharset.h) map the characters 0x00..0x7F

+ +   to U+0000..U+007F, like ASCII, except for

+ +     CP864      different mapping of '%'

+ +     SHIFT_JIS  different mappings of 0x5C, 0x7E

+ +     JOHAB      different mapping of 0x5C

+ +   However, these characters in the range 0x20..0x7E are in the ISO C

+ +   "basic character set" and in the POSIX "portable character set", which

+ +   ISO C and POSIX guarantee to be single-byte.  Thus, locales with these

+ +   encodings are not POSIX compliant.  And they are most likely not in use

+ +   any more (as of 2023).  */

+ +# define is_basic(c) ((unsigned char) (c) < 0x80)

+ +

+ +#else

+ +

+ +MBCHAR_INLINE bool

+ +is_basic (char c)

+ +{

+ +  switch (c)

+ +    {

+ +    case '\0':

+ +    case '\007': case '\010':

+ +    case '\t': case '\n': case '\v': case '\f': case '\r':

+ +    case ' ': case '!': case '"': case '#': case '$': case '%':

+ +    case '&': case '\'': case '(': case ')': case '*':

+ +    case '+': case ',': case '-': case '.': case '/':

+ +    case '0': case '1': case '2': case '3': case '4':

+ +    case '5': case '6': case '7': case '8': case '9':

+ +    case ':': case ';': case '<': case '=': case '>':

+ +    case '?': case '@':

+ +    case 'A': case 'B': case 'C': case 'D': case 'E':

+ +    case 'F': case 'G': case 'H': case 'I': case 'J':

+ +    case 'K': case 'L': case 'M': case 'N': case 'O':

+ +    case 'P': case 'Q': case 'R': case 'S': case 'T':

+ +    case 'U': case 'V': case 'W': case 'X': case 'Y':

+ +    case 'Z':

+ +    case '[': case '\\': case ']': case '^': case '_': case '`':

+ +    case 'a': case 'b': case 'c': case 'd': case 'e':

+ +    case 'f': case 'g': case 'h': case 'i': case 'j':

+ +    case 'k': case 'l': case 'm': case 'n': case 'o':

+ +    case 'p': case 'q': case 'r': case 's': case 't':

+ +    case 'u': case 'v': case 'w': case 'x': case 'y':

+ +    case 'z': case '{': case '|': case '}': case '~':

+ +      return 1;

+ +    default:

+ +      return 0;

+ +    }

+ +}

+ +

+ +#endif

+ +

+ +_GL_INLINE_HEADER_END

+ +

+ +#endif /* _MBCHAR_H */

  diff --git a/lib/mbfile.c b/lib/mbfile.c

  new file mode 100644

  index 0000000..8d2957b
@@ -398,6 +809,25 @@ 

  +_GL_INLINE_HEADER_END

  +

  +#endif /* _MBFILE_H */

+ diff --git a/m4/mbchar.m4 b/m4/mbchar.m4

+ new file mode 100644

+ index 0000000..471e8c4

+ --- /dev/null

+ +++ b/m4/mbchar.m4

+ @@ -0,0 +1,13 @@

+ +# mbchar.m4 serial 9

+ +dnl Copyright (C) 2005-2007, 2009-2024 Free Software Foundation, Inc.

+ +dnl This file is free software; the Free Software Foundation

+ +dnl gives unlimited permission to copy and/or distribute it,

+ +dnl with or without modifications, as long as this notice is preserved.

+ +

+ +dnl autoconf tests required for use of mbchar.m4

+ +dnl From Bruno Haible.

+ +

+ +AC_DEFUN([gl_MBCHAR],

+ +[

+ +  AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])

+ +])

  diff --git a/m4/mbfile.m4 b/m4/mbfile.m4

  new file mode 100644

  index 0000000..83068a9
@@ -419,7 +849,7 @@ 

  +  :

  +])

  diff --git a/src/cut.c b/src/cut.c

- index b4edbab..65e4658 100644

+ index 061e09c..6d10425 100644

  --- a/src/cut.c

  +++ b/src/cut.c

  @@ -27,6 +27,11 @@
@@ -1079,18 +1509,18 @@ 

   

     if (have_read_stdin && fclose (stdin) == EOF)

  diff --git a/src/expand-common.c b/src/expand-common.c

- index 89fa56a..c102e6e 100644

+ index c95998d..d4386fe 100644

  --- a/src/expand-common.c

  +++ b/src/expand-common.c

- @@ -18,6 +18,7 @@

-  

+ @@ -19,6 +19,7 @@

+  #include <ctype.h>

   #include <stdio.h>

   #include <sys/types.h>

  +#include <mbfile.h>

   #include "system.h"

   #include "fadvise.h"

   #include "quote.h"

- @@ -122,6 +123,119 @@ set_increment_size (uintmax_t tabval)

+ @@ -123,6 +124,119 @@ set_increment_size (uintmax_t tabval)

     return ok;

   }

   
@@ -1211,7 +1641,7 @@ 

      to the list of tab stops.  */

   extern void

  diff --git a/src/expand-common.h b/src/expand-common.h

- index daed31e..f6b2f68 100644

+ index 1a57108..6025652 100644

  --- a/src/expand-common.h

  +++ b/src/expand-common.h

  @@ -25,6 +25,18 @@ extern size_t max_column_width;
@@ -1234,10 +1664,10 @@ 

   extern void

   add_tab_stop (uintmax_t tabval);

  diff --git a/src/expand.c b/src/expand.c

- index 0e74d0c..7080c51 100644

+ index a6176a9..60b1b8e 100644

  --- a/src/expand.c

  +++ b/src/expand.c

- @@ -37,6 +37,9 @@

+ @@ -38,6 +38,9 @@

   #include <stdio.h>

   #include <getopt.h>

   #include <sys/types.h>
@@ -1247,7 +1677,7 @@ 

   #include "system.h"

   #include "expand-common.h"

   

- @@ -95,19 +98,41 @@ expand (void)

+ @@ -96,19 +99,41 @@ expand (void)

   {

     /* Input stream.  */

     FILE *fp = next_file (nullptr);
@@ -1293,7 +1723,7 @@ 

         /* The following variables have valid values only when CONVERT

            is true:  */

   

- @@ -117,17 +142,48 @@ expand (void)

+ @@ -118,17 +143,48 @@ expand (void)

         /* Index in TAB_LIST of next tab stop to examine.  */

         size_t tab_index = 0;

   
@@ -1346,7 +1776,7 @@ 

                   {

                     /* Column the next input tab stop is on.  */

                     uintmax_t next_tab_column;

- @@ -146,32 +202,34 @@ expand (void)

+ @@ -147,32 +203,34 @@ expand (void)

                       if (putchar (' ') < 0)

                         write_error ();

   
@@ -1390,10 +1820,10 @@ 

   }

   

  diff --git a/src/fold.c b/src/fold.c

- index 5c0428d..2372047 100644

+ index 941ad11..cf1e747 100644

  --- a/src/fold.c

  +++ b/src/fold.c

- @@ -22,10 +22,32 @@

+ @@ -23,10 +23,32 @@

   #include <getopt.h>

   #include <sys/types.h>

   
@@ -1426,7 +1856,7 @@ 

   #define TAB_WIDTH 8

   

   /* The official name of this program (e.g., no 'g' prefix).  */

- @@ -33,20 +55,41 @@

+ @@ -34,20 +56,41 @@

   

   #define AUTHORS proper_name ("David MacKenzie")

   
@@ -1472,7 +1902,7 @@ 

     {"spaces", no_argument, nullptr, 's'},

     {"width", required_argument, nullptr, 'w'},

     {GETOPT_HELP_OPTION_DECL},

- @@ -74,6 +117,7 @@ Wrap input lines in each FILE, writing to standard output.\n\

+ @@ -75,6 +118,7 @@ Wrap input lines in each FILE, writing to standard output.\n\

   

         fputs (_("\

     -b, --bytes         count bytes rather than columns\n\
@@ -1480,7 +1910,7 @@ 

     -s, --spaces        break at spaces\n\

     -w, --width=WIDTH   use WIDTH columns instead of 80\n\

   "), stdout);

- @@ -91,7 +135,7 @@ Wrap input lines in each FILE, writing to standard output.\n\

+ @@ -92,7 +136,7 @@ Wrap input lines in each FILE, writing to standard output.\n\

   static size_t

   adjust_column (size_t column, char c)

   {
@@ -1489,7 +1919,7 @@ 

       {

         if (c == '\b')

           {

- @@ -114,30 +158,14 @@ adjust_column (size_t column, char c)

+ @@ -115,30 +159,14 @@ adjust_column (size_t column, char c)

      to stdout, with maximum line length WIDTH.

      Return true if successful.  */

   
@@ -1522,7 +1952,7 @@ 

   

     fadvise (istream, FADVISE_SEQUENTIAL);

   

- @@ -167,6 +195,15 @@ fold_file (char const *filename, size_t width)

+ @@ -168,6 +196,15 @@ fold_file (char const *filename, size_t width)

                 bool found_blank = false;

                 size_t logical_end = offset_out;

   
@@ -1538,7 +1968,7 @@ 

                 /* Look for the last blank. */

                 while (logical_end)

                   {

- @@ -213,13 +250,225 @@ fold_file (char const *filename, size_t width)

+ @@ -214,13 +251,225 @@ fold_file (char const *filename, size_t width)

         line_out[offset_out++] = c;

       }

   
@@ -1766,7 +2196,7 @@ 

     if (STREQ (filename, "-"))

       clearerr (istream);

     else if (fclose (istream) != 0 && !saved_errno)

- @@ -250,7 +499,8 @@ main (int argc, char **argv)

+ @@ -251,7 +500,8 @@ main (int argc, char **argv)

   

     atexit (close_stdout);

   
@@ -1776,7 +2206,7 @@ 

   

     while ((optc = getopt_long (argc, argv, shortopts, longopts, nullptr)) != -1)

       {

- @@ -259,7 +509,15 @@ main (int argc, char **argv)

+ @@ -260,7 +510,15 @@ main (int argc, char **argv)

         switch (optc)

           {

           case 'b':		/* Count bytes rather than columns. */
@@ -1793,520 +2223,28 @@ 

             break;

   

           case 's':		/* Break at word boundaries. */

- diff --git a/src/join.c b/src/join.c

- index 0bcfa75..8a3bcf1 100644

- --- a/src/join.c

- +++ b/src/join.c

- @@ -21,18 +21,32 @@

-  #include <sys/types.h>

-  #include <getopt.h>

-  

- +/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth().  */

- +#if HAVE_WCHAR_H

- +# include <wchar.h>

- +#endif

- +

- +/* Get iswblank(), towupper.  */

- +#if HAVE_WCTYPE_H

- +# include <wctype.h>

- +#endif

- +

-  #include "system.h"

-  #include "assure.h"

-  #include "fadvise.h"

-  #include "hard-locale.h"

-  #include "linebuffer.h"

- -#include "memcasecmp.h"

-  #include "quote.h"

-  #include "stdio--.h"

-  #include "xmemcoll.h"

-  #include "xstrtol.h"

-  #include "argmatch.h"

-  

- +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */

- +#if HAVE_MBRTOWC && defined mbstate_t

- +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)

- +#endif

- +

-  /* The official name of this program (e.g., no 'g' prefix).  */

-  #define PROGRAM_NAME "join"

-  

- @@ -134,10 +148,12 @@ static struct outlist outlist_head;

-  /* Last element in 'outlist', where a new element can be added.  */

-  static struct outlist *outlist_end = &outlist_head;

-  

- -/* Tab character separating fields.  If negative, fields are separated

- -   by any nonempty string of blanks, otherwise by exactly one

- -   tab character whose value (when cast to unsigned char) equals TAB.  */

- -static int tab = -1;

- +/* Tab character separating fields.  If NULL, fields are separated

- +   by any nonempty string of blanks.  */

- +static char *tab = NULL;

- +

- +/* The number of bytes used for tab.  */

- +static size_t tablen = 0;

-  

-  /* If nonzero, check that the input is correctly ordered. */

-  static enum

- @@ -277,13 +293,14 @@ xfields (struct line *line)

-    if (ptr == lim)

-      return;

-  

- -  if (0 <= tab && tab != '\n')

- +  if (tab != NULL)

-      {

- +      unsigned char t = tab[0];

-        char *sep;

- -      for (; (sep = memchr (ptr, tab, lim - ptr)) != nullptr; ptr = sep + 1)

- +      for (; (sep = memchr (ptr, t, lim - ptr)) != nullptr; ptr = sep + 1)

-          extract_field (line, ptr, sep - ptr);

-      }

- -  else if (tab < 0)

- +   else

-      {

-        /* Skip leading blanks before the first field.  */

-        while (field_sep (*ptr))

- @@ -307,6 +324,147 @@ xfields (struct line *line)

-    extract_field (line, ptr, lim - ptr);

-  }

-  

- +#if HAVE_MBRTOWC

- +static void

- +xfields_multibyte (struct line *line)

- +{

- +  char *ptr = line->buf.buffer;

- +  char const *lim = ptr + line->buf.length - 1;

- +  wchar_t wc = 0;

- +  size_t mblength = 1;

- +  mbstate_t state, state_bak;

- +

- +  memset (&state, 0, sizeof (mbstate_t));

- +

- +  if (ptr >= lim)

- +    return;

- +

- +  if (tab != NULL)

- +    {

- +      char *sep = ptr;

- +      for (; ptr < lim; ptr = sep + mblength)

- +	{

- +	  sep = ptr;

- +	  while (sep < lim)

- +	    {

- +	      state_bak = state;

- +	      mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);

- +

- +	      if (mblength == (size_t)-1 || mblength == (size_t)-2)

- +		{

- +		  mblength = 1;

- +		  state = state_bak;

- +		}

- +	      mblength = (mblength < 1) ? 1 : mblength;

- +

- +	      if (mblength == tablen && !memcmp (sep, tab, mblength))

- +		break;

- +	      else

- +		{

- +		  sep += mblength;

- +		  continue;

- +		}

- +	    }

- +

- +	  if (sep >= lim)

- +	    break;

- +

- +	  extract_field (line, ptr, sep - ptr);

- +	}

- +    }

- +  else

- +    {

- +      /* Skip leading blanks before the first field.  */

- +      while(ptr < lim)

- +      {

- +        state_bak = state;

- +        mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);

- +

- +        if (mblength == (size_t)-1 || mblength == (size_t)-2)

- +          {

- +            mblength = 1;

- +            state = state_bak;

- +            break;

- +          }

- +        mblength = (mblength < 1) ? 1 : mblength;

- +

- +        if (!iswblank(wc) && wc != '\n')

- +          break;

- +        ptr += mblength;

- +      }

- +

- +      do

- +	{

- +	  char *sep;

- +	  state_bak = state;

- +	  mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);

- +	  if (mblength == (size_t)-1 || mblength == (size_t)-2)

- +	    {

- +	      mblength = 1;

- +	      state = state_bak;

- +	      break;

- +	    }

- +	  mblength = (mblength < 1) ? 1 : mblength;

- +

- +	  sep = ptr + mblength;

- +	  while (sep < lim)

- +	    {

- +	      state_bak = state;

- +	      mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);

- +	      if (mblength == (size_t)-1 || mblength == (size_t)-2)

- +		{

- +		  mblength = 1;

- +		  state = state_bak;

- +		  break;

- +		}

- +	      mblength = (mblength < 1) ? 1 : mblength;

- +

- +	      if (iswblank (wc) || wc == '\n')

- +		break;

- +

- +	      sep += mblength;

- +	    }

- +

- +	  extract_field (line, ptr, sep - ptr);

- +	  if (sep >= lim)

- +	    return;

- +

- +	  state_bak = state;

- +	  mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);

- +	  if (mblength == (size_t)-1 || mblength == (size_t)-2)

- +	    {

- +	      mblength = 1;

- +	      state = state_bak;

- +	      break;

- +	    }

- +	  mblength = (mblength < 1) ? 1 : mblength;

- +

- +	  ptr = sep + mblength;

- +	  while (ptr < lim)

- +	    {

- +	      state_bak = state;

- +	      mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);

- +	      if (mblength == (size_t)-1 || mblength == (size_t)-2)

- +		{

- +		  mblength = 1;

- +		  state = state_bak;

- +		  break;

- +		}

- +	      mblength = (mblength < 1) ? 1 : mblength;

- +

- +	      if (!iswblank (wc) && wc != '\n')

- +		break;

- +

- +	      ptr += mblength;

- +	    }

- +	}

- +      while (ptr < lim);

- +    }

- +

- +  extract_field (line, ptr, lim - ptr);

- +}

- +#endif

- +

-  static void

-  freeline (struct line *line)

-  {

- @@ -328,56 +486,133 @@ keycmp (struct line const *line1, struct line const *line2,

-          idx_t jf_1, idx_t jf_2)

-  {

-    /* Start of field to compare in each file.  */

- -  char *beg1;

- -  char *beg2;

- -

- -  idx_t len1;

- -  idx_t len2;		/* Length of fields to compare.  */

- +  char *beg[2];

- +  char *copy[2];

- +  idx_t len[2]; 	/* Length of fields to compare.  */

-    int diff;

- +  int i, j;

- +  int mallocd = 0;

-  

-    if (jf_1 < line1->nfields)

-      {

- -      beg1 = line1->fields[jf_1].beg;

- -      len1 = line1->fields[jf_1].len;

- +      beg[0] = line1->fields[jf_1].beg;

- +      len[0] = line1->fields[jf_1].len;

-      }

-    else

-      {

- -      beg1 = nullptr;

- -      len1 = 0;

- +      beg[0] = nullptr;

- +      len[0] = 0;

-      }

-  

-    if (jf_2 < line2->nfields)

-      {

- -      beg2 = line2->fields[jf_2].beg;

- -      len2 = line2->fields[jf_2].len;

- +      beg[1] = line2->fields[jf_2].beg;

- +      len[1] = line2->fields[jf_2].len;

-      }

-    else

-      {

- -      beg2 = nullptr;

- -      len2 = 0;

- +      beg[1] = nullptr;

- +      len[1] = 0;

-      }

-  

- -  if (len1 == 0)

- -    return len2 == 0 ? 0 : -1;

- -  if (len2 == 0)

- +  if (len[0] == 0)

- +    return len[1] == 0 ? 0 : -1;

- +  if (len[1] == 0)

-      return 1;

-  

-    if (ignore_case)

-      {

- -      /* FIXME: ignore_case does not work with NLS (in particular,

- -         with multibyte chars).  */

- -      diff = memcasecmp (beg1, beg2, MIN (len1, len2));

- +#ifdef HAVE_MBRTOWC

- +      if (MB_CUR_MAX > 1)

- +      {

- +        size_t mblength;

- +        wchar_t wc, uwc;

- +        mbstate_t state, state_bak;

- +

- +        memset (&state, '\0', sizeof (mbstate_t));

- +

- +        for (i = 0; i < 2; i++)

- +          {

- +            mallocd = 1;

- +            copy[i] = xmalloc (len[i] + 1);

- +            memset (copy[i], '\0',len[i] + 1);

- +

- +            for (j = 0; j < MIN (len[0], len[1]);)

- +              {

- +                state_bak = state;

- +                mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);

- +

- +                switch (mblength)

- +                  {

- +                  case (size_t) -1:

- +                  case (size_t) -2:

- +                    state = state_bak;

- +                    /* Fall through */

- +                  case 0:

- +                    mblength = 1;

- +                    break;

- +

- +                  default:

- +                    uwc = towupper (wc);

- +

- +                    if (uwc != wc)

- +                      {

- +                        mbstate_t state_wc;

- +                        size_t mblen;

- +

- +                        memset (&state_wc, '\0', sizeof (mbstate_t));

- +                        mblen = wcrtomb (copy[i] + j, uwc, &state_wc);

- +                        assert (mblen != (size_t)-1);

- +                      }

- +                    else

- +                      memcpy (copy[i] + j, beg[i] + j, mblength);

- +                  }

- +                j += mblength;

- +              }

- +            copy[i][j] = '\0';

- +          }

- +      }

- +      else

- +#endif

- +      {

- +        for (i = 0; i < 2; i++)

- +          {

- +            mallocd = 1;

- +            copy[i] = xmalloc (len[i] + 1);

- +

- +            for (j = 0; j < MIN (len[0], len[1]); j++)

- +              copy[i][j] = toupper (beg[i][j]);

- +

- +            copy[i][j] = '\0';

- +          }

- +      }

-      }

-    else

-      {

- -      if (hard_LC_COLLATE)

- -        return xmemcoll (beg1, len1, beg2, len2);

- -      diff = memcmp (beg1, beg2, MIN (len1, len2));

- +      copy[0] = beg[0];

- +      copy[1] = beg[1];

-      }

-  

- +  if (hard_LC_COLLATE)

- +    {

- +      diff = xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);

- +

- +      if (mallocd)

- +        for (i = 0; i < 2; i++)

- +          free (copy[i]);

- +

- +      return diff;

- +    }

- +  diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));

- +

- +  if (mallocd)

- +    for (i = 0; i < 2; i++)

- +      free (copy[i]);

- +

- +

-    if (diff)

-      return diff;

- -  return (len1 > len2) - (len1 < len2);

- +  return len[0] - len[1];

-  }

-  

-  /* Check that successive input lines PREV and CURRENT from input file

- @@ -469,6 +704,11 @@ get_line (FILE *fp, struct line **linep, int which)

-      }

-    ++line_no[which - 1];

-  

- +#if HAVE_MBRTOWC

- +  if (MB_CUR_MAX > 1)

- +    xfields_multibyte (line);

- +  else

- +#endif

-    xfields (line);

-  

-    if (prevline[which - 1])

- @@ -562,21 +802,28 @@ prfield (idx_t n, struct line const *line)

-  

-  /* Output all the fields in line, other than the join field.  */

-  

- +#define PUT_TAB_CHAR							\

- +  do									\

- +    {									\

- +      (tab != NULL) ?							\

- +	fwrite(tab, sizeof(char), tablen, stdout) : putchar (' ');	\

- +    }									\

- +  while (0)

- +

-  static void

-  prfields (struct line const *line, idx_t join_field, idx_t autocount)

-  {

-    idx_t i;

-    idx_t nfields = autoformat ? autocount : line->nfields;

- -  char output_separator = tab < 0 ? ' ' : tab;

-  

-    for (i = 0; i < join_field && i < nfields; ++i)

-      {

- -      putchar (output_separator);

- +      PUT_TAB_CHAR;

-        prfield (i, line);

-      }

-    for (i = join_field + 1; i < nfields; ++i)

-      {

- -      putchar (output_separator);

- +      PUT_TAB_CHAR;

-        prfield (i, line);

-      }

-  }

- @@ -587,7 +834,6 @@ static void

-  prjoin (struct line const *line1, struct line const *line2)

-  {

-    const struct outlist *outlist;

- -  char output_separator = tab < 0 ? ' ' : tab;

-    idx_t field;

-    struct line const *line;

-  

- @@ -621,7 +867,7 @@ prjoin (struct line const *line1, struct line const *line2)

-            o = o->next;

-            if (o == nullptr)

-              break;

- -          putchar (output_separator);

- +          PUT_TAB_CHAR;

-          }

-        putchar (eolchar);

-      }

- @@ -1086,20 +1332,43 @@ main (int argc, char **argv)

-  

-          case 't':

-            {

- -            unsigned char newtab = optarg[0];

- +            char *newtab = NULL;

- +            size_t newtablen;

- +            newtab = xstrdup (optarg);

- +#if HAVE_MBRTOWC

- +            if (MB_CUR_MAX > 1)

- +              {

- +                mbstate_t state;

- +

- +                memset (&state, 0, sizeof (mbstate_t));

- +                newtablen = mbrtowc (NULL, newtab,

- +                                     strnlen (newtab, MB_LEN_MAX),

- +                                     &state);

- +                if (newtablen == (size_t) 0

- +                    || newtablen == (size_t) -1

- +                    || newtablen == (size_t) -2)

- +                  newtablen = 1;

- +              }

- +            else

- +#endif

- +              newtablen = 1;

-              if (! newtab)

- -              newtab = '\n'; /* '' => process the whole line.  */

- +              newtab = (char*)"\n"; /* '' => process the whole line.  */

-              else if (optarg[1])

-                {

- -                if (STREQ (optarg, "\\0"))

- -                  newtab = '\0';

- -                else

- -                  error (EXIT_FAILURE, 0, _("multi-character tab %s"),

- -                         quote (optarg));

- +                if (newtablen == 1 && newtab[1])

- +                {

- +                  if (STREQ (newtab, "\\0"))

- +                     newtab[0] = '\0';

- +                }

- +              }

- +            if (tab != NULL && strcmp (tab, newtab))

- +              {

- +                free (newtab);

- +                error (EXIT_FAILURE, 0, _("incompatible tabs"));

-                }

- -            if (0 <= tab && tab != newtab)

- -              error (EXIT_FAILURE, 0, _("incompatible tabs"));

-              tab = newtab;

- +            tablen = newtablen;

-            }

-            break;

-  

  diff --git a/src/local.mk b/src/local.mk

- index f45b911..6f7036a 100644

+ index 96ee941..8fdb8fc 100644

  --- a/src/local.mk

  +++ b/src/local.mk

- @@ -447,8 +447,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS)

+ @@ -450,8 +450,8 @@ src_base32_CPPFLAGS = -DBASE_TYPE=32 $(AM_CPPFLAGS)

   src_basenc_SOURCES = src/basenc.c

   src_basenc_CPPFLAGS = -DBASE_TYPE=42 $(AM_CPPFLAGS)

   

  -src_expand_SOURCES = src/expand.c src/expand-common.c

  -src_unexpand_SOURCES = src/unexpand.c src/expand-common.c

- +src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c

- +src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c

+ +src_expand_SOURCES = src/expand.c src/expand-common.c lib/mbfile.c lib/mbchar.c

+ +src_unexpand_SOURCES = src/unexpand.c src/expand-common.c lib/mbfile.c lib/mbchar.c

   

   src_wc_SOURCES = src/wc.c

   if USE_AVX2_WC_LINECOUNT

  diff --git a/src/pr.c b/src/pr.c

- index 419545c..702e025 100644

+ index 09c6fa8..7552b62 100644

  --- a/src/pr.c

  +++ b/src/pr.c

  @@ -312,6 +312,24 @@

+  #include <ctype.h>

   #include <getopt.h>

-  #include <stdckdint.h>

   #include <sys/types.h>

  +

  +/* Get MB_LEN_MAX.  */
@@ -2831,7 +2769,7 @@ 

   /* Skip to page PAGE before printing.

      PAGE may be larger than total number of pages. */

   

- @@ -2496,9 +2695,9 @@ read_line (COLUMN *p)

+ @@ -2495,9 +2694,9 @@ read_line (COLUMN *p)

             align_empty_cols = false;

           }

   
@@ -2843,7 +2781,7 @@ 

             padding_not_printed = ANYWHERE;

           }

   

- @@ -2567,7 +2766,7 @@ print_stored (COLUMN *p)

+ @@ -2566,7 +2765,7 @@ print_stored (COLUMN *p)

     COLUMN *q;

   

     int line = p->current_line++;
@@ -2852,7 +2790,7 @@ 

     /* FIXME

        UMR: Uninitialized memory read:

        * This is occurring while in:

- @@ -2579,7 +2778,7 @@ print_stored (COLUMN *p)

+ @@ -2578,7 +2777,7 @@ print_stored (COLUMN *p)

        xmalloc        [xmalloc.c:94]

        init_store_cols [pr.c:1648]

        */
@@ -2861,7 +2799,7 @@ 

   

     pad_vertically = true;

   

- @@ -2599,9 +2798,9 @@ print_stored (COLUMN *p)

+ @@ -2598,9 +2797,9 @@ print_stored (COLUMN *p)

           }

       }

   
@@ -2873,7 +2811,7 @@ 

         padding_not_printed = ANYWHERE;

       }

   

- @@ -2614,8 +2813,8 @@ print_stored (COLUMN *p)

+ @@ -2613,8 +2812,8 @@ print_stored (COLUMN *p)

     if (spaces_not_printed == 0)

       {

         output_position = p->start_position + end_vector[line];
@@ -2884,7 +2822,7 @@ 

       }

   

     return true;

- @@ -2634,7 +2833,7 @@ print_stored (COLUMN *p)

+ @@ -2633,7 +2832,7 @@ print_stored (COLUMN *p)

      number of characters is 1.) */

   

   static int
@@ -2893,7 +2831,7 @@ 

   {

     unsigned char uc = c;

     char *s = clump_buff;

- @@ -2644,10 +2843,10 @@ char_to_clump (char c)

+ @@ -2643,10 +2842,10 @@ char_to_clump (char c)

     int chars;

     int chars_per_c = 8;

   
@@ -2906,7 +2844,7 @@ 

       {

         width = TAB_WIDTH (chars_per_c, input_position);

   

- @@ -2728,6 +2927,164 @@ char_to_clump (char c)

+ @@ -2727,6 +2926,164 @@ char_to_clump (char c)

     return chars;

   }

   
@@ -3072,10 +3010,10 @@ 

      looking for more options and printing the next batch of files.

   

  diff --git a/src/sort.c b/src/sort.c

- index e779845..1f5c337 100644

+ index 2d8324c..46331b8 100644

  --- a/src/sort.c

  +++ b/src/sort.c

- @@ -28,6 +28,14 @@

+ @@ -29,6 +29,14 @@

   #include <sys/types.h>

   #include <sys/wait.h>

   #include <signal.h>
@@ -3148,7 +3086,7 @@ 

   

   /* Flag to remove consecutive duplicate lines from the output.

      Only the last of a sequence of equal lines will be output. */

- @@ -803,6 +834,46 @@ reap_all (void)

+ @@ -804,6 +835,46 @@ reap_all (void)

       reap (-1);

   }

   
@@ -3195,7 +3133,7 @@ 

   /* Clean up any remaining temporary files.  */

   

   static void

- @@ -1270,7 +1341,7 @@ zaptemp (char const *name)

+ @@ -1271,7 +1342,7 @@ zaptemp (char const *name)

     free (node);

   }

   
@@ -3204,7 +3142,7 @@ 

   

   static int

   struct_month_cmp (void const *m1, void const *m2)

- @@ -1285,7 +1356,7 @@ struct_month_cmp (void const *m1, void const *m2)

+ @@ -1286,7 +1357,7 @@ struct_month_cmp (void const *m1, void const *m2)

   /* Initialize the character class tables. */

   

   static void
@@ -3213,7 +3151,7 @@ 

   {

     size_t i;

   

- @@ -1297,7 +1368,7 @@ inittables (void)

+ @@ -1298,7 +1369,7 @@ inittables (void)

         fold_toupper[i] = toupper (i);

       }

   
@@ -3222,7 +3160,7 @@ 

     /* If we're not in the "C" locale, read different names for months.  */

     if (hard_LC_TIME)

       {

- @@ -1379,6 +1450,84 @@ specify_nmerge (int oi, char c, char const *s)

+ @@ -1380,6 +1451,84 @@ specify_nmerge (int oi, char c, char const *s)

       xstrtol_fatal (e, oi, c, long_options, s);

   }

   
@@ -3307,7 +3245,7 @@ 

   /* Specify the amount of main memory to use when sorting.  */

   static void

   specify_sort_size (int oi, char c, char const *s)

- @@ -1610,7 +1759,7 @@ buffer_linelim (struct buffer const *buf)

+ @@ -1611,7 +1760,7 @@ buffer_linelim (struct buffer const *buf)

      by KEY in LINE. */

   

   static char *
@@ -3316,7 +3254,7 @@ 

   {

     char *ptr = line->text, *lim = ptr + line->length - 1;

     size_t sword = key->sword;

- @@ -1619,10 +1768,10 @@ begfield (struct line const *line, struct keyfield const *key)

+ @@ -1620,10 +1769,10 @@ begfield (struct line const *line, struct keyfield const *key)

     /* The leading field separator itself is included in a field when -t

        is absent.  */

   
@@ -3329,7 +3267,7 @@ 

             ++ptr;

           if (ptr < lim)

             ++ptr;

- @@ -1648,12 +1797,71 @@ begfield (struct line const *line, struct keyfield const *key)

+ @@ -1649,12 +1798,71 @@ begfield (struct line const *line, struct keyfield const *key)

     return ptr;

   }

   
@@ -3402,7 +3340,7 @@ 

   {

     char *ptr = line->text, *lim = ptr + line->length - 1;

     size_t eword = key->eword, echar = key->echar;

- @@ -1668,10 +1876,10 @@ limfield (struct line const *line, struct keyfield const *key)

+ @@ -1669,10 +1877,10 @@ limfield (struct line const *line, struct keyfield const *key)

        'beginning' is the first character following the delimiting TAB.

        Otherwise, leave PTR pointing at the first 'blank' character after

        the preceding field.  */
@@ -3415,7 +3353,7 @@ 

             ++ptr;

           if (ptr < lim && (eword || echar))

             ++ptr;

- @@ -1717,10 +1925,10 @@ limfield (struct line const *line, struct keyfield const *key)

+ @@ -1718,10 +1926,10 @@ limfield (struct line const *line, struct keyfield const *key)

        */

   

     /* Make LIM point to the end of (one byte past) the current field.  */
@@ -3428,7 +3366,7 @@ 

         if (newlim)

           lim = newlim;

       }

- @@ -1751,6 +1959,130 @@ limfield (struct line const *line, struct keyfield const *key)

+ @@ -1752,6 +1960,130 @@ limfield (struct line const *line, struct keyfield const *key)

     return ptr;

   }

   
@@ -3559,7 +3497,7 @@ 

   /* Fill BUF reading from FP, moving buf->left bytes from the end

      of buf->buf to the beginning first.  If EOF is reached and the

      file wasn't terminated by a newline, supply one.  Set up BUF's line

- @@ -1837,8 +2169,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)

+ @@ -1838,8 +2170,22 @@ fillbuf (struct buffer *buf, FILE *fp, char const *file)

                     else

                       {

                         if (key->skipsblanks)
@@ -3584,7 +3522,7 @@ 

                         line->keybeg = line_start;

                       }

                   }

- @@ -1976,12 +2322,10 @@ find_unit_order (char const *number)

+ @@ -1977,12 +2323,10 @@ find_unit_order (char const *number)

   

   ATTRIBUTE_PURE

   static int
@@ -3600,7 +3538,7 @@ 

   

     int diff = find_unit_order (a) - find_unit_order (b);

     return (diff ? diff : strnumcmp (a, b, decimal_point, thousands_sep));

- @@ -1993,7 +2337,7 @@ human_numcompare (char const *a, char const *b)

+ @@ -1994,7 +2338,7 @@ human_numcompare (char const *a, char const *b)

   

   ATTRIBUTE_PURE

   static int
@@ -3609,7 +3547,7 @@ 

   {

     while (blanks[to_uchar (*a)])

       a++;

- @@ -2003,6 +2347,25 @@ numcompare (char const *a, char const *b)

+ @@ -2004,6 +2348,25 @@ numcompare (char const *a, char const *b)

     return strnumcmp (a, b, decimal_point, thousands_sep);

   }

   
@@ -3635,7 +3573,7 @@ 

   static int

   nan_compare (long double a, long double b)

   {

- @@ -2044,7 +2407,7 @@ general_numcompare (char const *sa, char const *sb)

+ @@ -2045,7 +2408,7 @@ general_numcompare (char const *sa, char const *sb)

      Return 0 if the name in S is not recognized.  */

   

   static int
@@ -3644,7 +3582,7 @@ 

   {

     size_t lo = 0;

     size_t hi = MONTHS_PER_YEAR;

- @@ -2320,15 +2683,14 @@ debug_key (struct line const *line, struct keyfield const *key)

+ @@ -2372,15 +2735,14 @@ debug_key (struct line const *line, struct keyfield const *key)

             char saved = *lim;

             *lim = '\0';

   
@@ -3662,7 +3600,7 @@ 

             else if (key->general_numeric)

               ignore_value (strtold (beg, &tighter_lim));

             else if (key->numeric || key->human_numeric)

- @@ -2474,7 +2836,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

+ @@ -2526,7 +2888,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

         /* Warn about significant leading blanks.  */

         bool implicit_skip = key_numeric (key) || key->month;

         bool line_offset = key->eword == 0 && key->echar != 0; /* -k1.x,1.y  */
@@ -3671,7 +3609,7 @@ 

             && ((!key->skipsblanks && !implicit_skip)

                 || (!key->skipsblanks && key->schar)

                 || (!key->skipeblanks && key->echar)))

- @@ -2522,9 +2884,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

+ @@ -2574,9 +2936,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

     bool number_locale_warned = false;

     if (basic_numeric_field_span)

       {
@@ -3684,7 +3622,7 @@ 

           {

             error (0, 0,

                    _("field separator %s is treated as a "

- @@ -2535,9 +2897,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

+ @@ -2587,9 +2949,9 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

       }

     if (basic_numeric_field_span || general_numeric_field_span)

       {
@@ -3697,7 +3635,7 @@ 

           {

             error (0, 0,

                    _("field separator %s is treated as a "

- @@ -2545,19 +2907,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

+ @@ -2597,19 +2959,19 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

                    quote (((char []) {decimal_point, 0})));

             number_locale_warned = true;

           }
@@ -3721,7 +3659,7 @@ 

           }

       }

   

- @@ -2568,7 +2930,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

+ @@ -2620,7 +2982,7 @@ key_warnings (struct keyfield const *gkey, bool gkey_only)

       {

         error (0, 0,

                _("%snumbers use %s as a decimal point in this locale"),
@@ -3730,7 +3668,7 @@ 

                quote (((char []) {decimal_point, 0})));

   

       }

- @@ -2610,11 +2972,87 @@ diff_reversed (int diff, bool reversed)

+ @@ -2662,11 +3024,87 @@ diff_reversed (int diff, bool reversed)

     return reversed ? (diff < 0) - (diff > 0) : diff;

   }

   
@@ -3819,7 +3757,7 @@ 

   {

     struct keyfield *key = keylist;

   

- @@ -2695,7 +3133,7 @@ keycompare (struct line const *a, struct line const *b)

+ @@ -2747,7 +3185,7 @@ keycompare (struct line const *a, struct line const *b)

             else if (key->human_numeric)

               diff = human_numcompare (ta, tb);

             else if (key->month)
@@ -3828,7 +3766,7 @@ 

             else if (key->random)

               diff = compare_random (ta, tlena, tb, tlenb);

             else if (key->version)

- @@ -2805,6 +3243,211 @@ keycompare (struct line const *a, struct line const *b)

+ @@ -2857,6 +3295,211 @@ keycompare (struct line const *a, struct line const *b)

     return diff_reversed (diff, key->reverse);

   }

   
@@ -4040,7 +3978,7 @@ 

   /* Compare two lines A and B, returning negative, zero, or positive

      depending on whether A compares less than, equal to, or greater than B. */

   

- @@ -2832,7 +3475,7 @@ compare (struct line const *a, struct line const *b)

+ @@ -2884,7 +3527,7 @@ compare (struct line const *a, struct line const *b)

       diff = - NONZERO (blen);

     else if (blen == 0)

       diff = 1;
@@ -4049,7 +3987,7 @@ 

       {

         /* xmemcoll0 is a performance enhancement as

            it will not unconditionally write '\0' after the

- @@ -4220,6 +4863,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype)

+ @@ -4272,6 +4915,7 @@ set_ordering (char const *s, struct keyfield *key, enum blanktype blanktype)

             break;

           case 'f':

             key->translate = fold_toupper;
@@ -4057,7 +3995,7 @@ 

             break;

           case 'g':

             key->general_numeric = true;

- @@ -4299,7 +4943,7 @@ main (int argc, char **argv)

+ @@ -4351,7 +4995,7 @@ main (int argc, char **argv)

     initialize_exit_failure (SORT_FAILURE);

   

     hard_LC_COLLATE = hard_locale (LC_COLLATE);
@@ -4066,7 +4004,7 @@ 

     hard_LC_TIME = hard_locale (LC_TIME);

   #endif

   

- @@ -4322,6 +4966,29 @@ main (int argc, char **argv)

+ @@ -4374,6 +5018,29 @@ main (int argc, char **argv)

         thousands_sep = NON_CHAR;

     }

   
@@ -4096,7 +4034,7 @@ 

     have_read_stdin = false;

     inittables ();

   

- @@ -4592,13 +5259,34 @@ main (int argc, char **argv)

+ @@ -4644,13 +5311,34 @@ main (int argc, char **argv)

   

           case 't':

             {
@@ -4135,7 +4073,7 @@ 

                   else

                     {

                       /* Provoke with 'sort -txx'.  Complain about

- @@ -4609,9 +5297,11 @@ main (int argc, char **argv)

+ @@ -4661,9 +5349,11 @@ main (int argc, char **argv)

                              quote (optarg));

                     }

                 }
@@ -4150,10 +4088,10 @@ 

             break;

   

  diff --git a/src/unexpand.c b/src/unexpand.c

- index 5a2283f..f24ef76 100644

+ index aca67dd..f79c808 100644

  --- a/src/unexpand.c

  +++ b/src/unexpand.c

- @@ -38,6 +38,9 @@

+ @@ -39,6 +39,9 @@

   #include <stdio.h>

   #include <getopt.h>

   #include <sys/types.h>
@@ -4163,7 +4101,7 @@ 

   #include "system.h"

   #include "expand-common.h"

   

- @@ -104,24 +107,47 @@ unexpand (void)

+ @@ -105,24 +108,47 @@ unexpand (void)

   {

     /* Input stream.  */

     FILE *fp = next_file (nullptr);
@@ -4214,7 +4152,7 @@ 

   

         /* If true, perform translations.  */

         bool convert = true;

- @@ -155,12 +181,44 @@ unexpand (void)

+ @@ -156,12 +182,44 @@ unexpand (void)

   

         do

           {
@@ -4262,7 +4200,7 @@ 

   

                 if (blank)

                   {

- @@ -177,16 +235,16 @@ unexpand (void)

+ @@ -178,16 +236,16 @@ unexpand (void)

                         if (next_tab_column < column)

                           error (EXIT_FAILURE, 0, _("input line is too long"));

   
@@ -4282,7 +4220,7 @@ 

   

                             if (! (prev_blank && column == next_tab_column))

                               {

- @@ -194,13 +252,14 @@ unexpand (void)

+ @@ -195,13 +253,14 @@ unexpand (void)

                                    will be replaced by tabs.  */

                                 if (column == next_tab_column)

                                   one_blank_before_tab_stop = true;
@@ -4299,7 +4237,7 @@ 

                           }

   

                         /* Discard pending blanks, unless it was a single

- @@ -208,7 +267,7 @@ unexpand (void)

+ @@ -209,7 +268,7 @@ unexpand (void)

                         pending = one_blank_before_tab_stop;

                       }

                   }
@@ -4308,7 +4246,7 @@ 

                   {

                     /* Go back one column, and force recalculation of the

                        next tab stop.  */

- @@ -218,16 +277,20 @@ unexpand (void)

+ @@ -219,16 +278,20 @@ unexpand (void)

                   }

                 else

                   {
@@ -4333,7 +4271,7 @@ 

                       write_error ();

                     pending = 0;

                     one_blank_before_tab_stop = false;

- @@ -237,16 +300,17 @@ unexpand (void)

+ @@ -238,16 +301,17 @@ unexpand (void)

                 convert &= convert_entire_line || blank;

               }

   
@@ -4354,173 +4292,8 @@ 

       }

   }

   

- diff --git a/src/uniq.c b/src/uniq.c

- index fab04de..2e96dcb 100644

- --- a/src/uniq.c

- +++ b/src/uniq.c

- @@ -21,6 +21,17 @@

-  #include <getopt.h>

-  #include <sys/types.h>

-  

- +/* Get mbstate_t, mbrtowc(). */

- +#if HAVE_WCHAR_H

- +# include <wchar.h>

- +#endif

- +

- +/* Get isw* functions. */

- +#if HAVE_WCTYPE_H

- +# include <wctype.h>

- +#endif

- +#include <assert.h>

- +

-  #include "system.h"

-  #include "argmatch.h"

-  #include "linebuffer.h"

- @@ -31,6 +42,18 @@

-  #include "memcasecmp.h"

-  #include "quote.h"

-  

- +/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC

- +   installation; work around this configuration error.  */

- +#if !defined MB_LEN_MAX || MB_LEN_MAX < 2

- +# define MB_LEN_MAX 16

- +#endif

- +

- +/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */

- +#if HAVE_MBRTOWC && defined mbstate_t

- +# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)

- +#endif

- +

- +

-  /* The official name of this program (e.g., no 'g' prefix).  */

-  #define PROGRAM_NAME "uniq"

-  

- @@ -137,6 +160,10 @@ enum

-    GROUP_OPTION = CHAR_MAX + 1

-  };

-  

- +/* Function pointers. */

- +static char *

- +(*find_field) (struct linebuffer *line);

- +

-  static struct option const longopts[] =

-  {

-    {"count", no_argument, nullptr, 'c'},

- @@ -252,7 +279,7 @@ size_opt (char const *opt, char const *msgid)

-  

-  ATTRIBUTE_PURE

-  static char *

- -find_field (struct linebuffer const *line)

- +find_field_uni (struct linebuffer *line)

-  {

-    size_t count;

-    char const *lp = line->buffer;

- @@ -272,6 +299,83 @@ find_field (struct linebuffer const *line)

-    return line->buffer + i;

-  }

-  

- +#if HAVE_MBRTOWC

- +

- +# define MBCHAR_TO_WCHAR(WC, MBLENGTH, LP, POS, SIZE, STATEP, CONVFAIL)  \

- +  do                                                                        \

- +    {                                                                        \

- +      mbstate_t state_bak;                                                \

- +                                                                        \

- +      CONVFAIL = 0;                                                        \

- +      state_bak = *STATEP;                                                \

- +                                                                        \

- +      MBLENGTH = mbrtowc (&WC, LP + POS, SIZE - POS, STATEP);                \

- +                                                                        \

- +      switch (MBLENGTH)                                                        \

- +        {                                                                \

- +        case (size_t)-2:                                                \

- +        case (size_t)-1:                                                \

- +          *STATEP = state_bak;                                                \

- +          CONVFAIL++;                                                        \

- +          /* Fall through */                                                \

- +        case 0:                                                                \

- +          MBLENGTH = 1;                                                        \

- +        }                                                                \

- +    }                                                                        \

- +  while (0)

- +

- +static char *

- +find_field_multi (struct linebuffer *line)

- +{

- +  size_t count;

- +  char *lp = line->buffer;

- +  size_t size = line->length - 1;

- +  size_t pos;

- +  size_t mblength;

- +  wchar_t wc;

- +  mbstate_t *statep;

- +  int convfail = 0;

- +

- +  pos = 0;

- +  statep = &(line->state);

- +

- +  /* skip fields. */

- +  for (count = 0; count < skip_fields && pos < size; count++)

- +    {

- +      while (pos < size)

- +        {

- +          MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);

- +

- +          if (convfail || !(iswblank (wc) || wc == '\n'))

- +            {

- +              pos += mblength;

- +              break;

- +            }

- +          pos += mblength;

- +        }

- +

- +      while (pos < size)

- +        {

- +          MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);

- +

- +          if (!convfail && (iswblank (wc) || wc == '\n'))

- +            break;

- +

- +          pos += mblength;

- +        }

- +    }

- +

- +  /* skip fields. */

- +  for (count = 0; count < skip_chars && pos < size; count++)

- +    {

- +      MBCHAR_TO_WCHAR (wc, mblength, lp, pos, size, statep, convfail);

- +      pos += mblength;

- +    }

- +

- +  return lp + pos;

- +}

- +#endif

- +

-  /* Return false if two strings OLD and NEW match, true if not.

-     OLD and NEW point not to the beginnings of the lines

-     but rather to the beginnings of the fields to compare.

- @@ -495,6 +599,19 @@ main (int argc, char **argv)

-  

-    atexit (close_stdout);

-  

- +#if HAVE_MBRTOWC

- +  if (MB_CUR_MAX > 1)

- +    {

- +      find_field = find_field_multi;

- +    }

- +  else

- +#endif

- +    {

- +      find_field = find_field_uni;

- +    }

- +

- +

- +

-    skip_chars = 0;

-    skip_fields = 0;

-    check_chars = SIZE_MAX;

  diff --git a/tests/Coreutils.pm b/tests/Coreutils.pm

- index f147401..3ce5da9 100644

+ index 18e7bea..24a141b 100644

  --- a/tests/Coreutils.pm

  +++ b/tests/Coreutils.pm

  @@ -269,6 +269,9 @@ sub run_tests ($$$$$)
@@ -4534,7 +4307,7 @@ 

           {

             warn "$program_name: $test_name: test name is too long (> $max)\n";

  diff --git a/tests/expand/mb.sh b/tests/expand/mb.sh

- new file mode 100755

+ new file mode 100644

  index 0000000..dd6007c

  --- /dev/null

  +++ b/tests/expand/mb.sh
@@ -4723,7 +4496,7 @@ 

  +

  +exit $fail

  diff --git a/tests/i18n/sort.sh b/tests/i18n/sort.sh

- new file mode 100755

+ new file mode 100644

  index 0000000..26c95de

  --- /dev/null

  +++ b/tests/i18n/sort.sh
@@ -4758,10 +4531,10 @@ 

  +

  +Exit $fail

  diff --git a/tests/local.mk b/tests/local.mk

- index b74a4a2..fe6e557 100644

+ index fdbf369..a6ce49c 100644

  --- a/tests/local.mk

  +++ b/tests/local.mk

- @@ -384,6 +384,8 @@ all_tests =					\

+ @@ -387,6 +387,8 @@ all_tests =					\

     tests/sort/sort-discrim.sh			\

     tests/sort/sort-files0-from.pl		\

     tests/sort/sort-float.sh			\
@@ -4770,7 +4543,7 @@ 

     tests/sort/sort-h-thousands-sep.sh		\

     tests/sort/sort-merge.pl			\

     tests/sort/sort-merge-fdlimit.sh		\

- @@ -585,6 +587,7 @@ all_tests =					\

+ @@ -590,6 +592,7 @@ all_tests =					\

     tests/du/threshold.sh				\

     tests/du/trailing-slash.sh			\

     tests/du/two-args.sh				\
@@ -4778,7 +4551,7 @@ 

     tests/id/gnu-zero-uids.sh			\

     tests/id/no-context.sh			\

     tests/id/context.sh				\

- @@ -738,6 +741,7 @@ all_tests =					\

+ @@ -746,6 +749,7 @@ all_tests =					\

     tests/touch/read-only.sh			\

     tests/touch/relative.sh			\

     tests/touch/trailing-slash.sh			\
@@ -4787,7 +4560,7 @@ 

   

   # See tests/factor/create-test.sh.

  diff --git a/tests/misc/expand.pl b/tests/misc/expand.pl

- index 06261ac..7dd813e 100755

+ index 11f3fc4..d609a2c 100755

  --- a/tests/misc/expand.pl

  +++ b/tests/misc/expand.pl

  @@ -27,6 +27,15 @@ my $prog = 'expand';
@@ -4854,7 +4627,7 @@ 

   my $verbose = $ENV{VERBOSE};

   

  diff --git a/tests/misc/fold.pl b/tests/misc/fold.pl

- index a94072f..136a82e 100755

+ index 00b4362..7d51bea 100755

  --- a/tests/misc/fold.pl

  +++ b/tests/misc/fold.pl

  @@ -20,9 +20,18 @@ use strict;
@@ -4926,78 +4699,8 @@ 

  -my $prog = 'fold';

   my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);

   exit $fail;

- diff --git a/tests/misc/join.pl b/tests/misc/join.pl

- index 2ca8567..1d01a3d 100755

- --- a/tests/misc/join.pl

- +++ b/tests/misc/join.pl

- @@ -25,6 +25,15 @@ my $limits = getlimits ();

-  

-  my $prog = 'join';

-  

- +my $try = "Try \`$prog --help' for more information.\n";

- +my $inval = "$prog: invalid byte, character or field list\n$try";

- +

- +my $mb_locale;

- +#Comment out next line to disable multibyte tests

- +$mb_locale = $ENV{LOCALE_FR_UTF8};

- +! defined $mb_locale || $mb_locale eq 'none'

- +  and $mb_locale = 'C';

- +

-  my $delim = chr 0247;

-  sub t_subst ($)

-  {

- @@ -333,8 +342,49 @@ foreach my $t (@tv)

-      push @Tests, $new_ent;

-    }

-  

- +# Add _POSIX2_VERSION=199209 to the environment of each test

- +# that uses an old-style option like +1.

- +if ($mb_locale ne 'C')

- +  {

- +    # Duplicate each test vector, appending "-mb" to the test name and

- +    # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we

- +    # provide coverage for the distro-added multi-byte code paths.

- +    my @new;

- +    foreach my $t (@Tests)

- +      {

- +        my @new_t = @$t;

- +        my $test_name = shift @new_t;

- +

- +        # Depending on whether join is multi-byte-patched,

- +        # it emits different diagnostics:

- +        #   non-MB: invalid byte or field list

- +        #   MB:     invalid byte, character or field list

- +        # Adjust the expected error output accordingly.

- +        if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval}

- +            (@new_t))

- +          {

- +            my $sub = {ERR_SUBST => 's/, character//'};

- +            push @new_t, $sub;

- +            push @$t, $sub;

- +          }

- +        #Adjust the output some error messages including test_name for mb

- +        if (grep {ref $_ eq 'HASH' && exists $_->{ERR}}

- +             (@new_t))

- +          {

- +            my $sub2 = {ERR_SUBST => "s/$test_name-mb/$test_name/"};

- +            push @new_t, $sub2;

- +            push @$t, $sub2;

- +          }

- +        push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];

- +      }

- +    push @Tests, @new;

- +  }

- +

-  @Tests = triple_test \@Tests;

-  

- +#skip invalid-j-mb test, it is failing because of the format

- +@Tests = grep {$_->[0] ne 'invalid-j-mb'} @Tests;

- +

-  my $save_temps = $ENV{DEBUG};

-  my $verbose = $ENV{VERBOSE};

-  

  diff --git a/tests/misc/sort-mb-tests.sh b/tests/misc/sort-mb-tests.sh

- new file mode 100755

+ new file mode 100644

  index 0000000..11836ba

  --- /dev/null

  +++ b/tests/misc/sort-mb-tests.sh
@@ -5048,7 +4751,7 @@ 

  +

  +Exit $fail

  diff --git a/tests/misc/unexpand.pl b/tests/misc/unexpand.pl

- index d78a1bc..2b9137d 100755

+ index 76bcbd4..59eb819 100755

  --- a/tests/misc/unexpand.pl

  +++ b/tests/misc/unexpand.pl

  @@ -27,6 +27,14 @@ my $limits = getlimits ();
@@ -5105,7 +4808,7 @@ 

   my $verbose = $ENV{VERBOSE};

   

  diff --git a/tests/pr/pr-tests.pl b/tests/pr/pr-tests.pl

- index eafc13d..c1eca2a 100755

+ index 6b34e0b..34b4aeb 100755

  --- a/tests/pr/pr-tests.pl

  +++ b/tests/pr/pr-tests.pl

  @@ -24,6 +24,15 @@ use strict;
@@ -5174,7 +4877,7 @@ 

   my $verbose = $ENV{VERBOSE};

   

  diff --git a/tests/sort/sort-merge.pl b/tests/sort/sort-merge.pl

- index bd439ef..2ccdf87 100755

+ index 89eed0c..b855d73 100755

  --- a/tests/sort/sort-merge.pl

  +++ b/tests/sort/sort-merge.pl

  @@ -26,6 +26,15 @@ my $prog = 'sort';
@@ -5234,7 +4937,7 @@ 

   my $verbose = $ENV{VERBOSE};

   

  diff --git a/tests/sort/sort.pl b/tests/sort/sort.pl

- index 46f1d7a..bb38f5b 100755

+ index d49f65f..ebba925 100755

  --- a/tests/sort/sort.pl

  +++ b/tests/sort/sort.pl

  @@ -24,10 +24,15 @@ my $prog = 'sort';
@@ -5302,7 +5005,7 @@ 

   my $save_temps = $ENV{DEBUG};

   my $verbose = $ENV{VERBOSE};

  diff --git a/tests/unexpand/mb.sh b/tests/unexpand/mb.sh

- new file mode 100755

+ new file mode 100644

  index 0000000..8a82d74

  --- /dev/null

  +++ b/tests/unexpand/mb.sh
@@ -5479,82 +5182,6 @@ 

  +

  +LC_ALL=C unexpand in in > out || fail=1

  +compare exp out > /dev/null 2>&1 || fail=1

- diff --git a/tests/uniq/uniq.pl b/tests/uniq/uniq.pl

- index a6354dc..e43cd6e 100755

- --- a/tests/uniq/uniq.pl

- +++ b/tests/uniq/uniq.pl

- @@ -23,9 +23,17 @@ my $limits = getlimits ();

-  my $prog = 'uniq';

-  my $try = "Try '$prog --help' for more information.\n";

-  

- +my $inval = "$prog: invalid byte, character or field list\n$try";

- +

-  # Turn off localization of executable's output.

-  @ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;

-  

- +my $mb_locale;

- +#Comment out next line to disable multibyte tests

- +$mb_locale = $ENV{LOCALE_FR_UTF8};

- +! defined $mb_locale || $mb_locale eq 'none'

- +  and $mb_locale = 'C';

- +

-  # When possible, create a "-z"-testing variant of each test.

-  sub add_z_variants($)

-  {

- @@ -262,6 +270,53 @@ foreach my $t (@Tests)

-        and push @$t, {ENV=>'_POSIX2_VERSION=199209'};

-    }

-  

- +if ($mb_locale ne 'C')

- +  {

- +    # Duplicate each test vector, appending "-mb" to the test name and

- +    # inserting {ENV => "LC_ALL=$mb_locale"} in the copy, so that we

- +    # provide coverage for the distro-added multi-byte code paths.

- +    my @new;

- +    foreach my $t (@Tests)

- +      {

- +        my @new_t = @$t;

- +        my $test_name = shift @new_t;

- +

- +        # Depending on whether uniq is multi-byte-patched,

- +        # it emits different diagnostics:

- +        #   non-MB: invalid byte or field list

- +        #   MB:     invalid byte, character or field list

- +        # Adjust the expected error output accordingly.

- +        if (grep {ref $_ eq 'HASH' && exists $_->{ERR} && $_->{ERR} eq $inval}

- +            (@new_t))

- +          {

- +            my $sub = {ERR_SUBST => 's/, character//'};

- +            push @new_t, $sub;

- +            push @$t, $sub;

- +          }

- +        # In test #145, replace the each ‘...’ by '...'.

- +        if ($test_name =~ "145")

- +          {

- +            my $sub = { ERR_SUBST => "s/‘([^’]+)’/'\$1'/g"};

- +            push @new_t, $sub;

- +            push @$t, $sub;

- +          }

- +        next if (   $test_name =~ "schar"

- +                 or $test_name =~ "^obs-plus"

- +                 or $test_name =~ "119");

- +        push @new, ["$test_name-mb", @new_t, {ENV => "LC_ALL=$mb_locale"}];

- +      }

- +    push @Tests, @new;

- +   }

- +

- +# Remember that triple_test creates from each test with exactly one "IN"

- +# file two more tests (.p and .r suffix on name) corresponding to reading

- +# input from a file and from a pipe.  The pipe-reading test would fail

- +# due to a race condition about 1 in 20 times.

- +# Remove the IN_PIPE version of the "output-is-input" test above.

- +# The others aren't susceptible because they have three inputs each.

- +

- +@Tests = grep {$_->[0] ne 'output-is-input.p'} @Tests;

- +

-  @Tests = add_z_variants \@Tests;

-  @Tests = triple_test \@Tests;

-  

  -- 

- 2.43.0

+ 2.44.0

  

@@ -0,0 +1,65 @@ 

+ From cef9cccce395cd80cd5ac42a4fe6c3909be1c0b5 Mon Sep 17 00:00:00 2001

+ From: rpm-build <rpm-build>

+ Date: Tue, 2 Apr 2024 14:11:26 +0100

+ Subject: [PATCH] coreutils-python3.patch

+ 

+ ---

+  init.cfg                              | 4 ++--

+  tests/d_type-check                    | 2 +-

+  tests/du/move-dir-while-traversing.sh | 6 +++---

+  3 files changed, 6 insertions(+), 6 deletions(-)

+ 

+ diff --git a/init.cfg b/init.cfg

+ index b06965a..08413ee 100644

+ --- a/init.cfg

+ +++ b/init.cfg

+ @@ -581,10 +581,10 @@ seek_data_capable_()

+  # Skip the current test if "." lacks d_type support.

+  require_dirent_d_type_()

+  {

+ -  python < /dev/null \

+ +  python3 < /dev/null \

+      || skip_ python missing: assuming no d_type support

+  

+ -  python "$abs_srcdir"/tests/d_type-check \

+ +  python3 "$abs_srcdir"/tests/d_type-check \

+      || skip_ requires d_type support

+  }

+  

+ diff --git a/tests/d_type-check b/tests/d_type-check

+ index 1a2f76f..42d3924 100644

+ --- a/tests/d_type-check

+ +++ b/tests/d_type-check

+ @@ -1,4 +1,4 @@

+ -#!/usr/bin/python

+ +#!/usr/bin/python3

+  # Exit 0 if "." and "./tempfile" have useful d_type information, else 1.

+  # Intended to exit 0 only on Linux/GNU systems.

+  import os

+ diff --git a/tests/du/move-dir-while-traversing.sh b/tests/du/move-dir-while-traversing.sh

+ index 830a69e..7344ddf 100755

+ --- a/tests/du/move-dir-while-traversing.sh

+ +++ b/tests/du/move-dir-while-traversing.sh

+ @@ -21,8 +21,8 @@ print_ver_ du

+  require_trap_signame_

+  

+  # We use a python-inotify script, so...

+ -python -m pyinotify -h > /dev/null \

+ -  || skip_ 'python inotify package not installed'

+ +python3 -m pyinotify -h > /dev/null \

+ +  || skip_ 'python3 inotify package not installed'

+  

+  # Move a directory "up" while du is processing its sub-directories.

+  # While du is processing a hierarchy .../B/C/D/... this script

+ @@ -33,7 +33,7 @@ python -m pyinotify -h > /dev/null \

+  # rename syscall before du finishes processing the subtree under D/.

+  

+  cat <<'EOF' > inotify-watch-for-dir-access.py

+ -#!/usr/bin/env python

+ +#!/usr/bin/env python3

+  import pyinotify as pn

+  import os,sys

+  

+ -- 

+ 2.44.0

+ 

file modified
+6 -6
@@ -1,4 +1,4 @@ 

- From 88ba186955add2b230c017749d5622f7a0d62177 Mon Sep 17 00:00:00 2001

+ From 78970c915b8556fcec4622e948a37dd8e34efe6d Mon Sep 17 00:00:00 2001

  From: rpm-build <rpm-build>

  Date: Wed, 30 Aug 2023 17:19:58 +0200

  Subject: [PATCH] coreutils-selinux.patch
@@ -9,10 +9,10 @@ 

   2 files changed, 29 insertions(+), 2 deletions(-)

  

  diff --git a/src/cp.c b/src/cp.c

- index 04a5cbe..7a364e5 100644

+ index 28b0217..897379f 100644

  --- a/src/cp.c

  +++ b/src/cp.c

- @@ -989,7 +989,7 @@ main (int argc, char **argv)

+ @@ -997,7 +997,7 @@ main (int argc, char **argv)

     selinux_enabled = (0 < is_selinux_enabled ());

     cp_option_init (&x);

   
@@ -21,7 +21,7 @@ 

                              long_opts, nullptr))

            != -1)

       {

- @@ -1041,6 +1041,23 @@ main (int argc, char **argv)

+ @@ -1049,6 +1049,23 @@ main (int argc, char **argv)

             copy_contents = true;

             break;

   
@@ -46,7 +46,7 @@ 

             x.preserve_links = true;

             x.dereference = DEREF_NEVER;

  diff --git a/src/install.c b/src/install.c

- index 31a48f1..ce9fa2d 100644

+ index accd0fd..b686fe9 100644

  --- a/src/install.c

  +++ b/src/install.c

  @@ -807,7 +807,7 @@ main (int argc, char **argv)
@@ -83,5 +83,5 @@ 

             use_default_selinux_context = false;

             break;

  -- 

- 2.41.0

+ 2.44.0

  

file modified
+23 -11
@@ -1,7 +1,7 @@ 

  Summary: A set of basic GNU tools commonly used in shell scripts

  Name:    coreutils

- Version: 9.4

- Release: 6%{?dist}

+ Version: 9.5

+ Release: 1%{?dist}

  # some used parts of gnulib are under various variants of LGPL

  License: GPL-3.0-or-later AND GFDL-1.3-no-invariants-or-later AND LGPL-2.1-or-later AND LGPL-3.0-or-later

  Url:     https://www.gnu.org/software/coreutils/
@@ -26,18 +26,12 @@ 

  # downstream changes to default DIR_COLORS

  Patch102: coreutils-8.32-DIR_COLORS.patch

  

+ # use python3 in tests

+ Patch103: coreutils-python3.patch

+ 

  # df --direct

  Patch104: coreutils-df-direct.patch

  

- # fix crash with --enable-systemd

- Patch105: coreutils-9.4-systemd-coredump.patch

- 

- # fix buffer overflow in split (CVE-2024-0684)

- Patch106: coreutils-9.4-CVE-2024-0684.patch

- 

- # fix tail on kernels with 64k pagesize

- Patch107: coreutils-9.4-tail-64k-pages.patch

- 

  # (sb) lin18nux/lsb compliance - multibyte functionality patch

  Patch800: coreutils-i18n.patch

  
@@ -70,13 +64,26 @@ 

  BuildRequires: gnupg2

  

  # test-only dependencies

+ BuildRequires: acl

+ BuildRequires: gdb

  BuildRequires: perl-interpreter

+ BuildRequires: perl(Expect)

  BuildRequires: perl(FileHandle)

+ BuildRequires: python3

+ %if 0%{?fedora}

+ BuildRequires: python3-inotify

+ %endif

+ BuildRequires: tzdata

+ %ifarch %valgrind_arches

+ BuildRequires: valgrind

+ %endif

+ 

  %if 23 < 0%{?fedora} || 7 < 0%{?rhel}

  # needed by i18n test-cases

  BuildRequires: glibc-langpack-en

  BuildRequires: glibc-langpack-fr

  BuildRequires: glibc-langpack-ko

+ BuildRequires: glibc-langpack-sv

  %endif

  

  Requires: %{name}-common = %{version}-%{release}
@@ -262,6 +269,11 @@ 

  %license COPYING

  

  %changelog

+ * Tue Apr 02 2024 Lukáš Zaoral <lzaoral@redhat.com> - 9.5-1

+ - rebase to latest upstream version (rhbz#2272063)

+ - sync i18n patch with SUSE (Kudos to Berny Völker!)

+ - add some test dependencies to execute additional part of the upstream test-suite

+ 

  * Mon Jan 29 2024 Lukáš Zaoral <lzaoral@redhat.com> - 9.4-6

  - fix tail on kernels with 64k page sizes (RHEL-22866)

  

file modified
+2 -2
@@ -1,2 +1,2 @@ 

- SHA512 (coreutils-9.4.tar.xz) = 7c55ee23b685a0462bbbd118b04d25278c902604a0dcf3bf4f8bf81faa0500dee5a7813cba6f586d676c98e520cafd420f16479619305e94ea6798d8437561f5

- SHA512 (coreutils-9.4.tar.xz.sig) = 9674f783f592c4f3e5c708ff31426ac009bf132fd0005019571bf39c8a1627efb5351c6cecc7faecb1eff8fa2970318666593bffc0eda9c750159e174ef42524

+ SHA512 (coreutils-9.5.tar.xz) = 2ca0deac4dc10a80fd0c6fd131252e99d457fd03b7bd626a6bc74fe5a0529c0a3d48ce1f5da1d3b3a7a150a1ce44f0fbb6b68a6ac543dfd5baa3e71f5d65401c

+ SHA512 (coreutils-9.5.tar.xz.sig) = 029997e0f4ee64e561853cff7c8a124f58cc891598595b44c4a46f9813b4b71c9d677464bc8a26d294e9971832f4b87c23777fea4fac6e8e30f06ad93b9957d5

  • sync i18n patch with SUSE (Kudos to Berny Völker!)
  • add some test dependencies to execute additional part of the upstream test-suite

Resolves: rhbz#2272063

rebased onto a91df5d

2 months ago

Metadata Update from @lzaoral:
- Request assigned

2 months ago

Pull-Request has been merged by lzaoral

2 months ago