Blob Blame History Raw
diff -urNp coreutils-7.2-orig/lib/gnulib.mk coreutils-7.2/lib/gnulib.mk
--- coreutils-7.2-orig/lib/gnulib.mk	2009-03-26 09:12:33.000000000 +0100
+++ coreutils-7.2/lib/gnulib.mk	2009-09-08 10:08:09.000000000 +0200
@@ -1600,6 +1600,12 @@ EXTRA_DIST += str-kmp.h
 
 ## end   gnulib module mbsstr
 
+## begin gnulib module mbsalign
+
+libcoreutils_a_SOURCES += mbsalign.c mbsalign.h
+
+## end   gnulib module mbsalign
+
 ## begin gnulib module mbswidth
 
 libcoreutils_a_SOURCES += mbswidth.h mbswidth.c
diff -urNp coreutils-7.2-orig/lib/mbsalign.c coreutils-7.2/lib/mbsalign.c
--- coreutils-7.2-orig/lib/mbsalign.c	1970-01-01 01:00:00.000000000 +0100
+++ coreutils-7.2/lib/mbsalign.c	2009-08-15 17:25:32.000000000 +0200
@@ -0,0 +1,236 @@
+/* Align/Truncate a string in a given screen width
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Pádraig Brady.  */
+
+#include <config.h>
+#include "mbsalign.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#ifndef MIN
+# define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+/* Replace non printable chars.
+   Return 1 if replacement made, 0 otherwise.  */
+
+static bool
+wc_ensure_printable (wchar_t *wchars)
+{
+  bool replaced = false;
+  wchar_t *wc = wchars;
+  while (*wc)
+    {
+      if (!iswprint ((wint_t) *wc))
+        {
+          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
+          replaced = true;
+        }
+      wc++;
+    }
+  return replaced;
+}
+
+/* Truncate wchar string to width cells.
+ * Returns number of cells used.  */
+
+static size_t
+wc_truncate (wchar_t *wc, size_t width)
+{
+  size_t cells = 0;
+  int next_cells = 0;
+
+  while (*wc)
+    {
+      next_cells = wcwidth (*wc);
+      if (next_cells == -1) /* non printable */
+        {
+          *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
+          next_cells = 1;
+        }
+      if (cells + next_cells > width)
+        break;
+      cells += next_cells;
+      wc++;
+    }
+  *wc = L'\0';
+  return cells;
+}
+
+/* FIXME: move this function to gnulib as it's missing on:
+   OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS  */
+
+static int
+rpl_wcswidth (const wchar_t *s, size_t n)
+{
+  int ret = 0;
+
+  while (n-- > 0 && *s != L'\0')
+    {
+      int nwidth = wcwidth (*s++);
+      if (nwidth == -1)             /* non printable */
+        return -1;
+      if (ret > (INT_MAX - nwidth)) /* overflow */
+        return -1;
+      ret += nwidth;
+    }
+
+  return ret;
+}
+
+/* Write N_SPACES space characters to DEST while ensuring
+   nothing is written beyond DEST_END. A terminating NUL
+   is always added to DEST.
+   A pointer to the terminating NUL is returned.  */
+
+static char*
+mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces)
+{
+  /* FIXME: Should we pad with "figure space" (\u2007)
+     if non ascii data present?  */
+  while (n_spaces-- && (dest < dest_end))
+    *dest++ = ' ';
+  *dest = '\0';
+  return dest;
+}
+
+/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
+   characters; write the result into the DEST_SIZE-byte buffer, DEST.
+   ALIGNMENT specifies whether to left- or right-justify or to center.
+   If SRC requires more than *WIDTH columns, truncate it to fit.
+   When centering, the number of trailing spaces may be one less than the
+   number of leading spaces. The FLAGS parameter is unused at present.
+   Return the length in bytes required for the final result, not counting
+   the trailing NUL.  A return value of DEST_SIZE or larger means there
+   wasn't enough space.  DEST will be NUL terminated in any case.
+   Return (size_t) -1 upon error (invalid multi-byte sequence in SRC,
+   or malloc failure).
+   Update *WIDTH to indicate how many columns were used before padding.  */
+
+size_t
+mbsalign (const char *src, char *dest, size_t dest_size,
+          size_t *width, mbs_align_t align, int flags)
+{
+  size_t ret = -1;
+  size_t src_size = strlen (src) + 1;
+  char *newstr = NULL;
+  wchar_t *str_wc = NULL;
+  const char *str_to_print = src;
+  size_t n_cols = src_size - 1;
+  size_t n_used_bytes = n_cols; /* Not including NUL */
+  size_t n_spaces = 0;
+  bool conversion = false;
+  bool wc_enabled = false;
+
+  /* In multi-byte locales convert to wide characters
+     to allow easy truncation. Also determine number
+     of screen columns used.  */
+  if (MB_CUR_MAX > 1)
+    {
+      size_t src_chars = mbstowcs (NULL, src, 0);
+      if (src_chars == (size_t) -1)
+        goto mbsalign_cleanup;
+      src_chars += 1; /* make space for NUL */
+      str_wc = malloc (src_chars * sizeof (wchar_t));
+      if (str_wc == NULL)
+        goto mbsalign_cleanup;
+      if (mbstowcs (str_wc, src, src_chars) > 0)
+        {
+          str_wc[src_chars - 1] = L'\0';
+          wc_enabled = true;
+          conversion = wc_ensure_printable (str_wc);
+          n_cols = rpl_wcswidth (str_wc, src_chars);
+        }
+    }
+
+  /* If we transformed or need to truncate the source string
+     then create a modified copy of it.  */
+  if (conversion || (n_cols > *width))
+    {
+      newstr = malloc (src_size);
+      if (newstr == NULL)
+        goto mbsalign_cleanup;
+      str_to_print = newstr;
+      if (wc_enabled)
+        {
+          n_cols = wc_truncate (str_wc, *width);
+          n_used_bytes = wcstombs (newstr, str_wc, src_size);
+        }
+      else
+        {
+          n_cols = *width;
+          n_used_bytes = n_cols;
+          memcpy (newstr, src, n_cols);
+          newstr[n_cols] = '\0';
+        }
+    }
+
+  if (*width > n_cols)
+    n_spaces = *width - n_cols;
+
+  /* indicate to caller how many cells needed (not including padding).  */
+  *width = n_cols;
+
+  /* indicate to caller how many bytes needed (not including NUL).  */
+  ret = n_used_bytes + (n_spaces * 1);
+
+  /* Write as much NUL terminated output to DEST as possible.  */
+  if (dest_size != 0)
+    {
+      char *dest_end = dest + dest_size - 1;
+      size_t start_spaces = n_spaces / 2 + n_spaces % 2;
+      size_t end_spaces = n_spaces / 2;
+
+      switch (align)
+        {
+        case MBS_ALIGN_CENTER:
+          start_spaces = n_spaces / 2 + n_spaces % 2;
+          end_spaces = n_spaces / 2;
+          break;
+        case MBS_ALIGN_LEFT:
+          start_spaces = 0;
+          end_spaces = n_spaces;
+          break;
+        case MBS_ALIGN_RIGHT:
+          start_spaces = n_spaces;
+          end_spaces = 0;
+          break;
+        }
+
+      dest = mbs_align_pad (dest, dest_end, start_spaces);
+      dest = mempcpy(dest, str_to_print, MIN (n_used_bytes, dest_end - dest));
+      dest = mbs_align_pad (dest, dest_end, end_spaces);
+    }
+
+mbsalign_cleanup:
+
+  free (str_wc);
+  free (newstr);
+
+  return ret;
+}
+/*
+ * Local variables:
+ *  indent-tabs-mode: nil
+ * End:
+ */
diff -urNp coreutils-7.2-orig/lib/mbsalign.h coreutils-7.2/lib/mbsalign.h
--- coreutils-7.2-orig/lib/mbsalign.h	1970-01-01 01:00:00.000000000 +0100
+++ coreutils-7.2/lib/mbsalign.h	2009-08-15 17:25:32.000000000 +0200
@@ -0,0 +1,23 @@
+/* Align/Truncate a string in a given screen width
+   Copyright (C) 2009 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stddef.h>
+
+typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t;
+
+size_t
+mbsalign (const char *src, char *dest, size_t dest_size,
+          size_t *width, mbs_align_t align, int flags);
diff -urNp coreutils-7.2-orig/src/ls.c coreutils-7.2/src/ls.c
--- coreutils-7.2-orig/src/ls.c	2009-09-08 10:00:39.000000000 +0200
+++ coreutils-7.2/src/ls.c	2009-09-08 10:13:41.000000000 +0200
@@ -63,6 +63,10 @@
 #include <selinux/selinux.h>
 #include <wchar.h>
 
+#if HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+#endif
+
 /* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
    present.  */
 #ifndef SA_NOCLDSTOP
@@ -105,6 +109,7 @@
 #include "strftime.h"
 #include "xstrtol.h"
 #include "areadlink.h"
+#include "mbsalign.h"
 
 #define PROGRAM_NAME (ls_mode == LS_LS ? "ls" \
 		      : (ls_mode == LS_MULTI_COL \
@@ -719,6 +724,11 @@ static char const *long_time_format[2] =
        screen columns small, because many people work in windows with
        only 80 columns.  But make this as wide as the other string
        below, for recent files.  */
+    /* TRANSLATORS: ls output needs to be aligned for ease of reading,
+       so be wary of using variable width fields from the locale.
+       Note %b is handled specially by ls and aligned correctly.
+       Note also that specifying a width as in %5b is erroneous as strftime
+       will count bytes rather than characters in multibyte locales.  */
     N_("%b %e  %Y"),
     /* strftime format for recent files (younger than 6 months), in -l
        output.  This should contain the month, day and time (at
@@ -727,6 +737,11 @@ static char const *long_time_format[2] =
        screen columns small, because many people work in windows with
        only 80 columns.  But make this as wide as the other string
        above, for non-recent files.  */
+    /* TRANSLATORS: ls output needs to be aligned for ease of reading,
+       so be wary of using variable width fields from the locale.
+       Note %b is handled specially by ls and aligned correctly.
+       Note also that specifying a width as in %5b is erroneous as strftime
+       will count bytes rather than characters in multibyte locales.  */
     N_("%b %e %H:%M")
   };
 
@@ -1007,6 +1022,57 @@ dired_dump_obstack (const char *prefix, 
     }
 }
 
+/* Read the abbreviated month names from the locale, to align them
+   and to determine the max width of the field and to truncate names
+   greater than our max allowed.
+   Note even though this handles multibyte locales correctly
+   it's not restricted to them as single byte locales can have
+   variable width abbreviated months and also precomputing/caching
+   the names was seen to increase the performance of ls significantly.  */
+
+/* max number of display cells to use */
+enum { MAX_MON_WIDTH = 5 };
+/* In the unlikely event that the abmon[] storage is not big enough
+   an error message will be displayed, and we revert to using
+   unmodified abbreviated month names from the locale database.  */
+static char abmon[12][MAX_MON_WIDTH * 2 * MB_LEN_MAX + 1];
+/* minimum width needed to align %b, 0 => don't use precomputed values.  */
+static size_t required_mon_width;
+
+static size_t
+abmon_init (void)
+{
+#ifdef HAVE_NL_LANGINFO
+  required_mon_width = MAX_MON_WIDTH;
+  size_t curr_max_width;
+  do
+    {
+      curr_max_width = required_mon_width;
+      required_mon_width = 0;
+      for (int i = 0; i < 12; i++)
+       {
+         size_t width = curr_max_width;
+
+         int req = mbsalign (nl_langinfo (ABMON_1 + i),
+                             abmon[i], sizeof (abmon[i]),
+                             &width, MBS_ALIGN_LEFT, 0);
+
+         if (req == -1 || req >= sizeof(abmon[i]))
+           {
+             required_mon_width = 0; /* ignore precomputed strings.  */
+             return required_mon_width;
+           }
+
+         required_mon_width = MAX (required_mon_width, width);
+       }
+    }
+  while (curr_max_width > required_mon_width);
+#endif
+
+  return required_mon_width;
+}
+
+
 static size_t
 dev_ino_hash (void const *x, size_t table_size)
 {
@@ -1997,6 +2063,10 @@ decode_switches (int argc, char **argv)
 		  }
 	      }
 	  }
+       /* Note we leave %5b etc. alone so user widths/flags are honoured.  */
+       if (strstr(long_time_format[0],"%b") || strstr(long_time_format[1],"%b"))
+        if (!abmon_init())
+          error (0, 0, _("error initializing month strings"));
     }
 
   return optind;
@@ -3375,6 +3445,35 @@ print_current_files (void)
     }
 }
 
+/* Replace the first %b with precomputed aligned month names.
+   Note on glibc-2.7 on linux at least this speeds up the whole `ls -lU`
+   process by around 17%, compared to letting strftime() handle the %b.  */
+
+static size_t
+align_nstrftime (char *src, size_t size, char const *fmt, struct tm const *tm,
+                int __utc, int __ns)
+{
+  const char *nfmt = fmt;
+  /* In the unlikely event that rpl_fmt below is not large enough,
+     the replacement is not done.  A malloc here slows ls down by 2%  */
+  char rpl_fmt[sizeof (abmon[0]) + 100];
+  char *pb = NULL;
+  if (required_mon_width && (pb = strstr (fmt, "%b")))
+    {
+      if (strlen(fmt) < (sizeof (rpl_fmt) - sizeof (abmon[0]) + 2))
+       {
+         char *pfmt = rpl_fmt;
+         nfmt = rpl_fmt;
+
+         pfmt = mempcpy (pfmt, fmt, pb - fmt);
+         pfmt = stpcpy (pfmt, abmon[tm->tm_mon]);
+         strcpy (pfmt, pb + 2);
+       }
+    }
+  size_t ret = nstrftime (src, size, nfmt, tm, __utc, __ns);
+  return ret;
+}
+
 /* Return the expected number of columns in a long-format time stamp,
    or zero if it cannot be calculated.  */
 
@@ -3399,7 +3498,7 @@ long_time_expected_width (void)
       if (tm)
 	{
 	  size_t len =
-	    nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0);
+	    align_nstrftime (buf, sizeof buf, long_time_format[0], tm, 0, 0);
 	  if (len != 0)
 	    width = mbsnwidth (buf, len, 0);
 	}
@@ -3740,8 +3839,8 @@ print_long_format (const struct fileinfo
 
       /* We assume here that all time zones are offset from UTC by a
 	 whole number of seconds.  */
-      s = nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt,
-		     when_local, 0, when_timespec.tv_nsec);
+      s = align_nstrftime (p, TIME_STAMP_LEN_MAXIMUM + 1, fmt,
+		           when_local, 0, when_timespec.tv_nsec);
     }
 
   if (s || !*p)