[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
coreutils 'ls' fix for multibyte user and group names
From: |
Paul Eggert |
Subject: |
coreutils 'ls' fix for multibyte user and group names |
Date: |
Mon, 21 Jun 2004 00:21:20 -0700 |
User-agent: |
Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux) |
coreutils 'ls' mishandles user or group names that contain multibyte
characters where the number of columns is not equal to the number of
bytes. For example, if a user name is "castaneda" (except that the
"n" has a tilde over it), and if the current locale is en_US.utf8,
then the n-with-a-tilde consumes two bytes but only one print column.
Current GNU "ls -l" outputs something like this:
-rw-r--r-- 1 eggert eggert 4127 2004-06-12 23:18 file1
-rw-r--r-- 1 castaneda eggert 753 2004-06-20 23:58 file2
(again, where the "n" in "castaneda" has a tilde over it), so the
columns don't line up. With the patch proposed below, the output
looks like this instead:
-rw-r--r-- 1 eggert eggert 4127 2004-06-12 23:18 file1
-rw-r--r-- 1 castaneda eggert 753 2004-06-20 23:58 file2
2004-06-21 Paul Eggert <address@hidden>
Fix bug: GNU 'ls' didn't count columns correctly if user or group
names contained multibyte characters where the column count
differed from the byte count. This patch also corrects
some comments.
* src/ls.c (format_user_or_group): New function, which counts
columns correctly.
(format_user, format_group): Use it.
(format_user_or_group_width): New function, which counts columns
correctly.
(format_user_width, format_group_width): Use it.
Index: src/ls.c
===================================================================
RCS file: /home/meyering/coreutils/cu/src/ls.c,v
retrieving revision 1.357
diff -p -u -r1.357 ls.c
--- src/ls.c 15 Jun 2004 18:00:03 -0000 1.357
+++ src/ls.c 21 Jun 2004 07:05:18 -0000
@@ -330,7 +330,7 @@ static struct pending *pending_dirs;
static time_t current_time = TYPE_MINIMUM (time_t);
static int current_time_ns = -1;
-/* The number of bytes to use for columns containing inode numbers,
+/* The number of columns to use for columns containing inode numbers,
block sizes, link counts, owners, groups, authors, major device
numbers, minor device numbers, and file sizes, respectively. */
@@ -804,14 +804,14 @@ static struct column_info *column_info;
/* Maximum number of columns ever possible for this display. */
static size_t max_idx;
-/* The minimum width of a colum is 3: 1 character for the name and 2
+/* The minimum width of a column is 3: 1 character for the name and 2
for the separating white space. */
#define MIN_COLUMN_WIDTH 3
/* This zero-based index is used solely with the --dired option.
When that option is in effect, this counter is incremented for each
- character of output generated by this program so that the beginning
+ byte of output generated by this program so that the beginning
and ending indices (in that output) of every file name can be recorded
and later output themselves. */
static size_t dired_pos;
@@ -3055,19 +3055,44 @@ get_current_time (void)
current_time_ns = 999999999;
}
+/* Print the user or group name NAME, with numeric id ID, using a
+ print width of WIDTH columns. */
+
+static void
+format_user_or_group (char const *name, unsigned long int id, int width)
+{
+ size_t len;
+
+ if (name)
+ {
+ /* The output column count may differ from the byte count.
+ Adjust for this, but don't output garbage if integer overflow
+ occurs during adjustment. */
+ len = strlen (name);
+ width -= mbswidth (name, 0);
+ width += len;
+ if (width < 0)
+ width = 0;
+ printf ("%-*s ", width, name);
+ if (len < width)
+ len = width;
+ }
+ else
+ {
+ printf ("%*lu ", width, id);
+ len = width;
+ }
+
+ dired_pos += len + 1;
+}
+
/* Print the name or id of the user with id U, using a print width of
WIDTH. */
static void
format_user (uid_t u, int width)
{
- char const *name = (numeric_ids ? NULL : getuser (u));
- if (name)
- printf ("%-*s ", width, name);
- else
- printf ("%*lu ", width, (unsigned long int) u);
- dired_pos += width;
- dired_pos++;
+ format_user_or_group (numeric_ids ? NULL : getuser (u), u, width);
}
/* Likewise, for groups. */
@@ -3075,34 +3100,33 @@ format_user (uid_t u, int width)
static void
format_group (gid_t g, int width)
{
- char const *name = (numeric_ids ? NULL : getgroup (g));
- if (name)
- printf ("%-*s ", width, name);
- else
- printf ("%*lu ", width, (unsigned long int) g);
- dired_pos += width;
- dired_pos++;
+ format_user_or_group (numeric_ids ? NULL : getgroup (g), g, width);
}
-/* Return the number of bytes that format_user will print. */
+/* Return the number of columns that format_user_or_group will print. */
static int
-format_user_width (uid_t u)
+format_user_or_group_width (char const *name, unsigned long int id)
{
- char const *name = (numeric_ids ? NULL : getuser (u));
- char buf[INT_BUFSIZE_BOUND (unsigned long int)];
- size_t len;
-
- if (! name)
+ if (name)
{
- sprintf (buf, "%lu", (unsigned long int) u);
- name = buf;
+ int len = mbswidth (name, 0);
+ return MAX (0, len);
}
+ else
+ {
+ char buf[INT_BUFSIZE_BOUND (unsigned long int)];
+ sprintf (buf, "%lu", id);
+ return strlen (buf);
+ }
+}
- len = strlen (name);
- if (INT_MAX < len)
- error (EXIT_FAILURE, 0, _("User name too long"));
- return len;
+/* Return the number of columns that format_user will print. */
+
+static int
+format_user_width (uid_t u)
+{
+ return format_user_or_group_width (numeric_ids ? NULL : getuser (u), u);
}
/* Likewise, for groups. */
@@ -3110,20 +3134,7 @@ format_user_width (uid_t u)
static int
format_group_width (gid_t g)
{
- char const *name = (numeric_ids ? NULL : getgroup (g));
- char buf[INT_BUFSIZE_BOUND (unsigned long int)];
- size_t len;
-
- if (! name)
- {
- sprintf (buf, "%lu", (unsigned long int) g);
- name = buf;
- }
-
- len = strlen (name);
- if (INT_MAX < len)
- error (EXIT_FAILURE, 0, _("Group name too long"));
- return len;
+ return format_user_or_group_width (numeric_ids ? NULL : getgroup (g), g);
}
- coreutils 'ls' fix for multibyte user and group names,
Paul Eggert <=