[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] read: unsigned char delim issues
From: |
Grisha Levit |
Subject: |
[PATCH] read: unsigned char delim issues |
Date: |
Tue, 13 Aug 2024 15:00:59 -0400 |
The new read_mbchar code is missing an (unsigned char) cast, causing an
invalid continuation byte >0x7F to fail to be recognized as a delimiter
on platforms where char is signed.
$ printf '\317_' | { read -d _; echo "${REPLY@Q}"; }
$'\317'
$ printf '\317\360_' | { read -d $'\360'; echo "${REPLY@Q}"; }
$'\317\360_'
Also, the function cannot distinguish between an ignored delimiter and
a delimiter of 0xFF, since the value is stored as an unsigned char and
the separate ignore_delim flag is not passed along.
This ambiguity affects edit_line as well. Eg. due to a separate issue
(rl_num_chars_to_read treated as a byte, not character count), the 0xFF
value can show up in REPLY:
$ read -e -N2; declare -p REPLY
π
declare -- REPLY=$'π\377'
Switching the storage of delim to int seems like the easiest fix.
---
builtins/read.def | 44 ++++++++++++++++++++++++++------------------
1 file changed, 26 insertions(+), 18 deletions(-)
diff --git a/builtins/read.def b/builtins/read.def
index 3734104a..63cf320b 100644
--- a/builtins/read.def
+++ b/builtins/read.def
@@ -122,7 +122,7 @@ struct ttsave
#if defined (READLINE)
static void uw_reset_attempted_completion_function (void *);
static int set_itext (void);
-static char *edit_line (char *, char *, unsigned char, int);
+static char *edit_line (char *, char *, int, int);
static void set_eol_delim (int);
static void reset_eol_delim (void *);
static void set_readline_timeout (sh_timer *t, time_t, long);
@@ -237,7 +237,7 @@ read_builtin (WORD_LIST *list)
long ival, uval;
intmax_t intval;
char c;
- unsigned char delim;
+ int delim;
char *input_string, *orig_input_string, *ifs_chars, *prompt, *arrayname;
char *e, *t, *t1, *ps2, *tofree;
struct stat tsb;
@@ -353,7 +353,6 @@ read_builtin (WORD_LIST *list)
break;
case 'N':
ignore_delim = 1;
- delim = -1;
case 'n':
nflag = 1;
code = valid_number (list_optarg, &intval);
@@ -381,7 +380,7 @@ read_builtin (WORD_LIST *list)
}
break;
case 'd':
- delim = *list_optarg;
+ delim = (unsigned char)*list_optarg;
break;
CASE_HELPOPT;
default:
@@ -826,7 +825,7 @@ read_builtin (WORD_LIST *list)
continue;
}
- if (ignore_delim == 0 && (unsigned char)c == delim)
+ if ((unsigned char)c == delim)
break;
if (c == '\0' && delim != '\0')
@@ -1189,9 +1188,9 @@ read_mbchar (int fd, char *string, int ind, int ch, int
delim, int unbuffered)
multibyte character, we can't just add it to the input string
and treat it as a byte. We need to push it back so a subsequent
zread will pick it up. */
- if (c == delim)
+ if ((unsigned char)c == delim)
{
- zungetc (c);
+ zungetc ((unsigned char)c);
mbchar[--i] = '\0'; /* unget the delimiter */
}
break; /* invalid multibyte character */
@@ -1273,7 +1272,7 @@ set_itext (void)
}
static char *
-edit_line (char *p, char *itext, unsigned char delim, int keep_completion_func)
+edit_line (char *p, char *itext, int delim, int keep_completion_func)
{
char *ret;
size_t len;
@@ -1313,7 +1312,8 @@ edit_line (char *p, char *itext, unsigned char delim, int
keep_completion_func)
len = strlen (ret);
ret = (char *)xrealloc (ret, len + 2);
- ret[len++] = delim;
+ if (delim > 0)
+ ret[len++] = delim;
ret[len] = '\0';
return ret;
}
@@ -1331,7 +1331,7 @@ static rl_command_func_t *old_delim_func;
static int old_newline_ctype;
static rl_command_func_t *old_newline_func;
-static unsigned char delim_char;
+static int delim_char;
static void
set_eol_delim (int c)
@@ -1342,19 +1342,24 @@ set_eol_delim (int c)
initialize_readline ();
cmap = rl_get_keymap ();
- /* Save the old delimiter char binding */
+ /* Save the old newline binding */
old_newline_ctype = cmap[RETURN].type;
old_newline_func = cmap[RETURN].function;
- old_delim_ctype = cmap[c].type;
- old_delim_func = cmap[c].function;
/* Change newline to self-insert */
cmap[RETURN].type = ISFUNC;
cmap[RETURN].function = rl_insert;
- /* Bind the delimiter character to accept-line. */
- cmap[c].type = ISFUNC;
- cmap[c].function = rl_newline;
+ if (c >= 0)
+ {
+ /* Save the old delimiter char binding */
+ old_delim_ctype = cmap[c].type;
+ old_delim_func = cmap[c].function;
+
+ /* Bind the delimiter character to accept-line. */
+ cmap[c].type = ISFUNC;
+ cmap[c].function = rl_newline;
+ }
delim_char = c;
}
@@ -1369,7 +1374,10 @@ reset_eol_delim (void *ignore)
cmap[RETURN].type = old_newline_ctype;
cmap[RETURN].function = old_newline_func;
- cmap[delim_char].type = old_delim_ctype;
- cmap[delim_char].function = old_delim_func;
+ if (delim_char >= 0)
+ {
+ cmap[delim_char].type = old_delim_ctype;
+ cmap[delim_char].function = old_delim_func;
+ }
}
#endif
--
2.46.0
- [PATCH] read: unsigned char delim issues,
Grisha Levit <=