bug-bash
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] read: unsigned char delim issues


From: Grisha Levit
Subject: [PATCH] read: unsigned char delim issues
Date: Tue, 13 Aug 2024 15:00:59 -0400

The new read_mbchar code is missing an (unsigned char) cast, causing an
invalid continuation byte >0x7F to fail to be recognized as a delimiter
on platforms where char is signed.

    $ printf '\317_' | { read -d _; echo "${REPLY@Q}"; }
    $'\317'
    $ printf '\317\360_' | { read -d $'\360'; echo "${REPLY@Q}"; }
    $'\317\360_'

Also, the function cannot distinguish between an ignored delimiter and
a delimiter of 0xFF, since the value is stored as an unsigned char and
the separate ignore_delim flag is not passed along.

This ambiguity affects edit_line as well. Eg. due to a separate issue
(rl_num_chars_to_read treated as a byte, not character count), the 0xFF
value can show up in REPLY:

    $ read -e -N2; declare -p REPLY
    π
    declare -- REPLY=$'π\377'

Switching the storage of delim to int seems like the easiest fix.
---
 builtins/read.def | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/builtins/read.def b/builtins/read.def
index 3734104a..63cf320b 100644
--- a/builtins/read.def
+++ b/builtins/read.def
@@ -122,7 +122,7 @@ struct ttsave
 #if defined (READLINE)
 static void uw_reset_attempted_completion_function (void *);
 static int set_itext (void);
-static char *edit_line (char *, char *, unsigned char, int);
+static char *edit_line (char *, char *, int, int);
 static void set_eol_delim (int);
 static void reset_eol_delim (void *);
 static void set_readline_timeout (sh_timer *t, time_t, long);
@@ -237,7 +237,7 @@ read_builtin (WORD_LIST *list)
   long ival, uval;
   intmax_t intval;
   char c;
-  unsigned char delim;
+  int delim;
   char *input_string, *orig_input_string, *ifs_chars, *prompt, *arrayname;
   char *e, *t, *t1, *ps2, *tofree;
   struct stat tsb;
@@ -353,7 +353,6 @@ read_builtin (WORD_LIST *list)
          break;
        case 'N':
          ignore_delim = 1;
-         delim = -1;
        case 'n':
          nflag = 1;
          code = valid_number (list_optarg, &intval);
@@ -381,7 +380,7 @@ read_builtin (WORD_LIST *list)
            }
          break;
        case 'd':
-         delim = *list_optarg;
+         delim = (unsigned char)*list_optarg;
          break;
        CASE_HELPOPT;
        default:
@@ -826,7 +825,7 @@ read_builtin (WORD_LIST *list)
          continue;
        }
 
-      if (ignore_delim == 0 && (unsigned char)c == delim)
+      if ((unsigned char)c == delim)
        break;
 
       if (c == '\0' && delim != '\0')
@@ -1189,9 +1188,9 @@ read_mbchar (int fd, char *string, int ind, int ch, int 
delim, int unbuffered)
             multibyte character, we can't just add it to the input string
             and treat it as a byte. We need to push it back so a subsequent
             zread will pick it up. */
-         if (c == delim)
+         if ((unsigned char)c == delim)
            {
-             zungetc (c);
+             zungetc ((unsigned char)c);
              mbchar[--i] = '\0';               /* unget the delimiter */
            }
          break;                /* invalid multibyte character */
@@ -1273,7 +1272,7 @@ set_itext (void)
 }
 
 static char *
-edit_line (char *p, char *itext, unsigned char delim, int keep_completion_func)
+edit_line (char *p, char *itext, int delim, int keep_completion_func)
 {
   char *ret;
   size_t len;
@@ -1313,7 +1312,8 @@ edit_line (char *p, char *itext, unsigned char delim, int 
keep_completion_func)
 
   len = strlen (ret);
   ret = (char *)xrealloc (ret, len + 2);
-  ret[len++] = delim;
+  if (delim > 0)
+    ret[len++] = delim;
   ret[len] = '\0';
   return ret;
 }
@@ -1331,7 +1331,7 @@ static rl_command_func_t *old_delim_func;
 static int old_newline_ctype;
 static rl_command_func_t *old_newline_func;
 
-static unsigned char delim_char;
+static int delim_char;
 
 static void
 set_eol_delim (int c)
@@ -1342,19 +1342,24 @@ set_eol_delim (int c)
     initialize_readline ();
   cmap = rl_get_keymap ();
 
-  /* Save the old delimiter char binding */
+  /* Save the old newline binding */
   old_newline_ctype = cmap[RETURN].type;
   old_newline_func =  cmap[RETURN].function;
-  old_delim_ctype = cmap[c].type;
-  old_delim_func = cmap[c].function;
 
   /* Change newline to self-insert */
   cmap[RETURN].type = ISFUNC;
   cmap[RETURN].function = rl_insert;
 
-  /* Bind the delimiter character to accept-line. */
-  cmap[c].type = ISFUNC;
-  cmap[c].function = rl_newline;
+  if (c >= 0)
+    {
+      /* Save the old delimiter char binding */
+      old_delim_ctype = cmap[c].type;
+      old_delim_func = cmap[c].function;
+
+      /* Bind the delimiter character to accept-line. */
+      cmap[c].type = ISFUNC;
+      cmap[c].function = rl_newline;
+    }
 
   delim_char = c;
 }
@@ -1369,7 +1374,10 @@ reset_eol_delim (void *ignore)
   cmap[RETURN].type = old_newline_ctype;
   cmap[RETURN].function = old_newline_func;
 
-  cmap[delim_char].type = old_delim_ctype;
-  cmap[delim_char].function = old_delim_func;
+  if (delim_char >= 0)
+    {
+      cmap[delim_char].type = old_delim_ctype;
+      cmap[delim_char].function = old_delim_func;
+    }
 }
 #endif
-- 
2.46.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]