emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 5bd8cfc: Fix mishandling of symbols that look like


From: Paul Eggert
Subject: [Emacs-diffs] master 5bd8cfc: Fix mishandling of symbols that look like numbers
Date: Thu, 11 Oct 2018 02:19:47 -0400 (EDT)

branch: master
commit 5bd8cfc14d4b0c78c07e65a583f42a10c4cbc06d
Author: Paul Eggert <address@hidden>
Commit: Paul Eggert <address@hidden>

    Fix mishandling of symbols that look like numbers
    
    * src/bignum.c (make_neg_biguint): New function.
    * src/lread.c (read1): Do not mishandle an unquoted symbol
    with name equal to something like "1\0x", i.e., a string
    of numeric form followed by a NUL byte.
    Formerly these symbols were misread as numbers.
    (string_to_number): Change last argument from an integer flag
    to a pointer to the length.  This lets the caller figure out
    how much of the prefix was used.  All callers changed.
    Add a fast path if the integer (sans sign) fits in uintmax_t.
    Update comments and simplify now that bignums are present.
    * src/print.c (print_object): Fix quoting of symbols that look
    like numbers, by relying on string_to_number for the tricky
    cases rather than trying to redo its logic, incorrectly.  For
    example, (read (prin1-to-string '\1e+NaN)) formerly returned
    "1e+NaN", which was wrong: a backslash is needed in the output
    to prevent it from being read as a NaN.  Escape NO_BREAK_SPACE
    too, since lread.c treats it like SPACE.
    * test/src/print-tests.el (print-read-roundtrip):
    Add tests illustrating the abovementioned bugs.
---
 src/bignum.c            |  10 +++++
 src/data.c              |   2 +-
 src/lisp.h              |   5 ++-
 src/lread.c             | 100 ++++++++++++++++++++++++------------------------
 src/print.c             |  49 ++++++++----------------
 src/process.c           |   7 +++-
 test/src/print-tests.el |  16 +++++++-
 7 files changed, 99 insertions(+), 90 deletions(-)

diff --git a/src/bignum.c b/src/bignum.c
index 0ab8de3..e3db037 100644
--- a/src/bignum.c
+++ b/src/bignum.c
@@ -117,6 +117,16 @@ make_biguint (uintmax_t n)
   return make_bignum ();
 }
 
+/* Return a Lisp integer equal to -N, which must not be in fixnum range.  */
+Lisp_Object
+make_neg_biguint (uintmax_t n)
+{
+  eassert (-MOST_NEGATIVE_FIXNUM < n);
+  mpz_set_uintmax (mpz[0], n);
+  mpz_neg (mpz[0], mpz[0]);
+  return make_bignum ();
+}
+
 /* Return a Lisp integer with value taken from mpz[0].
    Set mpz[0] to a junk value.  */
 Lisp_Object
diff --git a/src/data.c b/src/data.c
index 5f1d059..538081e 100644
--- a/src/data.c
+++ b/src/data.c
@@ -2796,7 +2796,7 @@ If the base used is not 10, STRING is always parsed as an 
integer.  */)
   while (*p == ' ' || *p == '\t')
     p++;
 
-  Lisp_Object val = string_to_number (p, b, S2N_IGNORE_TRAILING);
+  Lisp_Object val = string_to_number (p, b, 0);
   return NILP (val) ? make_fixnum (0) : val;
 }
 
diff --git a/src/lisp.h b/src/lisp.h
index 2c20b48..5ecc48b 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2506,7 +2506,7 @@ INTEGERP (Lisp_Object x)
   return FIXNUMP (x) || BIGNUMP (x);
 }
 
-/* Return a Lisp integer with value taken from n.  */
+/* Return a Lisp integer with value taken from N.  */
 INLINE Lisp_Object
 make_int (intmax_t n)
 {
@@ -3329,6 +3329,7 @@ extern ptrdiff_t bignum_bufsize (Lisp_Object, int);
 extern ptrdiff_t bignum_to_c_string (char *, ptrdiff_t, Lisp_Object, int);
 extern Lisp_Object bignum_to_string (Lisp_Object, int);
 extern Lisp_Object make_bignum_str (char const *, int);
+extern Lisp_Object make_neg_biguint (uintmax_t);
 extern Lisp_Object double_to_integer (double);
 
 /* Converthe integer NUM to *N.  Return true if successful, false
@@ -3839,7 +3840,7 @@ LOADHIST_ATTACH (Lisp_Object x)
 extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
                   Lisp_Object *, Lisp_Object, bool);
 enum { S2N_IGNORE_TRAILING = 1 };
-extern Lisp_Object string_to_number (char const *, int, int);
+extern Lisp_Object string_to_number (char const *, int, ptrdiff_t *);
 extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
                          Lisp_Object);
 extern void dir_warning (const char *, Lisp_Object);
diff --git a/src/lread.c b/src/lread.c
index 73e38d8..62616cb 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2354,12 +2354,14 @@ character_name_to_code (char const *name, ptrdiff_t 
name_len)
 {
   /* For "U+XXXX", pass the leading '+' to string_to_number to reject
      monstrosities like "U+-0000".  */
+  ptrdiff_t len = name_len - 1;
   Lisp_Object code
     = (name[0] == 'U' && name[1] == '+'
-       ? string_to_number (name + 1, 16, 0)
+       ? string_to_number (name + 1, 16, &len)
        : call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt));
 
   if (! RANGED_FIXNUMP (0, code, MAX_UNICODE_CHAR)
+      || len != name_len - 1
       || char_surrogate_p (XFIXNUM (code)))
     {
       AUTO_STRING (format, "\\N{%s}");
@@ -3531,12 +3533,14 @@ read1 (Lisp_Object readcharfun, int *pch, bool 
first_in_list)
                   || strchr ("\"';()[]#`,", c) == NULL));
 
        *p = 0;
+       ptrdiff_t nbytes = p - read_buffer;
        UNREAD (c);
 
        if (!quoted && !uninterned_symbol)
          {
-           Lisp_Object result = string_to_number (read_buffer, 10, 0);
-           if (! NILP (result))
+           ptrdiff_t len;
+           Lisp_Object result = string_to_number (read_buffer, 10, &len);
+           if (! NILP (result) && len == nbytes)
              return unbind_to (count, result);
          }
         if (!quoted && multibyte)
@@ -3548,7 +3552,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool 
first_in_list)
           }
        {
          Lisp_Object result;
-         ptrdiff_t nbytes = p - read_buffer;
          ptrdiff_t nchars
            = (multibyte
               ? multibyte_chars_in_text ((unsigned char *) read_buffer,
@@ -3700,18 +3703,18 @@ substitute_in_interval (INTERVAL interval, void *arg)
 }
 
 
-/* Convert STRING to a number, assuming base BASE.  When STRING has
-   floating point syntax and BASE is 10, return a nearest float.  When
-   STRING has integer syntax, return a fixnum if the integer fits, or
-   else a bignum.  Otherwise, return nil.  If FLAGS &
-   S2N_IGNORE_TRAILING is nonzero, consider just the longest prefix of
-   STRING that has valid syntax.  */
+/* Convert the initial prefix of STRING to a number, assuming base BASE.
+   If the prefix has floating point syntax and BASE is 10, return a
+   nearest float; otherwise, if the prefix has integer syntax, return
+   the integer; otherwise, return nil.  If PLEN, set *PLEN to the
+   length of the numeric prefix if there is one, otherwise *PLEN is
+   unspecified.  */
 
 Lisp_Object
-string_to_number (char const *string, int base, int flags)
+string_to_number (char const *string, int base, ptrdiff_t *plen)
 {
   char const *cp = string;
-  bool float_syntax = 0;
+  bool float_syntax = false;
   double value = 0;
 
   /* Negate the value ourselves.  This treats 0, NaNs, and infinity properly on
@@ -3797,49 +3800,46 @@ string_to_number (char const *string, int base, int 
flags)
                      || (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP));
     }
 
-  /* Return nil if the number uses invalid syntax.  If FLAGS &
-     S2N_IGNORE_TRAILING, accept any prefix that matches.  Otherwise,
-     the entire string must match.  */
-  if (! (flags & S2N_IGNORE_TRAILING
-        ? ((state & LEAD_INT) != 0 || float_syntax)
-        : (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT
-                    || float_syntax))))
-    return Qnil;
+  if (plen)
+    *plen = cp - string;
 
-  /* If the number uses integer and not float syntax, and is in C-language
-     range, use its value, preferably as a fixnum.  */
-  if (leading_digit >= 0 && ! float_syntax)
+  /* Return a float if the number uses float syntax.  */
+  if (float_syntax)
     {
-      if ((state & INTOVERFLOW) == 0
-         && n <= (negative ? -MOST_NEGATIVE_FIXNUM : MOST_POSITIVE_FIXNUM))
-       {
-         EMACS_INT signed_n = n;
-         return make_fixnum (negative ? -signed_n : signed_n);
-       }
-
-      /* Trim any leading "+" and trailing nondigits, then convert to
-        bignum.  */
-      string += positive;
-      if (!*after_digits)
-       return make_bignum_str (string, base);
-      ptrdiff_t trimmed_len = after_digits - string;
-      USE_SAFE_ALLOCA;
-      char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
-      memcpy (trimmed, string, trimmed_len);
-      trimmed[trimmed_len] = '\0';
-      Lisp_Object result = make_bignum_str (trimmed, base);
-      SAFE_FREE ();
-      return result;
+      /* Convert to floating point, unless the value is already known
+        because it is infinite or a NaN.  */
+      if (! value)
+       value = atof (string + signedp);
+      return make_float (negative ? -value : value);
     }
 
-  /* Either the number uses float syntax, or it does not fit into a fixnum.
-     Convert it from string to floating point, unless the value is already
-     known because it is an infinity, a NAN, or its absolute value fits in
-     uintmax_t.  */
-  if (! value)
-    value = atof (string + signedp);
+  /* Return nil if the number uses invalid syntax.  */
+  if (! (state & LEAD_INT))
+    return Qnil;
+
+  /* Fast path if the integer (san sign) fits in uintmax_t.  */
+  if (! (state & INTOVERFLOW))
+    {
+      if (!negative)
+       return make_uint (n);
+      if (-MOST_NEGATIVE_FIXNUM < n)
+       return make_neg_biguint (n);
+      EMACS_INT signed_n = n;
+      return make_fixnum (-signed_n);
+    }
 
-  return make_float (negative ? -value : value);
+  /* Trim any leading "+" and trailing nondigits, then return a bignum.  */
+  string += positive;
+  if (!*after_digits)
+    return make_bignum_str (string, base);
+  ptrdiff_t trimmed_len = after_digits - string;
+  USE_SAFE_ALLOCA;
+  char *trimmed = SAFE_ALLOCA (trimmed_len + 1);
+  memcpy (trimmed, string, trimmed_len);
+  trimmed[trimmed_len] = '\0';
+  Lisp_Object result = make_bignum_str (trimmed, base);
+  SAFE_FREE ();
+  return result;
 }
 
 
diff --git a/src/print.c b/src/print.c
index c0c90bc..d15ff97 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1993,39 +1993,17 @@ print_object (Lisp_Object obj, Lisp_Object 
printcharfun, bool escapeflag)
 
     case Lisp_Symbol:
       {
-       bool confusing;
-       unsigned char *p = SDATA (SYMBOL_NAME (obj));
-       unsigned char *end = p + SBYTES (SYMBOL_NAME (obj));
-       int c;
-       ptrdiff_t i, i_byte;
-       ptrdiff_t size_byte;
-       Lisp_Object name;
-
-       name = SYMBOL_NAME (obj);
-
-       if (p != end && (*p == '-' || *p == '+')) p++;
-       if (p == end)
-         confusing = 0;
-       /* If symbol name begins with a digit, and ends with a digit,
-          and contains nothing but digits and `e', it could be treated
-          as a number.  So set CONFUSING.
-
-          Symbols that contain periods could also be taken as numbers,
-          but periods are always escaped, so we don't have to worry
-          about them here.  */
-       else if (*p >= '0' && *p <= '9'
-                && end[-1] >= '0' && end[-1] <= '9')
-         {
-           while (p != end && ((*p >= '0' && *p <= '9')
-                               /* Needed for \2e10.  */
-                               || *p == 'e' || *p == 'E'))
-             p++;
-           confusing = (end == p);
-         }
-       else
-         confusing = 0;
-
-       size_byte = SBYTES (name);
+       Lisp_Object name = SYMBOL_NAME (obj);
+       ptrdiff_t size_byte = SBYTES (name);
+
+       /* Set CONFUSING if NAME looks like a number, calling
+          string_to_number for non-obvious cases.  */
+       char *p = SSDATA (name);
+       bool signedp = *p == '-' || *p == '+';
+       ptrdiff_t len;
+       bool confusing = ((c_isdigit (p[signedp]) || p[signedp] == '.')
+                         && !NILP (string_to_number (p, 10, &len))
+                         && len == size_byte);
 
        if (! NILP (Vprint_gensym)
             && !SYMBOL_INTERNED_IN_INITIAL_OBARRAY_P (obj))
@@ -2036,10 +2014,12 @@ print_object (Lisp_Object obj, Lisp_Object 
printcharfun, bool escapeflag)
            break;
          }
 
-       for (i = 0, i_byte = 0; i_byte < size_byte;)
+       ptrdiff_t i = 0;
+       for (ptrdiff_t i_byte = 0; i_byte < size_byte; )
          {
            /* Here, we must convert each multi-byte form to the
               corresponding character code before handing it to PRINTCHAR.  */
+           int c;
            FETCH_STRING_CHAR_ADVANCE (c, name, i, i_byte);
            maybe_quit ();
 
@@ -2049,6 +2029,7 @@ print_object (Lisp_Object obj, Lisp_Object printcharfun, 
bool escapeflag)
                    || c == ';' || c == '#' || c == '(' || c == ')'
                    || c == ',' || c == '.' || c == '`'
                    || c == '[' || c == ']' || c == '?' || c <= 040
+                   || c == NO_BREAK_SPACE
                     || confusing
                    || (i == 1 && confusable_symbol_character_p (c)))
                  {
diff --git a/src/process.c b/src/process.c
index a9638df..6cda4f2 100644
--- a/src/process.c
+++ b/src/process.c
@@ -6852,7 +6852,12 @@ SIGCODE may be an integer, or a symbol whose name is a 
signal name.  */)
     {
       Lisp_Object tem = Fget_process (process);
       if (NILP (tem))
-       tem = string_to_number (SSDATA (process), 10, 0);
+       {
+         ptrdiff_t len;
+         tem = string_to_number (SSDATA (process), 10, &len);
+         if (NILP (tem) || len != SBYTES (process))
+           return Qnil;
+       }
       process = tem;
     }
   else if (!NUMBERP (process))
diff --git a/test/src/print-tests.el b/test/src/print-tests.el
index 091f1aa..78e769f 100644
--- a/test/src/print-tests.el
+++ b/test/src/print-tests.el
@@ -95,8 +95,20 @@ otherwise, use a different charset."
                      "--------\n"))))
 
 (ert-deftest print-read-roundtrip ()
-  (let ((sym '\’bar))
-    (should (eq (read (prin1-to-string sym)) sym))))
+  (let ((syms (list '## '& '* '+ '- '/ '0E '0e '< '= '> 'E 'E0 'NaN '\"
+                    '\# '\#x0 '\' '\'\' '\( '\) '\+00 '\, '\-0 '\. '\.0
+                    '\0 '\0.0 '\0E0 '\0e0 '\1E+ '\1E+NaN '\1e+ '\1e+NaN
+                    '\; '\? '\[ '\\ '\] '\` '_ 'a 'e 'e0 'x
+                    '{ '| '} '~ : '\’ '\’bar
+                    (intern "\t") (intern "\n") (intern " ")
+                    (intern "\N{NO-BREAK SPACE}")
+                    (intern "\N{ZERO WIDTH SPACE}")
+                    (intern "\0"))))
+    (dolist (sym syms)
+      (should (eq (read (prin1-to-string sym)) sym))
+      (dolist (sym1 syms)
+        (let ((sym2 (intern (concat (symbol-name sym) (symbol-name sym1)))))
+          (should (eq (read (prin1-to-string sym2)) sym2)))))))
 
 (ert-deftest print-bignum ()
   (let* ((str "999999999999999999999999999999999")



reply via email to

[Prev in Thread] Current Thread [Next in Thread]