--- Begin Message ---
Subject: |
Lisp reader and string-to-number bugs and inconsistencies |
Date: |
Wed, 20 Apr 2011 02:27:54 -0700 |
User-agent: |
Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.14) Gecko/20110223 Thunderbird/3.1.8 |
Emacs has several problems when converting strings to numbers:
1. On a typical 64-bit host, (string-to-number "2305843009213693951")
returns 2305843009213693440, which is off by 511. There are more
subtle numeric errors due to double-rounding.
2. The Lisp reader sometimes reports integer overflow for large
integers, and sometimes silently substitutes a float. For example,
on a typical 32-bit host, the Lisp reader reads 536870912 as if it
were 536870912.0, but reports an overflow if it reads 2147483648.
3. The Lisp reader treats the tokens -. and +. as if they were 0, which
is not documented and surely is not intended.
4. The Lisp reader parses NaNs and infinities, e.g., 0.0e+NaN is
treated as a NaN; but (string-to-number "0.0e+NaN") returns zero.
I plan to install the following patch to fix these problems, after
some further testing and editing (right just now I noticed a stray
comment "Return the length of the floating-point number ...", which I
will remove).
To fix (2), it's plausible to change the code in one of two ways:
either silently treat large integers as floats, or signal an overflow.
I don't care that much one way or another, but Emacs should be
consistent. I mildly prefer reporting the overflow, as that is a
better way to allow an upgrade path to arbitrary precision arithmetic,
so that's what the patch below does; but if the consensus is the other
way, I can easily change this.
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: address@hidden
# target_branch: bzr+ssh://address@hidden/emacs/trunk
# testament_sha1: 4a86d5674868b293852101d1d3ad0a7bc157e65c
# timestamp: 2011-04-20 01:43:56 -0700
# base_revision_id: address@hidden
# vk45j4qhfkv0xz4j
#
# Begin patch
=== modified file 'src/ChangeLog'
--- src/ChangeLog 2011-04-19 10:48:30 +0000
+++ src/ChangeLog 2011-04-20 06:24:51 +0000
@@ -1,3 +1,28 @@
+2011-04-20 Paul Eggert <address@hidden>
+
+ Make the Lisp reader and string-to-float more consistent.
+ * data.c (atof): Remove decl; no longer used or needed.
+ (Fstring_to_number): Use new string_to_float function, to be
+ consistent with how the Lisp reader treats infinities and NaNs.
+ Do not assume that floating-point numbers represent EMACS_INT
+ without losing information; this is not true on most 64-bit hosts.
+ Avoid double-rounding errors, by insisting on integers when
+ parsing non-base-10 numbers, as the documentation specifies.
+ Report integer overflow instead of silently converting to
+ integers.
+ * lisp.h (string_to_float): New decl, replacing ...
+ (isfloat_string): Remove.
+ * lread.c (read1): Do not accept +. and -. as integers; this
+ appears to have been a coding error. Similarly, do not accept
+ strings like +-1e0 as floating point numbers. Do not report
+ overflow for some integer overflows and not others; instead,
+ report them all. Break out the floating-point parsing into a new
+ function string_to_float, so that Fstring_to_number parses
+ floating point numbers consistently with the Lisp reader.
+ (string_to_float): New function, replacing isfloat_string.
+ This function checks for valid syntax and produces the resulting
+ Lisp float number too.
+
2011-04-19 Eli Zaretskii <address@hidden>
* syntax.h (SETUP_SYNTAX_TABLE_FOR_OBJECT): Fix setting of
=== modified file 'src/data.c'
--- src/data.c 2011-04-16 21:48:36 +0000
+++ src/data.c 2011-04-20 06:24:51 +0000
@@ -48,10 +48,6 @@
#include <math.h>
-#if !defined (atof)
-extern double atof (const char *);
-#endif /* !atof */
-
Lisp_Object Qnil, Qt, Qquote, Qlambda, Qunbound;
static Lisp_Object Qsubr;
Lisp_Object Qerror_conditions, Qerror_message, Qtop_level;
@@ -2415,8 +2411,7 @@
{
register char *p;
register int b;
- int sign = 1;
- Lisp_Object val;
+ EMACS_INT n;
CHECK_STRING (string);
@@ -2430,38 +2425,23 @@
xsignal1 (Qargs_out_of_range, base);
}
- /* Skip any whitespace at the front of the number. Some versions of
- atoi do this anyway, so we might as well make Emacs lisp consistent. */
+ /* Skip any whitespace at the front of the number. Typically strtol does
+ this anyway, so we might as well be consistent. */
p = SSDATA (string);
while (*p == ' ' || *p == '\t')
p++;
- if (*p == '-')
- {
- sign = -1;
- p++;
- }
- else if (*p == '+')
- p++;
-
- if (isfloat_string (p, 1) && b == 10)
- val = make_float (sign * atof (p));
- else
- {
- double v = 0;
-
- while (1)
- {
- int digit = digit_to_number (*p++, b);
- if (digit < 0)
- break;
- v = v * b + digit;
- }
-
- val = make_fixnum_or_float (sign * v);
- }
-
- return val;
+ if (b == 10)
+ {
+ Lisp_Object val = string_to_float (p, 1);
+ if (FLOATP (val))
+ return val;
+ }
+
+ n = strtol (p, NULL, b);
+ if (FIXNUM_OVERFLOW_P (n))
+ xsignal (Qoverflow_error, list1 (string));
+ return make_number (n);
}
=== modified file 'src/lisp.h'
--- src/lisp.h 2011-04-15 08:22:34 +0000
+++ src/lisp.h 2011-04-20 06:24:51 +0000
@@ -2782,7 +2782,7 @@
} while (0)
extern int openp (Lisp_Object, Lisp_Object, Lisp_Object,
Lisp_Object *, Lisp_Object);
-extern int isfloat_string (const char *, int);
+Lisp_Object string_to_float (char const *, int);
extern void map_obarray (Lisp_Object, void (*) (Lisp_Object, Lisp_Object),
Lisp_Object);
extern void dir_warning (const char *, Lisp_Object);
=== modified file 'src/lread.c'
--- src/lread.c 2011-04-14 05:04:02 +0000
+++ src/lread.c 2011-04-20 06:24:51 +0000
@@ -3006,85 +3006,32 @@
if (!quoted && !uninterned_symbol)
{
register char *p1;
+ Lisp_Object result;
p1 = read_buffer;
if (*p1 == '+' || *p1 == '-') p1++;
/* Is it an integer? */
- if (p1 != p)
+ if ('0' <= *p1 && *p1 <= '9')
{
- while (p1 != p && (c = *p1) >= '0' && c <= '9') p1++;
+ do
+ p1++;
+ while ('0' <= *p1 && *p1 <= '9');
+
/* Integers can have trailing decimal points. */
- if (p1 > read_buffer && p1 < p && *p1 == '.') p1++;
+ p1 += (*p1 == '.');
if (p1 == p)
- /* It is an integer. */
- {
- if (p1[-1] == '.')
- p1[-1] = '\0';
- {
- /* EMACS_INT n = atol (read_buffer); */
- char *endptr = NULL;
- EMACS_INT n = (errno = 0,
- strtol (read_buffer, &endptr, 10));
- if (errno == ERANGE && endptr)
- {
- Lisp_Object args
- = Fcons (make_string (read_buffer,
- endptr - read_buffer),
- Qnil);
- xsignal (Qoverflow_error, args);
- }
- return make_fixnum_or_float (n);
- }
- }
- }
- if (isfloat_string (read_buffer, 0))
- {
- /* Compute NaN and infinities using 0.0 in a variable,
- to cope with compilers that think they are smarter
- than we are. */
- double zero = 0.0;
-
- double value;
-
- /* Negate the value ourselves. This treats 0, NaNs,
- and infinity properly on IEEE floating point hosts,
- and works around a common bug where atof ("-0.0")
- drops the sign. */
- int negative = read_buffer[0] == '-';
-
- /* The only way p[-1] can be 'F' or 'N', after isfloat_string
- returns 1, is if the input ends in e+INF or e+NaN. */
- switch (p[-1])
- {
- case 'F':
- value = 1.0 / zero;
- break;
- case 'N':
- value = zero / zero;
-
- /* If that made a "negative" NaN, negate it. */
-
- {
- int i;
- union { double d; char c[sizeof (double)]; } u_data,
u_minus_zero;
-
- u_data.d = value;
- u_minus_zero.d = - 0.0;
- for (i = 0; i < sizeof (double); i++)
- if (u_data.c[i] & u_minus_zero.c[i])
- {
- value = - value;
- break;
- }
- }
- /* Now VALUE is a positive NaN. */
- break;
- default:
- value = atof (read_buffer + negative);
- break;
- }
-
- return make_float (negative ? - value : value);
- }
+ {
+ /* It is an integer. */
+ EMACS_INT n = strtol (read_buffer, NULL, 10);
+ if (FIXNUM_OVERFLOW_P (n))
+ xsignal (Qoverflow_error,
+ list1 (build_string (read_buffer)));
+ return make_number (n);
+ }
+ }
+
+ result = string_to_float (read_buffer, 0);
+ if (FLOATP (result))
+ return result;
}
{
Lisp_Object name, result;
@@ -3242,20 +3189,40 @@
}
+/* Return the length of the floating-point number that is the prefix of CP, or
+ zero if there is none. */
+
#define LEAD_INT 1
#define DOT_CHAR 2
#define TRAIL_INT 4
#define E_CHAR 8
#define EXP_INT 16
-int
-isfloat_string (const char *cp, int ignore_trailing)
+
+/* Convert CP to a floating point number. Return a non-float value if CP does
+ not have valid floating point syntax. If IGNORE_TRAILING is nonzero,
+ consider just the longest prefix of CP that has valid floating point
+ syntax. */
+
+Lisp_Object
+string_to_float (char const *cp, int ignore_trailing)
{
int state;
const char *start = cp;
+ /* Compute NaN and infinities using a variable, to cope with compilers that
+ think they are smarter than we are. */
+ double zero = 0;
+
+ /* Negate the value ourselves. This treats 0, NaNs, and infinity properly on
+ IEEE floating point hosts, and works around a formerly-common bug where
+ atof ("-0.0") drops the sign. */
+ int negative = *cp == '-';
+
+ double value = 0;
+
state = 0;
- if (*cp == '+' || *cp == '-')
+ if (negative || *cp == '+')
cp++;
if (*cp >= '0' && *cp <= '9')
@@ -3295,21 +3262,43 @@
{
state |= EXP_INT;
cp += 3;
+ value = 1.0 / zero;
}
else if (cp[-1] == '+' && cp[0] == 'N' && cp[1] == 'a' && cp[2] == 'N')
{
state |= EXP_INT;
cp += 3;
+ value = zero / zero;
+
+ /* If that made a "negative" NaN, negate it. */
+ {
+ int i;
+ union { double d; char c[sizeof (double)]; } u_data, u_minus_zero;
+
+ u_data.d = value;
+ u_minus_zero.d = - 0.0;
+ for (i = 0; i < sizeof (double); i++)
+ if (u_data.c[i] & u_minus_zero.c[i])
+ {
+ value = - value;
+ break;
+ }
+ }
+ /* Now VALUE is a positive NaN. */
}
- return ((ignore_trailing
- || *cp == 0 || *cp == ' ' || *cp == '\t' || *cp == '\n'
- || *cp == '\r' || *cp == '\f')
- && (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
- || state == (DOT_CHAR|TRAIL_INT)
- || state == (LEAD_INT|E_CHAR|EXP_INT)
- || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
- || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)));
+ if (! (state == (LEAD_INT|DOT_CHAR|TRAIL_INT)
+ || state == (DOT_CHAR|TRAIL_INT)
+ || state == (LEAD_INT|E_CHAR|EXP_INT)
+ || state == (LEAD_INT|DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)
+ || state == (DOT_CHAR|TRAIL_INT|E_CHAR|EXP_INT)))
+ return make_number (0); /* Any non-float value will do. */
+
+ if (! value)
+ value = atof (start + negative);
+ if (negative)
+ value = - value;
+ return make_float (value);
}
# Begin bundle
IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWVbWxdIABrrfgHgwcf///3/n
/8q////+YA69Y233exwPaIMFFKHiEx0DthodAUqmjR3aqB0hKCmgI0Ue1J6fqk09Gp6mEfqT00j1
M01MgGyjRpoABKCARppNGVDaqe0TSejyUzUDIwNIaYJk0GTZQinpTyJ+qADQaAAAANBoAAAAAEmp
TRU/RTT0mhk0NPSNDI0MgZAAAAGgaDmE0ZGhoZDCNDIaaNABiMmQDCAYBIkQCNNCZoCNNTNQYpqb
JkTJhPSYCNANNMCAYEwSIiiwVWJAUiyLo/3k65f+cX2osotErCz/5kIgdUrH26ln/VF38dvcuPVO
dNdr1rgUFg/uv5/c4tWslTODQVRnVXguK0pv9Zk3dZCu3G3GNt52tyjvSv/Z272ajtaAsHY4RlF2
5SJ8NLeEuzpSXZarmHATv3ZEhlmkutHv6Fd7V0Ssbz4ksOw60jx2eGjmtnJAmeTiLFFkVEVVDlAM
voKeOoQcCCjMwwxr1XjjQ6wi24o4KnadY8tYxK/9RhyEpHa7yj6KfgPCRy1A9l4KekbApaCf5NDr
x7ly9YaQmmDG2km9/f8nXkX6iO/zWPPGe2odSk+Y3psV09mr1b2Wq1xxXZTDZxyWBS19+FgwtSmQ
kER0EIi2BQ5zIC0CqaywpQVDBJQqCZ/ANDJBXuNQlAQyPlJkx3EFEqCoWI+LjITsB0wDhjZB403m
FTI48sof6Jdwz774P2PUS1z/9fHAsKS5Uc5ZjGZhMydJHkeGNSvKjMXT2GJBkMsD8PdcbT2bgqDE
n8NGcePApoQPz2nGBEkmwodDgbCJOL5UFNDIsVOdloWOEsVMRj9UR4kPAsDwFcEJkBbA7DGXwnlC
xWXq6FTU0H2wYEvzGHLBVQ8Yp2kjh65EzSREfwGoXXkCkSgGGnQt4RMgiYF6TMzNBvMfQ1E0Jn8Q
uW8p+oiMYiXlcCunBc8kNXLm5MhVNFZwk4+/hDgy5pQzFUofJ8EYEY5tnAWCr8PwcLtT4KErw0lI
xhpkw6RaX0A9nrPLyrjmzvzQv6DFAJ+QPlke+eTQlS0lmYcSFQfL6X8c1WM4tC1fPCSiISBmZM2l
aMoNo2FaItcbdvdzyvTvvVEmC0cgVSztdaZrtEWmmYxoFliqq2IB3lps0XJAdb2TfVs3xZr3CEin
CSeeXm49siRJ2MPFfSxTFZA/gmwem0NwUtFII9SgS3MLAWUrV+zx3lC0ZfQCyowLIPG5cfTut42N
V5Z1VVU1NVVSfvr4OebeVlNihmw9RwynAxxG9xoTTHnypwIOxG69ATXYgtDn9n1yiUI9Bq4YhI9f
Lr1VpQ+yiAXiyTERWYnJ17SCnzv+HCDICtmoQmMEOloseRlUTSCgE/qIpeXiC4lsW+f6rJlcTJXQ
SyDguSa6JyL93prOh6gT4gS/rTJL0aiqqB5juZWaC2HSW6zDEcE5xkCPPg3c3kiBHzlfekUC4ISB
L3qCZlbVpxT6oaG0srZEKVKE64WYIqVgvuiTPDqSy2xTkl5ZQfJ7YInfoMgmgoLTY6jxLBg6nI3t
OXv5VWArXw3VrLUEYEj7bIgUBYj6rLOXWUaKI0F8mtbJuoE6GO4m1pM0h3RMrNJECSw13Z1QMIt+
VareN1jKKPRkFVlQbs7HQ2rOaCQAupdUslkhpm6NR4jtRG9opDHj1IniCcJ4yYhbMwz3m2kSQO8k
TMUkc8RTA0wRwkoLZNXzKHKLKQVU98GaQLrTwyGNd17yO1GckQZEjBn7E9ad8sGeGdejWgXbTkm9
NVUEsS6kAmjFXOilUiyxNbGZnxz39gJqni5gl+fJcZbPnGMAR5WUEGLzGnqLSxdVGRNGBSjqGac1
9U5juMazaUJjGjxoXto0kixYzK6aCZRJ0Nzs5lQTLm6DT0bZYOxSrj6lFplE5ucjKtXK2ORgy1TP
lnEwWInkds7+ccROVKPIEVNN6u5424LgxLmdNJBNtS56rFSxscFI3MiXkBP5AnKfNhhSPGzhFemq
vmwFqI8RtDGTDFyTdc+bF1da+1PM27G0t9arMU1Ma3C8TE8GvJsw8U9BpVC1wyUyamjmKY5qcoN1
1LC+B3Oh4QrGAdban2D1zgh+/vJwGh59PjiKMVWkD2Iih+IcmPt+NVgjlaMU/99KZP+UT6F1MxOT
lwYYY/tP/iJ/qOqF9hE/IgjlUpqRCbhttQRdV+nnzCxP5f5pb8VETI9YoJBNTp+YcxxRPvOAolMt
yKHXUiGObG6JYGTRAcz8QVClJk0NFBkiHI4s887ZuXTRD+hl+gumUuwe8OEx251p6TeDoICbidgG
S+MsHd994GKH1AML5qkLakZN+AYi+phlr+Y7vo+76TkL3dmYNl4O4S/NpaPeBBqe8oDafkTTRdcY
yEqAxnruhChM6dJLymYJFmcllJitvmKt/caoJcUUQEDTCTAkw0gVI4k5oRxh0M8uUuJvGqpKA3df
YTqhQoafDQXzWeo1G8H4nm0H4nMvW6ouuJmaWwEWFx7Q8EcmaPo96u18wUH6B3IEzoDDy+I2n5HA
6EjiVFrXhM0BvLDVKRbadXAGeFv29t+oZuMiKYFWgJm2+DxxgR/ZfsgKwqvA8T2QYjWUKYC8ggJD
kSGjQbYMvSZ0MSzELRVCoI2GilBavXbyoNu5yMh132VBnILmjhR60azA3WCOI1ZagKUwWjk9P6HP
kLxR5CV+73+xyxazlBgUK95K0CvaNJFCMjMnWsp9HgjislAoxZS4cBxGQzdjHdMWVx0skhDBIZLT
HCS3gIOpe0LA5nHdCjPv1BCULXlqCADexbxq80EiZxDqDXwJF56UAwW/08TSC22vDYcDgaSv5Gil
I8F4HghhH5FFuGDj/sFPfaC4mvYfgDKjsmUmdphyJkjopi5ndArRh1hgutXhBY1IkcJCjjDtWPcm
pmBjy6zQPSegMw2uMKKWGCyoBZBb6h5DAam85mzWOJC0NDYG04gyaDLTWZGJjGxn1sbIZ06GvjuI
mQcuNHnBczH1G0l3nfyFkBNPBHqTO8PQHaG0NBsXNLuKBkC5hJeVcAdgFk5CxkQQyVEBMSWXfAMq
hAtCTdpkSgRk2JArwl0A0oduxsPZKIza1nkCJ/CSMJANMEaZEHgmH6rwlIPsGOOdamM0YoRHtemg
4S/c2Fj6tpeHHig1RQrlyHcfiN/NAvUjE8rVPISO3RcCpM/bsvPgXe9JhtZM15i3AGEDJgsKFCGJ
GUL6xazughIYEXQG7Su/souFsYXjGjP5llLhoaHcQexL+YGhDBlaDxKKsUsiowjKLSWUCfkuhcDu
x3FviCwUpv0jFzS48KDMuyk4jgwJSoJELuDQlKAlNIOQZA0IMiBpMxwC4uTd33huy4OFGvmqwTRC
AmEIAZXY6QvZvrEq9QLaaaoIUyQK1GaaLARpGY2hBPlnK2YrfYOhVKkMgWTRSSbroILLTBKBFBKr
ODA6rgyUUFsgCCwMUoKJhTocSYUCAKVJQWee9WTR9NKrAuaFyCXAXiMMghKZQpDgCpCgRl0qiU50
VJjETFQvwykejXuekYDwJn3CxU6Cookju6gUKgA2ggoO0moJ6B4CZ31HceZCmspav207dxgVA2BU
BfkySKTEMMlHJWIuqV6KKDBNKop+PfSKkvSqVxFZzViFaa/cC1JWqdIyoPLpI/cZ8+UKhhoNWsDu
JQj7C5qzJNEedgxMKMtCM+kqINvDupgLDraYxjgVcdp0gmNZd11g1wmXnBOzL71DLGhCkoQEYosR
RuahKvxzhOY6SNiMUXi+BXYLOVpFAteuCZ7bJIgftp6qpovAvtj05BCnKY4FrAgXBNUXnOuDt1Ct
3E60iQwuGBdIUiUMgGgIatTqKlToquwxkG81JMMOSjjFooQRxpimQd4DntzZBWg7u7i+ZAGCGadf
al95UiTgcMZAxna0h2a/IZZqLAsSpQSqCn3YCxvmKtAXwesGgWLD5zG7aCqSB4IGES8lJCi9VBwU
/AOM5K5WC1lm6gNO7DNLbOJorIkSkS5ukmXB6GQhX2FInqj0FSAooEhSrGQ8SMZkiUCRZghjCyom
TqQFAbQJAqEpChFjJzRNIyAoSp2WW3gXfTNYMmAF4/vY2SL5wrdBYZCkCY0XDGJqlheFiY0cDaAT
CpTK0qVmphTUxChqClQqZirt1ARlBdouCSYqYLd+6G6Upr4kBalcNLojPmO7AWUEM0limBcstGkC
VSvXwkG23vWSz016Wjz+iIhwEmHsBbCsyDgKM0QGafMk053jcCXGCIiIm8nStoE5bE+m8FKk4gvr
wMBmYF2dWrBBgnQdmU6pcXDC10G0P5UgqeivraWhT5qDJx+2WGdx+aMp9qaYSFRQtf8XckU4UJBW
1sXS
--- End Message ---