[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: To allow hex and oct %token values
From: |
Paul Eggert |
Subject: |
Re: To allow hex and oct %token values |
Date: |
08 Mar 2004 12:51:05 -0800 |
User-agent: |
Gnus/5.09 (Gnus v5.9.0) Emacs/21.3 |
Thanks for the suggestion. As we discussed, octal isn't compatible with
POSIX, but hexadecimal is a valid extension. I installed this patch:
2004-03-08 Paul Eggert <address@hidden>
Add support for hex token numbers. Suggested by Odd Arild Olsen in
<http://mail.gnu.org/archive/html/bison-patches/2004-03/msg00000.html>.
* NEWS: Document hexadecimal tokens, no NUL bytes, %destructor
in lalr1.cc.
* doc/bison.texinfo (Token Decl): Add hexadecimal token numbers.
* src/scan-gram.l (scan_integer): New function.
({int}): Use it.
(0[xX][0-9abcdefABCDEF]+): New pattern, to support hex numbers.
(<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>, \\x[0-9abcdefABCDEF]+,
handle_action_dollar, handle_action_at, convert_ucn_to_byte):
Say "long int", not "long", for uniformity with GNU style.
Index: NEWS
===================================================================
RCS file: /cvsroot/bison/bison/NEWS,v
retrieving revision 1.99
diff -p -u -r1.99 NEWS
--- NEWS 5 Oct 2003 07:34:35 -0000 1.99
+++ NEWS 8 Mar 2004 20:45:39 -0000
@@ -1,7 +1,18 @@
Bison News
----------
-Changes in version 1.875c:
+Changes in version 1.875d:
+
+* NUL bytes are no longer allowed in Bison string literals, unfortunately.
+
+* %token numbers can now be hexadecimal integers, e.g., `%token FOO 0x12d'.
+ This is a GNU extension.
+
+* Experimental %destructor support has been added to lalr1.cc.
+
+Changes in version 1.875c, 2003-08-25:
+
+ (Just bug fixes.)
Changes in version 1.875b, 2003-06-17:
@@ -551,7 +562,7 @@ End:
-----
-Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
Index: doc/bison.texinfo
===================================================================
RCS file: /cvsroot/bison/bison/doc/bison.texinfo,v
retrieving revision 1.119
diff -p -u -r1.119 bison.texinfo
--- doc/bison.texinfo 2 Dec 2003 17:08:31 -0000 1.119
+++ doc/bison.texinfo 8 Mar 2004 20:45:53 -0000
@@ -44,7 +44,7 @@ This manual is for @acronym{GNU} Bison (
@value{UPDATED}), the @acronym{GNU} parser generator.
Copyright @copyright{} 1988, 1989, 1990, 1991, 1992, 1993, 1995, 1998,
-1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
@quotation
Permission is granted to copy, distribute and/or modify this document
@@ -3309,10 +3309,12 @@ associativity and precedence. @xref{Pre
Precedence}.
You can explicitly specify the numeric code for a token type by appending
-an integer value in the field immediately following the token name:
+a decimal or hexadecimal integer value in the field immediately
+following the token name:
@example
%token NUM 300
+%token XNUM 0x12d // a GNU extension
@end example
@noindent
Index: src/scan-gram.l
===================================================================
RCS file: /cvsroot/bison/bison/src/scan-gram.l,v
retrieving revision 1.66
diff -p -u -r1.66 scan-gram.l
--- src/scan-gram.l 7 Oct 2003 07:32:57 -0000 1.66
+++ src/scan-gram.l 8 Mar 2004 20:45:54 -0000
@@ -1,6 +1,6 @@
/* Bison Grammar Scanner -*- C -*-
- Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of Bison, the GNU Compiler Compiler.
@@ -101,6 +101,7 @@ static int rule_length;
static void handle_dollar (int token_type, char *cp, location loc);
static void handle_at (int token_type, char *cp, location loc);
static void handle_syncline (char *args);
+static unsigned long int scan_integer (char const *p, int base, location loc);
static int convert_ucn_to_byte (char const *hex_text);
static void unexpected_eof (boundary, char const *);
@@ -235,15 +236,11 @@ splice (\\[ \f\t\v]*\n)*
}
{int} {
- unsigned long num;
- set_errno (0);
- num = strtoul (yytext, 0, 10);
- if (INT_MAX < num || get_errno ())
- {
- complain_at (*loc, _("integer out of range: %s"), quote (yytext));
- num = INT_MAX;
- }
- val->integer = num;
+ val->integer = scan_integer (yytext, 10, *loc);
+ return INT;
+ }
+ 0[xX][0-9abcdefABCDEF]+ {
+ val->integer = scan_integer (yytext, 16, *loc);
return INT;
}
@@ -411,7 +408,7 @@ splice (\\[ \f\t\v]*\n)*
<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
{
\\[0-7]{1,3} {
- unsigned long c = strtoul (yytext + 1, 0, 8);
+ unsigned long int c = strtoul (yytext + 1, 0, 8);
if (UCHAR_MAX < c)
complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
else if (! c)
@@ -421,7 +418,7 @@ splice (\\[ \f\t\v]*\n)*
}
\\x[0-9abcdefABCDEF]+ {
- unsigned long c;
+ unsigned long int c;
set_errno (0);
c = strtoul (yytext + 2, 0, 16);
if (UCHAR_MAX < c || get_errno ())
@@ -790,7 +787,7 @@ handle_action_dollar (char *text, locati
}
else
{
- long num;
+ long int num;
set_errno (0);
num = strtol (cp, 0, 10);
@@ -869,7 +866,7 @@ handle_action_at (char *text, location l
obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
else
{
- long num;
+ long int num;
set_errno (0);
num = strtol (cp, 0, 10);
@@ -919,6 +916,25 @@ handle_at (int token_type, char *text, l
}
+/*------------------------------------------------------.
+| Scan NUMBER for a base-BASE integer at location LOC. |
+`------------------------------------------------------*/
+
+static unsigned long int
+scan_integer (char const *number, int base, location loc)
+{
+ unsigned long int num;
+ set_errno (0);
+ num = strtoul (number, 0, base);
+ if (INT_MAX < num || get_errno ())
+ {
+ complain_at (loc, _("integer out of range: %s"), quote (number));
+ num = INT_MAX;
+ }
+ return num;
+}
+
+
/*------------------------------------------------------------------.
| Convert universal character name UCN to a single-byte character, |
| and return that character. Return -1 if UCN does not correspond |
@@ -928,7 +944,7 @@ handle_at (int token_type, char *text, l
static int
convert_ucn_to_byte (char const *ucn)
{
- unsigned long code = strtoul (ucn + 2, 0, 16);
+ unsigned long int code = strtoul (ucn + 2, 0, 16);
/* FIXME: Currently we assume Unicode-compatible unibyte characters
on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On