[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 5/8] parsers: support translatable token aliases
From: |
Akim Demaille |
Subject: |
[PATCH 5/8] parsers: support translatable token aliases |
Date: |
Sat, 29 Dec 2018 17:30:24 +0100 |
In addition to
%token NUM "number"
accept
%token NUM _("number")
in which case the token will be translated in error messages.
Do not use _() in the output if there are no translatable tokens.
* src/symtab.h, src/symtab.c (symbol): Add a 'translatable' member.
* src/parse-gram.y (TSTRING): New token.
(string_as_id.opt): Replace with...
(alias): this.
Use it.
* src/scan-gram.l (SC_ESCAPED_TSTRING): New start conditions, to match
TSTRINGs.
* src/output.c (prepare_symbols): Define b4_translatable if there are
translatable strings.
* data/skeletons/glr.c, data/skeletons/lalr1.cc,
* data/skeletons/yacc.c (yytnamerr): Receive b4_translatable, and use it.
---
data/skeletons/glr.c | 16 +++++++++++-----
data/skeletons/lalr1.cc | 11 +++++++++--
data/skeletons/yacc.c | 12 ++++++++++--
src/output.c | 28 ++++++++++++++++++++++++++--
src/parse-gram.y | 28 ++++++++++++++++++----------
src/scan-gram.l | 25 ++++++++++++++++++++-----
src/symtab.c | 3 ++-
src/symtab.h | 7 ++++++-
8 files changed, 102 insertions(+), 28 deletions(-)
diff --git a/data/skeletons/glr.c b/data/skeletons/glr.c
index 02438887..99321cd7 100644
--- a/data/skeletons/glr.c
+++ b/data/skeletons/glr.c
@@ -541,11 +541,17 @@ typedef int yySymbol;
/** A printable representation of TOKEN. */
static inline const char*
yytokenName (yySymbol yytoken)
-{
+{]m4_ifdef([b4_translatable], [[
+ /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is
+ internationalizable. */
+ static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
+ {
+ ]b4_translatable[
+ };]])[
if (yytoken == YYEMPTY)
- return "";
- else
- return yytname[yytoken];
+ return "";]m4_ifdef([b4_translatable], [[
+ return yytranslatable[yytoken] ? _(yytname[yytoken]) : yytname[yytoken];]],
[[
+ return yytname[yytoken];]])[
}
#endif
@@ -575,7 +581,7 @@ yystpcpy (char *yydest, const char *yysrc)
/* Copy to YYRES the name of YYTOKEN. If YYRES is null, do not copy;
instead, return the length of what the result would have been. */
static size_t
-yytnamerr (char *yyres, int yytoken)
+yytnamerr (char *yyres, yySymbol yytoken)
{
const char *yystr = yytokenName (yytoken);
if (! yyres)
diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc
index 7944c4e7..d3d9ff37 100644
--- a/data/skeletons/lalr1.cc
+++ b/data/skeletons/lalr1.cc
@@ -511,8 +511,15 @@ m4_if(b4_prefix, [yy], [],
std::string
]b4_parser_class_name[::yytnamerr_ (int yytoken)
- {
- return yytname_[yytoken];
+ {]m4_ifdef([b4_translatable], [[
+ // YYTRANSLATABLE[TOKEN-NUM] -- Whether YYTNAME[TOKEN-NUM] is
+ // internationalizable. */
+ static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
+ {
+ ]b4_translatable[
+ };
+ return yytranslatable[yytoken] ? _(yytname_[yytoken]) :
yytname_[yytoken];]], [[
+ return yytname_[yytoken];]])[
}
]])[
diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
index fd10a004..e508633d 100644
--- a/data/skeletons/yacc.c
+++ b/data/skeletons/yacc.c
@@ -1045,8 +1045,16 @@ yy_lac (yytype_int16 *yyesa, yytype_int16 **yyes,
instead, return the length of what the result would have been. */
static YYSIZE_T
yytnamerr (char *yyres, int yytoken)
-{
- const char *yystr = yytname[yytoken];
+{]m4_ifdef([b4_translatable], [[
+ /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is
+ internationalizable. */
+ static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
+ {
+ ]b4_translatable[
+ };
+ const char *yystr
+ = yytranslatable[yytoken] ? _(yytname[yytoken]) : yytname[yytoken];]], [[
+ const char *yystr = yytname[yytoken];]])[
if (! yyres)
return yystrlen (yystr);
diff --git a/src/output.c b/src/output.c
index a90e8266..eeeb3b81 100644
--- a/src/output.c
+++ b/src/output.c
@@ -54,6 +54,10 @@ static char *relocate_buffer = NULL;
| result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
| TYPE), and to the muscle NAME_max, the max value of the |
| TABLE_DATA. |
+| |
+| For the typical case of outputting a complete table from 0, pass |
+| TABLE[0] as FIRST, and 1 as BEGIN. For instance |
+| muscle_insert_base_table ("pact", base, base[0], 1, nstates); |
`-------------------------------------------------------------------*/
@@ -156,7 +160,7 @@ prepare_symbols (void)
token_translations[0],
1, max_user_token_number + 1);
- /* tname -- token names. */
+ /* tname -- symbol names. */
{
/* We assume that the table will be output starting at column 2. */
int j = 2;
@@ -193,6 +197,26 @@ prepare_symbols (void)
muscle_insert ("tname", obstack_finish0 (&format_obstack));
}
+ /* translatable -- whether a token is translatable. */
+ {
+ bool translatable = false;
+ for (int i = 0; i < ntokens; ++i)
+ if (symbols[i]->translatable)
+ {
+ translatable = true;
+ break;
+ }
+ if (translatable)
+ {
+ int *values = xnmalloc (nsyms, sizeof *values);
+ for (int i = 0; i < ntokens; ++i)
+ values[i] = symbols[i]->translatable;
+ muscle_insert_int_table ("translatable", values,
+ values[0], 1, ntokens);
+ free (values);
+ }
+ }
+
/* Output YYTOKNUM. */
{
int *values = xnmalloc (ntokens, sizeof *values);
@@ -230,7 +254,7 @@ prepare_rules (void)
prhs[r] = i;
/* RHS of the rule R. */
for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
- rhs[i++] = *rhsp;
+ rhs[i++] = *rhsp;
/* Separator in RHS. */
rhs[i++] = -1;
diff --git a/src/parse-gram.y b/src/parse-gram.y
index 366dccb5..f99cb052 100644
--- a/src/parse-gram.y
+++ b/src/parse-gram.y
@@ -120,6 +120,7 @@
/* Define the tokens together with their human representation. */
%token GRAM_EOF 0 "end of file"
%token STRING "string"
+ TSTRING "translatable string"
%token PERCENT_TOKEN "%token"
%token PERCENT_NTERM "%nterm"
@@ -186,8 +187,8 @@
%type <unsigned char> CHAR
%printer { fputs (char_name ($$), yyo); } <unsigned char>
-%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING
-%printer { fputs (quotearg_style (c_quoting_style, $$), yyo); } STRING
+%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING
+%printer { fputs (quotearg_style (c_quoting_style, $$), yyo); } STRING TSTRING
%printer { fprintf (yyo, "{\n%s\n}", $$); } <char*>
%type <uniqstr> BRACKETED_ID ID ID_COLON PERCENT_FLAG TAG tag tag.opt variable
@@ -462,7 +463,7 @@ tag:
`-----------------------*/
// A non empty list of possibly tagged symbols for %nterm.
-//
+//
// Can easily be defined like symbol_decls but restricted to ID, but
// using token_decls allows to reudce the number of rules, and also to
// make nicer error messages on "%nterm 'a'" or '%nterm FOO "foo"'.
@@ -497,7 +498,7 @@ token_decl.1:
// One symbol declaration for %token or %nterm.
token_decl:
- id int.opt[num] string_as_id.opt[alias]
+ id int.opt[num] alias
{
$$ = $id;
symbol_class_set ($id, current_class, @id, true);
@@ -514,6 +515,19 @@ int.opt:
| INT
;
+%type <symbol*> alias;
+alias:
+ %empty { $$ = NULL; }
+| string_as_id { $$ = $1; }
+| TSTRING
+ {
+ $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
+ symbol_class_set ($$, token_sym, @1, false);
+ $$->translatable = true;
+ }
+;
+
+
/*-------------------------------------.
| token_decls_for_prec (%left, etc.). |
`-------------------------------------*/
@@ -727,12 +741,6 @@ string_as_id:
}
;
-%type <symbol*> string_as_id.opt;
-string_as_id.opt:
- %empty { $$ = NULL; }
-| string_as_id
-;
-
epilogue.opt:
%empty
| "%%" EPILOGUE
diff --git a/src/scan-gram.l b/src/scan-gram.l
index 5fe0fc4e..c2ada035 100644
--- a/src/scan-gram.l
+++ b/src/scan-gram.l
@@ -108,8 +108,8 @@ static void unexpected_newline (boundary, char const *);
%}
/* A C-like comment in directives/rules. */
%x SC_YACC_COMMENT
- /* Strings and characters in directives/rules. */
-%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
+ /* Characters and strings in directives/rules. */
+%x SC_ESCAPED_CHARACTER SC_ESCAPED_STRING SC_ESCAPED_TSTRING
/* A identifier was just read in directives/rules. Special state
to capture the sequence 'identifier :'. */
%x SC_AFTER_IDENTIFIER
@@ -309,6 +309,7 @@ eqopt ([[:space:]]*=)?
/* Strings. */
"\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
+ "_(\"" token_start = loc->start; BEGIN SC_ESCAPED_TSTRING;
/* Prologue. */
"%{" code_start = loc->start; BEGIN SC_PROLOGUE;
@@ -369,7 +370,7 @@ eqopt ([[:space:]]*=)?
| added value. |
`--------------------------------------------------------------*/
-<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING,SC_TAG>
{
\0 complain (loc, complaint, _("invalid null character"));
}
@@ -529,6 +530,20 @@ eqopt ([[:space:]]*=)?
"\n" unexpected_newline (token_start, "\"");
}
+<SC_ESCAPED_TSTRING>
+{
+ "\")" {
+ STRING_FINISH;
+ BEGIN INITIAL;
+ loc->start = token_start;
+ complain (loc, Wyacc,
+ _("POSIX Yacc does not support string literals"));
+ RETURN_VALUE (TSTRING, last_string);
+ }
+ <<EOF>> unexpected_eof (token_start, "\"");
+ "\n" unexpected_newline (token_start, "\"");
+}
+
/*----------------------------------------------------------.
| Scanning a Bison character literal, decoding its escapes. |
| The initial quote is already eaten. |
@@ -591,7 +606,7 @@ eqopt ([[:space:]]*=)?
| Decode escaped characters. |
`----------------------------*/
-<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>
{
\\[0-7]{1,3} {
verify (UCHAR_MAX < ULONG_MAX);
@@ -787,7 +802,7 @@ eqopt ([[:space:]]*=)?
| By default, grow the string obstack with the input. |
`-----------------------------------------------------*/
-<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>.
|
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>.
|
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n
STRING_GROW;
%%
diff --git a/src/symtab.c b/src/symtab.c
index 2409f41c..72630cd0 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -98,6 +98,7 @@ symbol_new (uniqstr tag, location loc)
_("POSIX Yacc forbids dashes in symbol names: %s"), tag);
res->tag = tag;
+ res->translatable = false;
res->location = loc;
res->location_of_lhs = false;
res->alias = NULL;
@@ -874,7 +875,7 @@ dummy_symbol_get (location loc)
}
bool
-symbol_is_dummy (const symbol *sym)
+symbol_is_dummy (symbol const *sym)
{
return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
}
diff --git a/src/symtab.h b/src/symtab.h
index a63b904e..4d8f4321 100644
--- a/src/symtab.h
+++ b/src/symtab.h
@@ -88,6 +88,9 @@ struct symbol
/** The key, name of the symbol. */
uniqstr tag;
+ /** Whether this symbol is translatable. */
+ bool translatable;
+
/** The "defining" location. */
location location;
@@ -111,6 +114,8 @@ struct symbol
struct sym_content
{
+ /** The main symbol that denotes this content (it contains the
+ possible alias). */
symbol *symbol;
/** Its \c \%type.
@@ -173,7 +178,7 @@ symbol *dummy_symbol_get (location loc);
void symbol_print (symbol const *s, FILE *f);
/** Is this a dummy nonterminal? */
-bool symbol_is_dummy (const symbol *sym);
+bool symbol_is_dummy (symbol const *sym);
/** The name of the code_props type: "\%destructor" or "\%printer". */
char const *code_props_type_string (code_props_type kind);
--
2.20.0
- [PATCH 0/8] Revamp the handling token string aliases in error messages, Akim Demaille, 2018/12/29
- [PATCH 6/8] tests: check that internationalization of token works, Akim Demaille, 2018/12/29
- [PATCH 7/8] translate bison's own tokens, Akim Demaille, 2018/12/29
- [PATCH 4/8] parsers: don't double escape tnames, Akim Demaille, 2018/12/29
- [PATCH 2/8] parsers: revamp the interface of yytnamerr, Akim Demaille, 2018/12/29
- [PATCH 1/8] yacc.c: avoid negated if, Akim Demaille, 2018/12/29
- [PATCH 3/8] tests: no longer play with trigraphs, Akim Demaille, 2018/12/29
- [PATCH 5/8] parsers: support translatable token aliases,
Akim Demaille <=
- [PATCH 8/8] regen, Akim Demaille, 2018/12/29