[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug #66378] cldr-plurals fails to parse recent CLDR data: 1c6, ...
From: |
Michele Locati |
Subject: |
[bug #66378] cldr-plurals fails to parse recent CLDR data: 1c6, ... |
Date: |
Mon, 28 Oct 2024 07:27:41 -0400 (EDT) |
Follow-up Comment #1, bug #66378 (group gettext):
I managed to get cldr-plurals work with the following patch:
From fadb2220b6cd04c50d6d2bb5c6c6488c119c5484 Mon Sep 17 00:00:00 2001
From: Michele Locati <michele@locati.it>
Date: Mon, 28 Oct 2024 12:15:15 +0100
Subject: [PATCH] Accept CLDR rules with XcY samples and c and e vars
---
gettext-tools/src/cldr-plural-exp.c | 128 ++++++++++++++++++++++++++--
gettext-tools/src/cldr-plural.y | 4 +-
2 files changed, 126 insertions(+), 6 deletions(-)
diff --git a/gettext-tools/src/cldr-plural-exp.c
b/gettext-tools/src/cldr-plural-exp.c
index 60d3d0baf..dbce9627b 100644
--- a/gettext-tools/src/cldr-plural-exp.c
+++ b/gettext-tools/src/cldr-plural-exp.c
@@ -24,6 +24,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include "string-buffer.h"
#include "unistr.h"
#include "xalloc.h"
@@ -101,20 +102,136 @@ cldr_plural_rule_list_free (struct
cldr_plural_rule_list_ty *rules)
free (rules);
}
+static
+const char * get_XcY_end(const char *str)
+{
+ bool found_c = false;
+ if (str[0] < '0' || str[0] > '9')
+ return NULL;
+ str++;
+ while (str[0] != '\0')
+ {
+ if (str[0] == 'c')
+ {
+ if (found_c || str[1] < '0' || str[1] > '9')
+ return NULL;
+ found_c = true;
+ }
+ else if ((str[0] < '0' || str[0] > '9') && str[0] != '.')
+ break;
+ str++;
+ }
+ if (!found_c)
+ return NULL;
+ while(str[0] == ' ')
+ str++;
+ if (str[0] == ',')
+ {
+ str++;
+ while(str[0] == ' ')
+ str++;
+ }
+ return str;
+}
+
+static
+char * remove_XcY(const char *input)
+{
+ struct string_buffer buffer;
+ const char *p;
+ const char *p_next;
+ const char *p_next1;
+ const char *p_next2;
+
+ sb_init (&buffer);
+ p = (char*) input;
+ for (;;) {
+ int comma_and_spaces;
+ p_next1 = strstr(p, "@integer ");
+ p_next2 = strstr(p, "@decimal ");
+ if (p_next1 == NULL && p_next2 == NULL)
+ {
+ sb_append_c(&buffer, p);
+ break;
+ }
+ if (p_next1 != NULL && (p_next2 == NULL || p_next1 < p_next2))
+ p_next = p_next1 + /* strlen("@integer ")*/ 9;
+ else
+ p_next = p_next2 + /* strlen("@decimal ")*/ 9;
+ while (p < p_next)
+ sb_append1(&buffer, *p++);
+ while (p[0] == ' ')
+ sb_append1(&buffer, *p++);
+ comma_and_spaces = -1;
+ for (;;)
+ {
+ const char * XcY_end;
+ if (p[0] < '0' || p[0] > '9')
+ break;
+ XcY_end = get_XcY_end(p);
+ if (XcY_end != NULL)
+ {
+ p = XcY_end;
+ continue;
+ }
+ if (comma_and_spaces >= 0)
+ {
+ sb_append1(&buffer, ',');
+ while (comma_and_spaces > 0)
+ {
+ sb_append1(&buffer, ' ');
+ comma_and_spaces--;
+ }
+ }
+ while ((p[0] >= '0' && p[0] <= '9') || p[0] == '.' || p[0] == '~')
+ {
+ sb_append1(&buffer, p[0]);
+ p++;
+ }
+ if (p[0] != ',')
+ break;
+ comma_and_spaces = 0;
+ p++;
+ while (p[0] == ' ')
+ {
+ comma_and_spaces++;
+ p++;
+ }
+ }
+ if (comma_and_spaces > 0 && p[0] == '\xE2' && p[1] == '\x80' && p[2] ==
'\xA6')
+ {
+ sb_append1(&buffer, ',');
+ while (comma_and_spaces > 0)
+ {
+ sb_append1(&buffer, ' ');
+ comma_and_spaces--;
+ }
+ }
+ }
+ return sb_dupfree_c(&buffer);
+}
+
struct cldr_plural_rule_list_ty *
cldr_plural_parse (const char *input)
{
struct cldr_plural_parse_args arg;
+ char *input_without_XcY;
memset (&arg, 0, sizeof (struct cldr_plural_parse_args));
- arg.cp = input;
- arg.cp_end = input + strlen (input);
+ input_without_XcY = remove_XcY(input);
+ if (input_without_XcY == NULL)
+ return NULL;
+ arg.cp = input_without_XcY;
+ arg.cp_end = input_without_XcY + strlen(input_without_XcY);;
arg.result = XMALLOC (struct cldr_plural_rule_list_ty);
memset (arg.result, 0, sizeof (struct cldr_plural_rule_list_ty));
if (yyparse (&arg) != 0)
- return NULL;
-
+ {
+ free(input_without_XcY);
+ return NULL;
+ }
+ free(input_without_XcY);
return arg.result;
}
@@ -156,10 +273,11 @@ eval_relation (struct cldr_plural_relation_ty
*relation)
break;
case 'f': case 't':
case 'v': case 'w':
+ case 'c': case 'e':
{
/* Since plural expression in gettext only supports unsigned
integer, turn relations whose operand is either 'f', 't',
- 'v', or 'w' into a constant truth value. */
+ 'v', 'w', 'c', or 'e' into a constant truth value. */
/* FIXME: check mod? */
size_t i;
for (i = 0; i < relation->ranges->nitems; i++)
diff --git a/gettext-tools/src/cldr-plural.y
b/gettext-tools/src/cldr-plural.y
index 05c1b56ec..3e28d224b 100644
--- a/gettext-tools/src/cldr-plural.y
+++ b/gettext-tools/src/cldr-plural.y
@@ -263,6 +263,7 @@ at_decimal: %empty
;
sample_list: sample_list1 sample_ellipsis
+ | ELLIPSIS
;
sample_list1: sample_range
| sample_list1 ',' sample_range
@@ -413,7 +414,8 @@ yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg)
{
switch (ident[0])
{
- case 'n': case 'i': case 'f': case 't': case 'v': case 'w':
+ // See
https://unicode.org/reports/tr35/tr35-numbers.html#table-plural-operand-meanings
+ case 'n': case 'i': case 'f': case 't': case 'v': case 'w':
case 'c': case 'e':
arg->cp = exp;
lval->ival = ident[0];
sb_free (&buffer);
This patch basically:
1. accept the "e" and "c" variables described in
https://unicode.org/reports/tr35/tr35-numbers.html#table-plural-operand-meanings
(assuming they have a value of zero)
2. strips out the XcY samples (eg 1c6, 1.0000001c6), thus working for example
on
one: i = 0,1 @integer 0, 1 @decimal 0.0~1.5; many: e = 0 and i != 0 and i %
1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, … @decimal …; other:
@integer 2~17, 100, 1000, 10000, 100000, … @decimal 2.0~3.5, 10.0, 100.0,
1000.0, 10000.0, 100000.0, 1000000.0, …
instead of
one: i = 0,1 @integer 0, 1 @decimal 0.0~1.5; many: e = 0 and i != 0 and i %
1000000 = 0 and v = 0 or e != 0..5 @integer 1000000, 1c6, 2c6, 3c6, 4c6, 5c6,
6c6, … @decimal 1.0000001c6, 1.1c6, 2.0000001c6, 2.1c6, 3.0000001c6, 3.1c6,
…; other: @integer 2~17, 100, 1000, 10000, 100000, 1c3, 2c3, 3c3, 4c3, 5c3,
6c3, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0,
1.0001c3, 1.1c3, 2.0001c3, 2.1c3, 3.0001c3, 3.1c3, …
Generating this output:
nplurals=3; plural=(n==0 || n==1 ? 0 : n!=0 && n%1000000==0 ? 1 : 2);
_______________________________________________________
Reply to this item at:
<https://savannah.gnu.org/bugs/?66378>
_______________________________________________
Message sent via Savannah
https://savannah.gnu.org/
signature.asc
Description: PGP signature