[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
uninorm/composition: Make more maintainable
From: |
Bruno Haible |
Subject: |
uninorm/composition: Make more maintainable |
Date: |
Fri, 13 Sep 2024 02:48:31 +0200 |
In Unicode 16.0.0, there are composed characters whose constituents are
Unicode characters with value >= 0x12000. A hardcoded bound in
lib/uninorm/composition.c no longer works.
2024-09-12 Bruno Haible <bruno@clisp.org>
uninorm/composition: Make more maintainable.
* lib/gen-uni-tables.c (output_composition_tables): Add a filename2
parameter. Emit definitions of UNINORM_COMPOSE_MAX_ARG1 and
UNINORM_COMPOSE_MAX_ARG2 to this file.
(main): Invoke it with additional file name
uninorm/composition-table-bounds.h.
* uninorm/composition-table-bounds.h: New generated file.
* lib/uninorm/composition.c: Include it.
(uc_composition): Use UNINORM_COMPOSE_MAX_ARG1 and
UNINORM_COMPOSE_MAX_ARG2 instead of hardcoded bounds.
diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index b4f16da560..c42827ea25 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -10765,11 +10765,17 @@ debug_output_composition_tables (const char *filename)
}
static void
-output_composition_tables (const char *filename, const char *version)
+output_composition_tables (const char *filename, const char *filename2,
+ const char *version)
{
+ unsigned int max_code1;
+ unsigned int max_code2;
FILE *stream;
unsigned int ch;
+ max_code1 = 0;
+ max_code2 = 0;
+
stream = fopen (filename, "w");
if (stream == NULL)
{
@@ -10844,6 +10850,11 @@ output_composition_tables (const char *filename, const
char *version)
Verify this. */
assert (strcmp (unicode_attributes[combined].combining, "0") ==
0);
+ if (max_code1 < code1)
+ max_code1 = code1;
+ if (max_code2 < code2)
+ max_code2 = code2;
+
fprintf (stream,
"\"\\x%02x\\x%02x\\x%02x\\x%02x\\x%02x\\x%02x\", 0x%04x\n",
(code1 >> 16) & 0xff, (code1 >> 8) & 0xff, code1 & 0xff,
(code2 >> 16) & 0xff, (code2 >> 8) & 0xff, code2 & 0xff,
@@ -10857,6 +10868,37 @@ output_composition_tables (const char *filename, const
char *version)
fprintf (stderr, "error writing to '%s'\n", filename);
exit (1);
}
+
+ stream = fopen (filename2, "w");
+ if (stream == NULL)
+ {
+ fprintf (stderr, "cannot open '%s' for writing\n", filename2);
+ exit (1);
+ }
+
+ fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+ fprintf (stream, "/* Canonical composition of Unicode characters. */\n");
+ fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode
%s. */\n",
+ version);
+ fprintf (stream, "\n");
+
+ fprintf (stream, "/* Copyright (C) 2009-2024 Free Software Foundation,
Inc.\n");
+ fprintf (stream, "\n");
+ output_library_license (stream, true);
+ fprintf (stream, "\n");
+
+ fprintf (stream, "/* Maximum value of the first argument for which
gl_uninorm_compose_lookup\n"
+ " can return a non-NULL value. */\n");
+ fprintf (stream, "#define UNINORM_COMPOSE_MAX_ARG1 0x%x\n", max_code1);
+ fprintf (stream, "/* Maximum value of the second argument for which
gl_uninorm_compose_lookup\n"
+ " can return a non-NULL value. */\n");
+ fprintf (stream, "#define UNINORM_COMPOSE_MAX_ARG2 0x%x\n", max_code2);
+
+ if (ferror (stream) || fclose (stream))
+ {
+ fprintf (stderr, "error writing to '%s'\n", filename2);
+ exit (1);
+ }
}
/* ========================================================================= */
@@ -12031,7 +12073,7 @@ main (int argc, char * argv[])
output_decomposition_tables ("uninorm/decomposition-table1.h",
"uninorm/decomposition-table2.h", version);
debug_output_composition_tables ("uninorm/composition.txt");
- output_composition_tables ("uninorm/composition-table.gperf", version);
+ output_composition_tables ("uninorm/composition-table.gperf",
"uninorm/composition-table-bounds.h", version);
output_simple_mapping_test ("../tests/unicase/test-uc_toupper.c",
"uc_toupper", to_upper, version);
output_simple_mapping_test ("../tests/unicase/test-uc_tolower.c",
"uc_tolower", to_lower, version);
diff --git a/lib/uninorm/composition-table-bounds.h
b/lib/uninorm/composition-table-bounds.h
new file mode 100644
index 0000000000..5eafc478c0
--- /dev/null
+++ b/lib/uninorm/composition-table-bounds.h
@@ -0,0 +1,25 @@
+/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
+/* Canonical composition of Unicode characters. */
+/* Generated automatically by gen-uni-tables.c for Unicode 15.1.0. */
+
+/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+
+ This file is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
+
+ This file is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Maximum value of the first argument for which gl_uninorm_compose_lookup
+ can return a non-NULL value. */
+#define UNINORM_COMPOSE_MAX_ARG1 0x11935
+/* Maximum value of the second argument for which gl_uninorm_compose_lookup
+ can return a non-NULL value. */
+#define UNINORM_COMPOSE_MAX_ARG2 0x11930
diff --git a/lib/uninorm/composition.c b/lib/uninorm/composition.c
index df662895df..ecc4ed21e0 100644
--- a/lib/uninorm/composition.c
+++ b/lib/uninorm/composition.c
@@ -25,11 +25,12 @@
struct composition_rule { char codes[6]; unsigned int combined; };
#include "composition-table.h"
+#include "composition-table-bounds.h"
ucs4_t
uc_composition (ucs4_t uc1, ucs4_t uc2)
{
- if (uc1 < 0x12000 && uc2 < 0x12000)
+ if (uc1 <= UNINORM_COMPOSE_MAX_ARG1 && uc2 <= UNINORM_COMPOSE_MAX_ARG2)
{
if (uc2 >= 0x1161 && uc2 < 0x1161 + 21
&& uc1 >= 0x1100 && uc1 < 0x1100 + 19)
- uninorm/composition: Make more maintainable,
Bruno Haible <=