bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

uninorm/composition: Make more maintainable


From: Bruno Haible
Subject: uninorm/composition: Make more maintainable
Date: Fri, 13 Sep 2024 02:48:31 +0200

In Unicode 16.0.0, there are composed characters whose constituents are
Unicode characters with value >= 0x12000. A hardcoded bound in
lib/uninorm/composition.c no longer works.


2024-09-12  Bruno Haible  <bruno@clisp.org>

        uninorm/composition: Make more maintainable.
        * lib/gen-uni-tables.c (output_composition_tables): Add a filename2
        parameter. Emit definitions of UNINORM_COMPOSE_MAX_ARG1 and
        UNINORM_COMPOSE_MAX_ARG2 to this file.
        (main): Invoke it with additional file name
        uninorm/composition-table-bounds.h.
        * uninorm/composition-table-bounds.h: New generated file.
        * lib/uninorm/composition.c: Include it.
        (uc_composition): Use UNINORM_COMPOSE_MAX_ARG1 and
        UNINORM_COMPOSE_MAX_ARG2 instead of hardcoded bounds.

diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index b4f16da560..c42827ea25 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -10765,11 +10765,17 @@ debug_output_composition_tables (const char *filename)
 }
 
 static void
-output_composition_tables (const char *filename, const char *version)
+output_composition_tables (const char *filename, const char *filename2,
+                           const char *version)
 {
+  unsigned int max_code1;
+  unsigned int max_code2;
   FILE *stream;
   unsigned int ch;
 
+  max_code1 = 0;
+  max_code2 = 0;
+
   stream = fopen (filename, "w");
   if (stream == NULL)
     {
@@ -10844,6 +10850,11 @@ output_composition_tables (const char *filename, const 
char *version)
                  Verify this.  */
               assert (strcmp (unicode_attributes[combined].combining, "0") == 
0);
 
+              if (max_code1 < code1)
+                max_code1 = code1;
+              if (max_code2 < code2)
+                max_code2 = code2;
+
               fprintf (stream, 
"\"\\x%02x\\x%02x\\x%02x\\x%02x\\x%02x\\x%02x\", 0x%04x\n",
                        (code1 >> 16) & 0xff, (code1 >> 8) & 0xff, code1 & 0xff,
                        (code2 >> 16) & 0xff, (code2 >> 8) & 0xff, code2 & 0xff,
@@ -10857,6 +10868,37 @@ output_composition_tables (const char *filename, const 
char *version)
       fprintf (stderr, "error writing to '%s'\n", filename);
       exit (1);
     }
+
+  stream = fopen (filename2, "w");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "cannot open '%s' for writing\n", filename2);
+      exit (1);
+    }
+
+  fprintf (stream, "/* DO NOT EDIT! GENERATED AUTOMATICALLY! */\n");
+  fprintf (stream, "/* Canonical composition of Unicode characters.  */\n");
+  fprintf (stream, "/* Generated automatically by gen-uni-tables.c for Unicode 
%s.  */\n",
+           version);
+  fprintf (stream, "\n");
+
+  fprintf (stream, "/* Copyright (C) 2009-2024 Free Software Foundation, 
Inc.\n");
+  fprintf (stream, "\n");
+  output_library_license (stream, true);
+  fprintf (stream, "\n");
+
+  fprintf (stream, "/* Maximum value of the first argument for which 
gl_uninorm_compose_lookup\n"
+                   "   can return a non-NULL value.  */\n");
+  fprintf (stream, "#define UNINORM_COMPOSE_MAX_ARG1 0x%x\n", max_code1);
+  fprintf (stream, "/* Maximum value of the second argument for which 
gl_uninorm_compose_lookup\n"
+                   "   can return a non-NULL value.  */\n");
+  fprintf (stream, "#define UNINORM_COMPOSE_MAX_ARG2 0x%x\n", max_code2);
+
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error writing to '%s'\n", filename2);
+      exit (1);
+    }
 }
 
 /* ========================================================================= */
@@ -12031,7 +12073,7 @@ main (int argc, char * argv[])
 
   output_decomposition_tables ("uninorm/decomposition-table1.h", 
"uninorm/decomposition-table2.h", version);
   debug_output_composition_tables ("uninorm/composition.txt");
-  output_composition_tables ("uninorm/composition-table.gperf", version);
+  output_composition_tables ("uninorm/composition-table.gperf", 
"uninorm/composition-table-bounds.h", version);
 
   output_simple_mapping_test ("../tests/unicase/test-uc_toupper.c", 
"uc_toupper", to_upper, version);
   output_simple_mapping_test ("../tests/unicase/test-uc_tolower.c", 
"uc_tolower", to_lower, version);
diff --git a/lib/uninorm/composition-table-bounds.h 
b/lib/uninorm/composition-table-bounds.h
new file mode 100644
index 0000000000..5eafc478c0
--- /dev/null
+++ b/lib/uninorm/composition-table-bounds.h
@@ -0,0 +1,25 @@
+/* DO NOT EDIT! GENERATED AUTOMATICALLY! */
+/* Canonical composition of Unicode characters.  */
+/* Generated automatically by gen-uni-tables.c for Unicode 15.1.0.  */
+
+/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Maximum value of the first argument for which gl_uninorm_compose_lookup
+   can return a non-NULL value.  */
+#define UNINORM_COMPOSE_MAX_ARG1 0x11935
+/* Maximum value of the second argument for which gl_uninorm_compose_lookup
+   can return a non-NULL value.  */
+#define UNINORM_COMPOSE_MAX_ARG2 0x11930
diff --git a/lib/uninorm/composition.c b/lib/uninorm/composition.c
index df662895df..ecc4ed21e0 100644
--- a/lib/uninorm/composition.c
+++ b/lib/uninorm/composition.c
@@ -25,11 +25,12 @@
 struct composition_rule { char codes[6]; unsigned int combined; };
 
 #include "composition-table.h"
+#include "composition-table-bounds.h"
 
 ucs4_t
 uc_composition (ucs4_t uc1, ucs4_t uc2)
 {
-  if (uc1 < 0x12000 && uc2 < 0x12000)
+  if (uc1 <= UNINORM_COMPOSE_MAX_ARG1 && uc2 <= UNINORM_COMPOSE_MAX_ARG2)
     {
       if (uc2 >= 0x1161 && uc2 < 0x1161 + 21
           && uc1 >= 0x1100 && uc1 < 0x1100 + 19)






reply via email to

[Prev in Thread] Current Thread [Next in Thread]