texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/XS/convert/convert_html.c (unicode_e


From: Patrice Dumas
Subject: branch master updated: * tp/Texinfo/XS/convert/convert_html.c (unicode_entities) (html_format_setup): setup unicode_entities.
Date: Sun, 29 Sep 2024 18:12:46 -0400

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 3b483910e4 * tp/Texinfo/XS/convert/convert_html.c (unicode_entities) 
(html_format_setup): setup unicode_entities.
3b483910e4 is described below

commit 3b483910e4c1c6d83b9b83c76fcc41aedbdd351f
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Jul 19 16:37:13 2024 +0200

    * tp/Texinfo/XS/convert/convert_html.c (unicode_entities)
    (html_format_setup): setup unicode_entities.
    
    * tp/Texinfo/XS/convert/convert_html.c (html_format_setup),
    tp/Texinfo/XS/main/unicode.h (ENCODING_CODEPOINTS),
    tp/maintain/setup_converters_code_tables.pl: add
    unicode_character_brace_no_arg_commands css_string field.  Set it up
    with Perl code.  Set default_no_arg_commands_formatting css_string
    based on unicode_character_brace_no_arg_commands css_string and on
    othere existing @-commands formatting tables to text.
    
    * tp/Texinfo/XS/convert/convert_html.c (html_initialize_output_state):
    use unicode_entities and line_break_element to setup
    output_no_arg_commands_formatting
---
 ChangeLog                                   | 21 ++++++-
 tp/Texinfo/XS/convert/convert_html.c        | 92 +++++++++++++++++++++++++----
 tp/Texinfo/XS/main/unicode.h                |  1 +
 tp/maintain/setup_converters_code_tables.pl |  7 ++-
 4 files changed, 106 insertions(+), 15 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index ed176bba85..e105ff3324 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2024-07-19  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/XS/convert/convert_html.c (unicode_entities)
+       (html_format_setup): setup unicode_entities.
+
+       * tp/Texinfo/XS/convert/convert_html.c (html_format_setup),
+       tp/Texinfo/XS/main/unicode.h (ENCODING_CODEPOINTS),
+       tp/maintain/setup_converters_code_tables.pl: add
+       unicode_character_brace_no_arg_commands css_string field.  Set it up
+       with Perl code.  Set default_no_arg_commands_formatting css_string
+       based on unicode_character_brace_no_arg_commands css_string and on
+       othere existing @-commands formatting tables to text.
+
+       * tp/Texinfo/XS/convert/convert_html.c (html_initialize_output_state):
+       use unicode_entities and line_break_element to setup
+       output_no_arg_commands_formatting
+
 2024-07-18  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/XS/convert/convert_html.c (html_initialize_output_state):
@@ -33,7 +50,7 @@
        (html_converter_initialize_sv), tp/Texinfo/XS/convert/convert_html.c
        (html_free_converter), tp/Texinfo/XS/convert/get_html_perl_info.c
        (html_converter_initialize_sv), tp/Texinfo/XS/main/converter_types.h
-       (CONVERTER): pass default_converted_directions_strings to perl, to
+       (CONVERTER): pass default_converted_directions_strings from Perl to
        default_converted_directions_strings in converter.
 
 2024-07-18  Patrice Dumas  <pertusus@free.fr>
@@ -47,7 +64,7 @@
        * tp/Texinfo/Convert/HTML.pm (%special_characters)
        (conversion_initialization): remove _set_non_breaking_space.  Add
        $xml_named_entity_nbsp as entity of non_breaking_space.  Simplify code
-       setting conf_default_no_arg_commands_formatting_normal too
+       setting conf_default_no_arg_commands_formatting_normal to
        non_breaking_space for space commands.
 
        * tp/Texinfo/XS/main/build_perl_info.c (build_expanded_formats):
diff --git a/tp/Texinfo/XS/convert/convert_html.c 
b/tp/Texinfo/XS/convert/convert_html.c
index db717cf73a..598c128759 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -16615,6 +16615,8 @@ static COMMAND_STACK def_cmd_list;
 
 static COMMAND_ID_LIST no_arg_formatted_cmd;
 
+static char *unicode_entities[BUILTIN_CMD_NUMBER];
+
 /* set information that is independent of customization, only called once */
 void
 html_format_setup (void)
@@ -16721,7 +16723,8 @@ html_format_setup (void)
     {
       if (xml_text_entity_no_arg_commands_formatting[i])
         {
-          /* need to cast to drop const */
+          /* the value is never modified but the struct field type is not const
+             so need to cast to drop const */
           default_no_arg_commands_formatting[i][HCC_type_normal].text
              = (char *)xml_text_entity_no_arg_commands_formatting[i];
 
@@ -16745,6 +16748,56 @@ html_format_setup (void)
   default_no_arg_commands_formatting[CM_ASTERISK][HCC_type_normal].text = 
"<br>";
   default_no_arg_commands_formatting[CM_ASTERISK][HCC_type_preformatted].text
     = "\n";
+
+  for (i = 0; i < no_arg_formatted_cmd_nr; i++)
+    {
+      enum command_id cmd = no_arg_formatted_cmd.list[i];
+      /* prepare unicode numeric entities.  Freed at exit */
+      if (unicode_character_brace_no_arg_commands[cmd].codepoint)
+        {
+          unsigned long point_nr
+           = strtoul (unicode_character_brace_no_arg_commands[cmd].codepoint,
+                      NULL, 16);
+          char *entity;
+          xasprintf (&entity, "&#%lu;", point_nr);
+          unicode_entities[cmd] = entity;
+        }
+
+      /* css_strings */
+      if (cmd == CM_NEWLINE)
+        default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+          = "\\A ";
+      else if (cmd == CM_error)
+        default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+          = 0;
+      else if (unicode_character_brace_no_arg_commands[cmd].css_string)
+        {
+          unsigned long point_nr
+           = strtoul (unicode_character_brace_no_arg_commands[cmd].codepoint,
+                      NULL, 16);
+          if (point_nr < 128) /* 7bit ascii */
+            default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+              = (char *)point_nr;
+          else
+          /* the value is never modified but the struct field type is not const
+             so need to cast to drop const */
+            default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+              = (char 
*)unicode_character_brace_no_arg_commands[cmd].css_string;
+        }
+      else if (nobrace_symbol_text[cmd])
+        default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+          /* the value is never modified but the struct field type is not const
+             so need to cast to drop const */
+          = (char *)nobrace_symbol_text[cmd];
+      else if (text_brace_no_arg_commands[cmd])
+          /* the value is never modified but the struct field type is not const
+             so need to cast to drop const */
+        default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+          = (char *)text_brace_no_arg_commands[cmd];
+      else
+        fprintf (stderr, "BUG: %s: no css_string\n",
+                         builtin_command_data[cmd].cmdname);
+    }
 }
 
 static int
@@ -17467,10 +17520,9 @@ html_initialize_output_state (CONVERTER *self, const 
char *context)
       fprintf (stderr, "REMARK: html_initialize_output_state: no document");
     }
 
+  /* corresponds with $self->{'no_arg_commands_formatting'} */
   HTML_COMMAND_CONVERSION
    
output_no_arg_commands_formatting[BUILTIN_CMD_NUMBER][HCC_type_css_string+1];
-  memcpy (output_no_arg_commands_formatting, 
default_no_arg_commands_formatting,
-          sizeof (default_no_arg_commands_formatting));
 
   output_encoding = self->conf->OUTPUT_ENCODING_NAME.o.string;
 
@@ -17495,6 +17547,20 @@ html_initialize_output_state (CONVERTER *self, const 
char *context)
       self->special_character[i].len = strlen (special_character_string);
     }
 
+  if (self->conf->USE_XML_SYNTAX.o.integer > 0)
+    {
+      /* here in perl something for rules but we already get that from perl */
+      line_break_element = "<br/>";
+    }
+  else
+    line_break_element = "<br>";
+
+  self->line_break_element.string = line_break_element;
+  self->line_break_element.len = strlen (line_break_element);
+
+  memcpy (output_no_arg_commands_formatting, 
default_no_arg_commands_formatting,
+          sizeof (default_no_arg_commands_formatting));
+
   /* if not the textual entity */
   if (strcmp(self->special_character[SC_non_breaking_space].string,
              special_characters_formatting[SC_non_breaking_space][0]))
@@ -17502,21 +17568,25 @@ html_initialize_output_state (CONVERTER *self, const 
char *context)
       for (i = 0; i < sizeof (spaces_cmd) / sizeof (spaces_cmd[0]); i++)
         {
           enum command_id cmd = spaces_cmd[i];
+          /* cast to drop const */
           output_no_arg_commands_formatting[cmd][HCC_type_normal].text
-            = strdup (self->special_character[SC_non_breaking_space].string);
+            = (char *)self->special_character[SC_non_breaking_space].string;
         }
     }
 
-  if (self->conf->USE_XML_SYNTAX.o.integer > 0)
+  if (self->conf->USE_NUMERIC_ENTITY.o.integer > 0)
     {
-      /* here in perl something for rules but we already get that from perl */
-      line_break_element = "<br/>";
+      for (i = 0; i < no_arg_formatted_cmd.number; i++)
+        {
+          enum command_id cmd = no_arg_formatted_cmd.list[i];
+          if (unicode_entities[cmd])
+            output_no_arg_commands_formatting[cmd][HCC_type_normal].text
+              = unicode_entities[cmd];
+        }
     }
-  else
-    line_break_element = "<br>";
 
-  self->line_break_element.string = line_break_element;
-  self->line_break_element.len = strlen (line_break_element);
+  output_no_arg_commands_formatting[CM_NEWLINE][HCC_type_normal].text
+    = self->line_break_element.string;
 
   sort_css_element_class_styles (self);
 
diff --git a/tp/Texinfo/XS/main/unicode.h b/tp/Texinfo/XS/main/unicode.h
index 34e42b06ee..f59b593e5a 100644
--- a/tp/Texinfo/XS/main/unicode.h
+++ b/tp/Texinfo/XS/main/unicode.h
@@ -17,6 +17,7 @@ typedef struct ENCODING_CODEPOINTS {
 typedef struct COMMAND_UNICODE {
     const char *codepoint;
     const char *text; /* UTF-8 encoded */
+    const char *css_string;
     int is_extra;
 } COMMAND_UNICODE;
 
diff --git a/tp/maintain/setup_converters_code_tables.pl 
b/tp/maintain/setup_converters_code_tables.pl
index 8ec82d9a6c..d7137f7c35 100755
--- a/tp/maintain/setup_converters_code_tables.pl
+++ b/tp/maintain/setup_converters_code_tables.pl
@@ -180,13 +180,16 @@ foreach my $command_name (@commands_order) {
     my $result = $unicode_character_brace_no_arg_commands{$command_name};
     my $protected = '"'.join ('', map {_protect_char($_)} split ('', 
$result)).'"';
     my $codepoint = '"'.$unicode_map{$command_name}.'"';
+    # note that this is not used for ASCII characters and some specific
+    # characters
+    my $css_string = '"\\\\'.$unicode_map{$command_name}.' "';
     my $is_extra = 0;
     if (defined($extra_unicode_map{$command_name})) {
       $is_extra = 1;
     }
-    print UNIC "{$codepoint, $protected, $is_extra},   /* $command */\n";
+    print UNIC "{$codepoint, $protected, $css_string, $is_extra},   /* 
$command */\n";
   } else {
-    print UNIC "{0, 0, -1},\n";
+    print UNIC "{0, 0, 0, -1},\n";
   }
 }
 print UNIC "};\n\n";



reply via email to

[Prev in Thread] Current Thread [Next in Thread]