[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: * tp/Texinfo/XS/convert/convert_html.c (unicode_e
From: |
Patrice Dumas |
Subject: |
branch master updated: * tp/Texinfo/XS/convert/convert_html.c (unicode_entities) (html_format_setup): setup unicode_entities. |
Date: |
Sun, 29 Sep 2024 18:12:46 -0400 |
This is an automated email from the git hooks/post-receive script.
pertusus pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new 3b483910e4 * tp/Texinfo/XS/convert/convert_html.c (unicode_entities)
(html_format_setup): setup unicode_entities.
3b483910e4 is described below
commit 3b483910e4c1c6d83b9b83c76fcc41aedbdd351f
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Jul 19 16:37:13 2024 +0200
* tp/Texinfo/XS/convert/convert_html.c (unicode_entities)
(html_format_setup): setup unicode_entities.
* tp/Texinfo/XS/convert/convert_html.c (html_format_setup),
tp/Texinfo/XS/main/unicode.h (ENCODING_CODEPOINTS),
tp/maintain/setup_converters_code_tables.pl: add
unicode_character_brace_no_arg_commands css_string field. Set it up
with Perl code. Set default_no_arg_commands_formatting css_string
based on unicode_character_brace_no_arg_commands css_string and on
othere existing @-commands formatting tables to text.
* tp/Texinfo/XS/convert/convert_html.c (html_initialize_output_state):
use unicode_entities and line_break_element to setup
output_no_arg_commands_formatting
---
ChangeLog | 21 ++++++-
tp/Texinfo/XS/convert/convert_html.c | 92 +++++++++++++++++++++++++----
tp/Texinfo/XS/main/unicode.h | 1 +
tp/maintain/setup_converters_code_tables.pl | 7 ++-
4 files changed, 106 insertions(+), 15 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index ed176bba85..e105ff3324 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2024-07-19 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/XS/convert/convert_html.c (unicode_entities)
+ (html_format_setup): setup unicode_entities.
+
+ * tp/Texinfo/XS/convert/convert_html.c (html_format_setup),
+ tp/Texinfo/XS/main/unicode.h (ENCODING_CODEPOINTS),
+ tp/maintain/setup_converters_code_tables.pl: add
+ unicode_character_brace_no_arg_commands css_string field. Set it up
+ with Perl code. Set default_no_arg_commands_formatting css_string
+ based on unicode_character_brace_no_arg_commands css_string and on
+ othere existing @-commands formatting tables to text.
+
+ * tp/Texinfo/XS/convert/convert_html.c (html_initialize_output_state):
+ use unicode_entities and line_break_element to setup
+ output_no_arg_commands_formatting
+
2024-07-18 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/XS/convert/convert_html.c (html_initialize_output_state):
@@ -33,7 +50,7 @@
(html_converter_initialize_sv), tp/Texinfo/XS/convert/convert_html.c
(html_free_converter), tp/Texinfo/XS/convert/get_html_perl_info.c
(html_converter_initialize_sv), tp/Texinfo/XS/main/converter_types.h
- (CONVERTER): pass default_converted_directions_strings to perl, to
+ (CONVERTER): pass default_converted_directions_strings from Perl to
default_converted_directions_strings in converter.
2024-07-18 Patrice Dumas <pertusus@free.fr>
@@ -47,7 +64,7 @@
* tp/Texinfo/Convert/HTML.pm (%special_characters)
(conversion_initialization): remove _set_non_breaking_space. Add
$xml_named_entity_nbsp as entity of non_breaking_space. Simplify code
- setting conf_default_no_arg_commands_formatting_normal too
+ setting conf_default_no_arg_commands_formatting_normal to
non_breaking_space for space commands.
* tp/Texinfo/XS/main/build_perl_info.c (build_expanded_formats):
diff --git a/tp/Texinfo/XS/convert/convert_html.c
b/tp/Texinfo/XS/convert/convert_html.c
index db717cf73a..598c128759 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -16615,6 +16615,8 @@ static COMMAND_STACK def_cmd_list;
static COMMAND_ID_LIST no_arg_formatted_cmd;
+static char *unicode_entities[BUILTIN_CMD_NUMBER];
+
/* set information that is independent of customization, only called once */
void
html_format_setup (void)
@@ -16721,7 +16723,8 @@ html_format_setup (void)
{
if (xml_text_entity_no_arg_commands_formatting[i])
{
- /* need to cast to drop const */
+ /* the value is never modified but the struct field type is not const
+ so need to cast to drop const */
default_no_arg_commands_formatting[i][HCC_type_normal].text
= (char *)xml_text_entity_no_arg_commands_formatting[i];
@@ -16745,6 +16748,56 @@ html_format_setup (void)
default_no_arg_commands_formatting[CM_ASTERISK][HCC_type_normal].text =
"<br>";
default_no_arg_commands_formatting[CM_ASTERISK][HCC_type_preformatted].text
= "\n";
+
+ for (i = 0; i < no_arg_formatted_cmd_nr; i++)
+ {
+ enum command_id cmd = no_arg_formatted_cmd.list[i];
+ /* prepare unicode numeric entities. Freed at exit */
+ if (unicode_character_brace_no_arg_commands[cmd].codepoint)
+ {
+ unsigned long point_nr
+ = strtoul (unicode_character_brace_no_arg_commands[cmd].codepoint,
+ NULL, 16);
+ char *entity;
+ xasprintf (&entity, "&#%lu;", point_nr);
+ unicode_entities[cmd] = entity;
+ }
+
+ /* css_strings */
+ if (cmd == CM_NEWLINE)
+ default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+ = "\\A ";
+ else if (cmd == CM_error)
+ default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+ = 0;
+ else if (unicode_character_brace_no_arg_commands[cmd].css_string)
+ {
+ unsigned long point_nr
+ = strtoul (unicode_character_brace_no_arg_commands[cmd].codepoint,
+ NULL, 16);
+ if (point_nr < 128) /* 7bit ascii */
+ default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+ = (char *)point_nr;
+ else
+ /* the value is never modified but the struct field type is not const
+ so need to cast to drop const */
+ default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+ = (char
*)unicode_character_brace_no_arg_commands[cmd].css_string;
+ }
+ else if (nobrace_symbol_text[cmd])
+ default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+ /* the value is never modified but the struct field type is not const
+ so need to cast to drop const */
+ = (char *)nobrace_symbol_text[cmd];
+ else if (text_brace_no_arg_commands[cmd])
+ /* the value is never modified but the struct field type is not const
+ so need to cast to drop const */
+ default_no_arg_commands_formatting[cmd][HCC_type_css_string].text
+ = (char *)text_brace_no_arg_commands[cmd];
+ else
+ fprintf (stderr, "BUG: %s: no css_string\n",
+ builtin_command_data[cmd].cmdname);
+ }
}
static int
@@ -17467,10 +17520,9 @@ html_initialize_output_state (CONVERTER *self, const
char *context)
fprintf (stderr, "REMARK: html_initialize_output_state: no document");
}
+ /* corresponds with $self->{'no_arg_commands_formatting'} */
HTML_COMMAND_CONVERSION
output_no_arg_commands_formatting[BUILTIN_CMD_NUMBER][HCC_type_css_string+1];
- memcpy (output_no_arg_commands_formatting,
default_no_arg_commands_formatting,
- sizeof (default_no_arg_commands_formatting));
output_encoding = self->conf->OUTPUT_ENCODING_NAME.o.string;
@@ -17495,6 +17547,20 @@ html_initialize_output_state (CONVERTER *self, const
char *context)
self->special_character[i].len = strlen (special_character_string);
}
+ if (self->conf->USE_XML_SYNTAX.o.integer > 0)
+ {
+ /* here in perl something for rules but we already get that from perl */
+ line_break_element = "<br/>";
+ }
+ else
+ line_break_element = "<br>";
+
+ self->line_break_element.string = line_break_element;
+ self->line_break_element.len = strlen (line_break_element);
+
+ memcpy (output_no_arg_commands_formatting,
default_no_arg_commands_formatting,
+ sizeof (default_no_arg_commands_formatting));
+
/* if not the textual entity */
if (strcmp(self->special_character[SC_non_breaking_space].string,
special_characters_formatting[SC_non_breaking_space][0]))
@@ -17502,21 +17568,25 @@ html_initialize_output_state (CONVERTER *self, const
char *context)
for (i = 0; i < sizeof (spaces_cmd) / sizeof (spaces_cmd[0]); i++)
{
enum command_id cmd = spaces_cmd[i];
+ /* cast to drop const */
output_no_arg_commands_formatting[cmd][HCC_type_normal].text
- = strdup (self->special_character[SC_non_breaking_space].string);
+ = (char *)self->special_character[SC_non_breaking_space].string;
}
}
- if (self->conf->USE_XML_SYNTAX.o.integer > 0)
+ if (self->conf->USE_NUMERIC_ENTITY.o.integer > 0)
{
- /* here in perl something for rules but we already get that from perl */
- line_break_element = "<br/>";
+ for (i = 0; i < no_arg_formatted_cmd.number; i++)
+ {
+ enum command_id cmd = no_arg_formatted_cmd.list[i];
+ if (unicode_entities[cmd])
+ output_no_arg_commands_formatting[cmd][HCC_type_normal].text
+ = unicode_entities[cmd];
+ }
}
- else
- line_break_element = "<br>";
- self->line_break_element.string = line_break_element;
- self->line_break_element.len = strlen (line_break_element);
+ output_no_arg_commands_formatting[CM_NEWLINE][HCC_type_normal].text
+ = self->line_break_element.string;
sort_css_element_class_styles (self);
diff --git a/tp/Texinfo/XS/main/unicode.h b/tp/Texinfo/XS/main/unicode.h
index 34e42b06ee..f59b593e5a 100644
--- a/tp/Texinfo/XS/main/unicode.h
+++ b/tp/Texinfo/XS/main/unicode.h
@@ -17,6 +17,7 @@ typedef struct ENCODING_CODEPOINTS {
typedef struct COMMAND_UNICODE {
const char *codepoint;
const char *text; /* UTF-8 encoded */
+ const char *css_string;
int is_extra;
} COMMAND_UNICODE;
diff --git a/tp/maintain/setup_converters_code_tables.pl
b/tp/maintain/setup_converters_code_tables.pl
index 8ec82d9a6c..d7137f7c35 100755
--- a/tp/maintain/setup_converters_code_tables.pl
+++ b/tp/maintain/setup_converters_code_tables.pl
@@ -180,13 +180,16 @@ foreach my $command_name (@commands_order) {
my $result = $unicode_character_brace_no_arg_commands{$command_name};
my $protected = '"'.join ('', map {_protect_char($_)} split ('',
$result)).'"';
my $codepoint = '"'.$unicode_map{$command_name}.'"';
+ # note that this is not used for ASCII characters and some specific
+ # characters
+ my $css_string = '"\\\\'.$unicode_map{$command_name}.' "';
my $is_extra = 0;
if (defined($extra_unicode_map{$command_name})) {
$is_extra = 1;
}
- print UNIC "{$codepoint, $protected, $is_extra}, /* $command */\n";
+ print UNIC "{$codepoint, $protected, $css_string, $is_extra}, /*
$command */\n";
} else {
- print UNIC "{0, 0, -1},\n";
+ print UNIC "{0, 0, 0, -1},\n";
}
}
print UNIC "};\n\n";
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: * tp/Texinfo/XS/convert/convert_html.c (unicode_entities) (html_format_setup): setup unicode_entities.,
Patrice Dumas <=