[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
texinfo/tp/Texinfo/Convert Converter.pm NodeNam...
From: |
Patrice Dumas |
Subject: |
texinfo/tp/Texinfo/Convert Converter.pm NodeNam... |
Date: |
Thu, 11 Aug 2011 21:07:16 +0000 |
CVSROOT: /sources/texinfo
Module name: texinfo
Changes by: Patrice Dumas <pertusus> 11/08/11 21:07:16
Modified files:
tp/Texinfo/Convert: Converter.pm NodeNameNormalization.pm
Text.pm Unicode.pm
Log message:
Don't pass a fallback function to unicode_accent.
Stop processing a stack as unicode as soon as it cannot be represented
as
unicode.
Allready handle upper casing in eight_bit_accents and unicode_accents.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Converter.pm?cvsroot=texinfo&r1=1.23&r2=1.24
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/NodeNameNormalization.pm?cvsroot=texinfo&r1=1.10&r2=1.11
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Text.pm?cvsroot=texinfo&r1=1.48&r2=1.49
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Unicode.pm?cvsroot=texinfo&r1=1.12&r2=1.13
Patches:
Index: Converter.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Converter.pm,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -b -r1.23 -r1.24
--- Converter.pm 2 Aug 2011 21:01:34 -0000 1.23
+++ Converter.pm 11 Aug 2011 21:07:16 -0000 1.24
@@ -595,10 +595,11 @@
return xml_accent($text, $command, 1);
}
-sub xml_accents($$)
+sub xml_accents($$;$)
{
my $self = shift;
my $accent = shift;
+ my $in_upper_case = shift;
my $format_accents;
if ($self->get_conf('USE_NUMERIC_ENTITY')) {
$format_accents = \&xml_accent_numeric_entities;
Index: NodeNameNormalization.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/NodeNameNormalization.pm,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -b -r1.10 -r1.11
--- NodeNameNormalization.pm 4 Aug 2011 23:22:27 -0000 1.10
+++ NodeNameNormalization.pm 11 Aug 2011 21:07:16 -0000 1.11
@@ -255,8 +255,15 @@
# commands with braces
} elsif ($accent_commands{$root->{'cmdname'}}) {
return '' if (!$root->{'args'});
- my $accented_char =
Texinfo::Convert::Unicode::unicode_accent(_convert($root->{'args'}->[0]),
- $root, \&Texinfo::Convert::Text::ascii_accent);
+ my $accent_text = _convert($root->{'args'}->[0]);
+ my $accented_char
+ = Texinfo::Convert::Unicode::unicode_accent($accent_text,
+ #$root, \&Texinfo::Convert::Text::ascii_accent);
+ $root);
+ if (!defined($accented_char)) {
+ $accented_char = Texinfo::Convert::Text::ascii_accent($accent_text,
+ $root);
+ }
if ($in_sc) {
return uc ($accented_char);
} else {
Index: Text.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Text.pm,v
retrieving revision 1.48
retrieving revision 1.49
diff -u -b -r1.48 -r1.49
--- Text.pm 4 Aug 2011 23:22:28 -0000 1.48
+++ Text.pm 11 Aug 2011 21:07:16 -0000 1.49
@@ -641,11 +641,12 @@
return ($eight_bit, $codepoint);
}
-sub eight_bit_accents($$$)
+sub eight_bit_accents($$$;$)
{
my $current = shift;
my $encoding = shift;
my $convert_accent = shift;
+ my $in_upper_case = shift;
my $debug;
#$debug = 1;
@@ -664,19 +665,36 @@
my @results_stack;
while (1) {
- $current_result
- = Texinfo::Convert::Unicode::unicode_accent($current_result, $accent,
- $convert_accent);
- push @results_stack, [$current_result, $accent];
+ my $unicode_formatted_accent
+ = Texinfo::Convert::Unicode::unicode_accent($current_result, $accent);
+ if (!defined($unicode_formatted_accent)) {
+ last;
+ }
+ $current_result = $unicode_formatted_accent;
+ $unicode_formatted_accent = uc($unicode_formatted_accent)
+ if ($in_upper_case);
+ push @results_stack, [$unicode_formatted_accent, $accent];
last if ($accent eq $current);
$accent = $accent->{'parent'}->{'parent'};
}
+ if ($accent ne $current) {
+ while (1) {
+ push @results_stack, [undef, $accent];
+ last if ($accent eq $current);
+ $accent = $accent->{'parent'}->{'parent'};
+ }
+ }
+
if ($debug) {
- print STDERR "stack: ".join('|',@$stack)."\nPARTIAL_RESULATS_STACK:\n";
+ print STDERR "stack: ".join('|',@$stack)."\nPARTIAL_RESULTS_STACK:\n";
foreach my $partial_result (@results_stack) {
+ if (defined($partial_result->[0])) {
print STDERR " -> ".Encode::encode('utf8', $partial_result->[0])
."|$partial_result->[1]->{'cmdname'}\n";
+ } else {
+ print STDERR " -> NO UTF8 |$partial_result->[1]->{'cmdname'}\n";
+ }
}
}
@@ -691,6 +709,7 @@
my $eight_bit_command_index = -1;
foreach my $partial_result (@results_stack) {
my $char = $partial_result->[0];
+ last if (!defined($char));
my ($new_eight_bit, $new_codepoint) = _eight_bit_and_unicode_point($char,
$encoding_map_name);
@@ -773,16 +792,31 @@
}
# format a stack of accents as unicode
-sub unicode_accents ($$)
+sub unicode_accents ($$;$)
{
my $current = shift;
- my $format_accents = shift;
+ my $format_accent = shift;
+ my $in_upper_case = shift;
my ($result, $innermost_accent, $stack) = _find_innermost_accent($current,
'utf-8');
+ my @stack_accent_commands = reverse(@$stack);
- foreach my $accent_command (reverse(@$stack)) {
- $result = Texinfo::Convert::Unicode::unicode_accent($result,
- {'cmdname' => $accent_command}, $format_accents);
+ while (@stack_accent_commands) {
+ my $accent_command = shift @stack_accent_commands;
+ my $formatted_result
+ = Texinfo::Convert::Unicode::unicode_accent($result,
+ {'cmdname' => $accent_command});
+ if (!defined($formatted_result)) {
+ push @stack_accent_commands, $accent_command;
+ } else {
+ $result = $formatted_result;
+ }
+ }
+ $result = uc ($result) if ($in_upper_case);
+ while (@stack_accent_commands) {
+ my $accent_command = shift @stack_accent_commands;
+ $result = &$format_accent($result,
+ {'cmdname' => $accent_command}, $in_upper_case);
}
return $result;
}
Index: Unicode.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Unicode.pm,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -b -r1.12 -r1.13
--- Unicode.pm 31 Jul 2011 23:21:49 -0000 1.12
+++ Unicode.pm 11 Aug 2011 21:07:16 -0000 1.13
@@ -803,14 +803,15 @@
);
-sub unicode_accent($$$)
+sub unicode_accent($$)
{
my $text = shift;
my $command = shift;
- my $fallback_convert_accent = shift;
my $accent = $command->{'cmdname'};
+ my $result;
+
# special handling of @dotless{i}.
# \x{0131}\x{0308} for @dotless{i} @" doesn't lead to NFC 00ef.
# so it is set to a real dotless i only if not in an accent command.
@@ -819,15 +820,19 @@
or !$command->{'parent'}->{'parent'}
or !$command->{'parent'}->{'parent'}->{'cmdname'}
or
!$unicode_accented_letters{$command->{'parent'}->{'parent'}->{'cmdname'}})) {
- return "\x{0131}";
- }
- #return "\x{}" if ($text eq 'j'); # dotless j not known i unicode !
- return $text;
+ $result = "\x{0131}";
+ } else {
+ $result = $text;
+ }
+ return $result;
}
- return Unicode::Normalize::NFC($text .
chr(hex($unicode_diacritics{$accent})))
- if (defined($unicode_diacritics{$accent}));
- return &$fallback_convert_accent($text, $command);
+ if (defined($unicode_diacritics{$accent})) {
+ $result = Unicode::Normalize::NFC($text .
chr(hex($unicode_diacritics{$accent})));
+ return $result;
+ } else {
+ return undef;
+ }
}
sub unicode_text($$$$)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- texinfo/tp/Texinfo/Convert Converter.pm NodeNam...,
Patrice Dumas <=