texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

texinfo/tp/Texinfo Parser.pm Convert/HTML.pm Co...


From: Patrice Dumas
Subject: texinfo/tp/Texinfo Parser.pm Convert/HTML.pm Co...
Date: Sun, 31 Jul 2011 23:21:49 +0000

CVSROOT:        /sources/texinfo
Module name:    texinfo
Changes by:     Patrice Dumas <pertusus>        11/07/31 23:21:49

Modified files:
        tp/Texinfo     : Parser.pm 
        tp/Texinfo/Convert: HTML.pm NodeNameNormalization.pm Unicode.pm 

Log message:
        Handle better bogus nodes, by associating a file name.
        
        Translate strings when a @documentlanguage appears, and after 
        _set_global_multiple_commands.
        
        Really remove diacritics when transliterating.
        
        Handle right accented letters transliteration when USE_UNIDECODE is 
unset.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Parser.pm?cvsroot=texinfo&r1=1.270&r2=1.271
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/HTML.pm?cvsroot=texinfo&r1=1.117&r2=1.118
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/NodeNameNormalization.pm?cvsroot=texinfo&r1=1.7&r2=1.8
http://cvs.savannah.gnu.org/viewcvs/texinfo/tp/Texinfo/Convert/Unicode.pm?cvsroot=texinfo&r1=1.11&r2=1.12

Patches:
Index: Parser.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Parser.pm,v
retrieving revision 1.270
retrieving revision 1.271
diff -u -b -r1.270 -r1.271
--- Parser.pm   31 Jul 2011 16:36:00 -0000      1.270
+++ Parser.pm   31 Jul 2011 23:21:49 -0000      1.271
@@ -957,7 +957,7 @@
   }
 }
 
-# for debugging
+# for debugging
 sub _print_command_args_texi($)
 {
   my $current = shift;

Index: Convert/HTML.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/HTML.pm,v
retrieving revision 1.117
retrieving revision 1.118
diff -u -b -r1.117 -r1.118
--- Convert/HTML.pm     31 Jul 2011 01:52:42 -0000      1.117
+++ Convert/HTML.pm     31 Jul 2011 23:21:49 -0000      1.118
@@ -631,6 +631,7 @@
 sub _translate_names($)
 {
   my $self = shift;
+  #print STDERR "encoding_name: $self->{'encoding_name'} documentlanguage: 
".$self->get_conf('documentlanguage')."\n";
 
 
   %BUTTONS_TEXT = (
@@ -2033,7 +2034,9 @@
   if ($cmdname eq 'node') {
     if (!$element or (!$element->{'extra'}->{'section'}
                       and $element->{'extra'}->{'node'}
-                      and $element->{'extra'}->{'node'} eq $command)) {
+                      and $element->{'extra'}->{'node'} eq $command
+                      # bogus node may not have been normalized
+                      and defined($command->{'extra'}->{'normalized'}))) {
       if ($command->{'extra'}->{'normalized'} eq 'Top') {
         $heading_level = 0;
       } else {
@@ -3043,6 +3046,9 @@
        and ! $self->get_conf('set'.$cmdname.'aftertitlepage')) {
     return $self->_contents_inline_element($cmdname, $command);
   }
+  if ($cmdname eq 'documentlanguage') {
+    $self->_translate_names();
+  }
   return '';
 }
 
@@ -3983,7 +3989,7 @@
   $self->{'multiple_pass'} = [],
   $self->_new_document_context('_toplevel_context');
 
-  $self->_translate_names();
+  #$self->_translate_names();
 
   return $self;
 }
@@ -4527,13 +4533,33 @@
           foreach my $root_command (@{$element->{'contents'}}) {
             if ($root_command->{'cmdname'} 
                 and $root_command->{'cmdname'} eq 'node') {
-              # Happens for bogus nodes
+              # Happens for bogus nodes, as bogus nodes are not in 
+              # %{$self->{'labels'}}
               #if (!defined($self->{'targets'}->{$root_command})
               #    or 
!defined($self->{'targets'}->{$root_command}->{'node_filename'})) {
               #  print STDERR "BUG: no target/filename($root_command): 
".Texinfo::Structuring::_print_root_command_texi($root_command)."\n";
               #}
-              $self->_set_page_file($page, 
-                   $self->{'targets'}->{$root_command}->{'node_filename'});
+              my $node_filename;
+              # double node are not normalized
+              if (!defined($root_command->{'extra'}->{'normalized'})
+                  or 
!defined($self->{'labels'}->{$root_command->{'extra'}->{'normalized'}})) {
+                $node_filename = 'unknown_node';
+                $node_filename .= '.'.$self->get_conf('NODE_FILE_EXTENSION') 
+                  if (defined($self->get_conf('NODE_FILE_EXTENSION')) 
+                    and $self->get_conf('NODE_FILE_EXTENSION') ne '');
+              } else { 
+                if (!defined($self->{'targets'}->{$root_command})
+                    or 
!defined($self->{'targets'}->{$root_command}->{'node_filename'})) {
+                  # Should normally be a double node.  Use the equivalent node.
+                  # However since double nodes are not normalized, in fact it 
+                  # never happens.
+                  $root_command
+                    = 
$self->{'labels'}->{$root_command->{'extra'}->{'normalized'}};
+                }
+                $node_filename 
+                  = $self->{'targets'}->{$root_command}->{'node_filename'};
+              }
+              $self->_set_page_file($page, $node_filename);
               next PAGE;
             }
           }
@@ -4616,6 +4642,7 @@
   # do that now to have it available for formatting
   # FIXME set language and documentencoding/encoding_name? If not done already.
   $self->_set_global_multiple_commands(-1);
+  $self->_translate_names();
 
   if ($self->get_conf('USE_NODES')) {
     $elements = Texinfo::Structuring::split_by_node($root);
@@ -4633,13 +4660,16 @@
   #    print STDERR "ELEMENT $element->{'type'}: $element\n";
   #  }
   #}
+
   $self->_set_root_commands_targets_node_files($elements);
+
   foreach my $couple ([$elements, 'elements'], 
                       [$special_elements, 'special_elements'],
                       [$special_pages, 'special_pages']) {
     $self->{$couple->[1]} = $couple->[0]
       if (defined($couple->[0]));
   }
+
   return ($elements, $special_elements, $special_pages);
 }
 
@@ -5909,6 +5939,9 @@
       } else {
         $node_filename = $target->{'node_filename'};
       }
+      #if (!defined($filename)) {
+      #  print STDERR "No filename 
".Texinfo::Parser::_print_command_args_texi($node);
+      #}
       if (defined($filename) and $node_filename ne $filename) {
         my $redirection_page 
           = &{$self->{'node_redirection_page'}}($self, $node);

Index: Convert/NodeNameNormalization.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/NodeNameNormalization.pm,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -b -r1.7 -r1.8
--- Convert/NodeNameNormalization.pm    4 Jun 2011 08:11:37 -0000       1.7
+++ Convert/NodeNameNormalization.pm    31 Jul 2011 23:21:49 -0000      1.8
@@ -173,7 +173,7 @@
                and 
exists($Texinfo::Convert::Unicode::transliterate_map{uc(sprintf("%04x",ord($char)))}))
 {
         $result .= 
$Texinfo::Convert::Unicode::transliterate_map{uc(sprintf("%04x",ord($char)))};
       } elsif (ord($char) <= hex(0xFFFF) 
-               and 
exists($Texinfo::Convert::Unicode::unicode_diacritics{uc(sprintf("%04x",ord($char)))}))
 {
+               and 
exists($Texinfo::Convert::Unicode::diacritics_accent_commands{uc(sprintf("%04x",ord($char)))}))
 {
         $result .= '';
       # in this case, we want to avoid calling unidecode, as we are sure
       # that there is no useful transliteration of the unicode character
@@ -186,11 +186,17 @@
         $result .= $char;
       } else {
         if ($no_unidecode) {
+          if (ord($char) <= hex(0xFFFF)
+              and exists 
($Texinfo::Convert::Unicode::transliterate_accent_map{uc(sprintf("%04x",ord($char)))}))
 {
+            $result .= 
$Texinfo::Convert::Unicode::transliterate_accent_map{uc(sprintf("%04x",ord($char)))};
+          } else {
           $result .= $char;
+          }
         } else {
           $result .= unidecode($char);
         }
       }
+      #print STDERR " ($no_unidecode) $text -> CHAR: ".ord($char)." 
".uc(sprintf("%04x",ord($char)))."\n$result\n";
     } else {
       print STDERR "Bug: unknown character in cross ref transliteration 
(likely in infinite loop)\n";
       print STDERR "Text: !!$text!!\n";

Index: Convert/Unicode.pm
===================================================================
RCS file: /sources/texinfo/texinfo/tp/Texinfo/Convert/Unicode.pm,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -b -r1.11 -r1.12
--- Convert/Unicode.pm  22 Mar 2011 00:37:04 -0000      1.11
+++ Convert/Unicode.pm  31 Jul 2011 23:21:49 -0000      1.12
@@ -67,6 +67,11 @@
        'ogonek'     => '0328'
 );
 
+our %diacritics_accent_commands;
+foreach my $diacritic(keys(%unicode_diacritics)) {
+  $diacritics_accent_commands{$unicode_diacritics{$diacritic}} = $diacritic;
+}
+
 our %unicode_accented_letters = (
     'dotaccent' => { # dot above
         'A' => '0226', #C moz-1.2 
@@ -597,6 +602,15 @@
   }
 }
 
+our %transliterate_accent_map;
+foreach my $command (keys(%unicode_accented_letters)) {
+  foreach my $letter(keys (%{$unicode_accented_letters{$command}})) {
+    $transliterate_accent_map{$unicode_accented_letters{$command}->{$letter}} 
= $letter
+      unless 
(exists($transliterate_map{$unicode_accented_letters{$command}->{$letter}}));
+  }
+}
+
+
 # currently unused
 my %makeinfo_transliterate_map = (
   '0416' => 'ZH',



reply via email to

[Prev in Thread] Current Thread [Next in Thread]