koha-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Koha-cvs] CVS: koha/misc/translator TmplToken.pm,1.5,1.6 TmplTokenizer.


From: Ambrose C. LI
Subject: [Koha-cvs] CVS: koha/misc/translator TmplToken.pm,1.5,1.6 TmplTokenizer.pm,1.33,1.34 text-extract2.pl,1.41,1.42 tmpl_process3.pl,1.19,1.20 xgettext.pl,1.11,1.12
Date: Tue, 09 Mar 2004 23:00:30 -0800

Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18221

Modified Files:
        TmplToken.pm TmplTokenizer.pm text-extract2.pl 
        tmpl_process3.pl xgettext.pl 
Log Message:
Added hack to extract and translate strings inside JavaScript CDATA blocks,
using C-like _("some translatable string") notation. English templates will
need to be modified.


Index: TmplToken.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplToken.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** TmplToken.pm        27 Feb 2004 13:26:07 -0000      1.5
--- TmplToken.pm        10 Mar 2004 07:00:27 -0000      1.6
***************
*** 110,113 ****
--- 110,156 ----
  }
  
+ sub has_js_data {
+     my $this = shift;
+     return defined $this->{'_js_data'} && ref($this->{'_js_data'}) eq 'ARRAY';
+ }
+ 
+ sub js_data {
+     my $this = shift;
+     return $this->{'_js_data'};
+ }
+ 
+ sub set_js_data {
+     my $this = shift;
+     $this->{'_js_data'} = $_[0];
+     return $this;
+ }
+ 
+ # predefined tests
+ 
+ sub tag_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::TAG;
+ }
+ 
+ sub cdata_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::CDATA;
+ }
+ 
+ sub text_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::TEXT;
+ }
+ 
+ sub text_parametrized_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::TEXT_PARAMETRIZED;
+ }
+ 
+ sub directive_p {
+     my $this = shift;
+     return $this->type == TmplTokenType::DIRECTIVE;
+ }
+ 
  
###############################################################################
  

Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.33
retrieving revision 1.34
diff -C2 -r1.33 -r1.34
*** TmplTokenizer.pm    8 Mar 2004 05:00:42 -0000       1.33
--- TmplTokenizer.pm    10 Mar 2004 07:00:27 -0000      1.34
***************
*** 94,97 ****
--- 94,98 ----
  sub CDATA_CLOSE               () {'cdata-close'}
  sub PCDATA_MODE_P     () {'pcdata-mode-p'}    # additional submode for CDATA
+ sub JS_MODE_P         () {'js-mode-p'}        # cdata-mode-p must also be true
  
  sub ALLOW_CFORMAT_P   () {'allow-cformat-p'}
***************
*** 170,173 ****
--- 171,179 ----
  }
  
+ sub js_mode_p {
+     my $this = shift;
+     return $this->{+JS_MODE_P};
+ }
+ 
  sub cdata_close {
      my $this = shift;
***************
*** 241,244 ****
--- 247,256 ----
  }
  
+ sub _set_js_mode {
+     my $this = shift;
+     $this->{+JS_MODE_P} = $_[0];
+     return $this;
+ }
+ 
  sub _set_cdata_close {
      my $this = shift;
***************
*** 255,258 ****
--- 267,364 ----
  
###############################################################################
  
+ use vars qw( $js_EscapeSequence );
+ BEGIN {
+     # Perl quoting is really screwed up, but this common subexp is way too 
long
+     $js_EscapeSequence = 
q{\\\\(?:['"\\\\bfnrt]|[^0-7xu]|[0-3]?[0-7]{1,2}|x[\da-fA-F]{2}|u[\da-fA-F]{4})};
+ }
+ sub parenleft  () { '(' }
+ sub parenright () { ')' }
+ 
+ sub split_js ($) {
+     my ($s0) = @_;
+     my @it = ();
+     while (length $s0) {
+       if ($s0 =~ /^\s+/s) {                           # whitespace
+           push @it, $&;
+           $s0 = $';
+       } elsif ($s0 =~ /^\/\/[^\r\n]*(?:[\r\n]|$)/s) { # C++-style comment
+           push @it, $&;
+           $s0 = $';
+       } elsif ($s0 =~ /^\/\*(?:(?!\*\/).)*\*\//s) {   # C-style comment
+           push @it, $&;
+           $s0 = $';
+       # Keyword or identifier, ECMA-262 p.13 (section 7.5)
+       } elsif ($s0 =~ /^[A-Z_\$][A-Z\d_\$]*/is) {     # IdentifierName
+           push @it, $&;
+           $s0 = $';
+       # Punctuator, ECMA-262 p.13 (section 7.6)
+       } elsif ($s0 =~ 
/^(?:[\(\){}\[\];]|>>>=|<<=|>>=|[-\+\*\/\&\|\^\%]=|>>>|<<|>>|--|\+\+|\|\||\&\&|==|<=|>=|!=|[=><,!~\?:\.\-\+\*\/\&\|\^\%])/s)
 {
+           push @it, $&;
+           $s0 = $';
+       # DecimalLiteral, ECMA-262 p.14 (section 7.7.3); note: bug in the spec
+       } elsif ($s0 =~ /^(?:0|[1-9]\d+(?:\.\d*(?:[eE][-\+]?\d+)?)?)/s) {
+           push @it, $&;
+           $s0 = $';
+       # HexIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+       } elsif ($s0 =~ /^0[xX][\da-fA-F]+/s) {
+           push @it, $&;
+           $s0 = $';
+       # OctalIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+       } elsif ($s0 =~ /^0[\da-fA-F]+/s) {
+           push @it, $&;
+           $s0 = $';
+       # StringLiteral, ECMA-262 p.17 (section 7.7.4)
+       # XXX SourceCharacter doesn't seem to be defined (?)
+       } elsif ($s0 =~ 
/^(?:"(?:(?!["\\\r\n]).|$js_EscapeSequence)*"|'(?:(?!['\\\r\n]).|$js_EscapeSequence)*')/os)
 {
+           push @it, $&;
+           $s0 = $';
+       } elsif ($s0 =~ /^./) {                         # UNKNOWN TOKEN !!!
+           push @it, $&;
+           $s0 = $';
+       }
+     }
+     return @it;
+ }
+ 
+ sub STATE_UNDERSCORE     () { 1 }
+ sub STATE_PARENLEFT      () { 2 }
+ sub STATE_STRING_LITERAL () { 3 }
+ 
+ # XXX This is a crazy hack. I don't want to write an ECMAScript parser.
+ # XXX A scanner is one thing; a parser another thing.
+ sub identify_js_translatables (@) {
+     my @input = @_;
+     my @output = ();
+     # We mark a JavaScript translatable string as in C, i.e., _("literal")
+     # For simplicity, we ONLY look for "_" "(" StringLiteral ")"
+     for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) {
+       my $reset_state_p = 0;
+       push @output, [0, $input[$i]];
+       if ($input[$i] !~ /\S/s) {
+           ;
+       } elsif ($state == 0) {
+           $state = STATE_UNDERSCORE if $input[$i] eq '_';
+       } elsif ($state == STATE_UNDERSCORE) {
+           $state = $input[$i] eq parenleft ? STATE_PARENLEFT : 0;
+       } elsif ($state == STATE_PARENLEFT) {
+           if ($input[$i] =~ /^(['"])(.*)\1$/s) {
+               ($state, $j, $q, $s) = (STATE_STRING_LITERAL, $#output, $1, $2);
+           } else {
+               $state = 0;
+           }
+       } elsif ($state == STATE_STRING_LITERAL) {
+           if ($input[$i] eq parenright) {
+               $output[$j] = [1, $output[$j]->[1], $q, $s];
+           }
+           $state = 0;
+       } else {
+           die "identify_js_translatables internal error: Unknown state $state"
+       }
+     }
+     return address@hidden;
+ }
+ 
+ 
###############################################################################
+ 
  sub _extract_attributes ($;$) {
      my $this = shift;
***************
*** 431,434 ****
--- 537,541 ----
                $this->_set_cdata_close( "</$1\\s*>" );
                $this->_set_pcdata_mode( 0 );
+               $this->_set_js_mode( lc($1) eq 'script' );
  #         } elsif ($it->string =~ /^<(title)\b/is) {
  #             $this->_set_cdata_mode( 1 );
***************
*** 471,476 ****
                        ($this->pcdata_mode_p?
                            TmplTokenType::TEXT: TmplTokenType::CDATA),
!                       $this->line_number )
                if defined $it;
        $this->_set_pcdata_mode, 0;
        $this->_set_cdata_close, undef unless !defined $it;
--- 578,595 ----
                        ($this->pcdata_mode_p?
                            TmplTokenType::TEXT: TmplTokenType::CDATA),
!                       $this->line_number, $this->filename )
                if defined $it;
+       if ($this->js_mode_p) {
+           my $s0 = $it->string;
+           my @head = ();
+           my @tail = ();
+           if ($s0 =~ /^(\s*<!--\s*)(.*)(\s*--\s*>\s*)$/s) {
+               push @head, $1;
+               push @tail, $3;
+               $s0 = $2;
+           }
+           push @head, split_js $s0;
+           $it->set_js_data( identify_js_translatables(@head, @tail) );
+       }
        $this->_set_pcdata_mode, 0;
        $this->_set_cdata_close, undef unless !defined $it;

Index: text-extract2.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/text-extract2.pl,v
retrieving revision 1.41
retrieving revision 1.42
diff -C2 -r1.41 -r1.42
*** text-extract2.pl    19 Feb 2004 21:24:30 -0000      1.41
--- text-extract2.pl    10 Mar 2004 07:00:28 -0000      1.42
***************
*** 61,64 ****
--- 61,70 ----
            }
        }
+       if ($s->has_js_data) {
+           printf "JavaScript translatable strings:\n";
+           for my $t (@{$s->js_data}) {
+               printf "%dH%s\n", length $t->[3], underline $t->[3] if $t->[0]; 
# FIXME
+           }
+       }
      }
  }
***************
*** 89,92 ****
--- 95,102 ----
                }
            }
+       } elsif ($s->has_js_data) {
+           for my $t (@{$s->js_data}) {
+               remember( $s, $t->[3] ) if $t->[0]; # FIXME
+           }
        }
      }

Index: tmpl_process3.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/tmpl_process3.pl,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -r1.19 -r1.20
*** tmpl_process3.pl    8 Mar 2004 04:59:38 -0000       1.19
--- tmpl_process3.pl    10 Mar 2004 07:00:28 -0000      1.20
***************
*** 98,101 ****
--- 98,111 ----
        } elsif ($kind eq TmplTokenType::TAG && %$attr) {
            print $output text_replace_tag($t, $attr);
+       } elsif ($s->has_js_data) {
+           for my $t (@{$s->js_data}) {
+               # FIXME for this whole block
+               if ($t->[0]) {
+                   printf $output "%s%s%s", $t->[2], find_translation $t->[3],
+                           $t->[2];
+               } else {
+                   print $output $t->[1];
+               }
+           }
        } elsif (defined $t) {
            print $output $t;

Index: xgettext.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/xgettext.pl,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -r1.11 -r1.12
*** xgettext.pl 27 Feb 2004 13:26:08 -0000      1.11
--- xgettext.pl 10 Mar 2004 07:00:28 -0000      1.12
***************
*** 108,111 ****
--- 108,115 ----
                }
            }
+       } elsif ($s->has_js_data) {
+           for my $t (@{$s->js_data}) {
+               remember( $s, $t->[3] ) if $t->[0]; # FIXME
+           }
        }
      }
***************
*** 199,202 ****
--- 203,209 ----
                    . (defined $name? " name=$name->[1]": '');
            }
+       } elsif ($text{$t}->[0]->has_js_data) {
+           printf OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1;
+           printf OUTPUT "#. SCRIPT\n";
        }
        my $cformat_p;
***************
*** 377,381 ****
  =item -
  
- (Future goal)
  Translation to non-English-like languages with different word
  order:  gettext's c-format strings can theoretically be
--- 384,387 ----
***************
*** 418,421 ****
--- 424,441 ----
  "update" actions have already been implemented in tmpl_process3.pl.
  
+ =head2 Strings inside JavaScript
+ 
+ In the SCRIPT elements, the script will attempt to scan for
+ _("I<string literal>") patterns, and extract the I<string literal>
+ as a translatable string.
+ 
+ Note that the C-like _(...) notation is required.
+ 
+ The JavaScript must actually define a _ function
+ so that the code remains correct JavaScript.
+ A suitable definition of such a function can be
+ 
+       function _(s) { return s } // dummy function for gettext
+ 
  =head1 SEE ALSO
  




reply via email to

[Prev in Thread] Current Thread [Next in Thread]