gnunet-svn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[GNUnet-SVN] r588 - Extractor-docs/WWW GNUnet-docs/WWW GNUnet-docs/WWW/p


From: grothoff
Subject: [GNUnet-SVN] r588 - Extractor-docs/WWW GNUnet-docs/WWW GNUnet-docs/WWW/papers doodle-docs/WWW i18nHTML-docs/WWW
Date: Sun, 3 Apr 2005 23:51:26 -0700 (PDT)

Author: grothoff
Date: 2005-04-03 23:51:09 -0700 (Sun, 03 Apr 2005)
New Revision: 588

Added:
   GNUnet-docs/WWW/papers/CameraReady_174.pdf
Modified:
   Extractor-docs/WWW/commitMassTranslation.php
   Extractor-docs/WWW/commitTranslation.php
   Extractor-docs/WWW/editor.php
   Extractor-docs/WWW/html_header.php3
   Extractor-docs/WWW/i18nhtml.inc
   Extractor-docs/WWW/i18nhtml_config.inc
   Extractor-docs/WWW/index.php
   Extractor-docs/WWW/translate.php
   Extractor-docs/WWW/vote.php
   GNUnet-docs/WWW/html_header.php3
   GNUnet-docs/WWW/i18nhtml.inc
   GNUnet-docs/WWW/i18nhtml_config.inc
   doodle-docs/WWW/commitMassTranslation.php
   doodle-docs/WWW/commitTranslation.php
   doodle-docs/WWW/editor.php
   doodle-docs/WWW/i18nhtml.inc
   doodle-docs/WWW/i18nhtml_config.inc
   doodle-docs/WWW/index.php
   doodle-docs/WWW/translate.php
   doodle-docs/WWW/vote.php
   i18nHTML-docs/WWW/commitMassTranslation.php
   i18nHTML-docs/WWW/commitTranslation.php
   i18nHTML-docs/WWW/editor.php
   i18nHTML-docs/WWW/i18nhtml.inc
   i18nHTML-docs/WWW/i18nhtml_config.inc
   i18nHTML-docs/WWW/index.php
   i18nHTML-docs/WWW/start.php
   i18nHTML-docs/WWW/status.php
   i18nHTML-docs/WWW/translate.php
   i18nHTML-docs/WWW/vote.php
Log:
i18nHTML update -- keep fingers crossed

Modified: Extractor-docs/WWW/commitMassTranslation.php
===================================================================
--- Extractor-docs/WWW/commitMassTranslation.php        2005-04-04 06:47:24 UTC 
(rev 587)
+++ Extractor-docs/WWW/commitMassTranslation.php        2005-04-04 06:51:09 UTC 
(rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -19,22 +19,20 @@
 */
 include("i18nhtml.inc");
 if (!$connection) {
-  echo "<html><head><title>WWW translation: commit</title></head><body>";
   echo "Database is down. Cannot edit translations.";
-  echo "</body></html>";
   die();
 }
 if ($xlang == "English") {
-  echo "<html><head><title>WWW translation: commit</title></head><body>";
   W("Translating to English currently not allowed.\n");
-  echo "</body></html>";
   die();
 }
-echo "<html><head><title>WWW translation: commit</title></head><body>";
+echo "<html><head>";
+TITLE("WWW translation: commit");
+echo "</head><body>";
 W("Processing translations...");
 P();
 $done = 0;
-foreach($_GET as $dec=>$val) {
+foreach($_POST as $dec=>$val) {
   if ($val == "")
     continue;
   if ( ($dec == "xlang") || ($dec == "start") )
@@ -48,7 +46,7 @@
     $num--;
     $row = mysql_fetch_array($result);
     if ($dec == bin2hex(md5(urldecode($row["c"])))) {
-      $enc = $row["c"];
+      $enc = mysql_real_escape_string($row["c"]);
       break;
     }
   }
@@ -60,22 +58,30 @@
   }  
   $query = "DELETE FROM pending WHERE lang=\"$lang\" AND c=\"$enc\"";
   mysql_query($query, $connection); 
-  //$t = urlencode($val);
-  $t = urlencode($val);
-  //  $t = urlencode(htmlentities($val, ENT_QUOTES, $charset));
+  $t = mysql_real_escape_string(to_unicode($val));
   $query = "SELECT ranking FROM map WHERE name=\"$enc\" AND lang=\"$lang\" AND 
translation=\"$t\"";
   $result = mysql_query($query, $connection);
   $num = 0;
   if ($result) 
     $num = mysql_numrows($result);
   if ($num == 0) {
-    $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
-    mysql_query($query, $connection);
-    $done++;
-    W("Storing translation for &quot;%s&quot = &quot;%s&quot;.",
-      ARRAY(urldecode($enc),
-           urldecode($t)));
-    BR();  
+    $txtCnt = count_chars(urldecode($enc), 1);
+    $tCnt = count_chars($t, 1);
+    if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+      W("Commit '%s->%s' failed.", $enc, $t);
+      W("The number of percent signs in source text and translation do not 
match.");     
+      W("Note that you must preserve all %%s expressions unchanged.");
+      W("Also, a single displayed %% sign must be translated into two (%%%%) 
such signs.");
+      P();
+    } else {
+      $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
+      mysql_query($query, $connection);
+      $done++;
+      W("Storing translation for &quot;%s&quot = &quot;%s&quot;.",
+        ARRAY(urldecode($enc),
+             urldecode($t)));
+      BR();  
+    }
   }  
 }
 P();

Modified: Extractor-docs/WWW/commitTranslation.php
===================================================================
--- Extractor-docs/WWW/commitTranslation.php    2005-04-04 06:47:24 UTC (rev 
587)
+++ Extractor-docs/WWW/commitTranslation.php    2005-04-04 06:51:09 UTC (rev 
588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -18,7 +18,7 @@
      Boston, MA 02111-1307, USA.
 */
 include("i18nhtml.inc");
-$text = $_REQUEST['text'];
+$text = mysql_real_escape_string($_REQUEST['text']);
 $translation = $_REQUEST['translation'];
 $back = $_REQUEST['back'];
 if (!$connection) {
@@ -30,12 +30,7 @@
   die();
 }
 
-// note: $text is already urlencoded (by submitting via form) and html 
compatible
-// ensure translation is stored in encoded form and html compatible
-// if (get_magic_quotes_gpc()) $translation = stripslashes($translation);
-
-// $t = urlencode(htmlentities($translation, ENT_QUOTES, $charset));
-$t = urlencode($translation);
+$t = mysql_real_escape_string(to_unicode($translation));
 // check for identical translation
 $query = "SELECT ranking FROM map WHERE name=\"$text\" AND lang=\"$lang\" AND 
translation=\"$t\"";
 $result = mysql_query($query, $connection);
@@ -44,22 +39,38 @@
   $num = mysql_numrows($result);
 }
 if ($num > 0) {
-  echo "<html><body>";
+  echo "<html><head>";
+  TITLE("Translation exists.");
+  echo "</head><body>";
   W("Translation exists.");   
   extlink($back, "Back...");
   generateFooter();
   echo "</body></html>";
 } else {
-  //  if (!get_magic_quotes_gpc()) $t = addslashes($t); // ensure escaped 
before adding to DB
-  $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
-  $result = mysql_query($query, $connection);
-  if ($result) {
-    header("Location: " . $back); /* Redirect browser */
+  $txtCnt = count_chars(urldecode($text), 1);
+  $tCnt = count_chars($t, 1);
+  if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+      echo "<html><head>";
+      TITLE("Commit failed.");
+      echo "</head><body>";
+      W("Commit failed.");
+      W("The number of percent signs in source text and translation do not 
match.");     
+      W("Note that you must preserve all %%s expressions unchanged.");
+      W("Also, a single displayed %% sign must be translated into two (%%%%) 
such signs.");
+      echo "</body></html>";
   } else {
-    echo "<html><body>";
-    W("Commit ('%s') failed: ", $query);
-    echo mysql_error();
-    echo "</body></html>";
+    $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
+    $result = mysql_query($query, $connection);
+    if ($result) {
+      header("Location: " . $back); /* Redirect browser */
+    } else {
+      echo "<html><head>";
+      TITLE("Commit failed.");
+      echo "</head><body>";
+      W("Commit ('%s') failed: ", $query);
+      echo mysql_error();
+      echo "</body></html>";
+    }
   } 
 }
 ?>
\ No newline at end of file

Modified: Extractor-docs/WWW/editor.php
===================================================================
--- Extractor-docs/WWW/editor.php       2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/editor.php       2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -65,7 +65,7 @@
     ARRAY($start, $end));
   P();
 
-  echo "<form action=\"" . $i18nHTMLbase . "commitMassTranslation.php\">";
+  echo "<form method=\"POST\" action=\"" . $i18nHTMLbase . 
"commitMassTranslation.php\">";
   echo "<input type=hidden name=\"xlang\" value=\"$xlang\">";
   $endp = $end + 1;
   echo "<input type=hidden name=\"start\" value=\"$endp\">";

Modified: Extractor-docs/WWW/html_header.php3
===================================================================
--- Extractor-docs/WWW/html_header.php3 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/html_header.php3 2005-04-04 06:51:09 UTC (rev 588)
@@ -3,9 +3,9 @@
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
 if ($title) {
-  echo "<title>";
-  TRANSLATE($title);
-  echo "</title>";
+  TITLE($title);
+ } else {
+  TITLE("libextractor");
  }
 if ($description) {
   echo "<meta name=\"description\" content=\"";
@@ -14,7 +14,7 @@
  }
 if ($author) {
   echo "<meta name=\"author\" content=\"$author\">\n";
-  echo "<meta name=\"rights\" content=\"(C) 2002,2003,2004 by $author\">\n";
+  echo "<meta name=\"rights\" content=\"(C) 2002,2003,2004,2005 by 
$author\">\n";
  }
 if ($date) 
   echo "<meta name=\"date\" content=\"$date\">\n";

Modified: Extractor-docs/WWW/i18nhtml.inc
===================================================================
--- Extractor-docs/WWW/i18nhtml.inc     2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/i18nhtml.inc     2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff and other contributing authors.
+     (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
   // $editor can be set to create a translation
   // tag even if a translation is already available.
   // there is currently no security.
+  //
   // An "_" is used for functions that return the
   // translated string instead of printing it directly.
   // These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
 // obtain user db specific configuration parameters
 include("i18nhtml_config.inc");
 
+header("Content-type: text/html; charset=utf-8");
+
 // establish default connection to database server
 $connection = @mysql_connect($i18nHTMLsqlServer,
                             $i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
 if ($xlang)
   $lang = $xlang;
 $lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
 $editor = $_REQUEST['editor'];
 
 
@@ -241,6 +245,464 @@
   }
 }
 
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering.  It is aiming to transcode HTML Unicode entities
+ * (eg, &amp;#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+  // translate extended ISO8859-1 chars, if any
+  $text = utf8_encode($text);
+  
+  // translate Unicode entities
+  $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+  $text = '';
+  $index = 0;
+  foreach($areas as $area) {
+    switch($index%2) {
+    case 0: // before entity
+      $text .= $area;
+      break;
+    case 1: // the entity itself
+      
+      // get the integer value
+      $unicode = intval($area);
+      
+      // one byte
+      if($unicode < 0x80) {
+       
+       $text .= chr($unicode);
+       
+       // two bytes
+      } elseif($unicode < 0x800) {
+       
+       $text .= chr( 0xC0 +  ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // three bytes
+      } elseif($unicode < 0x10000) {
+       
+       $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 ) 
);
+       $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) ) 
/ 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // more bytes, keep it as it is...
+      } else
+         $text .= '&#'.$unicode.';';
+      
+      break;
+    }
+    $index++;
+  }
+  
+  // the updated string
+  return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+  
+  // initialize tables only once
+  static $html_entities, $unicode_entities;
+  if(!is_array($html_entities)) {
+    
+    
+    // numerical order
+    $codes = array(
+                  '&#160;'     => '&nbsp;',    // non-breaking space
+                  '&#161;'     => '&iexcl;',   // inverted exclamation mark
+                  '&#162;'     => '&cent;',    // cent sign
+                  '&#163;'     => '&pound;',   // pound sign
+                  '&#164;'     => '&curren;',  // currency sign
+                  '&#165;'     => '&yen;',             // yen sign
+                  '&#166;'     => '&brvbar;',  // broken bar
+                  '&#167;'     => '&sect;',    // section sign
+                  '&#168;'     => '&uml;',             // diaeresis
+                  '&#169;'     => '&copy;',    // copyright sign
+                  '&#170;'     => '&ordf;',    // feminine ordinal indicator
+                  '&#171;'     => '&laquo;',   // left-pointing double angle 
quotation mark
+                  '&#172;'     => '&not;',             // not sign
+                  '&#173;'     => '&shy;',             // soft hyphen
+                  '&#174;'     => '&reg;',             // registered sign
+                  '&#175;'     => '&macr;',    // macron
+                  '&#176;'     => '&deg;',             // degree sign
+                  '&#177;'     => '&plusmn;',  // plus-minus sign
+                  '&#178;'     => '&sup2;',    // superscript two
+                  '&#179;'     => '&sup3;',    // superscript three
+                  '&#180;'     => '&acute;',   // acute accent
+                  '&#181;'     => '&micro;',   // micro sign
+                  '&#182;'     => '&para;',    // pilcrow sign
+                  '&#183;'     => '&middot;',  // middle dot
+                  '&#184;'     => '&cedil;',   // cedilla
+                  '&#185;'     => '&sup1;',    // superscript one
+                  '&#186;'     => '&ordm;',    // masculine ordinal indicator
+                  '&#187;'     => '&raquo;',   // right-pointing double angle 
quotation mark
+                  '&#188;'     => '&frac14;',  // vulgar fraction one quarter
+                  '&#189;'     => '&frac12;',  // vulgar fraction one half
+                  '&#190;'     => '&frac34;',  // vulgar fraction three 
quarters
+                  '&#191;'     => '&iquest;',  // inverted question mark
+                  '&#192;'     => '&Agrave;',  // latin capital letter A with 
grave
+                  '&#193;'     => '&Aacute;',  // latin capital letter A with 
acute
+                  '&#194;'     => '&Acirc;',   // latin capital letter A with 
circumflex
+                  '&#195;'     => '&Atilde;',  // latin capital letter A with 
tilde
+                  '&#196;'     => '&Auml;',    // latin capital letter A with 
diaeresis
+                  '&#197;'     => '&Aring;',   // latin capital letter A with 
ring above
+                  '&#198;'     => '&AElig;',   // latin capital letter AE
+                  '&#199;'     => '&Ccedil;',  // latin capital letter C with 
cedilla
+                  '&#200;'     => '&Egrave;',  // latin capital letter E with 
grave
+                  '&#201;'     => '&Eacute;',  // latin capital letter E with 
acute
+                  '&#202;'     => '&Ecirc;',   // latin capital letter E with 
circumflex
+                  '&#203;'     => '&Euml;',    // latin capital letter E with 
diaeresis
+                  '&#204;'     => '&Igrave;',  // latin capital letter I with 
grave
+                  '&#205;'     => '&Iacute;',  // latin capital letter I with 
acute
+                  '&#206;'     => '&Icirc;',   // latin capital letter I with 
circumflex
+                  '&#207;'     => '&Iuml;',    // latin capital letter I with 
diaeresis
+                  '&#208;'     => '&ETH;',             // latin capital letter 
ETH
+                  '&#209;'     => '&Ntilde;',  // latin capital letter N with 
tilde
+                  '&#210;'     => '&Ograve;',  // latin capital letter O with 
grave
+                  '&#211;'     => '&Oacute;',  // latin capital letter O with 
acute
+                  '&#212;'     => '&Ocirc;',   // latin capital letter O with 
circumflex
+                  '&#213;'     => '&Otilde;',  // latin capital letter O with 
tilde
+                  '&#214;'     => '&Ouml;',    // latin capital letter O with 
diaeresis
+                  '&#215;'     => '&times;',   // multiplication sign
+                  '&#216;'     => '&Oslash;',  // latin capital letter O with 
stroke
+                  '&#217;'     => '&Ugrave;',  // latin capital letter U with 
grave
+                  '&#218;'     => '&Uacute;',  // latin capital letter U with 
acute
+                  '&#219;'     => '&Ucirc;',   // latin capital letter U with 
circumflex
+                  '&#220;'     => '&Uuml;',    // latin capital letter U with 
diaeresis
+                  '&#221;'     => '&Yacute;',  // latin capital letter Y with 
acute
+                  '&#222;'     => '&THORN;',   // latin capital letter THORN
+                  '&#223;'     => '&szlig;',   // latin small letter sharp s
+                  '&#224;'     => '&agrave;',  // latin small letter a with 
grave
+                  '&#225;'     => '&aacute;',  // latin small letter a with 
acute
+                  '&#226;'     => '&acirc;',   // latin small letter a with 
circumflex
+                  '&#227;'     => '&atilde;',  // latin small letter a with 
tilde
+                  '&#228;'     => '&auml;',    // latin small letter a with 
diaeresis
+                  '&#229;'     => '&aring;',   // latin small letter a with 
ring above
+                  '&#230;'     => '&aelig;',   // latin small letter ae
+                  '&#231;'     => '&ccedil;',  // latin small letter c with 
cedilla
+                  '&#232;'     => '&egrave;',  // latin small letter e with 
grave
+                  '&#233;'     => '&eacute;',  // latin small letter e with 
acute
+                  '&#234;'     => '&ecirc;',   // latin small letter e with 
circumflex
+                  '&#235;'     => '&euml;',    // latin small letter e with 
diaeresis
+                  '&#236;'     => '&igrave;',  // latin small letter i with 
grave
+                  '&#237;'     => '&iacute;',  // latin small letter i with 
acute
+                  '&#238;'     => '&icirc;',   // latin small letter i with 
circumflex
+                  '&#239;'     => '&iuml;',    // latin small letter i with 
diaeresis
+                  '&#240;'     => '&eth;',             // latin small letter 
eth
+                  '&#241;'     => '&ntilde;',  // latin small letter n with 
tilde
+                  '&#242;'     => '&ograve;',  // latin small letter o with 
grave
+                  '&#243;'     => '&oacute;',  // latin small letter o with 
acute
+                  '&#244;'     => '&ocirc;',   // latin small letter o with 
circumflex
+                  '&#245;'     => '&otilde;',  // latin small letter o with 
tilde
+                  '&#246;'     => '&ouml;',    // latin small letter o with 
diaeresis
+                  '&#247;'     => '&divide;',  // division sign
+                  '&#248;'     => '&oslash;',  // latin small letter o with 
stroke
+                  '&#249;'     => '&ugrave;',  // latin small letter u with 
grave
+                  '&#250;'     => '&uacute;',  // latin small letter u with 
acute
+                  '&#251;'     => '&ucirc;',   // latin small letter u with 
circumflex
+                  '&#252;'     => '&uuml;',    // latin small letter u with 
diaeresis
+                  '&#253;'     => '&yacute;',  // latin small letter y with 
acute
+                  '&#254;'     => '&thorn;',   // latin small letter thorn
+                  '&#255;'     => '&yuml;',    //
+                  '&#338;'     => '&OElig;',   // latin capital ligature OE
+                  '&#339;'     => '&oelig;',   // latin small ligature oe
+                  '&#352;'     => '&Scaron;',  // latin capital letter S with 
caron
+                  '&#353;'     => '&scaron;',  // latin small letter s with 
caron
+                  '&#376;'     => '&Yuml;',    // latin capital letter Y with 
diaeresis
+                  '&#402;'     => '&fnof;' ,   // latin small f with hook
+                  '&#710;'     => '&circ;',    // modifier letter circumflex 
accent
+                  '&#732;'     => '&tilde;',   // small tilde
+                  '&#913;'     => '&Alpha;',   // greek capital letter alpha
+                  '&#914;'     => '&Beta;',    // greek capital letter beta
+                  '&#915;'     => '&Gamma;',   // greek capital letter gamma
+                  '&#916;'     => '&Delta;',   // greek capital letter delta
+                  '&#917;'     => '&Epsilon;', // greek capital letter epsilon
+                  '&#918;'     => '&Zeta;',    // greek capital letter zeta
+                  '&#919;'     => '&Eta;',             // greek capital letter 
eta
+                  '&#920;'     => '&Theta;',   // greek capital letter theta
+                  '&#921;'     => '&Iota;',    // greek capital letter iota
+                  '&#922;'     => '&Kappa;',   // greek capital letter kappa
+                  '&#923;'     => '&Lambda;',  // greek capital letter lambda
+                  '&#924;'     => '&Mu;',              // greek capital letter 
mu
+                  '&#925;'     => '&Nu;',              // greek capital letter 
nu
+                  '&#926;'     => '&Xi;',              // greek capital letter 
xi
+                  '&#927;'     => '&Omicron;', // greek capital letter omicron
+                  '&#928;'     => '&Pi;',              // greek capital letter 
pi
+                  '&#929;'     => '&Rho;',             // greek capital letter 
rho
+                  '&#931;'     => '&Sigma;',   // greek capital letter sigma
+                  '&#932;'     => '&Tau;',             // greek capital letter 
tau
+                  '&#933;'     => '&Upsilon;', // greek capital letter upsilon
+                  '&#934;'     => '&Phi;',             // greek capital letter 
phi
+                  '&#935;'     => '&Chi;',             // greek capital letter 
chi
+                  '&#936;'     => '&Psi;',             // greek capital letter 
psi
+                  '&#937;'     => '&Omega;',   // greek capital letter omega
+                  '&#945;'     => '&alpha;',   // greek small letter alpha
+                  '&#946;'     => '&beta;',    // greek small letter beta
+                  '&#947;'     => '&gamma;',   // greek small letter gamma
+                  '&#948;'     => '&delta;',   // greek small letter delta
+                  '&#949;'     => '&epsilon;', // greek small letter epsilon
+                  '&#950;'     => '&zeta;',    // greek small letter zeta
+                  '&#951;'     => '&eta;',             // greek small letter 
eta
+                  '&#952;'     => '&theta;',   // greek small letter theta
+                  '&#953;'     => '&iota;',    // greek small letter iota
+                  '&#954;'     => '&kappa;',   // greek small letter kappa
+                  '&#955;'     => '&lambda;',  // greek small letter lambda
+                  '&#956;'     => '&mu;',              // greek small letter mu
+                  '&#957;'     => '&nu;',              // greek small letter nu
+                  '&#958;'     => '&xi;',              // greek small letter xi
+                  '&#959;'     => '&omicron;', // greek small letter omicron
+                  '&#960;'     => '&pi;',              // greek small letter pi
+                  '&#961;'     => '&rho;',             // greek small letter 
rho
+                  '&#962;'     => '&sigmaf;',  // greek small letter final 
sigma
+                  '&#963;'     => '&sigma;',   // greek small letter sigma
+                  '&#964;'     => '&tau;',             // greek small letter 
tau
+                  '&#965;'     => '&upsilon;', // greek small letter upsilon
+                  '&#966;'     => '&phi;',             // greek small letter 
phi
+                  '&#967;'     => '&chi;',             // greek small letter 
chi
+                  '&#968;'     => '&psi;',             // greek small letter 
psi
+                  '&#969;'     => '&omega;',   // greek small letter omega
+                  '&#977;'     => '&thetasym;',        // greek small letter 
theta symbol
+                  '&#978;'     => '&upsih;',   // greek upsilon with hook 
symbol
+                  '&#982;'     => '&piv;',             // greek pi symbol
+                  '&#8194;'    => '&ensp;',    // en space
+                  '&#8195;'    => '&emsp;',    // em space
+                  '&#8201;'    => '&thinsp;',  // thin space
+                  '&#8204;'    => '&zwnj;',    // zero width non-joiner
+                  '&#8205;'    => '&zwj;',             // zero width joiner
+                  '&#8206;'    => '&lrm;',             // left-to-right mark
+                  '&#8207;'    => '&rlm;',             // right-to-left mark
+                  '&#8211;'    => '&ndash;',   // en dash
+                  '&#8212;'    => '&mdash;',   // em dash
+                  '&#8216;'    => '&lsquo;',   // left single quotation mark
+                  '&#8217;'    => '&rsquo;',   // right single quotation mark
+                  '&#8218;'    => '&sbquo;',   // single low-9 quotation mark
+                  '&#8220;'    => '&ldquo;',   // left double quotation mark
+                  '&#8221;'    => '&rdquo;',   // right double quotation mark
+                  '&#8222;'    => '&bdquo;',   // double low-9 quotation mark
+                  '&#8224;'    => '&dagger;',  // dagger
+                  '&#8225;'    => '&Dagger;',  // double dagger
+                  '&#8226;'    => '&bull;',    // bullet
+                  '&#8230;'    => '&hellip;',  // horizontal ellipsis
+                  '&#8240;'    => '&permil;',  // per mille sign
+                  '&#8242;'    => '&prime;',   // primeminutes
+                  '&#8243;'    => '&Prime;',   // double prime
+                  '&#8249;'    => '&lsaquo;',  // single left-pointing angle 
quotation mark
+                  '&#8250;'    => '&rsaquo;',  // single right-pointing angle 
quotation mark
+                  '&#8254;'    => '&oline;',   // overline
+                  '&#8260;'    => '&frasl;',   // fraction slash
+                  '&#8364;'    => '&euro;',    // euro sign
+                  '&#8465;'    => '&image;',   // blackletter capital I
+                  '&#8472;'    => '&weierp;',  // script capital P
+                  '&#8476;'    => '&real;',    // blackletter capital R
+                  '&#8482;'    => '&trade;',   // trade mark sign
+                  '&#8501;'    => '&alefsym;', // alef symbol
+                  '&#8592;'    => '&larr;',    // leftwards arrow
+                  '&#8593;'    => '&uarr;',    // upwards arrow
+                  '&#8594;'    => '&rarr;',    // rightwards arrow
+                  '&#8595;'    => '&darr;',    // downwards arrow
+                  '&#8596;'    => '&harr;',    // left right arrow
+                  '&#8629;'    => '&crarr;',   // downwards arrow with corner 
leftwards
+                  '&#8656;'    => '&lArr;',    // leftwards double arrow
+                  '&#8657;'    => '&uArr;',    // upwards double arrow
+                  '&#8658;'    => '&rArr;',    // rightwards double arrow
+                  '&#8659;'    => '&dArr;',    // downwards double arrow
+                  '&#8660;'    => '&hArr;',    // left right double arrow
+                  '&#8704;'    => '&forall;',  // for all
+                  '&#8706;'    => '&part;',    // partial differential
+                  '&#8707;'    => '&exist;',   // there exists
+                  '&#8709;'    => '&empty;',   // empty set
+                  '&#8711;'    => '&nabla;',   // nabla
+                  '&#8712;'    => '&isin;',    // element of
+                  '&#8713;'    => '&notin;',   // not an element of
+                  '&#8715;'    => '&ni;',              // contains as member
+                  '&#8719;'    => '&prod;',    // n-ary product
+                  '&#8721;'    => '&sum;',             // n-ary sumation
+                  '&#8722;'    => '&minus;',   // minus sign
+                  '&#8727;'    => '&lowast;',  // asterisk operator
+                  '&#8730;'    => '&radic;',   // square root
+                  '&#8733;'    => '&prop;',    // proportional to
+                  '&#8734;'    => '&infin;',   // infinity
+                  '&#8736;'    => '&ang;',             // angle
+                  '&#8743;'    => '&and;',             // logical and
+                  '&#8744;'    => '&or;',              // logical or
+                  '&#8745;'    => '&cap;',             // intersection
+                  '&#8746;'    => '&cup;',             // union
+                  '&#8747;'    => '&int;',             // integral
+                  '&#8756;'    => '&there4;',  // therefore
+                  '&#8764;'    => '&sim;',             // tilde operator
+                  '&#8773;'    => '&cong;',    // approximately equal to
+                  '&#8776;'    => '&asymp;',   // almost equal to
+                  '&#8800;'    => '&ne;',              // not equal to
+                  '&#8801;'    => '&equiv;',   // identical to
+                  '&#8804;'    => '&le;',              // less-than or equal to
+                  '&#8805;'    => '&ge;',              // greater-than or 
equal to
+                  '&#8834;'    => '&sub;',             // subset of
+                  '&#8835;'    => '&sup;',             // superset of
+                  '&#8836;'    => '&nsub;',    // not a subset of
+                  '&#8838;'    => '&sube;',    // subset of or equal to
+                  '&#8839;'    => '&supe;',    // superset of or equal to
+                  '&#8853;'    => '&oplus;',   // circled plus
+                  '&#8855;'    => '&otimes;',  // circled times
+                  '&#8869;'    => '&perp;',    // up tack
+                  '&#8901;'    => '&sdot;',    // dot operator
+                  '&#8968;'    => '&lceil;',   // left ceiling
+                  '&#8969;'    => '&rceil;',   // right ceiling
+                  '&#8970;'    => '&lfloor;',  // left floor
+                  '&#8971;'    => '&rfloor;',  // right floor
+                  '&#9001;'    => '&lang;',    // left-pointing angle bracket
+                  '&#9002;'    => '&rang;',    // right-pointing angle bracket
+                  '&#9674;'    => '&loz;',             // lozenge
+                  '&#9824;'    => '&spades;',  // black spade suit
+                  '&#9827;'    => '&clubs;',   // black club suit
+                  '&#9829;'    => '&hearts;',  // black heart suit
+                  '&#9830;'    => '&diams;'    // black diam suit
+                  );
+    
+    // split entities for use in str_replace()
+    foreach($codes as  $unicode_entity => $html_entity) {
+      $unicode_entities[] = $unicode_entity;
+      $html_entities[] = $html_entity;
+    }
+  }
+  
+  // transcode HTML entities to Unicode
+  if($to_unicode)
+    return str_replace($html_entities, $unicode_entities, $input);
+  
+  // transcode Unicode entities to HTML entities
+  else
+    return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a 
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML 
numerical entity (eg, &amp;#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the 
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign 
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 + 
MySQl 3)
+ */
+function to_unicode($input) {
+  
+  // transcode HTML entities to Unicode entities
+  $input = transcode($input);
+  
+  // scan the whole string
+  $output = '';
+  $index = 0;
+  while($index < strlen($input)) {
+    
+    // look at one char
+    $char = ord($input[$index]);
+    
+    // one byte (0xxxxxxx)
+    if($char < 0x80) {
+      
+      // some chars may be undefined
+      $output .= chr($char);
+      $index += 1;
+      
+      // two bytes (110xxxxx 10xxxxxx)
+    } elseif($char < 0xE0) {
+      
+      // strip weird sequences (eg, C0 80 -> NUL)
+      if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 2;
+      
+      // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign = 
\xE2\x82\xAC -> &#8364;
+    } elseif($char < 0xF0) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) % 
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 3;
+      
+      // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xF8) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+        + (ord($input[$index + 3]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 4;
+      
+      // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xFC) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) % 
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4]) 
% 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 5;
+      
+      // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } else {
+      
+      // strip weird sequences
+      if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index + 
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 6;
+    }
+    
+  }
+ 
+  // return the translated string
+  return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 // returns either the translated string
 // or the original string.  Assumes we
 // are passed the original string as occurs
@@ -264,8 +726,7 @@
   if ($a == "")
     return $a;
 
-  // $a = htmlentities($a, ENT_QUOTES, $charset);
-  $u = urlencode($a);
+  $u = mysql_real_escape_string(urlencode($a));
   
   if (!$connection) {
     // database not available, just print English
@@ -319,7 +780,7 @@
     return fix($a);              // just return English string
   } else { // translation available
     $row = mysql_fetch_array($result);
-    return fix(urldecode($row["translation"]));
+    return $row["translation"];
   }
 }
 
@@ -361,7 +822,16 @@
 }
 
 function TITLE($a,$b="") {
-  echo "<title>" . W_($a,$b) . "</title>\n";
+  global $lang;
+  global $languagecodes;
+  echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" 
>";
+  echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+  if (isset($languagecodes[$lang])) {
+    echo "<meta name=\"content-language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+    echo "<meta name=\"language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+  }
 }
 
 

Modified: Extractor-docs/WWW/i18nhtml_config.inc
===================================================================
--- Extractor-docs/WWW/i18nhtml_config.inc      2005-04-04 06:47:24 UTC (rev 
587)
+++ Extractor-docs/WWW/i18nhtml_config.inc      2005-04-04 06:51:09 UTC (rev 
588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
 $i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
 
 $i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
 
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
 $i18nHTMLbase = "";  // base directory prepended to i18nHTML php pages used in 
links
 $i18nHTMLmarker = "*";  // default value if never changed
 
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
 
+// for selectively cloning a DB
+//  $i18nHTMLclone = "/tmp/cloneFile.sql";
+
 ?>

Modified: Extractor-docs/WWW/index.php
===================================================================
--- Extractor-docs/WWW/index.php        2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/index.php        2005-04-04 06:51:09 UTC (rev 588)
@@ -1,108 +1,68 @@
 <?php
-$title="libextractor - a simple library for keyword extraction";
-$description="a simple library for keyword extraction";
-$email="address@hidden";       
-$keywords="keyword, extraction, mp3, html, pdf, images, jpeg, gif, ps, mime, 
real, qt, asf, mpeg, avi, riff, tiff, summary, summaries, kbps, format, 
mime-type, zip, elf, doc, ppt, xls, sha-1, md5, open office, sxw, dvi, id3, 
id3v2, id3v2.3, id3v2.4, thumbnails";
-$author="Vids Samanta and Christian Grothoff"; 
-$page="home";  
-include("html_header.php3");
-
-ANCHOR("about");
-H2("About libextractor");
-
-IMG("extractor_logo.png", "libextractor", "right", "136", "94", "0");
+include("i18nhtml.inc");
+DOCTYPE("HTML", "Transitional");
+echo "<html><head>\n";
+TITLE("i18nHTML - enabling collaborative webpage translation");
+echo "<meta name=\"description\" content=\"";
+TRANSLATE("i18nHTML is a collection of PHP scripts that allow visitors of a 
webpage to help translating it.");
+echo "\">";
+?>
+<meta name="author" content="Christian Grothoff">
+<meta name="keywords" 
content="i18n,HTML,PHP,translation,languages,mysql,database,internationalization,www,free,GNU,GPL">
+<meta name="robots" content="index,follow">
+<meta name="revisit-after" content="28 days">
+<meta name="publisher" content="Christian Grothoff">
+<meta name="date" content="2005-01-03">
+<meta name="rights" content="(C) 2004,2005 by Christian Grothoff>";
+<meta http-equiv="expires" content="43200">
+<meta http-equiv="content-type" content="text/html">
+</head>
+<body>
+<?php
+generateLanguageBar();
+H1("i18nHTML");
+H2("About");
+W("i18nHTML is a collection of PHP files that can be used to write webpages 
that visitors can translate into their respective native languages.");
+W("i18nHTML uses a database to match sentences from the webpage against 
translations.");
+W("i18nHTML defines a set of PHP functions that generate either the translated 
HTML sentences or the original (typically English) text with decorations that 
allow users to provide translations.");
+W("i18nHTML requires the internationalized webpages to be written using the 
provided PHP functions but does not constrain the page design in any way.");
+W("Webpages internationalized with i18nHTML can be updated without loosing 
existing translations for sentences that were not changed.");
+W("Note that it is important that you use the i18nHTML <tt>TITLE</tt> command 
in your documents in order to ensure that the character set and other meta-data 
is set properly.");
 P();
-W("libextractor is a library used to extract meta-data from files of arbitrary 
type.");
-W("It is designed to use helper-libraries to perform the actual extraction, 
and to be trivially extendable by linking against external extractors for 
additional file types.");
-W("libextractor is part of the %s.",
-  extlink_("http://www.gnu.org/";, "GNU project"));
-//W("Our official GNU website can be found at %s.",
-//  
extlink_("http://www.gnu.org/software/libextractor/","http://www.gnu.org/software/libextractor/";));
-W("libextractor can be downloaded from this site or the %s.",
-  extlink_("http://www.gnu.org/prep/ftp.html","GNU mirrors"));
-
+H2("Download");
+W("You can find the latest version %s.",
+  extlink_("https://gnunet.org/i18nHTML/download/";, "here"));
+W("The latest CVS version can be obtained using");
+PRE("$ svn checkout https://gnunet.org/svn/i18nHTML/";);
+P();    
+W("If you want to be notified about updates, subscribe to %s",
+  extlink_("http://freshmeat.net/projects/i18nHTML/";, "i18nHTML on 
freshmeat"));
 P();
-W("The goal is to provide developers of file-sharing networks or WWW-indexing 
bots with a universal library to obtain simple keywords to match against 
queries.");
-W("libextractor contains a shell-command &quot;extract&quot; that, similar to 
the well-known &quot;file&quot; command, can extract meta-data from a file an 
print the results to stdout.");
-P();
 
-W("Currently, libextractor supports the following formats:");
-include("plugins_list");
-BR();
-W("Also, various additional MIME types are detected.");
-P();
 
-W("libextractor is free software; you can redistribute it and/or modify it 
under the terms of the GNU General Public License as published by the Free 
Software Foundation; either version 2 of the License, or (at your option) any 
later version.");
-ANCHOR("news");
-H2("Recent News");
-P();
-echo "<dl>";
-DTDD("Thu Feb 24 01:23:31 EST 2005 | libextractor v0.4.2 released.",
-     "This release fixes some bugs in the ID3, PDF, PNG and REAL extractors.  
The REAL extractor now also handles the new Helix formats.  libextractor can 
now also be used to extract thumbnails from images (using ImageMagick).");
-DTDD("Wed Jan 26 19:51:44 EST 2005 | libextractor v0.4.1 released.",
-     "This release fixes a security issue (inherited from xpdf).  It also 
extracts more meta-data from files of TAR or QuickTime format.");
-DTDD("Sat Dec 25 21:42:26 CET 2004 | libextractor v0.4.0 released.",
-     "This release improves support for character sets (plugins are now 
expected to convert to UTF-8).  It also improves support for mp3 (adding 
genres) and png (handling of compressed comments).");
-DTDD("Sat Nov 13 13:23:23 EST 2004 | libextractor v0.3.11 released.",
-     "This release fixes bugs in the dvi, man, ID3v2.3, ole2 and pdf 
extractors.");
-DTDD("Sun Oct 18 13:23:35 EST 2004 | libextractor v0.3.10 released.", 
-     "This release adds support for ID3v2.3 and ID3v2.4.  It fixes bugs in the 
tar, man, deb, mp3 and ole2 extractors.");
-DTDD("Sat Oct 17 18:12:11 EST 2004 | libextractor v0.3.9 released.", 
-     "This release adds support for the man, tar (including tar.gz) and deb 
formats.  It fixes bugs in the id3v2 and jpeg extractors.  The size of jpeg 
images is now also extracted.  This version adds support for 64-bit file 
sizes.");
-DTDD("Sat Oct 02 20:00:04 EST 2004 | libextractor v0.3.8 released.",
- "This release adds support for dvi (from TeX).  The plugins are now installed 
in a separate plugin directory.  libextractor now works under OS X (10.3)."); 
-DTDD("Fri Sep 23 23:30:33 EST 2004 | libextractor v0.3.7 released.",
-     "This release adds support for StarOffice formats, ID3v2 tags and the 
Ripe160MD hash function.  It also improves the performance of the HTML and ZIP 
extractors.");
-DTDD("Fri Sep 10 20:10:38 EST 2004 | libextractor v0.3.6 released.",
-     "This release adds support for OpenOffice formats, hash functions (md5, 
sha-1) and fixes some build problems.");
-DTDD("Mon Aug 30 23:18:49 IST 2004 | libextractor v0.3.5 released.",
-     "This release adds support for OLE2 (WinWord, PowerPoint, Excel formats) 
and fixes various minor bugs.  For OLE2 support you will have to have glib 2.0 
installed (yes, that is glib from GTK/Gnome, not glibc!).");
-DTDD("Thu Aug 26 20:27:24 IST 2004 | Bugtracking using Mantis enabled.",
-     "You can now report and view bug-reports about libextractor on %s.",
-     extlink_("https://gnunet.org/mantis/","Mantis";));
-DTDD("Wed Aug 25 19:02:07 IST 2004 | libextractor v0.3.4 released.",
-     "This release fixes a minor linking error (<tt>-lm</tt> for 
<tt>floor</tt>), improves performance and adds support for GNU gettext 
(internationalization).");
-DTDD("Wed May 31 19:22:07 EST 2004 | libextractor v0.3.3 released.",
-     "This release fixes various minor bugs (segmentation faults and 
non-termination of mpeg and riff extractors for malformed files) and adds 
support for WAV files.");
-DTDD("Wed May 31 19:22:07 EST 2004 | libextractor v0.3.2 released.",
-     "This release fixes various minor bugs (plugins misbehaving for malformed 
files) and improves portability to Cygwin/MinGW.");
-echo "</dl>";
-P();
-W("%s",
-  intlink_("oldnews","Older news archive"));
 
-ANCHOR("links");
-H2("Links");
 P();
-W("Related work:");
-echo "<ul>";
-LILI("http://www.wotsit.org","File format database");
-LILI("http://getid3.sf.net/","getid3, similar project for PHP");
-LILI("download/php/",
-     "PHP wrapper for libextractor (mirrored, not written by us, see README)");
-LILI("http://dublincore.org/documents/dcmi-terms/","Meta-data categorization 
standard");
-LILI("http://hul.harvard.edu/jhove/","JHOVE, Harvard Object Validation 
Environment");
-echo "</ul>";
-W("Projects that use libextractor:");
-echo "<ul>";
-LILI("http://witme.sourceforge.net/libferris.web/","libferris, a virtual file 
system");
-LILI("http://evidence.sf.net/","Evidence, enlightened file manager");
-LILI("http://gnunet.org/","GNUnet, secure P2P file sharing");
-LILI("http://gnunet.org/doodle/","doodle, index your disk");
-echo "</ul>";
+if ( ($xlang) && ($xlang != "English")) {
+  H2("Mass translation");
+  W("The mass-translation page for translating many sentences at once is %s.",
+    intlink_("editor.php", "here"));
+  W("Note that the sentence database is shared with the %s, %s and %s 
projects.",
+    ARRAY(extlink_("http://gnunet.org/";, "GNUnet"),
+         extlink_("http://gnunet.org/doodle/";, "doodle"),
+         extlink_("http://gnunet.org/libextractor/";, "libExtractor")));  
+ }
 
-ANCHOR("contact");
-H2("Contact");
+H2("Bugtrack");
+W("i18nHTML uses Mantis for bugtracking.");
+W("Visit %s to report bugs.",
+  extlink_("https://gnunet.org/mantis/","https://gnunet.org/mantis/";));
+W("You need to sign up for a reporter account.");
+W("Please make sure you report bugs under <strong>I18nHTML</strong> and not 
under any of the other projects.");
 P();
-W("libextractor is developed by %s and %s.",
-  ARRAY(extlink_("http://grothoff.org/christian/";,
-                "Christian Grothoff"),
-       extlink_("http://compilers.cs.purdue.edu/~vids/";,
-                "Vids Samanta")));
-W("For questions about libextractor send email to %s.",
-  extlink_("mailto:address@hidden";,
-          "address@hidden"));
+W("If you dislike Mantis and need to report a bug contact %s via e-mail (good 
luck getting by the spam-filter).",
+  extlink_("mailto:address@hidden","address@hidden";));
 
-include("html_footer.php3");
+HR();
+generateFooter();
+echo "</body></html>\n";
 ?>
-

Modified: Extractor-docs/WWW/translate.php
===================================================================
--- Extractor-docs/WWW/translate.php    2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/translate.php    2005-04-04 06:51:09 UTC (rev 588)
@@ -27,9 +27,7 @@
 
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("WWW translation");
-echo "</title>";
+TITLE("WWW translation");
 echo "<meta name=\"description\" content=\"";
 TRANSLATE("Help translating this webpage.");
 echo "\">";
@@ -45,7 +43,7 @@
 W("Destination language: ");
 W($lang);
 P();
-echo "<form action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n";
+echo "<form method=\"POST\" action=\"" . $i18nHTMLbase . 
"commitTranslation.php\">\n";
 echo "<input type=hidden name=\"text\" value=\"" . urlencode($text) . "\">\n";
 echo "<input type=hidden name=\"xlang\" value=\"$xlang\">\n";
 echo "<input type=hidden name=\"back\" value=\"$back\">\n";
@@ -93,9 +91,9 @@
    printf("<tr><td>%s</td><td><a href=\"" . $i18nHTMLbase . 
"vote.php?xlang=%s&text=%s&translation=%s\">%s</a></td></tr>\n",
           W_($row["lang"]),
           urlencode($row["lang"]),
-         urlencode($text),
-         $translation,
-         urldecode($translation));
+         $u,
+         urlencode(from_unicode($translation)),
+         fix(from_unicode($translation)));
  }
 echo "</table>";
 
@@ -121,4 +119,4 @@
 generateFooter();
 echo "</body></html>";
 
-?>
\ No newline at end of file
+?>

Modified: Extractor-docs/WWW/vote.php
===================================================================
--- Extractor-docs/WWW/vote.php 2005-04-04 06:47:24 UTC (rev 587)
+++ Extractor-docs/WWW/vote.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -21,17 +21,18 @@
   // For sentences with multiple translations, the one with the most
   // votes is displayed.
 include("i18nhtml.inc");
-echo "<html><head><title>";
-W("WWW translation: vote");
-echo "</title></head><body>";
+echo "<html><head>";
+echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >";
+TITLE("WWW translation: vote");
+echo "</head><body>";
 if (!$connection) {
   echo "Database is down. Cannot edit translations.";
   die();
  } 
 $text = $_REQUEST['text'];
 $translation = $_REQUEST['translation'];
-$u = urlencode($text);
-$t = urlencode($translation);
+$u = mysql_real_escape_string($text);
+$t = mysql_real_escape_string(to_unicode($translation));
 echo "text = " . $text . "<br>\n";
 echo "translation = " . $translation . "<br>\n";
 

Modified: GNUnet-docs/WWW/html_header.php3
===================================================================
--- GNUnet-docs/WWW/html_header.php3    2005-04-04 06:47:24 UTC (rev 587)
+++ GNUnet-docs/WWW/html_header.php3    2005-04-04 06:51:09 UTC (rev 588)
@@ -3,9 +3,9 @@
 echo "<html><head>\n";
 $haveNBO = 0;
 if ($title) {
-  echo "<title>";
-  TRANSLATE($title);
-  echo "</title>";
+  TITLE($title);
+ } else {
+  TITLE("GNUnet");
  }
 if ($description) {
   echo "<meta name=\"description\" content=\"";

Modified: GNUnet-docs/WWW/i18nhtml.inc
===================================================================
--- GNUnet-docs/WWW/i18nhtml.inc        2005-04-04 06:47:24 UTC (rev 587)
+++ GNUnet-docs/WWW/i18nhtml.inc        2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff and other contributing authors.
+     (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
   // $editor can be set to create a translation
   // tag even if a translation is already available.
   // there is currently no security.
+  //
   // An "_" is used for functions that return the
   // translated string instead of printing it directly.
   // These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
 // obtain user db specific configuration parameters
 include("i18nhtml_config.inc");
 
+header("Content-type: text/html; charset=utf-8");
+
 // establish default connection to database server
 $connection = @mysql_connect($i18nHTMLsqlServer,
                             $i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
 if ($xlang)
   $lang = $xlang;
 $lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
 $editor = $_REQUEST['editor'];
 
 
@@ -241,6 +245,464 @@
   }
 }
 
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering.  It is aiming to transcode HTML Unicode entities
+ * (eg, &amp;#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+  // translate extended ISO8859-1 chars, if any
+  $text = utf8_encode($text);
+  
+  // translate Unicode entities
+  $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+  $text = '';
+  $index = 0;
+  foreach($areas as $area) {
+    switch($index%2) {
+    case 0: // before entity
+      $text .= $area;
+      break;
+    case 1: // the entity itself
+      
+      // get the integer value
+      $unicode = intval($area);
+      
+      // one byte
+      if($unicode < 0x80) {
+       
+       $text .= chr($unicode);
+       
+       // two bytes
+      } elseif($unicode < 0x800) {
+       
+       $text .= chr( 0xC0 +  ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // three bytes
+      } elseif($unicode < 0x10000) {
+       
+       $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 ) 
);
+       $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) ) 
/ 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // more bytes, keep it as it is...
+      } else
+         $text .= '&#'.$unicode.';';
+      
+      break;
+    }
+    $index++;
+  }
+  
+  // the updated string
+  return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+  
+  // initialize tables only once
+  static $html_entities, $unicode_entities;
+  if(!is_array($html_entities)) {
+    
+    
+    // numerical order
+    $codes = array(
+                  '&#160;'     => '&nbsp;',    // non-breaking space
+                  '&#161;'     => '&iexcl;',   // inverted exclamation mark
+                  '&#162;'     => '&cent;',    // cent sign
+                  '&#163;'     => '&pound;',   // pound sign
+                  '&#164;'     => '&curren;',  // currency sign
+                  '&#165;'     => '&yen;',             // yen sign
+                  '&#166;'     => '&brvbar;',  // broken bar
+                  '&#167;'     => '&sect;',    // section sign
+                  '&#168;'     => '&uml;',             // diaeresis
+                  '&#169;'     => '&copy;',    // copyright sign
+                  '&#170;'     => '&ordf;',    // feminine ordinal indicator
+                  '&#171;'     => '&laquo;',   // left-pointing double angle 
quotation mark
+                  '&#172;'     => '&not;',             // not sign
+                  '&#173;'     => '&shy;',             // soft hyphen
+                  '&#174;'     => '&reg;',             // registered sign
+                  '&#175;'     => '&macr;',    // macron
+                  '&#176;'     => '&deg;',             // degree sign
+                  '&#177;'     => '&plusmn;',  // plus-minus sign
+                  '&#178;'     => '&sup2;',    // superscript two
+                  '&#179;'     => '&sup3;',    // superscript three
+                  '&#180;'     => '&acute;',   // acute accent
+                  '&#181;'     => '&micro;',   // micro sign
+                  '&#182;'     => '&para;',    // pilcrow sign
+                  '&#183;'     => '&middot;',  // middle dot
+                  '&#184;'     => '&cedil;',   // cedilla
+                  '&#185;'     => '&sup1;',    // superscript one
+                  '&#186;'     => '&ordm;',    // masculine ordinal indicator
+                  '&#187;'     => '&raquo;',   // right-pointing double angle 
quotation mark
+                  '&#188;'     => '&frac14;',  // vulgar fraction one quarter
+                  '&#189;'     => '&frac12;',  // vulgar fraction one half
+                  '&#190;'     => '&frac34;',  // vulgar fraction three 
quarters
+                  '&#191;'     => '&iquest;',  // inverted question mark
+                  '&#192;'     => '&Agrave;',  // latin capital letter A with 
grave
+                  '&#193;'     => '&Aacute;',  // latin capital letter A with 
acute
+                  '&#194;'     => '&Acirc;',   // latin capital letter A with 
circumflex
+                  '&#195;'     => '&Atilde;',  // latin capital letter A with 
tilde
+                  '&#196;'     => '&Auml;',    // latin capital letter A with 
diaeresis
+                  '&#197;'     => '&Aring;',   // latin capital letter A with 
ring above
+                  '&#198;'     => '&AElig;',   // latin capital letter AE
+                  '&#199;'     => '&Ccedil;',  // latin capital letter C with 
cedilla
+                  '&#200;'     => '&Egrave;',  // latin capital letter E with 
grave
+                  '&#201;'     => '&Eacute;',  // latin capital letter E with 
acute
+                  '&#202;'     => '&Ecirc;',   // latin capital letter E with 
circumflex
+                  '&#203;'     => '&Euml;',    // latin capital letter E with 
diaeresis
+                  '&#204;'     => '&Igrave;',  // latin capital letter I with 
grave
+                  '&#205;'     => '&Iacute;',  // latin capital letter I with 
acute
+                  '&#206;'     => '&Icirc;',   // latin capital letter I with 
circumflex
+                  '&#207;'     => '&Iuml;',    // latin capital letter I with 
diaeresis
+                  '&#208;'     => '&ETH;',             // latin capital letter 
ETH
+                  '&#209;'     => '&Ntilde;',  // latin capital letter N with 
tilde
+                  '&#210;'     => '&Ograve;',  // latin capital letter O with 
grave
+                  '&#211;'     => '&Oacute;',  // latin capital letter O with 
acute
+                  '&#212;'     => '&Ocirc;',   // latin capital letter O with 
circumflex
+                  '&#213;'     => '&Otilde;',  // latin capital letter O with 
tilde
+                  '&#214;'     => '&Ouml;',    // latin capital letter O with 
diaeresis
+                  '&#215;'     => '&times;',   // multiplication sign
+                  '&#216;'     => '&Oslash;',  // latin capital letter O with 
stroke
+                  '&#217;'     => '&Ugrave;',  // latin capital letter U with 
grave
+                  '&#218;'     => '&Uacute;',  // latin capital letter U with 
acute
+                  '&#219;'     => '&Ucirc;',   // latin capital letter U with 
circumflex
+                  '&#220;'     => '&Uuml;',    // latin capital letter U with 
diaeresis
+                  '&#221;'     => '&Yacute;',  // latin capital letter Y with 
acute
+                  '&#222;'     => '&THORN;',   // latin capital letter THORN
+                  '&#223;'     => '&szlig;',   // latin small letter sharp s
+                  '&#224;'     => '&agrave;',  // latin small letter a with 
grave
+                  '&#225;'     => '&aacute;',  // latin small letter a with 
acute
+                  '&#226;'     => '&acirc;',   // latin small letter a with 
circumflex
+                  '&#227;'     => '&atilde;',  // latin small letter a with 
tilde
+                  '&#228;'     => '&auml;',    // latin small letter a with 
diaeresis
+                  '&#229;'     => '&aring;',   // latin small letter a with 
ring above
+                  '&#230;'     => '&aelig;',   // latin small letter ae
+                  '&#231;'     => '&ccedil;',  // latin small letter c with 
cedilla
+                  '&#232;'     => '&egrave;',  // latin small letter e with 
grave
+                  '&#233;'     => '&eacute;',  // latin small letter e with 
acute
+                  '&#234;'     => '&ecirc;',   // latin small letter e with 
circumflex
+                  '&#235;'     => '&euml;',    // latin small letter e with 
diaeresis
+                  '&#236;'     => '&igrave;',  // latin small letter i with 
grave
+                  '&#237;'     => '&iacute;',  // latin small letter i with 
acute
+                  '&#238;'     => '&icirc;',   // latin small letter i with 
circumflex
+                  '&#239;'     => '&iuml;',    // latin small letter i with 
diaeresis
+                  '&#240;'     => '&eth;',             // latin small letter 
eth
+                  '&#241;'     => '&ntilde;',  // latin small letter n with 
tilde
+                  '&#242;'     => '&ograve;',  // latin small letter o with 
grave
+                  '&#243;'     => '&oacute;',  // latin small letter o with 
acute
+                  '&#244;'     => '&ocirc;',   // latin small letter o with 
circumflex
+                  '&#245;'     => '&otilde;',  // latin small letter o with 
tilde
+                  '&#246;'     => '&ouml;',    // latin small letter o with 
diaeresis
+                  '&#247;'     => '&divide;',  // division sign
+                  '&#248;'     => '&oslash;',  // latin small letter o with 
stroke
+                  '&#249;'     => '&ugrave;',  // latin small letter u with 
grave
+                  '&#250;'     => '&uacute;',  // latin small letter u with 
acute
+                  '&#251;'     => '&ucirc;',   // latin small letter u with 
circumflex
+                  '&#252;'     => '&uuml;',    // latin small letter u with 
diaeresis
+                  '&#253;'     => '&yacute;',  // latin small letter y with 
acute
+                  '&#254;'     => '&thorn;',   // latin small letter thorn
+                  '&#255;'     => '&yuml;',    //
+                  '&#338;'     => '&OElig;',   // latin capital ligature OE
+                  '&#339;'     => '&oelig;',   // latin small ligature oe
+                  '&#352;'     => '&Scaron;',  // latin capital letter S with 
caron
+                  '&#353;'     => '&scaron;',  // latin small letter s with 
caron
+                  '&#376;'     => '&Yuml;',    // latin capital letter Y with 
diaeresis
+                  '&#402;'     => '&fnof;' ,   // latin small f with hook
+                  '&#710;'     => '&circ;',    // modifier letter circumflex 
accent
+                  '&#732;'     => '&tilde;',   // small tilde
+                  '&#913;'     => '&Alpha;',   // greek capital letter alpha
+                  '&#914;'     => '&Beta;',    // greek capital letter beta
+                  '&#915;'     => '&Gamma;',   // greek capital letter gamma
+                  '&#916;'     => '&Delta;',   // greek capital letter delta
+                  '&#917;'     => '&Epsilon;', // greek capital letter epsilon
+                  '&#918;'     => '&Zeta;',    // greek capital letter zeta
+                  '&#919;'     => '&Eta;',             // greek capital letter 
eta
+                  '&#920;'     => '&Theta;',   // greek capital letter theta
+                  '&#921;'     => '&Iota;',    // greek capital letter iota
+                  '&#922;'     => '&Kappa;',   // greek capital letter kappa
+                  '&#923;'     => '&Lambda;',  // greek capital letter lambda
+                  '&#924;'     => '&Mu;',              // greek capital letter 
mu
+                  '&#925;'     => '&Nu;',              // greek capital letter 
nu
+                  '&#926;'     => '&Xi;',              // greek capital letter 
xi
+                  '&#927;'     => '&Omicron;', // greek capital letter omicron
+                  '&#928;'     => '&Pi;',              // greek capital letter 
pi
+                  '&#929;'     => '&Rho;',             // greek capital letter 
rho
+                  '&#931;'     => '&Sigma;',   // greek capital letter sigma
+                  '&#932;'     => '&Tau;',             // greek capital letter 
tau
+                  '&#933;'     => '&Upsilon;', // greek capital letter upsilon
+                  '&#934;'     => '&Phi;',             // greek capital letter 
phi
+                  '&#935;'     => '&Chi;',             // greek capital letter 
chi
+                  '&#936;'     => '&Psi;',             // greek capital letter 
psi
+                  '&#937;'     => '&Omega;',   // greek capital letter omega
+                  '&#945;'     => '&alpha;',   // greek small letter alpha
+                  '&#946;'     => '&beta;',    // greek small letter beta
+                  '&#947;'     => '&gamma;',   // greek small letter gamma
+                  '&#948;'     => '&delta;',   // greek small letter delta
+                  '&#949;'     => '&epsilon;', // greek small letter epsilon
+                  '&#950;'     => '&zeta;',    // greek small letter zeta
+                  '&#951;'     => '&eta;',             // greek small letter 
eta
+                  '&#952;'     => '&theta;',   // greek small letter theta
+                  '&#953;'     => '&iota;',    // greek small letter iota
+                  '&#954;'     => '&kappa;',   // greek small letter kappa
+                  '&#955;'     => '&lambda;',  // greek small letter lambda
+                  '&#956;'     => '&mu;',              // greek small letter mu
+                  '&#957;'     => '&nu;',              // greek small letter nu
+                  '&#958;'     => '&xi;',              // greek small letter xi
+                  '&#959;'     => '&omicron;', // greek small letter omicron
+                  '&#960;'     => '&pi;',              // greek small letter pi
+                  '&#961;'     => '&rho;',             // greek small letter 
rho
+                  '&#962;'     => '&sigmaf;',  // greek small letter final 
sigma
+                  '&#963;'     => '&sigma;',   // greek small letter sigma
+                  '&#964;'     => '&tau;',             // greek small letter 
tau
+                  '&#965;'     => '&upsilon;', // greek small letter upsilon
+                  '&#966;'     => '&phi;',             // greek small letter 
phi
+                  '&#967;'     => '&chi;',             // greek small letter 
chi
+                  '&#968;'     => '&psi;',             // greek small letter 
psi
+                  '&#969;'     => '&omega;',   // greek small letter omega
+                  '&#977;'     => '&thetasym;',        // greek small letter 
theta symbol
+                  '&#978;'     => '&upsih;',   // greek upsilon with hook 
symbol
+                  '&#982;'     => '&piv;',             // greek pi symbol
+                  '&#8194;'    => '&ensp;',    // en space
+                  '&#8195;'    => '&emsp;',    // em space
+                  '&#8201;'    => '&thinsp;',  // thin space
+                  '&#8204;'    => '&zwnj;',    // zero width non-joiner
+                  '&#8205;'    => '&zwj;',             // zero width joiner
+                  '&#8206;'    => '&lrm;',             // left-to-right mark
+                  '&#8207;'    => '&rlm;',             // right-to-left mark
+                  '&#8211;'    => '&ndash;',   // en dash
+                  '&#8212;'    => '&mdash;',   // em dash
+                  '&#8216;'    => '&lsquo;',   // left single quotation mark
+                  '&#8217;'    => '&rsquo;',   // right single quotation mark
+                  '&#8218;'    => '&sbquo;',   // single low-9 quotation mark
+                  '&#8220;'    => '&ldquo;',   // left double quotation mark
+                  '&#8221;'    => '&rdquo;',   // right double quotation mark
+                  '&#8222;'    => '&bdquo;',   // double low-9 quotation mark
+                  '&#8224;'    => '&dagger;',  // dagger
+                  '&#8225;'    => '&Dagger;',  // double dagger
+                  '&#8226;'    => '&bull;',    // bullet
+                  '&#8230;'    => '&hellip;',  // horizontal ellipsis
+                  '&#8240;'    => '&permil;',  // per mille sign
+                  '&#8242;'    => '&prime;',   // primeminutes
+                  '&#8243;'    => '&Prime;',   // double prime
+                  '&#8249;'    => '&lsaquo;',  // single left-pointing angle 
quotation mark
+                  '&#8250;'    => '&rsaquo;',  // single right-pointing angle 
quotation mark
+                  '&#8254;'    => '&oline;',   // overline
+                  '&#8260;'    => '&frasl;',   // fraction slash
+                  '&#8364;'    => '&euro;',    // euro sign
+                  '&#8465;'    => '&image;',   // blackletter capital I
+                  '&#8472;'    => '&weierp;',  // script capital P
+                  '&#8476;'    => '&real;',    // blackletter capital R
+                  '&#8482;'    => '&trade;',   // trade mark sign
+                  '&#8501;'    => '&alefsym;', // alef symbol
+                  '&#8592;'    => '&larr;',    // leftwards arrow
+                  '&#8593;'    => '&uarr;',    // upwards arrow
+                  '&#8594;'    => '&rarr;',    // rightwards arrow
+                  '&#8595;'    => '&darr;',    // downwards arrow
+                  '&#8596;'    => '&harr;',    // left right arrow
+                  '&#8629;'    => '&crarr;',   // downwards arrow with corner 
leftwards
+                  '&#8656;'    => '&lArr;',    // leftwards double arrow
+                  '&#8657;'    => '&uArr;',    // upwards double arrow
+                  '&#8658;'    => '&rArr;',    // rightwards double arrow
+                  '&#8659;'    => '&dArr;',    // downwards double arrow
+                  '&#8660;'    => '&hArr;',    // left right double arrow
+                  '&#8704;'    => '&forall;',  // for all
+                  '&#8706;'    => '&part;',    // partial differential
+                  '&#8707;'    => '&exist;',   // there exists
+                  '&#8709;'    => '&empty;',   // empty set
+                  '&#8711;'    => '&nabla;',   // nabla
+                  '&#8712;'    => '&isin;',    // element of
+                  '&#8713;'    => '&notin;',   // not an element of
+                  '&#8715;'    => '&ni;',              // contains as member
+                  '&#8719;'    => '&prod;',    // n-ary product
+                  '&#8721;'    => '&sum;',             // n-ary sumation
+                  '&#8722;'    => '&minus;',   // minus sign
+                  '&#8727;'    => '&lowast;',  // asterisk operator
+                  '&#8730;'    => '&radic;',   // square root
+                  '&#8733;'    => '&prop;',    // proportional to
+                  '&#8734;'    => '&infin;',   // infinity
+                  '&#8736;'    => '&ang;',             // angle
+                  '&#8743;'    => '&and;',             // logical and
+                  '&#8744;'    => '&or;',              // logical or
+                  '&#8745;'    => '&cap;',             // intersection
+                  '&#8746;'    => '&cup;',             // union
+                  '&#8747;'    => '&int;',             // integral
+                  '&#8756;'    => '&there4;',  // therefore
+                  '&#8764;'    => '&sim;',             // tilde operator
+                  '&#8773;'    => '&cong;',    // approximately equal to
+                  '&#8776;'    => '&asymp;',   // almost equal to
+                  '&#8800;'    => '&ne;',              // not equal to
+                  '&#8801;'    => '&equiv;',   // identical to
+                  '&#8804;'    => '&le;',              // less-than or equal to
+                  '&#8805;'    => '&ge;',              // greater-than or 
equal to
+                  '&#8834;'    => '&sub;',             // subset of
+                  '&#8835;'    => '&sup;',             // superset of
+                  '&#8836;'    => '&nsub;',    // not a subset of
+                  '&#8838;'    => '&sube;',    // subset of or equal to
+                  '&#8839;'    => '&supe;',    // superset of or equal to
+                  '&#8853;'    => '&oplus;',   // circled plus
+                  '&#8855;'    => '&otimes;',  // circled times
+                  '&#8869;'    => '&perp;',    // up tack
+                  '&#8901;'    => '&sdot;',    // dot operator
+                  '&#8968;'    => '&lceil;',   // left ceiling
+                  '&#8969;'    => '&rceil;',   // right ceiling
+                  '&#8970;'    => '&lfloor;',  // left floor
+                  '&#8971;'    => '&rfloor;',  // right floor
+                  '&#9001;'    => '&lang;',    // left-pointing angle bracket
+                  '&#9002;'    => '&rang;',    // right-pointing angle bracket
+                  '&#9674;'    => '&loz;',             // lozenge
+                  '&#9824;'    => '&spades;',  // black spade suit
+                  '&#9827;'    => '&clubs;',   // black club suit
+                  '&#9829;'    => '&hearts;',  // black heart suit
+                  '&#9830;'    => '&diams;'    // black diam suit
+                  );
+    
+    // split entities for use in str_replace()
+    foreach($codes as  $unicode_entity => $html_entity) {
+      $unicode_entities[] = $unicode_entity;
+      $html_entities[] = $html_entity;
+    }
+  }
+  
+  // transcode HTML entities to Unicode
+  if($to_unicode)
+    return str_replace($html_entities, $unicode_entities, $input);
+  
+  // transcode Unicode entities to HTML entities
+  else
+    return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a 
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML 
numerical entity (eg, &amp;#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the 
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign 
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 + 
MySQl 3)
+ */
+function to_unicode($input) {
+  
+  // transcode HTML entities to Unicode entities
+  $input = transcode($input);
+  
+  // scan the whole string
+  $output = '';
+  $index = 0;
+  while($index < strlen($input)) {
+    
+    // look at one char
+    $char = ord($input[$index]);
+    
+    // one byte (0xxxxxxx)
+    if($char < 0x80) {
+      
+      // some chars may be undefined
+      $output .= chr($char);
+      $index += 1;
+      
+      // two bytes (110xxxxx 10xxxxxx)
+    } elseif($char < 0xE0) {
+      
+      // strip weird sequences (eg, C0 80 -> NUL)
+      if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 2;
+      
+      // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign = 
\xE2\x82\xAC -> &#8364;
+    } elseif($char < 0xF0) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) % 
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 3;
+      
+      // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xF8) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+        + (ord($input[$index + 3]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 4;
+      
+      // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xFC) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) % 
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4]) 
% 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 5;
+      
+      // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } else {
+      
+      // strip weird sequences
+      if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index + 
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 6;
+    }
+    
+  }
+ 
+  // return the translated string
+  return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 // returns either the translated string
 // or the original string.  Assumes we
 // are passed the original string as occurs
@@ -264,8 +726,7 @@
   if ($a == "")
     return $a;
 
-  // $a = htmlentities($a, ENT_QUOTES, $charset);
-  $u = urlencode($a);
+  $u = mysql_real_escape_string(urlencode($a));
   
   if (!$connection) {
     // database not available, just print English
@@ -319,7 +780,7 @@
     return fix($a);              // just return English string
   } else { // translation available
     $row = mysql_fetch_array($result);
-    return fix(urldecode($row["translation"]));
+    return $row["translation"];
   }
 }
 
@@ -361,7 +822,16 @@
 }
 
 function TITLE($a,$b="") {
-  echo "<title>" . W_($a,$b) . "</title>\n";
+  global $lang;
+  global $languagecodes;
+  echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" 
>";
+  echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+  if (isset($languagecodes[$lang])) {
+    echo "<meta name=\"content-language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+    echo "<meta name=\"language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+  }
 }
 
 

Modified: GNUnet-docs/WWW/i18nhtml_config.inc
===================================================================
--- GNUnet-docs/WWW/i18nhtml_config.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ GNUnet-docs/WWW/i18nhtml_config.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
 $i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
 
 $i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
 
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
 $i18nHTMLbase = "";  // base directory prepended to i18nHTML php pages used in 
links
 $i18nHTMLmarker = "*";  // default value if never changed
 
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
 
+// for selectively cloning a DB
+//  $i18nHTMLclone = "/tmp/cloneFile.sql";
+
 ?>

Added: GNUnet-docs/WWW/papers/CameraReady_174.pdf
===================================================================
(Binary files differ)


Property changes on: GNUnet-docs/WWW/papers/CameraReady_174.pdf
___________________________________________________________________
Name: svn:mime-type
   + application/octet-stream

Modified: doodle-docs/WWW/commitMassTranslation.php
===================================================================
--- doodle-docs/WWW/commitMassTranslation.php   2005-04-04 06:47:24 UTC (rev 
587)
+++ doodle-docs/WWW/commitMassTranslation.php   2005-04-04 06:51:09 UTC (rev 
588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -19,22 +19,20 @@
 */
 include("i18nhtml.inc");
 if (!$connection) {
-  echo "<html><head><title>WWW translation: commit</title></head><body>";
   echo "Database is down. Cannot edit translations.";
-  echo "</body></html>";
   die();
 }
 if ($xlang == "English") {
-  echo "<html><head><title>WWW translation: commit</title></head><body>";
   W("Translating to English currently not allowed.\n");
-  echo "</body></html>";
   die();
 }
-echo "<html><head><title>WWW translation: commit</title></head><body>";
+echo "<html><head>";
+TITLE("WWW translation: commit");
+echo "</head><body>";
 W("Processing translations...");
 P();
 $done = 0;
-foreach($_GET as $dec=>$val) {
+foreach($_POST as $dec=>$val) {
   if ($val == "")
     continue;
   if ( ($dec == "xlang") || ($dec == "start") )
@@ -48,7 +46,7 @@
     $num--;
     $row = mysql_fetch_array($result);
     if ($dec == bin2hex(md5(urldecode($row["c"])))) {
-      $enc = $row["c"];
+      $enc = mysql_real_escape_string($row["c"]);
       break;
     }
   }
@@ -60,22 +58,30 @@
   }  
   $query = "DELETE FROM pending WHERE lang=\"$lang\" AND c=\"$enc\"";
   mysql_query($query, $connection); 
-  //$t = urlencode($val);
-  $t = urlencode($val);
-  //  $t = urlencode(htmlentities($val, ENT_QUOTES, $charset));
+  $t = mysql_real_escape_string(to_unicode($val));
   $query = "SELECT ranking FROM map WHERE name=\"$enc\" AND lang=\"$lang\" AND 
translation=\"$t\"";
   $result = mysql_query($query, $connection);
   $num = 0;
   if ($result) 
     $num = mysql_numrows($result);
   if ($num == 0) {
-    $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
-    mysql_query($query, $connection);
-    $done++;
-    W("Storing translation for &quot;%s&quot = &quot;%s&quot;.",
-      ARRAY(urldecode($enc),
-           urldecode($t)));
-    BR();  
+    $txtCnt = count_chars(urldecode($enc), 1);
+    $tCnt = count_chars($t, 1);
+    if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+      W("Commit '%s->%s' failed.", $enc, $t);
+      W("The number of percent signs in source text and translation do not 
match.");     
+      W("Note that you must preserve all %%s expressions unchanged.");
+      W("Also, a single displayed %% sign must be translated into two (%%%%) 
such signs.");
+      P();
+    } else {
+      $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
+      mysql_query($query, $connection);
+      $done++;
+      W("Storing translation for &quot;%s&quot = &quot;%s&quot;.",
+        ARRAY(urldecode($enc),
+             urldecode($t)));
+      BR();  
+    }
   }  
 }
 P();

Modified: doodle-docs/WWW/commitTranslation.php
===================================================================
--- doodle-docs/WWW/commitTranslation.php       2005-04-04 06:47:24 UTC (rev 
587)
+++ doodle-docs/WWW/commitTranslation.php       2005-04-04 06:51:09 UTC (rev 
588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -18,7 +18,7 @@
      Boston, MA 02111-1307, USA.
 */
 include("i18nhtml.inc");
-$text = $_REQUEST['text'];
+$text = mysql_real_escape_string($_REQUEST['text']);
 $translation = $_REQUEST['translation'];
 $back = $_REQUEST['back'];
 if (!$connection) {
@@ -30,12 +30,7 @@
   die();
 }
 
-// note: $text is already urlencoded (by submitting via form) and html 
compatible
-// ensure translation is stored in encoded form and html compatible
-// if (get_magic_quotes_gpc()) $translation = stripslashes($translation);
-
-// $t = urlencode(htmlentities($translation, ENT_QUOTES, $charset));
-$t = urlencode($translation);
+$t = mysql_real_escape_string(to_unicode($translation));
 // check for identical translation
 $query = "SELECT ranking FROM map WHERE name=\"$text\" AND lang=\"$lang\" AND 
translation=\"$t\"";
 $result = mysql_query($query, $connection);
@@ -44,22 +39,38 @@
   $num = mysql_numrows($result);
 }
 if ($num > 0) {
-  echo "<html><body>";
+  echo "<html><head>";
+  TITLE("Translation exists.");
+  echo "</head><body>";
   W("Translation exists.");   
   extlink($back, "Back...");
   generateFooter();
   echo "</body></html>";
 } else {
-  //  if (!get_magic_quotes_gpc()) $t = addslashes($t); // ensure escaped 
before adding to DB
-  $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
-  $result = mysql_query($query, $connection);
-  if ($result) {
-    header("Location: " . $back); /* Redirect browser */
+  $txtCnt = count_chars(urldecode($text), 1);
+  $tCnt = count_chars($t, 1);
+  if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+      echo "<html><head>";
+      TITLE("Commit failed.");
+      echo "</head><body>";
+      W("Commit failed.");
+      W("The number of percent signs in source text and translation do not 
match.");     
+      W("Note that you must preserve all %%s expressions unchanged.");
+      W("Also, a single displayed %% sign must be translated into two (%%%%) 
such signs.");
+      echo "</body></html>";
   } else {
-    echo "<html><body>";
-    W("Commit ('%s') failed: ", $query);
-    echo mysql_error();
-    echo "</body></html>";
+    $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
+    $result = mysql_query($query, $connection);
+    if ($result) {
+      header("Location: " . $back); /* Redirect browser */
+    } else {
+      echo "<html><head>";
+      TITLE("Commit failed.");
+      echo "</head><body>";
+      W("Commit ('%s') failed: ", $query);
+      echo mysql_error();
+      echo "</body></html>";
+    }
   } 
 }
 ?>
\ No newline at end of file

Modified: doodle-docs/WWW/editor.php
===================================================================
--- doodle-docs/WWW/editor.php  2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/editor.php  2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -65,7 +65,7 @@
     ARRAY($start, $end));
   P();
 
-  echo "<form action=\"" . $i18nHTMLbase . "commitMassTranslation.php\">";
+  echo "<form method=\"POST\" action=\"" . $i18nHTMLbase . 
"commitMassTranslation.php\">";
   echo "<input type=hidden name=\"xlang\" value=\"$xlang\">";
   $endp = $end + 1;
   echo "<input type=hidden name=\"start\" value=\"$endp\">";

Modified: doodle-docs/WWW/i18nhtml.inc
===================================================================
--- doodle-docs/WWW/i18nhtml.inc        2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/i18nhtml.inc        2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff and other contributing authors.
+     (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
   // $editor can be set to create a translation
   // tag even if a translation is already available.
   // there is currently no security.
+  //
   // An "_" is used for functions that return the
   // translated string instead of printing it directly.
   // These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
 // obtain user db specific configuration parameters
 include("i18nhtml_config.inc");
 
+header("Content-type: text/html; charset=utf-8");
+
 // establish default connection to database server
 $connection = @mysql_connect($i18nHTMLsqlServer,
                             $i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
 if ($xlang)
   $lang = $xlang;
 $lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
 $editor = $_REQUEST['editor'];
 
 
@@ -241,6 +245,464 @@
   }
 }
 
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering.  It is aiming to transcode HTML Unicode entities
+ * (eg, &amp;#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+  // translate extended ISO8859-1 chars, if any
+  $text = utf8_encode($text);
+  
+  // translate Unicode entities
+  $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+  $text = '';
+  $index = 0;
+  foreach($areas as $area) {
+    switch($index%2) {
+    case 0: // before entity
+      $text .= $area;
+      break;
+    case 1: // the entity itself
+      
+      // get the integer value
+      $unicode = intval($area);
+      
+      // one byte
+      if($unicode < 0x80) {
+       
+       $text .= chr($unicode);
+       
+       // two bytes
+      } elseif($unicode < 0x800) {
+       
+       $text .= chr( 0xC0 +  ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // three bytes
+      } elseif($unicode < 0x10000) {
+       
+       $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 ) 
);
+       $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) ) 
/ 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // more bytes, keep it as it is...
+      } else
+         $text .= '&#'.$unicode.';';
+      
+      break;
+    }
+    $index++;
+  }
+  
+  // the updated string
+  return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+  
+  // initialize tables only once
+  static $html_entities, $unicode_entities;
+  if(!is_array($html_entities)) {
+    
+    
+    // numerical order
+    $codes = array(
+                  '&#160;'     => '&nbsp;',    // non-breaking space
+                  '&#161;'     => '&iexcl;',   // inverted exclamation mark
+                  '&#162;'     => '&cent;',    // cent sign
+                  '&#163;'     => '&pound;',   // pound sign
+                  '&#164;'     => '&curren;',  // currency sign
+                  '&#165;'     => '&yen;',             // yen sign
+                  '&#166;'     => '&brvbar;',  // broken bar
+                  '&#167;'     => '&sect;',    // section sign
+                  '&#168;'     => '&uml;',             // diaeresis
+                  '&#169;'     => '&copy;',    // copyright sign
+                  '&#170;'     => '&ordf;',    // feminine ordinal indicator
+                  '&#171;'     => '&laquo;',   // left-pointing double angle 
quotation mark
+                  '&#172;'     => '&not;',             // not sign
+                  '&#173;'     => '&shy;',             // soft hyphen
+                  '&#174;'     => '&reg;',             // registered sign
+                  '&#175;'     => '&macr;',    // macron
+                  '&#176;'     => '&deg;',             // degree sign
+                  '&#177;'     => '&plusmn;',  // plus-minus sign
+                  '&#178;'     => '&sup2;',    // superscript two
+                  '&#179;'     => '&sup3;',    // superscript three
+                  '&#180;'     => '&acute;',   // acute accent
+                  '&#181;'     => '&micro;',   // micro sign
+                  '&#182;'     => '&para;',    // pilcrow sign
+                  '&#183;'     => '&middot;',  // middle dot
+                  '&#184;'     => '&cedil;',   // cedilla
+                  '&#185;'     => '&sup1;',    // superscript one
+                  '&#186;'     => '&ordm;',    // masculine ordinal indicator
+                  '&#187;'     => '&raquo;',   // right-pointing double angle 
quotation mark
+                  '&#188;'     => '&frac14;',  // vulgar fraction one quarter
+                  '&#189;'     => '&frac12;',  // vulgar fraction one half
+                  '&#190;'     => '&frac34;',  // vulgar fraction three 
quarters
+                  '&#191;'     => '&iquest;',  // inverted question mark
+                  '&#192;'     => '&Agrave;',  // latin capital letter A with 
grave
+                  '&#193;'     => '&Aacute;',  // latin capital letter A with 
acute
+                  '&#194;'     => '&Acirc;',   // latin capital letter A with 
circumflex
+                  '&#195;'     => '&Atilde;',  // latin capital letter A with 
tilde
+                  '&#196;'     => '&Auml;',    // latin capital letter A with 
diaeresis
+                  '&#197;'     => '&Aring;',   // latin capital letter A with 
ring above
+                  '&#198;'     => '&AElig;',   // latin capital letter AE
+                  '&#199;'     => '&Ccedil;',  // latin capital letter C with 
cedilla
+                  '&#200;'     => '&Egrave;',  // latin capital letter E with 
grave
+                  '&#201;'     => '&Eacute;',  // latin capital letter E with 
acute
+                  '&#202;'     => '&Ecirc;',   // latin capital letter E with 
circumflex
+                  '&#203;'     => '&Euml;',    // latin capital letter E with 
diaeresis
+                  '&#204;'     => '&Igrave;',  // latin capital letter I with 
grave
+                  '&#205;'     => '&Iacute;',  // latin capital letter I with 
acute
+                  '&#206;'     => '&Icirc;',   // latin capital letter I with 
circumflex
+                  '&#207;'     => '&Iuml;',    // latin capital letter I with 
diaeresis
+                  '&#208;'     => '&ETH;',             // latin capital letter 
ETH
+                  '&#209;'     => '&Ntilde;',  // latin capital letter N with 
tilde
+                  '&#210;'     => '&Ograve;',  // latin capital letter O with 
grave
+                  '&#211;'     => '&Oacute;',  // latin capital letter O with 
acute
+                  '&#212;'     => '&Ocirc;',   // latin capital letter O with 
circumflex
+                  '&#213;'     => '&Otilde;',  // latin capital letter O with 
tilde
+                  '&#214;'     => '&Ouml;',    // latin capital letter O with 
diaeresis
+                  '&#215;'     => '&times;',   // multiplication sign
+                  '&#216;'     => '&Oslash;',  // latin capital letter O with 
stroke
+                  '&#217;'     => '&Ugrave;',  // latin capital letter U with 
grave
+                  '&#218;'     => '&Uacute;',  // latin capital letter U with 
acute
+                  '&#219;'     => '&Ucirc;',   // latin capital letter U with 
circumflex
+                  '&#220;'     => '&Uuml;',    // latin capital letter U with 
diaeresis
+                  '&#221;'     => '&Yacute;',  // latin capital letter Y with 
acute
+                  '&#222;'     => '&THORN;',   // latin capital letter THORN
+                  '&#223;'     => '&szlig;',   // latin small letter sharp s
+                  '&#224;'     => '&agrave;',  // latin small letter a with 
grave
+                  '&#225;'     => '&aacute;',  // latin small letter a with 
acute
+                  '&#226;'     => '&acirc;',   // latin small letter a with 
circumflex
+                  '&#227;'     => '&atilde;',  // latin small letter a with 
tilde
+                  '&#228;'     => '&auml;',    // latin small letter a with 
diaeresis
+                  '&#229;'     => '&aring;',   // latin small letter a with 
ring above
+                  '&#230;'     => '&aelig;',   // latin small letter ae
+                  '&#231;'     => '&ccedil;',  // latin small letter c with 
cedilla
+                  '&#232;'     => '&egrave;',  // latin small letter e with 
grave
+                  '&#233;'     => '&eacute;',  // latin small letter e with 
acute
+                  '&#234;'     => '&ecirc;',   // latin small letter e with 
circumflex
+                  '&#235;'     => '&euml;',    // latin small letter e with 
diaeresis
+                  '&#236;'     => '&igrave;',  // latin small letter i with 
grave
+                  '&#237;'     => '&iacute;',  // latin small letter i with 
acute
+                  '&#238;'     => '&icirc;',   // latin small letter i with 
circumflex
+                  '&#239;'     => '&iuml;',    // latin small letter i with 
diaeresis
+                  '&#240;'     => '&eth;',             // latin small letter 
eth
+                  '&#241;'     => '&ntilde;',  // latin small letter n with 
tilde
+                  '&#242;'     => '&ograve;',  // latin small letter o with 
grave
+                  '&#243;'     => '&oacute;',  // latin small letter o with 
acute
+                  '&#244;'     => '&ocirc;',   // latin small letter o with 
circumflex
+                  '&#245;'     => '&otilde;',  // latin small letter o with 
tilde
+                  '&#246;'     => '&ouml;',    // latin small letter o with 
diaeresis
+                  '&#247;'     => '&divide;',  // division sign
+                  '&#248;'     => '&oslash;',  // latin small letter o with 
stroke
+                  '&#249;'     => '&ugrave;',  // latin small letter u with 
grave
+                  '&#250;'     => '&uacute;',  // latin small letter u with 
acute
+                  '&#251;'     => '&ucirc;',   // latin small letter u with 
circumflex
+                  '&#252;'     => '&uuml;',    // latin small letter u with 
diaeresis
+                  '&#253;'     => '&yacute;',  // latin small letter y with 
acute
+                  '&#254;'     => '&thorn;',   // latin small letter thorn
+                  '&#255;'     => '&yuml;',    //
+                  '&#338;'     => '&OElig;',   // latin capital ligature OE
+                  '&#339;'     => '&oelig;',   // latin small ligature oe
+                  '&#352;'     => '&Scaron;',  // latin capital letter S with 
caron
+                  '&#353;'     => '&scaron;',  // latin small letter s with 
caron
+                  '&#376;'     => '&Yuml;',    // latin capital letter Y with 
diaeresis
+                  '&#402;'     => '&fnof;' ,   // latin small f with hook
+                  '&#710;'     => '&circ;',    // modifier letter circumflex 
accent
+                  '&#732;'     => '&tilde;',   // small tilde
+                  '&#913;'     => '&Alpha;',   // greek capital letter alpha
+                  '&#914;'     => '&Beta;',    // greek capital letter beta
+                  '&#915;'     => '&Gamma;',   // greek capital letter gamma
+                  '&#916;'     => '&Delta;',   // greek capital letter delta
+                  '&#917;'     => '&Epsilon;', // greek capital letter epsilon
+                  '&#918;'     => '&Zeta;',    // greek capital letter zeta
+                  '&#919;'     => '&Eta;',             // greek capital letter 
eta
+                  '&#920;'     => '&Theta;',   // greek capital letter theta
+                  '&#921;'     => '&Iota;',    // greek capital letter iota
+                  '&#922;'     => '&Kappa;',   // greek capital letter kappa
+                  '&#923;'     => '&Lambda;',  // greek capital letter lambda
+                  '&#924;'     => '&Mu;',              // greek capital letter 
mu
+                  '&#925;'     => '&Nu;',              // greek capital letter 
nu
+                  '&#926;'     => '&Xi;',              // greek capital letter 
xi
+                  '&#927;'     => '&Omicron;', // greek capital letter omicron
+                  '&#928;'     => '&Pi;',              // greek capital letter 
pi
+                  '&#929;'     => '&Rho;',             // greek capital letter 
rho
+                  '&#931;'     => '&Sigma;',   // greek capital letter sigma
+                  '&#932;'     => '&Tau;',             // greek capital letter 
tau
+                  '&#933;'     => '&Upsilon;', // greek capital letter upsilon
+                  '&#934;'     => '&Phi;',             // greek capital letter 
phi
+                  '&#935;'     => '&Chi;',             // greek capital letter 
chi
+                  '&#936;'     => '&Psi;',             // greek capital letter 
psi
+                  '&#937;'     => '&Omega;',   // greek capital letter omega
+                  '&#945;'     => '&alpha;',   // greek small letter alpha
+                  '&#946;'     => '&beta;',    // greek small letter beta
+                  '&#947;'     => '&gamma;',   // greek small letter gamma
+                  '&#948;'     => '&delta;',   // greek small letter delta
+                  '&#949;'     => '&epsilon;', // greek small letter epsilon
+                  '&#950;'     => '&zeta;',    // greek small letter zeta
+                  '&#951;'     => '&eta;',             // greek small letter 
eta
+                  '&#952;'     => '&theta;',   // greek small letter theta
+                  '&#953;'     => '&iota;',    // greek small letter iota
+                  '&#954;'     => '&kappa;',   // greek small letter kappa
+                  '&#955;'     => '&lambda;',  // greek small letter lambda
+                  '&#956;'     => '&mu;',              // greek small letter mu
+                  '&#957;'     => '&nu;',              // greek small letter nu
+                  '&#958;'     => '&xi;',              // greek small letter xi
+                  '&#959;'     => '&omicron;', // greek small letter omicron
+                  '&#960;'     => '&pi;',              // greek small letter pi
+                  '&#961;'     => '&rho;',             // greek small letter 
rho
+                  '&#962;'     => '&sigmaf;',  // greek small letter final 
sigma
+                  '&#963;'     => '&sigma;',   // greek small letter sigma
+                  '&#964;'     => '&tau;',             // greek small letter 
tau
+                  '&#965;'     => '&upsilon;', // greek small letter upsilon
+                  '&#966;'     => '&phi;',             // greek small letter 
phi
+                  '&#967;'     => '&chi;',             // greek small letter 
chi
+                  '&#968;'     => '&psi;',             // greek small letter 
psi
+                  '&#969;'     => '&omega;',   // greek small letter omega
+                  '&#977;'     => '&thetasym;',        // greek small letter 
theta symbol
+                  '&#978;'     => '&upsih;',   // greek upsilon with hook 
symbol
+                  '&#982;'     => '&piv;',             // greek pi symbol
+                  '&#8194;'    => '&ensp;',    // en space
+                  '&#8195;'    => '&emsp;',    // em space
+                  '&#8201;'    => '&thinsp;',  // thin space
+                  '&#8204;'    => '&zwnj;',    // zero width non-joiner
+                  '&#8205;'    => '&zwj;',             // zero width joiner
+                  '&#8206;'    => '&lrm;',             // left-to-right mark
+                  '&#8207;'    => '&rlm;',             // right-to-left mark
+                  '&#8211;'    => '&ndash;',   // en dash
+                  '&#8212;'    => '&mdash;',   // em dash
+                  '&#8216;'    => '&lsquo;',   // left single quotation mark
+                  '&#8217;'    => '&rsquo;',   // right single quotation mark
+                  '&#8218;'    => '&sbquo;',   // single low-9 quotation mark
+                  '&#8220;'    => '&ldquo;',   // left double quotation mark
+                  '&#8221;'    => '&rdquo;',   // right double quotation mark
+                  '&#8222;'    => '&bdquo;',   // double low-9 quotation mark
+                  '&#8224;'    => '&dagger;',  // dagger
+                  '&#8225;'    => '&Dagger;',  // double dagger
+                  '&#8226;'    => '&bull;',    // bullet
+                  '&#8230;'    => '&hellip;',  // horizontal ellipsis
+                  '&#8240;'    => '&permil;',  // per mille sign
+                  '&#8242;'    => '&prime;',   // primeminutes
+                  '&#8243;'    => '&Prime;',   // double prime
+                  '&#8249;'    => '&lsaquo;',  // single left-pointing angle 
quotation mark
+                  '&#8250;'    => '&rsaquo;',  // single right-pointing angle 
quotation mark
+                  '&#8254;'    => '&oline;',   // overline
+                  '&#8260;'    => '&frasl;',   // fraction slash
+                  '&#8364;'    => '&euro;',    // euro sign
+                  '&#8465;'    => '&image;',   // blackletter capital I
+                  '&#8472;'    => '&weierp;',  // script capital P
+                  '&#8476;'    => '&real;',    // blackletter capital R
+                  '&#8482;'    => '&trade;',   // trade mark sign
+                  '&#8501;'    => '&alefsym;', // alef symbol
+                  '&#8592;'    => '&larr;',    // leftwards arrow
+                  '&#8593;'    => '&uarr;',    // upwards arrow
+                  '&#8594;'    => '&rarr;',    // rightwards arrow
+                  '&#8595;'    => '&darr;',    // downwards arrow
+                  '&#8596;'    => '&harr;',    // left right arrow
+                  '&#8629;'    => '&crarr;',   // downwards arrow with corner 
leftwards
+                  '&#8656;'    => '&lArr;',    // leftwards double arrow
+                  '&#8657;'    => '&uArr;',    // upwards double arrow
+                  '&#8658;'    => '&rArr;',    // rightwards double arrow
+                  '&#8659;'    => '&dArr;',    // downwards double arrow
+                  '&#8660;'    => '&hArr;',    // left right double arrow
+                  '&#8704;'    => '&forall;',  // for all
+                  '&#8706;'    => '&part;',    // partial differential
+                  '&#8707;'    => '&exist;',   // there exists
+                  '&#8709;'    => '&empty;',   // empty set
+                  '&#8711;'    => '&nabla;',   // nabla
+                  '&#8712;'    => '&isin;',    // element of
+                  '&#8713;'    => '&notin;',   // not an element of
+                  '&#8715;'    => '&ni;',              // contains as member
+                  '&#8719;'    => '&prod;',    // n-ary product
+                  '&#8721;'    => '&sum;',             // n-ary sumation
+                  '&#8722;'    => '&minus;',   // minus sign
+                  '&#8727;'    => '&lowast;',  // asterisk operator
+                  '&#8730;'    => '&radic;',   // square root
+                  '&#8733;'    => '&prop;',    // proportional to
+                  '&#8734;'    => '&infin;',   // infinity
+                  '&#8736;'    => '&ang;',             // angle
+                  '&#8743;'    => '&and;',             // logical and
+                  '&#8744;'    => '&or;',              // logical or
+                  '&#8745;'    => '&cap;',             // intersection
+                  '&#8746;'    => '&cup;',             // union
+                  '&#8747;'    => '&int;',             // integral
+                  '&#8756;'    => '&there4;',  // therefore
+                  '&#8764;'    => '&sim;',             // tilde operator
+                  '&#8773;'    => '&cong;',    // approximately equal to
+                  '&#8776;'    => '&asymp;',   // almost equal to
+                  '&#8800;'    => '&ne;',              // not equal to
+                  '&#8801;'    => '&equiv;',   // identical to
+                  '&#8804;'    => '&le;',              // less-than or equal to
+                  '&#8805;'    => '&ge;',              // greater-than or 
equal to
+                  '&#8834;'    => '&sub;',             // subset of
+                  '&#8835;'    => '&sup;',             // superset of
+                  '&#8836;'    => '&nsub;',    // not a subset of
+                  '&#8838;'    => '&sube;',    // subset of or equal to
+                  '&#8839;'    => '&supe;',    // superset of or equal to
+                  '&#8853;'    => '&oplus;',   // circled plus
+                  '&#8855;'    => '&otimes;',  // circled times
+                  '&#8869;'    => '&perp;',    // up tack
+                  '&#8901;'    => '&sdot;',    // dot operator
+                  '&#8968;'    => '&lceil;',   // left ceiling
+                  '&#8969;'    => '&rceil;',   // right ceiling
+                  '&#8970;'    => '&lfloor;',  // left floor
+                  '&#8971;'    => '&rfloor;',  // right floor
+                  '&#9001;'    => '&lang;',    // left-pointing angle bracket
+                  '&#9002;'    => '&rang;',    // right-pointing angle bracket
+                  '&#9674;'    => '&loz;',             // lozenge
+                  '&#9824;'    => '&spades;',  // black spade suit
+                  '&#9827;'    => '&clubs;',   // black club suit
+                  '&#9829;'    => '&hearts;',  // black heart suit
+                  '&#9830;'    => '&diams;'    // black diam suit
+                  );
+    
+    // split entities for use in str_replace()
+    foreach($codes as  $unicode_entity => $html_entity) {
+      $unicode_entities[] = $unicode_entity;
+      $html_entities[] = $html_entity;
+    }
+  }
+  
+  // transcode HTML entities to Unicode
+  if($to_unicode)
+    return str_replace($html_entities, $unicode_entities, $input);
+  
+  // transcode Unicode entities to HTML entities
+  else
+    return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a 
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML 
numerical entity (eg, &amp;#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the 
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign 
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 + 
MySQl 3)
+ */
+function to_unicode($input) {
+  
+  // transcode HTML entities to Unicode entities
+  $input = transcode($input);
+  
+  // scan the whole string
+  $output = '';
+  $index = 0;
+  while($index < strlen($input)) {
+    
+    // look at one char
+    $char = ord($input[$index]);
+    
+    // one byte (0xxxxxxx)
+    if($char < 0x80) {
+      
+      // some chars may be undefined
+      $output .= chr($char);
+      $index += 1;
+      
+      // two bytes (110xxxxx 10xxxxxx)
+    } elseif($char < 0xE0) {
+      
+      // strip weird sequences (eg, C0 80 -> NUL)
+      if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 2;
+      
+      // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign = 
\xE2\x82\xAC -> &#8364;
+    } elseif($char < 0xF0) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) % 
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 3;
+      
+      // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xF8) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+        + (ord($input[$index + 3]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 4;
+      
+      // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xFC) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) % 
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4]) 
% 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 5;
+      
+      // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } else {
+      
+      // strip weird sequences
+      if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index + 
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 6;
+    }
+    
+  }
+ 
+  // return the translated string
+  return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 // returns either the translated string
 // or the original string.  Assumes we
 // are passed the original string as occurs
@@ -264,8 +726,7 @@
   if ($a == "")
     return $a;
 
-  // $a = htmlentities($a, ENT_QUOTES, $charset);
-  $u = urlencode($a);
+  $u = mysql_real_escape_string(urlencode($a));
   
   if (!$connection) {
     // database not available, just print English
@@ -319,7 +780,7 @@
     return fix($a);              // just return English string
   } else { // translation available
     $row = mysql_fetch_array($result);
-    return fix(urldecode($row["translation"]));
+    return $row["translation"];
   }
 }
 
@@ -361,7 +822,16 @@
 }
 
 function TITLE($a,$b="") {
-  echo "<title>" . W_($a,$b) . "</title>\n";
+  global $lang;
+  global $languagecodes;
+  echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" 
>";
+  echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+  if (isset($languagecodes[$lang])) {
+    echo "<meta name=\"content-language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+    echo "<meta name=\"language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+  }
 }
 
 

Modified: doodle-docs/WWW/i18nhtml_config.inc
===================================================================
--- doodle-docs/WWW/i18nhtml_config.inc 2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/i18nhtml_config.inc 2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
 $i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
 
 $i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
 
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
 $i18nHTMLbase = "";  // base directory prepended to i18nHTML php pages used in 
links
 $i18nHTMLmarker = "*";  // default value if never changed
 
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
 
+// for selectively cloning a DB
+//  $i18nHTMLclone = "/tmp/cloneFile.sql";
+
 ?>

Modified: doodle-docs/WWW/index.php
===================================================================
--- doodle-docs/WWW/index.php   2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/index.php   2005-04-04 06:51:09 UTC (rev 588)
@@ -2,198 +2,67 @@
 include("i18nhtml.inc");
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
-if ($title) {
-  echo "<title>";
-  TRANSLATE("Doodle: find information on your computer");
-  echo "</title>";
- }
-if ($description) {
-  echo "<meta name=\"description\" content=\"";
-  TRANSLATE("A tool to index and search the files on your disk.");
-  echo "\">";
- }
-echo "<meta name=\"content-language\" content=\"" . 
-     $languagecodes[$lang] . "\">";
-echo "<meta name=\"language\" content=\"" . 
-     $languagecodes[$lang] . "\">";
+TITLE("i18nHTML - enabling collaborative webpage translation");
+echo "<meta name=\"description\" content=\"";
+TRANSLATE("i18nHTML is a collection of PHP scripts that allow visitors of a 
webpage to help translating it.");
+echo "\">";
 ?>
 <meta name="author" content="Christian Grothoff">
-<meta name="keywords" 
content="doodle,desktop,search,suffix,tree,meta,data,libextractor,keywords,fam,index,database,free,Linux,GNU,GPL">
+<meta name="keywords" 
content="i18n,HTML,PHP,translation,languages,mysql,database,internationalization,www,free,GNU,GPL">
 <meta name="robots" content="index,follow">
 <meta name="revisit-after" content="28 days">
 <meta name="publisher" content="Christian Grothoff">
-<meta name="date" content="2004-12-31">
-<meta name="rights" content="(C) 2004 by Christian Grothoff>";
+<meta name="date" content="2005-01-03">
+<meta name="rights" content="(C) 2004,2005 by Christian Grothoff>";
 <meta http-equiv="expires" content="43200">
 <meta http-equiv="content-type" content="text/html">
 </head>
 <body>
 <?php
 generateLanguageBar();
-
-H1("Doodle");
-ANCHOR("about");
+H1("i18nHTML");
 H2("About");
-
-W("Doodle is a tool to quickly search the documents on a computer.");
-W("Doodle builds an index using meta-data contained in the documents and 
allows fast searches on the resulting database.");
-W("Doodle uses %s to support obtaining meta-data from various file-formats.",
-  extlink_("/libextractor/","libextractor"));
-W("The database used by doodle is a %s, resulting in fast lookups.",
-  extlink_("http://www.nist.gov/dads/HTML/suffixtree.html","suffix tree"));
-W("Doodle supports approximate searches.");
-BR();
-W("Features that Doodle does not have at the moment include:");
-echo "<ul>";
-LI("A web interface");
-LI("Ordering of search results");
-LI("Spidering (indexing the Internet or websites)");
-echo "</ul>";
-W("If you need these features, have a look at the %s section.",
-  extlink_("#links", "links"));
-
+W("i18nHTML is a collection of PHP files that can be used to write webpages 
that visitors can translate into their respective native languages.");
+W("i18nHTML uses a database to match sentences from the webpage against 
translations.");
+W("i18nHTML defines a set of PHP functions that generate either the translated 
HTML sentences or the original (typically English) text with decorations that 
allow users to provide translations.");
+W("i18nHTML requires the internationalized webpages to be written using the 
provided PHP functions but does not constrain the page design in any way.");
+W("Webpages internationalized with i18nHTML can be updated without loosing 
existing translations for sentences that were not changed.");
+W("Note that it is important that you use the i18nHTML <tt>TITLE</tt> command 
in your documents in order to ensure that the character set and other meta-data 
is set properly.");
 P();
-W("Doodle is licensed under the %s.",
-  extlink_("http://www.gnu.org/licenses/gpl.html","GNU GPL"));
-W("Indexing large volumes can take several hundred MBs of memory (depending on 
the amount of meta-data found).");
-W("Searching should nevertheless require almost no memory.");
-W("Using the latest version of libextractor is recommended.");
-W("Doodle has so far only been tested under %s and %s %s %s.",
-  ARRAY(extlink_("http://www.debian.org/";, "Debian"),
-        extlink_("http://www.redhat.com/";, "RedHat"),
-        extlink_("http://www.gnu.org/";, "GNU"),
-        extlink_("http://www.linux.org/";, "Linux")));
-W("Doodle is expected it to work under any platform supported by %s.",
-  extlink_("/libextractor/", "libextractor"));
-P();
-
-ANCHOR("download");
 H2("Download");
-W("You can find the current release %s.",
-  extlink_("download/doodle-0.6.2.tar.gz", "here"));
-W("Man-pages for %s, %s and %s are also on-line.",
-  ARRAY(extlink_("man/doodle.html", "doodle"),
-        extlink_("man/doodled.html", "doodled"),
-        extlink_("man/libdoodle", "libdoodle")));
-
-W("The latest version can be obtained using");
-PRE("$ svn checkout https://gnunet.org/svn/doodle/";);
+W("You can find the latest version %s.",
+  extlink_("https://gnunet.org/i18nHTML/download/";, "here"));
+W("The latest CVS version can be obtained using");
+PRE("$ svn checkout https://gnunet.org/svn/i18nHTML/";);
 P();    
 W("If you want to be notified about updates, subscribe to %s",
-  extlink_("http://freshmeat.net/projects/doodle/";, "doodle on freshmeat"));
+  extlink_("http://freshmeat.net/projects/i18nHTML/";, "i18nHTML on 
freshmeat"));
 P();
-W("Debian packages provided by Daniel Baumann can be found %s.",
-  extlink_("http://packages.debian.org/doodle";, "here"));
-W("RedHat/Fedora RPM packages provided by Dag Wieers can be found %s.",
-  extlink_("http://dag.wieers.com/packages/doodle/","here";));
-P();
 
-ANCHOR("using");
-H2("Using doodle");
-W("First the doodle database needs to be created.");
-W("The simplest way to create the database is to run doodle with the 
<tt>-b</tt> option on the directories that are to be indexed.");
-W("For example:");
-PRE("$ doodle -b $HOME");
-W("This will create the doodle database under <tt>~/.doodle</tt>.");
-BR();
-W("After creating the doodle database, you can search it.");
-W("For example:");
-PRE("$ doodle keyword");
-BR();
-H3("Keeping the database up-to-date");
-W("If you want to keep your doodle database up-to-date, you can either 
periodically re-run doodle with the <tt>-b</tt> option, or you can use doodled, 
the doodle daemon.");
-W("doodled uses %s to notice whenever a file is changed and instantly updates 
the doodle database.",
-  "fam");
-W("In order to use doodled, you must have famd running.");
-W("If famd is running, you can start doodled by passing the same arguments 
that you would pass to doodle to construct the database, but without the 
<tt>-b</tt> option:");
-PRE("$ doodled $HOME");
-W("You can also use doodled to construct the initial database.");
-W("While doodled is updating the database, any doodle search will block until 
the update is complete.");
-W("Note that while you may want to index your entire disk (i.e., <tt>doodle -b 
/</tt>), it is typically not a great idea to have doodled monitor your entire 
system for changes -- especially since <tt>/usr</tt> is unlikely to change 
frequently.");
-W("You can address this issue by first indexing <tt>/</tt> and then using 
doodled to monitor only directories that change frequently:");
-PRE("$ doodle -b /\n" .
-    "$ doodled $HOME");
-W("This way, your entire system will be in the index, and your home directory 
will be always up-to-date.");
-P();
-H3("Full-text search");
-W("You can achieve a (limited) form of full-text search with doodle.");
-W("For that, the dictionary-based plaintext extractors from %s are used.",
-  extlink_("/libextractor/","libextractor"));
-W("In order to use them, you need to pass the option <tt>-b LANG</tt> to 
doodle.");
-W("LANG is a two letter language code that selects the dictionary.");
-W("Available languages at the moment are en, es, fr, it and no.");
-W("Words and sentences that are available in the respective dictionaries for 
these languages will then be added to the index.");
-W("While libextractor attempts to avoid full-text extraction for certain kown 
binary formats, it may still find words in non-text files.");
-W("Running with this option will dramatically increase the size of the index 
and the time it takes to build the index.");
-W("Note that if you change the options used to build a database will not (!) 
result in doodle re-indexing files that were processed with other options 
previously.");
-W("The only way to force doodle to re-index files with different options is to 
either touch the files (change modification timestamp) or to delete the old 
database and start from scratch.");
 
-P();
-H3("Hints of the system administrator");
-W("If you are the system administrator, you might want to run doodle on the 
entire system periodically (cron job) and have doodled monitor the home 
directories in the background.");
-W("In that case, it is suggested to have the doodle database be group-readable 
for a group doodle.");
-W("Set the permissions for the doodle binary to SGID to allow users to poll 
the database.");
-W("Doodle will ensure that information about files not accessible to the user 
are not leaked by checking if files found in the database are accessible to the 
user.");
-W("doodled has to run as root since otherwise it would be impossible to index 
the personal files of all users.");
-W("If that it too risky, doodled will still work, but only index the files 
readable to the user that runs doodled.");
 
-
 P();
-H3("Using different options for different directories");
-W("You can build a database from multiple doodle runs over distinct sets of 
files with different options.");
-W("For example, the following can make sense:");
-PRE("$ doodle -B en -b /usr/share/doc # full-text index over documentation\n" .
-    "$ doodle -b /usr /opt /bin /lib # normal index over other system files\n" 
.
-    "$ doodled -B en /home # monitor /home, with full-text support\n");
-W("A simple doodle search will then find files in all listed directories.");
-W("You can also build multiple disjoint databases and search all of them in 
one run (see %s for option <tt>-d</tt>).",
-  extlink_("man/doodle.html", "doodle"));
+if ( ($xlang) && ($xlang != "English")) {
+  H2("Mass translation");
+  W("The mass-translation page for translating many sentences at once is %s.",
+    intlink_("editor.php", "here"));
+  W("Note that the sentence database is shared with the %s, %s and %s 
projects.",
+    ARRAY(extlink_("http://gnunet.org/";, "GNUnet"),
+         extlink_("http://gnunet.org/doodle/";, "doodle"),
+         extlink_("http://gnunet.org/libextractor/";, "libExtractor")));  
+ }
 
-P();
-ANCHOR("mantis");
 H2("Bugtrack");
-W("Doodle uses Mantis for bugtracking.");
+W("i18nHTML uses Mantis for bugtracking.");
 W("Visit %s to report bugs.",
   extlink_("https://gnunet.org/mantis/","https://gnunet.org/mantis/";));
 W("You need to sign up for a reporter account.");
-W("Please make sure you report bugs under <strong>Doodle</strong> and not 
under any of the other projects.");
+W("Please make sure you report bugs under <strong>I18nHTML</strong> and not 
under any of the other projects.");
 P();
-W("If you dislike Mantis and need to report a bug contact %s via e-mail.",
+W("If you dislike Mantis and need to report a bug contact %s via e-mail (good 
luck getting by the spam-filter).",
   extlink_("mailto:address@hidden","address@hidden";));
 
-
-ANCHOR("links");
-H2("Links");
-echo "<ul>";
-LI(extlink_("http://members.cox.net/sinzui/medusa/";, 
-            "Medusa, similar project for Gnome"));
-LI(extlink_("http://swish-e.org/";, 
-            "Swish-e, indexing tool with focus on the WWW"));
-LI(extlink_("http://www.gnome.org/~seth/storage/";,
-            "GNOME Storage"));
-LI(extlink_("http://www.gnome.org/projects/beagle/";,
-            "Beagle"));
-LI(extlink_("http://sourceforge.net/projects/rlocate/";,
-            "rlocate, version of locate that is always up-to-date"));
-LI(extlink_("http://www.htdig.org/";,
-            "HTDig"));
-LI(extlink_("http://jakarta.apache.org/lucene/";,
-            "Lucene"));
-LI(extlink_("http://homepage.mac.com/pauljlucas/software/swish/";,
-            "Swish++"));
-LI(extlink_("http://webglimpse.net/";,
-            "Glimpse"));
-LI(extlink_("http://evidence.sf.net/";,
-            "Evidence, file-manager with support for doodle"));
-LI(extlink_("http://www.linux-magazin.de/Artikel/ausgabe/2004/09/bgw/bgw.html";,
-            "Article about doodle (and other things) in the German 
Linux-Magazin"));
-echo "</ul>";
 HR();
-echo "<address><a href=\"mailto:address@hidden";>Christian 
Grothoff</a></address>";
-PRE("Copyright (C) 2004 Christian Grothoff.\n" .
-    "Verbatim copying and distribution of this entire article\n" .
-    "is permitted in any medium, provided this notice is preserved.");
-BR();
 generateFooter();
 echo "</body></html>\n";
 ?>

Modified: doodle-docs/WWW/translate.php
===================================================================
--- doodle-docs/WWW/translate.php       2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/translate.php       2005-04-04 06:51:09 UTC (rev 588)
@@ -27,9 +27,7 @@
 
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("WWW translation");
-echo "</title>";
+TITLE("WWW translation");
 echo "<meta name=\"description\" content=\"";
 TRANSLATE("Help translating this webpage.");
 echo "\">";
@@ -45,7 +43,7 @@
 W("Destination language: ");
 W($lang);
 P();
-echo "<form action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n";
+echo "<form method=\"POST\" action=\"" . $i18nHTMLbase . 
"commitTranslation.php\">\n";
 echo "<input type=hidden name=\"text\" value=\"" . urlencode($text) . "\">\n";
 echo "<input type=hidden name=\"xlang\" value=\"$xlang\">\n";
 echo "<input type=hidden name=\"back\" value=\"$back\">\n";
@@ -93,9 +91,9 @@
    printf("<tr><td>%s</td><td><a href=\"" . $i18nHTMLbase . 
"vote.php?xlang=%s&text=%s&translation=%s\">%s</a></td></tr>\n",
           W_($row["lang"]),
           urlencode($row["lang"]),
-         urlencode($text),
-         $translation,
-         urldecode($translation));
+         $u,
+         urlencode(from_unicode($translation)),
+         fix(from_unicode($translation)));
  }
 echo "</table>";
 
@@ -121,4 +119,4 @@
 generateFooter();
 echo "</body></html>";
 
-?>
\ No newline at end of file
+?>

Modified: doodle-docs/WWW/vote.php
===================================================================
--- doodle-docs/WWW/vote.php    2005-04-04 06:47:24 UTC (rev 587)
+++ doodle-docs/WWW/vote.php    2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -21,17 +21,18 @@
   // For sentences with multiple translations, the one with the most
   // votes is displayed.
 include("i18nhtml.inc");
-echo "<html><head><title>";
-W("WWW translation: vote");
-echo "</title></head><body>";
+echo "<html><head>";
+echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >";
+TITLE("WWW translation: vote");
+echo "</head><body>";
 if (!$connection) {
   echo "Database is down. Cannot edit translations.";
   die();
  } 
 $text = $_REQUEST['text'];
 $translation = $_REQUEST['translation'];
-$u = urlencode($text);
-$t = urlencode($translation);
+$u = mysql_real_escape_string($text);
+$t = mysql_real_escape_string(to_unicode($translation));
 echo "text = " . $text . "<br>\n";
 echo "translation = " . $translation . "<br>\n";
 

Modified: i18nHTML-docs/WWW/commitMassTranslation.php
===================================================================
--- i18nHTML-docs/WWW/commitMassTranslation.php 2005-04-04 06:47:24 UTC (rev 
587)
+++ i18nHTML-docs/WWW/commitMassTranslation.php 2005-04-04 06:51:09 UTC (rev 
588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -26,11 +26,13 @@
   W("Translating to English currently not allowed.\n");
   die();
 }
-echo "<html><head><title>WWW translation: commit</title></head><body>";
+echo "<html><head>";
+TITLE("WWW translation: commit");
+echo "</head><body>";
 W("Processing translations...");
 P();
 $done = 0;
-foreach($_GET as $dec=>$val) {
+foreach($_POST as $dec=>$val) {
   if ($val == "")
     continue;
   if ( ($dec == "xlang") || ($dec == "start") )
@@ -44,7 +46,7 @@
     $num--;
     $row = mysql_fetch_array($result);
     if ($dec == bin2hex(md5(urldecode($row["c"])))) {
-      $enc = $row["c"];
+      $enc = mysql_real_escape_string($row["c"]);
       break;
     }
   }
@@ -56,21 +58,30 @@
   }  
   $query = "DELETE FROM pending WHERE lang=\"$lang\" AND c=\"$enc\"";
   mysql_query($query, $connection); 
-  $t = urlencode($val);
-  // $t = urlencode(htmlentities($val, ENT_QUOTES, $charset));
+  $t = mysql_real_escape_string(to_unicode($val));
   $query = "SELECT ranking FROM map WHERE name=\"$enc\" AND lang=\"$lang\" AND 
translation=\"$t\"";
   $result = mysql_query($query, $connection);
   $num = 0;
   if ($result) 
     $num = mysql_numrows($result);
   if ($num == 0) {
-    $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
-    mysql_query($query, $connection);
-    $done++;
-    W("Storing translation for &quot;%s&quot = &quot;%s&quot;.",
-      ARRAY(urldecode($enc),
-           urldecode($t)));
-    BR();  
+    $txtCnt = count_chars(urldecode($enc), 1);
+    $tCnt = count_chars($t, 1);
+    if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+      W("Commit '%s->%s' failed.", $enc, $t);
+      W("The number of percent signs in source text and translation do not 
match.");     
+      W("Note that you must preserve all %%s expressions unchanged.");
+      W("Also, a single displayed %% sign must be translated into two (%%%%) 
such signs.");
+      P();
+    } else {
+      $query = "INSERT INTO map VALUES(\"$enc\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
+      mysql_query($query, $connection);
+      $done++;
+      W("Storing translation for &quot;%s&quot = &quot;%s&quot;.",
+        ARRAY(urldecode($enc),
+             urldecode($t)));
+      BR();  
+    }
   }  
 }
 P();

Modified: i18nHTML-docs/WWW/commitTranslation.php
===================================================================
--- i18nHTML-docs/WWW/commitTranslation.php     2005-04-04 06:47:24 UTC (rev 
587)
+++ i18nHTML-docs/WWW/commitTranslation.php     2005-04-04 06:51:09 UTC (rev 
588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -18,7 +18,7 @@
      Boston, MA 02111-1307, USA.
 */
 include("i18nhtml.inc");
-$text = $_REQUEST['text'];
+$text = mysql_real_escape_string($_REQUEST['text']);
 $translation = $_REQUEST['translation'];
 $back = $_REQUEST['back'];
 if (!$connection) {
@@ -30,12 +30,7 @@
   die();
 }
 
-// note: $text is already urlencoded (by submitting via form) and html 
compatible
-// ensure translation is stored in encoded form and html compatible
-// if (get_magic_quotes_gpc()) $translation = stripslashes($translation);
-
-// $t = urlencode(htmlentities($translation, ENT_QUOTES, $charset));
-$t = urlencode($translation);
+$t = mysql_real_escape_string(to_unicode($translation));
 // check for identical translation
 $query = "SELECT ranking FROM map WHERE name=\"$text\" AND lang=\"$lang\" AND 
translation=\"$t\"";
 $result = mysql_query($query, $connection);
@@ -44,22 +39,38 @@
   $num = mysql_numrows($result);
 }
 if ($num > 0) {
-  echo "<html><body>";
+  echo "<html><head>";
+  TITLE("Translation exists.");
+  echo "</head><body>";
   W("Translation exists.");   
   extlink($back, "Back...");
   generateFooter();
   echo "</body></html>";
 } else {
-  //  if (!get_magic_quotes_gpc()) $t = addslashes($t); // ensure escaped 
before adding to DB
-  $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
-  $result = mysql_query($query, $connection);
-  if ($result) {
-    header("Location: " . $back); /* Redirect browser */
+  $txtCnt = count_chars(urldecode($text), 1);
+  $tCnt = count_chars($t, 1);
+  if ($txtCnt[ord('%')] != $tCnt[ord('%')]) {
+      echo "<html><head>";
+      TITLE("Commit failed.");
+      echo "</head><body>";
+      W("Commit failed.");
+      W("The number of percent signs in source text and translation do not 
match.");     
+      W("Note that you must preserve all %%s expressions unchanged.");
+      W("Also, a single displayed %% sign must be translated into two (%%%%) 
such signs.");
+      echo "</body></html>";
   } else {
-    echo "<html><body>";
-    W("Commit ('%s') failed: ", $query);
-    echo mysql_error();
-    echo "</body></html>";
+    $query = "INSERT INTO map VALUES(\"$text\", \"$lang\", \"$t\", 1, \"" . 
$_SERVER['REMOTE_ADDR'] . "\");";
+    $result = mysql_query($query, $connection);
+    if ($result) {
+      header("Location: " . $back); /* Redirect browser */
+    } else {
+      echo "<html><head>";
+      TITLE("Commit failed.");
+      echo "</head><body>";
+      W("Commit ('%s') failed: ", $query);
+      echo mysql_error();
+      echo "</body></html>";
+    }
   } 
 }
 ?>
\ No newline at end of file

Modified: i18nHTML-docs/WWW/editor.php
===================================================================
--- i18nHTML-docs/WWW/editor.php        2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/editor.php        2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -65,7 +65,7 @@
     ARRAY($start, $end));
   P();
 
-  echo "<form action=\"" . $i18nHTMLbase . "commitMassTranslation.php\">";
+  echo "<form method=\"POST\" action=\"" . $i18nHTMLbase . 
"commitMassTranslation.php\">";
   echo "<input type=hidden name=\"xlang\" value=\"$xlang\">";
   $endp = $end + 1;
   echo "<input type=hidden name=\"start\" value=\"$endp\">";

Modified: i18nHTML-docs/WWW/i18nhtml.inc
===================================================================
--- i18nHTML-docs/WWW/i18nhtml.inc      2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/i18nhtml.inc      2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff and other contributing authors.
+     (C) 2003, 2004, 2005 Christian Grothoff and other contributing authors.
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -27,6 +27,7 @@
   // $editor can be set to create a translation
   // tag even if a translation is already available.
   // there is currently no security.
+  //
   // An "_" is used for functions that return the
   // translated string instead of printing it directly.
   // These functions are used for "%s" printing with W().
@@ -39,6 +40,8 @@
 // obtain user db specific configuration parameters
 include("i18nhtml_config.inc");
 
+header("Content-type: text/html; charset=utf-8");
+
 // establish default connection to database server
 $connection = @mysql_connect($i18nHTMLsqlServer,
                             $i18nHTMLsqlUser,
@@ -119,6 +122,7 @@
 if ($xlang)
   $lang = $xlang;
 $lang = ucfirst(strtolower($lang));
+$lang = mysql_real_escape_string($lang);
 $editor = $_REQUEST['editor'];
 
 
@@ -241,6 +245,464 @@
   }
 }
 
+
+/**
+ * restore UTF-8 from HTML Unicode entities
+ *
+ * This function is triggered by the YACS handler during page
+ * rendering.  It is aiming to transcode HTML Unicode entities
+ * (eg, &amp;#8364;) back to actual UTF-8 encoding (eg, �).
+ *
+ * @param string a string with a mix of UTF-8 and of HTML Unicode entities
+ * @return an UTF-8 string
+ */
+function from_unicode($text) {
+  // translate extended ISO8859-1 chars, if any
+  $text = utf8_encode($text);
+  
+  // translate Unicode entities
+  $areas = preg_split('/&#(\d+?);/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
+  $text = '';
+  $index = 0;
+  foreach($areas as $area) {
+    switch($index%2) {
+    case 0: // before entity
+      $text .= $area;
+      break;
+    case 1: // the entity itself
+      
+      // get the integer value
+      $unicode = intval($area);
+      
+      // one byte
+      if($unicode < 0x80) {
+       
+       $text .= chr($unicode);
+       
+       // two bytes
+      } elseif($unicode < 0x800) {
+       
+       $text .= chr( 0xC0 +  ( ( $unicode - ( $unicode % 0x40 ) ) / 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // three bytes
+      } elseif($unicode < 0x10000) {
+       
+       $text .= chr( 0xE0 + ( ( $unicode - ( $unicode % 0x1000 ) ) / 0x1000 ) 
);
+       $text .= chr( 0x80 + ( ( ( $unicode % 0x1000 ) - ( $unicode % 0x40 ) ) 
/ 0x40 ) );
+       $text .= chr( 0x80 + ( $unicode % 0x40 ) );
+       
+       // more bytes, keep it as it is...
+      } else
+         $text .= '&#'.$unicode.';';
+      
+      break;
+    }
+    $index++;
+  }
+  
+  // the updated string
+  return $text;
+}
+
+
+/**
+ * transcode unicode entities to/from HTML entities
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the string to be transcoded
+ * @param boolean TRUE to transcode to Unicode, FALSE to transcode to HTML
+ * @return a transcoded string
+ */
+function transcode($input, $to_unicode=TRUE) {
+  
+  // initialize tables only once
+  static $html_entities, $unicode_entities;
+  if(!is_array($html_entities)) {
+    
+    
+    // numerical order
+    $codes = array(
+                  '&#160;'     => '&nbsp;',    // non-breaking space
+                  '&#161;'     => '&iexcl;',   // inverted exclamation mark
+                  '&#162;'     => '&cent;',    // cent sign
+                  '&#163;'     => '&pound;',   // pound sign
+                  '&#164;'     => '&curren;',  // currency sign
+                  '&#165;'     => '&yen;',             // yen sign
+                  '&#166;'     => '&brvbar;',  // broken bar
+                  '&#167;'     => '&sect;',    // section sign
+                  '&#168;'     => '&uml;',             // diaeresis
+                  '&#169;'     => '&copy;',    // copyright sign
+                  '&#170;'     => '&ordf;',    // feminine ordinal indicator
+                  '&#171;'     => '&laquo;',   // left-pointing double angle 
quotation mark
+                  '&#172;'     => '&not;',             // not sign
+                  '&#173;'     => '&shy;',             // soft hyphen
+                  '&#174;'     => '&reg;',             // registered sign
+                  '&#175;'     => '&macr;',    // macron
+                  '&#176;'     => '&deg;',             // degree sign
+                  '&#177;'     => '&plusmn;',  // plus-minus sign
+                  '&#178;'     => '&sup2;',    // superscript two
+                  '&#179;'     => '&sup3;',    // superscript three
+                  '&#180;'     => '&acute;',   // acute accent
+                  '&#181;'     => '&micro;',   // micro sign
+                  '&#182;'     => '&para;',    // pilcrow sign
+                  '&#183;'     => '&middot;',  // middle dot
+                  '&#184;'     => '&cedil;',   // cedilla
+                  '&#185;'     => '&sup1;',    // superscript one
+                  '&#186;'     => '&ordm;',    // masculine ordinal indicator
+                  '&#187;'     => '&raquo;',   // right-pointing double angle 
quotation mark
+                  '&#188;'     => '&frac14;',  // vulgar fraction one quarter
+                  '&#189;'     => '&frac12;',  // vulgar fraction one half
+                  '&#190;'     => '&frac34;',  // vulgar fraction three 
quarters
+                  '&#191;'     => '&iquest;',  // inverted question mark
+                  '&#192;'     => '&Agrave;',  // latin capital letter A with 
grave
+                  '&#193;'     => '&Aacute;',  // latin capital letter A with 
acute
+                  '&#194;'     => '&Acirc;',   // latin capital letter A with 
circumflex
+                  '&#195;'     => '&Atilde;',  // latin capital letter A with 
tilde
+                  '&#196;'     => '&Auml;',    // latin capital letter A with 
diaeresis
+                  '&#197;'     => '&Aring;',   // latin capital letter A with 
ring above
+                  '&#198;'     => '&AElig;',   // latin capital letter AE
+                  '&#199;'     => '&Ccedil;',  // latin capital letter C with 
cedilla
+                  '&#200;'     => '&Egrave;',  // latin capital letter E with 
grave
+                  '&#201;'     => '&Eacute;',  // latin capital letter E with 
acute
+                  '&#202;'     => '&Ecirc;',   // latin capital letter E with 
circumflex
+                  '&#203;'     => '&Euml;',    // latin capital letter E with 
diaeresis
+                  '&#204;'     => '&Igrave;',  // latin capital letter I with 
grave
+                  '&#205;'     => '&Iacute;',  // latin capital letter I with 
acute
+                  '&#206;'     => '&Icirc;',   // latin capital letter I with 
circumflex
+                  '&#207;'     => '&Iuml;',    // latin capital letter I with 
diaeresis
+                  '&#208;'     => '&ETH;',             // latin capital letter 
ETH
+                  '&#209;'     => '&Ntilde;',  // latin capital letter N with 
tilde
+                  '&#210;'     => '&Ograve;',  // latin capital letter O with 
grave
+                  '&#211;'     => '&Oacute;',  // latin capital letter O with 
acute
+                  '&#212;'     => '&Ocirc;',   // latin capital letter O with 
circumflex
+                  '&#213;'     => '&Otilde;',  // latin capital letter O with 
tilde
+                  '&#214;'     => '&Ouml;',    // latin capital letter O with 
diaeresis
+                  '&#215;'     => '&times;',   // multiplication sign
+                  '&#216;'     => '&Oslash;',  // latin capital letter O with 
stroke
+                  '&#217;'     => '&Ugrave;',  // latin capital letter U with 
grave
+                  '&#218;'     => '&Uacute;',  // latin capital letter U with 
acute
+                  '&#219;'     => '&Ucirc;',   // latin capital letter U with 
circumflex
+                  '&#220;'     => '&Uuml;',    // latin capital letter U with 
diaeresis
+                  '&#221;'     => '&Yacute;',  // latin capital letter Y with 
acute
+                  '&#222;'     => '&THORN;',   // latin capital letter THORN
+                  '&#223;'     => '&szlig;',   // latin small letter sharp s
+                  '&#224;'     => '&agrave;',  // latin small letter a with 
grave
+                  '&#225;'     => '&aacute;',  // latin small letter a with 
acute
+                  '&#226;'     => '&acirc;',   // latin small letter a with 
circumflex
+                  '&#227;'     => '&atilde;',  // latin small letter a with 
tilde
+                  '&#228;'     => '&auml;',    // latin small letter a with 
diaeresis
+                  '&#229;'     => '&aring;',   // latin small letter a with 
ring above
+                  '&#230;'     => '&aelig;',   // latin small letter ae
+                  '&#231;'     => '&ccedil;',  // latin small letter c with 
cedilla
+                  '&#232;'     => '&egrave;',  // latin small letter e with 
grave
+                  '&#233;'     => '&eacute;',  // latin small letter e with 
acute
+                  '&#234;'     => '&ecirc;',   // latin small letter e with 
circumflex
+                  '&#235;'     => '&euml;',    // latin small letter e with 
diaeresis
+                  '&#236;'     => '&igrave;',  // latin small letter i with 
grave
+                  '&#237;'     => '&iacute;',  // latin small letter i with 
acute
+                  '&#238;'     => '&icirc;',   // latin small letter i with 
circumflex
+                  '&#239;'     => '&iuml;',    // latin small letter i with 
diaeresis
+                  '&#240;'     => '&eth;',             // latin small letter 
eth
+                  '&#241;'     => '&ntilde;',  // latin small letter n with 
tilde
+                  '&#242;'     => '&ograve;',  // latin small letter o with 
grave
+                  '&#243;'     => '&oacute;',  // latin small letter o with 
acute
+                  '&#244;'     => '&ocirc;',   // latin small letter o with 
circumflex
+                  '&#245;'     => '&otilde;',  // latin small letter o with 
tilde
+                  '&#246;'     => '&ouml;',    // latin small letter o with 
diaeresis
+                  '&#247;'     => '&divide;',  // division sign
+                  '&#248;'     => '&oslash;',  // latin small letter o with 
stroke
+                  '&#249;'     => '&ugrave;',  // latin small letter u with 
grave
+                  '&#250;'     => '&uacute;',  // latin small letter u with 
acute
+                  '&#251;'     => '&ucirc;',   // latin small letter u with 
circumflex
+                  '&#252;'     => '&uuml;',    // latin small letter u with 
diaeresis
+                  '&#253;'     => '&yacute;',  // latin small letter y with 
acute
+                  '&#254;'     => '&thorn;',   // latin small letter thorn
+                  '&#255;'     => '&yuml;',    //
+                  '&#338;'     => '&OElig;',   // latin capital ligature OE
+                  '&#339;'     => '&oelig;',   // latin small ligature oe
+                  '&#352;'     => '&Scaron;',  // latin capital letter S with 
caron
+                  '&#353;'     => '&scaron;',  // latin small letter s with 
caron
+                  '&#376;'     => '&Yuml;',    // latin capital letter Y with 
diaeresis
+                  '&#402;'     => '&fnof;' ,   // latin small f with hook
+                  '&#710;'     => '&circ;',    // modifier letter circumflex 
accent
+                  '&#732;'     => '&tilde;',   // small tilde
+                  '&#913;'     => '&Alpha;',   // greek capital letter alpha
+                  '&#914;'     => '&Beta;',    // greek capital letter beta
+                  '&#915;'     => '&Gamma;',   // greek capital letter gamma
+                  '&#916;'     => '&Delta;',   // greek capital letter delta
+                  '&#917;'     => '&Epsilon;', // greek capital letter epsilon
+                  '&#918;'     => '&Zeta;',    // greek capital letter zeta
+                  '&#919;'     => '&Eta;',             // greek capital letter 
eta
+                  '&#920;'     => '&Theta;',   // greek capital letter theta
+                  '&#921;'     => '&Iota;',    // greek capital letter iota
+                  '&#922;'     => '&Kappa;',   // greek capital letter kappa
+                  '&#923;'     => '&Lambda;',  // greek capital letter lambda
+                  '&#924;'     => '&Mu;',              // greek capital letter 
mu
+                  '&#925;'     => '&Nu;',              // greek capital letter 
nu
+                  '&#926;'     => '&Xi;',              // greek capital letter 
xi
+                  '&#927;'     => '&Omicron;', // greek capital letter omicron
+                  '&#928;'     => '&Pi;',              // greek capital letter 
pi
+                  '&#929;'     => '&Rho;',             // greek capital letter 
rho
+                  '&#931;'     => '&Sigma;',   // greek capital letter sigma
+                  '&#932;'     => '&Tau;',             // greek capital letter 
tau
+                  '&#933;'     => '&Upsilon;', // greek capital letter upsilon
+                  '&#934;'     => '&Phi;',             // greek capital letter 
phi
+                  '&#935;'     => '&Chi;',             // greek capital letter 
chi
+                  '&#936;'     => '&Psi;',             // greek capital letter 
psi
+                  '&#937;'     => '&Omega;',   // greek capital letter omega
+                  '&#945;'     => '&alpha;',   // greek small letter alpha
+                  '&#946;'     => '&beta;',    // greek small letter beta
+                  '&#947;'     => '&gamma;',   // greek small letter gamma
+                  '&#948;'     => '&delta;',   // greek small letter delta
+                  '&#949;'     => '&epsilon;', // greek small letter epsilon
+                  '&#950;'     => '&zeta;',    // greek small letter zeta
+                  '&#951;'     => '&eta;',             // greek small letter 
eta
+                  '&#952;'     => '&theta;',   // greek small letter theta
+                  '&#953;'     => '&iota;',    // greek small letter iota
+                  '&#954;'     => '&kappa;',   // greek small letter kappa
+                  '&#955;'     => '&lambda;',  // greek small letter lambda
+                  '&#956;'     => '&mu;',              // greek small letter mu
+                  '&#957;'     => '&nu;',              // greek small letter nu
+                  '&#958;'     => '&xi;',              // greek small letter xi
+                  '&#959;'     => '&omicron;', // greek small letter omicron
+                  '&#960;'     => '&pi;',              // greek small letter pi
+                  '&#961;'     => '&rho;',             // greek small letter 
rho
+                  '&#962;'     => '&sigmaf;',  // greek small letter final 
sigma
+                  '&#963;'     => '&sigma;',   // greek small letter sigma
+                  '&#964;'     => '&tau;',             // greek small letter 
tau
+                  '&#965;'     => '&upsilon;', // greek small letter upsilon
+                  '&#966;'     => '&phi;',             // greek small letter 
phi
+                  '&#967;'     => '&chi;',             // greek small letter 
chi
+                  '&#968;'     => '&psi;',             // greek small letter 
psi
+                  '&#969;'     => '&omega;',   // greek small letter omega
+                  '&#977;'     => '&thetasym;',        // greek small letter 
theta symbol
+                  '&#978;'     => '&upsih;',   // greek upsilon with hook 
symbol
+                  '&#982;'     => '&piv;',             // greek pi symbol
+                  '&#8194;'    => '&ensp;',    // en space
+                  '&#8195;'    => '&emsp;',    // em space
+                  '&#8201;'    => '&thinsp;',  // thin space
+                  '&#8204;'    => '&zwnj;',    // zero width non-joiner
+                  '&#8205;'    => '&zwj;',             // zero width joiner
+                  '&#8206;'    => '&lrm;',             // left-to-right mark
+                  '&#8207;'    => '&rlm;',             // right-to-left mark
+                  '&#8211;'    => '&ndash;',   // en dash
+                  '&#8212;'    => '&mdash;',   // em dash
+                  '&#8216;'    => '&lsquo;',   // left single quotation mark
+                  '&#8217;'    => '&rsquo;',   // right single quotation mark
+                  '&#8218;'    => '&sbquo;',   // single low-9 quotation mark
+                  '&#8220;'    => '&ldquo;',   // left double quotation mark
+                  '&#8221;'    => '&rdquo;',   // right double quotation mark
+                  '&#8222;'    => '&bdquo;',   // double low-9 quotation mark
+                  '&#8224;'    => '&dagger;',  // dagger
+                  '&#8225;'    => '&Dagger;',  // double dagger
+                  '&#8226;'    => '&bull;',    // bullet
+                  '&#8230;'    => '&hellip;',  // horizontal ellipsis
+                  '&#8240;'    => '&permil;',  // per mille sign
+                  '&#8242;'    => '&prime;',   // primeminutes
+                  '&#8243;'    => '&Prime;',   // double prime
+                  '&#8249;'    => '&lsaquo;',  // single left-pointing angle 
quotation mark
+                  '&#8250;'    => '&rsaquo;',  // single right-pointing angle 
quotation mark
+                  '&#8254;'    => '&oline;',   // overline
+                  '&#8260;'    => '&frasl;',   // fraction slash
+                  '&#8364;'    => '&euro;',    // euro sign
+                  '&#8465;'    => '&image;',   // blackletter capital I
+                  '&#8472;'    => '&weierp;',  // script capital P
+                  '&#8476;'    => '&real;',    // blackletter capital R
+                  '&#8482;'    => '&trade;',   // trade mark sign
+                  '&#8501;'    => '&alefsym;', // alef symbol
+                  '&#8592;'    => '&larr;',    // leftwards arrow
+                  '&#8593;'    => '&uarr;',    // upwards arrow
+                  '&#8594;'    => '&rarr;',    // rightwards arrow
+                  '&#8595;'    => '&darr;',    // downwards arrow
+                  '&#8596;'    => '&harr;',    // left right arrow
+                  '&#8629;'    => '&crarr;',   // downwards arrow with corner 
leftwards
+                  '&#8656;'    => '&lArr;',    // leftwards double arrow
+                  '&#8657;'    => '&uArr;',    // upwards double arrow
+                  '&#8658;'    => '&rArr;',    // rightwards double arrow
+                  '&#8659;'    => '&dArr;',    // downwards double arrow
+                  '&#8660;'    => '&hArr;',    // left right double arrow
+                  '&#8704;'    => '&forall;',  // for all
+                  '&#8706;'    => '&part;',    // partial differential
+                  '&#8707;'    => '&exist;',   // there exists
+                  '&#8709;'    => '&empty;',   // empty set
+                  '&#8711;'    => '&nabla;',   // nabla
+                  '&#8712;'    => '&isin;',    // element of
+                  '&#8713;'    => '&notin;',   // not an element of
+                  '&#8715;'    => '&ni;',              // contains as member
+                  '&#8719;'    => '&prod;',    // n-ary product
+                  '&#8721;'    => '&sum;',             // n-ary sumation
+                  '&#8722;'    => '&minus;',   // minus sign
+                  '&#8727;'    => '&lowast;',  // asterisk operator
+                  '&#8730;'    => '&radic;',   // square root
+                  '&#8733;'    => '&prop;',    // proportional to
+                  '&#8734;'    => '&infin;',   // infinity
+                  '&#8736;'    => '&ang;',             // angle
+                  '&#8743;'    => '&and;',             // logical and
+                  '&#8744;'    => '&or;',              // logical or
+                  '&#8745;'    => '&cap;',             // intersection
+                  '&#8746;'    => '&cup;',             // union
+                  '&#8747;'    => '&int;',             // integral
+                  '&#8756;'    => '&there4;',  // therefore
+                  '&#8764;'    => '&sim;',             // tilde operator
+                  '&#8773;'    => '&cong;',    // approximately equal to
+                  '&#8776;'    => '&asymp;',   // almost equal to
+                  '&#8800;'    => '&ne;',              // not equal to
+                  '&#8801;'    => '&equiv;',   // identical to
+                  '&#8804;'    => '&le;',              // less-than or equal to
+                  '&#8805;'    => '&ge;',              // greater-than or 
equal to
+                  '&#8834;'    => '&sub;',             // subset of
+                  '&#8835;'    => '&sup;',             // superset of
+                  '&#8836;'    => '&nsub;',    // not a subset of
+                  '&#8838;'    => '&sube;',    // subset of or equal to
+                  '&#8839;'    => '&supe;',    // superset of or equal to
+                  '&#8853;'    => '&oplus;',   // circled plus
+                  '&#8855;'    => '&otimes;',  // circled times
+                  '&#8869;'    => '&perp;',    // up tack
+                  '&#8901;'    => '&sdot;',    // dot operator
+                  '&#8968;'    => '&lceil;',   // left ceiling
+                  '&#8969;'    => '&rceil;',   // right ceiling
+                  '&#8970;'    => '&lfloor;',  // left floor
+                  '&#8971;'    => '&rfloor;',  // right floor
+                  '&#9001;'    => '&lang;',    // left-pointing angle bracket
+                  '&#9002;'    => '&rang;',    // right-pointing angle bracket
+                  '&#9674;'    => '&loz;',             // lozenge
+                  '&#9824;'    => '&spades;',  // black spade suit
+                  '&#9827;'    => '&clubs;',   // black club suit
+                  '&#9829;'    => '&hearts;',  // black heart suit
+                  '&#9830;'    => '&diams;'    // black diam suit
+                  );
+    
+    // split entities for use in str_replace()
+    foreach($codes as  $unicode_entity => $html_entity) {
+      $unicode_entities[] = $unicode_entity;
+      $html_entities[] = $html_entity;
+    }
+  }
+  
+  // transcode HTML entities to Unicode
+  if($to_unicode)
+    return str_replace($html_entities, $unicode_entities, $input);
+  
+  // transcode Unicode entities to HTML entities
+  else
+    return str_replace($unicode_entities, $html_entities, $input);
+}
+
+
+
+
+/**
+ * transcode multi-byte characters to HTML representations for Unicode
+ *
+ * This function is aiming to preserve Unicode characters through storage in a 
ISO-8859-1 compliant system.
+ *
+ * Every multi-byte UTF-8 character is transformed to its equivalent HTML 
numerical entity (eg, &amp;#4568;)
+ * that may be handled safely by PHP and by MySQL.
+ *
+ * Of course, this solution does not allow for full-text search in the 
database and therefore, is not a
+ * definitive solution to internationalization issues.
+ * It does enable, however, practical use of Unicode to build pages in foreign 
languages.
+ *
+ * Also, this function transforms HTML entities into their equivalent Unicode 
entities.
+ * For example, w.bloggar posts pages using HTML entities.
+ * If you have to modify these pages using web forms, you would like to get 
UTF-8 instead.
+ *
+ * @link 
http://www.evolt.org/article/A_Simple_Character_Entity_Chart/17/21234/ A Simple 
Character Entity Chart
+ *
+ * @param string the original UTF-8 string
+ * @return a string acceptable in an ISO-8859-1 storage system (ie., PHP4 + 
MySQl 3)
+ */
+function to_unicode($input) {
+  
+  // transcode HTML entities to Unicode entities
+  $input = transcode($input);
+  
+  // scan the whole string
+  $output = '';
+  $index = 0;
+  while($index < strlen($input)) {
+    
+    // look at one char
+    $char = ord($input[$index]);
+    
+    // one byte (0xxxxxxx)
+    if($char < 0x80) {
+      
+      // some chars may be undefined
+      $output .= chr($char);
+      $index += 1;
+      
+      // two bytes (110xxxxx 10xxxxxx)
+    } elseif($char < 0xE0) {
+      
+      // strip weird sequences (eg, C0 80 -> NUL)
+      if($value = (($char % 0x20) * 0x40) + (ord($input[$index + 1]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 2;
+      
+      // three bytes (1110xxxx 10xxxxxx 10xxxxxx) example: euro sign = 
\xE2\x82\xAC -> &#8364;
+    } elseif($char < 0xF0) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x10) * 0x1000) + ((ord($input[$index + 1]) % 
0x40) * 0x40) + (ord($input[$index + 2]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 3;
+      
+      // four bytes (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xF8) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x08) * 0x40000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000) + ((ord($input[$index + 2]) % 0x40) * 0x40)
+        + (ord($input[$index + 3]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 4;
+      
+      // five bytes (111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } elseif($char < 0xFC) {
+      
+      // strip weird sequences
+      if($value = (($char % 0x04) * 0x1000000) + ((ord($input[$index + 1]) % 
0x40) * 0x40000) + ((ord($input[$index + 2]) % 0x40) * 0x1000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x40) + (ord($input[$index + 4]) 
% 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 5;
+      
+      // six bytes (1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
+    } else {
+      
+      // strip weird sequences
+      if($value = (($char % 0x02) * 0x40000000) + ((ord($input[$index + 1]) % 
0x40) * 0x1000000) + ((ord($input[$index + 2]) % 0x40) * 0x40000)
+        + ((ord($input[$index + 3]) % 0x40) * 0x1000) + ((ord($input[$index + 
4]) % 0x40) * 0x40) + (ord($input[$index + 4]) % 0x40))
+       $output .= '&#' . $value . ';';
+      $index += 6;
+    }
+    
+  }
+ 
+  // return the translated string
+  return $output;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 // returns either the translated string
 // or the original string.  Assumes we
 // are passed the original string as occurs
@@ -264,8 +726,7 @@
   if ($a == "")
     return $a;
 
-  // $a = htmlentities($a, ENT_QUOTES, $charset);
-  $u = urlencode($a);
+  $u = mysql_real_escape_string(urlencode($a));
   
   if (!$connection) {
     // database not available, just print English
@@ -319,7 +780,7 @@
     return fix($a);              // just return English string
   } else { // translation available
     $row = mysql_fetch_array($result);
-    return fix(urldecode($row["translation"]));
+    return $row["translation"];
   }
 }
 
@@ -361,7 +822,16 @@
 }
 
 function TITLE($a,$b="") {
-  echo "<title>" . W_($a,$b) . "</title>\n";
+  global $lang;
+  global $languagecodes;
+  echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" 
>";
+  echo "<title>" . TRANSLATE_($a,$b) . "</title>\n";
+  if (isset($languagecodes[$lang])) {
+    echo "<meta name=\"content-language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+    echo "<meta name=\"language\" content=\"" . 
+         $languagecodes[$lang] . "\">";
+  }
 }
 
 

Modified: i18nHTML-docs/WWW/i18nhtml_config.inc
===================================================================
--- i18nHTML-docs/WWW/i18nhtml_config.inc       2005-04-04 06:47:24 UTC (rev 
587)
+++ i18nHTML-docs/WWW/i18nhtml_config.inc       2005-04-04 06:51:09 UTC (rev 
588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -24,12 +24,18 @@
 $i18nHTMLrecordMode = 2; // 1: only missing, 2: everything, 0: disable
 
 $i18nHTMLsqlServer = "localhost";
-$i18nHTMLsqlUser = "GNUnetWWW";
-$i18nHTMLsqlPass = "garlic";
+$i18nHTMLsqlUser = "i18nHTML";
+$i18nHTMLsqlPass = "pass";
 
-$i18nHTMLsqlDB = "translations"; // default is "translation"
+$i18nHTMLsqlDB = "translation"; // default is "translation"
 $i18nHTMLbase = "";  // base directory prepended to i18nHTML php pages used in 
links
 $i18nHTMLmarker = "*";  // default value if never changed
 
+// Note that if you enable debug, the scripts may print
+// warnings even if everything is ok!
+$i18nHTMLdebug = 0; // 0 = no, 1 = yes
 
+// for selectively cloning a DB
+//  $i18nHTMLclone = "/tmp/cloneFile.sql";
+
 ?>

Modified: i18nHTML-docs/WWW/index.php
===================================================================
--- i18nHTML-docs/WWW/index.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/index.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -2,17 +2,10 @@
 include("i18nhtml.inc");
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("i18nHTML - enabling collaborative webpage translation");
-echo "</title>";
+TITLE("i18nHTML - enabling collaborative webpage translation");
 echo "<meta name=\"description\" content=\"";
 TRANSLATE("i18nHTML is a collection of PHP scripts that allow visitors of a 
webpage to help translating it.");
 echo "\">";
-
-echo "<meta name=\"content-language\" content=\"" . 
-     $languagecodes[$lang] . "\">";
-echo "<meta name=\"language\" content=\"" . 
-     $languagecodes[$lang] . "\">";
 ?>
 <meta name="author" content="Christian Grothoff">
 <meta name="keywords" 
content="i18n,HTML,PHP,translation,languages,mysql,database,internationalization,www,free,GNU,GPL">
@@ -34,11 +27,12 @@
 W("i18nHTML defines a set of PHP functions that generate either the translated 
HTML sentences or the original (typically English) text with decorations that 
allow users to provide translations.");
 W("i18nHTML requires the internationalized webpages to be written using the 
provided PHP functions but does not constrain the page design in any way.");
 W("Webpages internationalized with i18nHTML can be updated without loosing 
existing translations for sentences that were not changed.");
+W("Note that it is important that you use the i18nHTML <tt>TITLE</tt> command 
in your documents in order to ensure that the character set and other meta-data 
is set properly.");
 P();
 H2("Download");
 W("You can find the latest version %s.",
-  extlink_("download/i18nHTML-0.0.2.tar.gz", "here"));
-W("The latest version can be obtained using");
+  extlink_("https://gnunet.org/i18nHTML/download/";, "here"));
+W("The latest CVS version can be obtained using");
 PRE("$ svn checkout https://gnunet.org/svn/i18nHTML/";);
 P();    
 W("If you want to be notified about updates, subscribe to %s",
@@ -54,18 +48,18 @@
     intlink_("editor.php", "here"));
   W("Note that the sentence database is shared with the %s, %s and %s 
projects.",
     ARRAY(extlink_("http://gnunet.org/";, "GNUnet"),
-         extlink_("/doodle/", "doodle"),
-         extlink_("/libextractor/", "libExtractor")));  
+         extlink_("http://gnunet.org/doodle/";, "doodle"),
+         extlink_("http://gnunet.org/libextractor/";, "libExtractor")));  
  }
 
 H2("Bugtrack");
-W("I18nHTML uses Mantis for bugtracking.");
+W("i18nHTML uses Mantis for bugtracking.");
 W("Visit %s to report bugs.",
   extlink_("https://gnunet.org/mantis/","https://gnunet.org/mantis/";));
 W("You need to sign up for a reporter account.");
 W("Please make sure you report bugs under <strong>I18nHTML</strong> and not 
under any of the other projects.");
 P();
-W("If you dislike Mantis and need to report a bug contact %s via e-mail.",
+W("If you dislike Mantis and need to report a bug contact %s via e-mail (good 
luck getting by the spam-filter).",
   extlink_("mailto:address@hidden","address@hidden";));
 
 HR();

Modified: i18nHTML-docs/WWW/start.php
===================================================================
--- i18nHTML-docs/WWW/start.php 2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/start.php 2005-04-04 06:51:09 UTC (rev 588)
@@ -2,9 +2,7 @@
 include("i18nhtml.inc");
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("Documentation");
-echo "</title>";
+TITLE("Documentation");
 echo "</head><body>";
 generateLanguageBar();
 H2("Documentation");

Modified: i18nHTML-docs/WWW/status.php
===================================================================
--- i18nHTML-docs/WWW/status.php        2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/status.php        2005-04-04 06:51:09 UTC (rev 588)
@@ -30,9 +30,7 @@
 
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("Translation: status");
-echo "</title>";
+TITLE("Translation: status");
 echo "</head><body>";
 W("Language setting is %s.",
   $lang);

Modified: i18nHTML-docs/WWW/translate.php
===================================================================
--- i18nHTML-docs/WWW/translate.php     2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/translate.php     2005-04-04 06:51:09 UTC (rev 588)
@@ -27,9 +27,7 @@
 
 DOCTYPE("HTML", "Transitional");
 echo "<html><head>\n";
-echo "<title>";
-TRANSLATE("WWW translation");
-echo "</title>";
+TITLE("WWW translation");
 echo "<meta name=\"description\" content=\"";
 TRANSLATE("Help translating this webpage.");
 echo "\">";
@@ -45,7 +43,7 @@
 W("Destination language: ");
 W($lang);
 P();
-echo "<form action=\"" . $i18nHTMLbase . "commitTranslation.php\">\n";
+echo "<form method=\"POST\" action=\"" . $i18nHTMLbase . 
"commitTranslation.php\">\n";
 echo "<input type=hidden name=\"text\" value=\"" . urlencode($text) . "\">\n";
 echo "<input type=hidden name=\"xlang\" value=\"$xlang\">\n";
 echo "<input type=hidden name=\"back\" value=\"$back\">\n";
@@ -93,9 +91,9 @@
    printf("<tr><td>%s</td><td><a href=\"" . $i18nHTMLbase . 
"vote.php?xlang=%s&text=%s&translation=%s\">%s</a></td></tr>\n",
           W_($row["lang"]),
           urlencode($row["lang"]),
-         urlencode($text),
-         $translation,
-         urldecode($translation));
+         $u,
+         urlencode(from_unicode($translation)),
+         fix(from_unicode($translation)));
  }
 echo "</table>";
 
@@ -121,4 +119,4 @@
 generateFooter();
 echo "</body></html>";
 
-?>
\ No newline at end of file
+?>

Modified: i18nHTML-docs/WWW/vote.php
===================================================================
--- i18nHTML-docs/WWW/vote.php  2005-04-04 06:47:24 UTC (rev 587)
+++ i18nHTML-docs/WWW/vote.php  2005-04-04 06:51:09 UTC (rev 588)
@@ -1,6 +1,6 @@
 <?php
 /*
-     (C) 2003, 2004 Christian Grothoff
+     (C) 2003, 2004, 2005 Christian Grothoff
 
      This code is free software; you can redistribute it and/or modify
      it under the terms of the GNU General Public License as published
@@ -21,17 +21,18 @@
   // For sentences with multiple translations, the one with the most
   // votes is displayed.
 include("i18nhtml.inc");
-echo "<html><head><title>";
-W("WWW translation: vote");
-echo "</title></head><body>";
+echo "<html><head>";
+echo "<meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\" >";
+TITLE("WWW translation: vote");
+echo "</head><body>";
 if (!$connection) {
   echo "Database is down. Cannot edit translations.";
   die();
  } 
 $text = $_REQUEST['text'];
 $translation = $_REQUEST['translation'];
-$u = urlencode($text);
-$t = urlencode($translation);
+$u = mysql_real_escape_string($text);
+$t = mysql_real_escape_string(to_unicode($translation));
 echo "text = " . $text . "<br>\n";
 echo "translation = " . $translation . "<br>\n";
 





reply via email to

[Prev in Thread] Current Thread [Next in Thread]