bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH] Use u8_check() instead our own utf8 checking


From: Ángel González
Subject: [Bug-wget] [PATCH] Use u8_check() instead our own utf8 checking
Date: Mon, 06 Jul 2015 02:05:57 +0200
User-agent: Thunderbird

 * bootstrap.conf: Enable u8-check module
 * src/iri.c: Remove _utf8_is_valid()
---

This is probably the shortest-lived function in wget :)

I didn't change the urls, but there are probably more suited ones.

 bootstrap.conf |  1 +
src/iri.c | 62 ++++++++++++++--------------------------------------------
 2 files changed, 16 insertions(+), 47 deletions(-)

diff --git a/bootstrap.conf b/bootstrap.conf
index 4fff711..376a549 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -85,6 +85,7 @@ strtoll
 timegm
 tmpdir
 unlocked-io
+unistr/u8-check
 update-copyright
 vasprintf
 vsnprintf
diff --git a/src/iri.c b/src/iri.c
index a6b1c6e..7d66e9d 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -43,6 +43,7 @@ as that of the covered work.  */
 #include "url.h"
 #include "c-strcase.h"
 #include "c-strcasestr.h"
+#include "unistr.h"
 #include "xstrndup.h"

 /* RFC3987 section 3.1 mandates STD3 ASCII RULES */
@@ -220,50 +221,6 @@ locale_to_utf8 (const char *str)
   return str;
 }

-/*
- * Work around a libidn <= 1.30 vulnerability.
- *
- * The function checks for a valid UTF-8 character sequence before
- * passing it to idna_to_ascii_8z().
- *
- * [1] http://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
- * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
- * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
- */
-static bool
-_utf8_is_valid(const char *utf8)
-{
-  const unsigned char *s = (const unsigned char *) utf8;
-
-  while (*s)
-    {
-      if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
-        s++;
-      else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */
-        {
-          if ((s[1] & 0xC0) != 0x80)
-            return false;
-          s+=2;
-        }
-      else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */
-        {
-          if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
-            return false;
-          s+=3;
-        }
- else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
-        {
- if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
-            return false;
-          s+=4;
-        }
-      else
-        return false;
-    }
-
-  return true;
-}
-
/* Try to "ASCII encode" UTF-8 host. Return the new domain on success or NULL
    on error. */
 char *
@@ -272,6 +229,7 @@ idn_encode (const struct iri *i, const char *host)
   int ret;
   char *ascii_encoded;
   char *utf8_encoded = NULL;
+  const char *utf8_host;

   /* Encode to UTF-8 if not done */
   if (!i->utf8_encode)
@@ -280,16 +238,26 @@ idn_encode (const struct iri *i, const char *host)
           return NULL;  /* Nothing to encode or an error occured */
     }

-  if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host))
+  utf8_host = utf8_encoded ? utf8_encoded : host;
+
+ /*
+  * Verify that utf8_host is a valid UTF-8 character sequence before
+  * passing it to idna_to_ascii_8z().
+  *
+ * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
+  * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
+  * [3] http://curl.haxx.se/mail/lib-2015-06/0143.html
+  */
+  if (u8_check (utf8_host, strlen(utf8_host)))
     {
       logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"),
-                 quote(utf8_encoded ? utf8_encoded : host));
+                 quote (utf8_host));
       xfree (utf8_encoded);
       return NULL;
     }

   /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */
- ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, &ascii_encoded, IDNA_FLAGS);
+  ret = idna_to_ascii_8z (utf8_host, &ascii_encoded, IDNA_FLAGS);
   xfree (utf8_encoded);

   if (ret != IDNA_SUCCESS)
--
2.4.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]