From cbdd976dea6289a1f167c2b50cc1d4b1ff878686 Mon Sep 17 00:00:00 2001 From: Tim Schlueter Date: Mon, 24 Jul 2017 20:39:24 -0700 Subject: [PATCH 1/3] Adjust Extension based on Content-Encoding * doc/wget.texi (--adjust-extension, adjust_extension): Updated documentation. * src/http.c (encoding_t): New enum. (struct http_stat): Add local_encoding field. (gethttp): --adjust-extension based on Content-Encoding. --- doc/wget.texi | 10 +++++-- src/http.c | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/doc/wget.texi b/doc/wget.texi index 6453c35..e582d4f 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -1346,6 +1346,11 @@ renamed from @samp{--html-extension}, to better reflect its new behavior. The old option name is still acceptable, but should now be considered deprecated. +As of version 1.20, Wget will also ensure that any downloaded files with +a @code{Content-Encoding} of @samp{br}, @samp{compress}, @samp{deflate} +or @samp{gzip} end in the suffix @samp{.br}, @samp{.Z}, @samp{.zlib} +and @samp{.gz} respectively. + At some point in the future, this option may well be expanded to include suffixes for other types of content, including content types that are not parsed by Wget. @@ -3365,8 +3370,9 @@ Define a header for HTTP downloads, like using @item adjust_extension = on/off Add a @samp{.html} extension to @samp{text/html} or address@hidden/xhtml+xml} files that lack one, or a @samp{.css} -extension to @samp{text/css} files that lack one, like address@hidden/xhtml+xml} files that lack one, a @samp{.css} +extension to @samp{text/css} files that lack one, and a @samp{.br}, address@hidden, @samp{.zlib} or @samp{.gz} to compressed files like @samp{-E}. Previously named @samp{html_extension} (still acceptable, but deprecated). diff --git a/src/http.c b/src/http.c index f5d9caf..a8c6e18 100644 --- a/src/http.c +++ b/src/http.c @@ -1539,6 +1539,16 @@ persistent_available_p (const char *host, int port, bool ssl, fd = -1; \ } while (0) +typedef enum +{ + ENC_INVALID = -1, /* invalid encoding */ + ENC_NONE = 0, /* no special encoding */ + ENC_GZIP, /* gzip compression */ + ENC_DEFLATE, /* deflate compression */ + ENC_COMPRESS, /* compress compression */ + ENC_BROTLI /* brotli compression */ +} encoding_t; + struct http_stat { wgint len; /* received length */ @@ -1569,6 +1579,9 @@ struct http_stat #ifdef HAVE_METALINK metalink_t *metalink; #endif + + encoding_t local_encoding; /* the encoding of the local file */ + bool temporary; /* downloading a temporary file */ }; @@ -3189,6 +3202,7 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs, xfree (hs->remote_time); hs->error = NULL; hs->message = NULL; + hs->local_encoding = ENC_NONE; conn = u; @@ -3639,6 +3653,49 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs, } } + if (resp_header_copy (resp, "Content-Encoding", hdrval, sizeof (hdrval))) + { + hs->local_encoding = ENC_INVALID; + + switch (hdrval[0]) + { + case 'b': case 'B': + if (0 == c_strcasecmp(hdrval, "br")) + hs->local_encoding = ENC_BROTLI; + break; + case 'c': case 'C': + if (0 == c_strcasecmp(hdrval, "compress")) + hs->local_encoding = ENC_COMPRESS; + break; + case 'd': case 'D': + if (0 == c_strcasecmp(hdrval, "deflate")) + hs->local_encoding = ENC_DEFLATE; + break; + case 'g': case 'G': + if (0 == c_strcasecmp(hdrval, "gzip")) + hs->local_encoding = ENC_GZIP; + break; + case 'i': case 'I': + if (0 == c_strcasecmp(hdrval, "identity")) + hs->local_encoding = ENC_NONE; + break; + case 'x': case 'X': + if (0 == c_strcasecmp(hdrval, "x-compress")) + hs->local_encoding = ENC_COMPRESS; + else if (0 == c_strcasecmp(hdrval, "x-gzip")) + hs->local_encoding = ENC_GZIP; + break; + case '\0': + hs->local_encoding = ENC_NONE; + } + + if (hs->local_encoding == ENC_INVALID) + { + DEBUGP (("Unrecognized Content-Encoding: %s\n", hdrval)); + hs->local_encoding = ENC_NONE; + } + } + /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) *dt |= RETROKF; @@ -3767,6 +3824,35 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs, if (opt.adjust_extension) { + const char *encoding_ext = NULL; + switch (hs->local_encoding) + { + case ENC_INVALID: + case ENC_NONE: + break; + case ENC_BROTLI: + encoding_ext = ".br"; + break; + case ENC_COMPRESS: + encoding_ext = ".Z"; + break; + case ENC_DEFLATE: + encoding_ext = ".zlib"; + break; + case ENC_GZIP: + encoding_ext = ".gz"; + break; + default: + DEBUGP (("No extension found for encoding %d\n", + hs->local_encoding)); + } + if (encoding_ext != NULL) + { + char *file_ext = strrchr (hs->local_file, '.'); + /* strip Content-Encoding extension (it will be re-added later) */ + if (file_ext != NULL && 0 == strcasecmp (file_ext, encoding_ext)) + *file_ext = '\0'; + } if (*dt & TEXTHTML) /* -E / --adjust-extension / adjust_extension = on was specified, and this is a text/html file. If some case-insensitive @@ -3779,6 +3865,10 @@ gethttp (const struct url *u, struct url *original_url, struct http_stat *hs, { ensure_extension (hs, ".css", dt); } + if (encoding_ext != NULL) + { + ensure_extension (hs, encoding_ext, dt); + } } if (cond_get) -- 2.7.4