[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Bug-wget] [PATCH] Disable automatic wget headers.
From: |
adham elkarn |
Subject: |
[Bug-wget] [PATCH] Disable automatic wget headers. |
Date: |
Sat, 4 May 2019 15:30:54 +0000 |
From: sulfastor <address@hidden>
Hi, Thank you Darshit for your feedback and code review.
Those are the updates:
* doc/wget.texi: Added --disable-header documentation.
* fuzz/wget_options_fuzzer.dict: Update with --disable-header inputs.
* src/http.c (disabled_header): Checks for disabled headers
(request_set_header): Doesn't let header to be set if disabled
(gethttp): frees disabled header to let overriding
* src/init.c (cmd_dis_header), (check_user_disabled_header),
(vec_remove_header): added new option disabled_headers.
* src/main.c: added new option --disable-header, added help description
* src/options.h: added new option --disable-header
* testenv/Makefile.am: Added new test files
* testenv/server/http/http_server.py: Added new rule RejectHeaderField
* testenv/conf/reject_header_field.py: Added new rule RejectHeaderField
* testenv/README: Added help description for new rule
* testenv/Test-disable-default-headers.py: Test without using --header
* testenv/Test-disable-headers-after.py: Test using --header before
--disable-header
* testenv/Test-disable-headers-before.py: Test using --header after
--disable-header
Signed-off-by: sulfastor <address@hidden>, adham elkarn <address@hidden>
---
doc/wget.texi | 19 ++++++
fuzz/wget_options_fuzzer.dict | 17 +++++
src/http.c | 31 +++++++++
src/init.c | 88 +++++++++++++++++++++++++
src/main.c | 4 ++
src/options.h | 1 +
testenv/Makefile.am | 3 +
testenv/README | 4 ++
testenv/Test-disable-default-headers.py | 73 ++++++++++++++++++++
testenv/Test-disable-headers-after.py | 77 ++++++++++++++++++++++
testenv/Test-disable-headers-before.py | 77 ++++++++++++++++++++++
testenv/conf/reject_header_field.py | 12 ++++
testenv/server/http/http_server.py | 8 +++
13 files changed, 414 insertions(+)
create mode 100644 testenv/Test-disable-default-headers.py
create mode 100644 testenv/Test-disable-headers-after.py
create mode 100644 testenv/Test-disable-headers-before.py
create mode 100644 testenv/conf/reject_header_field.py
diff --git a/doc/wget.texi b/doc/wget.texi
index 7eada2dd..a43224a3 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -1542,6 +1542,25 @@ wget --header="Host: foo.bar" http://localhost/
In versions of Wget prior to 1.10 such use of @samp{--header} caused
sending of duplicate headers.
address@hidden disable header, choose
address@hidden address@hidden
+Remove @var{header-field} among the headers in each @sc{http} request.
+
+You may define more than one additional header field by specifying
address@hidden more than once as in @samp{--header}.
+
address@hidden
address@hidden
+wget --disable-header='Accept' \
+ --disable-header='User-Agent' \
+ --disable-header='Accept-Encoding' \
+ https://example.com/
address@hidden group
address@hidden example
+
+Specifying a header field with @samp{--header} after disabling it
+will override it and include it in the @sc{http} request headers.
+
@cindex Content-Encoding, choose
@item address@hidden
Choose the type of compression to be used. Legal values are
diff --git a/fuzz/wget_options_fuzzer.dict b/fuzz/wget_options_fuzzer.dict
index 9a2dbd8e..12d54d60 100644
--- a/fuzz/wget_options_fuzzer.dict
+++ b/fuzz/wget_options_fuzzer.dict
@@ -30,6 +30,22 @@
"human"
"csv"
"json"
+"Authorization"
+"User-Agent"
+"Referer"
+"Cache-Control"
+"Pragma"
+"If-Modified-Since"
+"Range"
+"Accept"
+"Accept-Encoding"
+"Host"
+"Connection"
+"Proxy-Connection"
+"Content-Type"
+"Content-Length"
+"Proxy-Authorization"
+"Cookie"
"accept="
"accept-regex="
"adjust-extension="
@@ -66,6 +82,7 @@
"delete-after="
"directories="
"directory-prefix="
+"disable-header="
"dns-caching="
"dns-timeout="
"domains="
diff --git a/src/http.c b/src/http.c
index 289d1101..225be265 100644
--- a/src/http.c
+++ b/src/http.c
@@ -88,6 +88,7 @@ static char *basic_authentication_encode (const char *, const
char *);
static bool known_authentication_scheme_p (const char *, const char *);
static void ensure_extension (struct http_stat *, const char *, int *);
static void load_cookies (void);
+static bool disabled_header (char*);
static bool cookies_loaded_p;
static struct cookie_jar *wget_cookie_jar;
@@ -236,6 +237,9 @@ request_set_header (struct request *req, const char *name,
const char *value,
struct request_header *hdr;
int i;
+ if (disabled_header ((char*) name))
+ return;
+
if (!value)
{
/* A NULL value is a no-op; if freeing the name is requested,
@@ -1842,6 +1846,27 @@ time_to_rfc1123 (time_t time, char *buf, size_t bufsize)
return RETROK;
}
+static bool
+disabled_header (char* header_name)
+{
+ char** p = opt.disabled_headers;
+ char *s;
+ size_t n;
+
+ if (!p)
+ return 0;
+
+ for (; *p; ++p)
+ {
+ s = strchrnul (header_name, ':');
+ n = (size_t) (s - header_name);
+ if (n == strlen (*p) && 0 == strncmp (header_name, *p, n))
+ return 1;
+ }
+
+ return 0;
+}
+
static struct request *
initialize_request (const struct url *u, struct http_stat *hs, int *dt, struct
url *proxy,
bool inhibit_keep_alive, bool *basic_auth_finished,
@@ -3263,6 +3288,11 @@ gethttp (const struct url *u, struct url *original_url,
struct http_stat *hs,
if (opt.user_headers)
{
int i;
+ /* Empty the disabled headers as they are no longer used
+ and this will let headers to be overriden by the user */
+ free_vec (opt.disabled_headers);
+ opt.disabled_headers = NULL;
+
for (i = 0; opt.user_headers[i]; i++)
request_set_user_header (req, opt.user_headers[i]);
}
@@ -5274,6 +5304,7 @@ ensure_extension (struct http_stat *hs, const char *ext,
int *dt)
shortext[0] = '\0';
len = strlen (ext);
if (len == 5)
+
{
memcpy (shortext, ext, len - 1);
shortext[len - 1] = '\0';
diff --git a/src/init.c b/src/init.c
index 9b6665a6..aaf99ba6 100644
--- a/src/init.c
+++ b/src/init.c
@@ -101,6 +101,7 @@ CMD_DECLARE (cmd_spec_compression);
#endif
CMD_DECLARE (cmd_spec_dirstruct);
CMD_DECLARE (cmd_spec_header);
+CMD_DECLARE (cmd_dis_header);
CMD_DECLARE (cmd_spec_warc_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
@@ -183,6 +184,7 @@ static const struct {
{ "deleteafter", &opt.delete_after, cmd_boolean },
{ "dirprefix", &opt.dir_prefix, cmd_directory },
{ "dirstruct", NULL, cmd_spec_dirstruct },
+ { "disableheader", NULL, cmd_dis_header},
{ "dnscache", &opt.dns_cache, cmd_boolean },
#ifdef HAVE_LIBCARES
{ "dnsservers", &opt.dns_servers, cmd_string },
@@ -398,6 +400,7 @@ defaults (void)
opt.metalink_index = -1;
#endif
+ opt.disabled_headers = NULL;
opt.cookies = true;
opt.verbose = -1;
opt.ntry = 20;
@@ -990,6 +993,7 @@ struct decode_item {
static bool decode_string (const char *, const struct decode_item *, int, int
*);
static bool simple_atof (const char *, const char *, double *);
+
#define CMP1(p, c0) (c_tolower((p)[0]) == (c0) && (p)[1] == '\0')
#define CMP2(p, c0, c1) (c_tolower((p)[0]) == (c0) \
@@ -1459,6 +1463,8 @@ cmd_cert_type (const char *com, const char *val, void
*place)
options specially. */
static bool check_user_specified_header (const char *);
+static bool check_user_disabled_header (const char *);
+static char ** vec_remove_header (char **, const char *);
#ifdef HAVE_LIBZ
static bool
@@ -1493,6 +1499,29 @@ cmd_spec_dirstruct (const char *com, const char *val,
void *place_ignored _GL_UN
return true;
}
+static bool
+cmd_dis_header (const char *com, const char *val, void *place_ignored
_GL_UNUSED)
+{
+ /* Empty value means reset the list of headers. */
+ if (*val == '\0')
+ {
+ free_vec (opt.disabled_headers);
+ opt.disabled_headers = NULL;
+ return true;
+ }
+
+ if (!check_user_disabled_header (val))
+ {
+ fprintf (stderr, _("%s: %s: Invalid header %s.\n"),
+ exec_name, com, quote (val));
+ return false;
+ }
+ /* Removes disabled headers from user defined headers */
+ opt.user_headers = vec_remove_header (opt.user_headers, val);
+ opt.disabled_headers = vec_append (opt.disabled_headers, val);
+ return true;
+}
+
static bool
cmd_spec_header (const char *com, const char *val, void *place_ignored
_GL_UNUSED)
{
@@ -1850,6 +1879,23 @@ simple_atof (const char *beg, const char *end, double
*dest)
contain a colon preceded by non-white-space characters and must not
contain newlines. */
+
+static bool
+check_user_disabled_header (const char* s)
+{
+ const char *p;
+
+ for (p = s; *p && !c_isspace (*p); p++)
+ ;
+
+ if (p == s)
+ return false;
+ /* The header MUST NOT contain newlines. */
+ if (strchr (s, '\n'))
+ return false;
+ return true;
+}
+
static bool
check_user_specified_header (const char *s)
{
@@ -1867,6 +1913,47 @@ check_user_specified_header (const char *s)
return true;
}
+/* Removes a header from a request headers vector */
+
+static char **
+vec_remove_header (char **vec, const char *str)
+{
+ char* s;
+ int i, cnt; /* count of vector elements */
+ size_t n;
+
+ if (vec != NULL)
+ {
+ for (cnt = 0; vec[cnt]; cnt++)
+ ;
+ /* remove all duplicates */
+ i = 0;
+ while (vec[i])
+ {
+ s = strchrnul (vec[i], ':');
+ n = (size_t) (s - vec[i]);
+ if (n == strlen (str) && 0 == strncmp (vec[i], str, n))
+ {
+ if (cnt == 1)
+ {
+ vec[i] = NULL;
+ return vec;
+ }
+ else
+ {
+ vec[i] = xstrdup (vec[cnt - 1]);
+ vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
+ vec[cnt] = NULL;
+ --cnt;
+ }
+ }
+ else
+ ++i;
+ }
+ }
+ return vec;
+}
+
/* Decode VAL into a number, according to ITEMS. */
static bool
@@ -1977,6 +2064,7 @@ cleanup (void)
xfree (opt.http_passwd);
xfree (opt.dot_style);
free_vec (opt.user_headers);
+ free_vec (opt.disabled_headers);
free_vec (opt.warc_user_headers);
# ifdef HAVE_SSL
xfree (opt.cert_file);
diff --git a/src/main.c b/src/main.c
index 65b7f3f3..a73b39d9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -304,6 +304,7 @@ static struct cmdline_option option_data[] =
{ "delete-after", 0, OPT_BOOLEAN, "deleteafter", -1 },
{ "directories", 0, OPT_BOOLEAN, "dirstruct", -1 },
{ "directory-prefix", 'P', OPT_VALUE, "dirprefix", -1 },
+ { "disable-header", 0, OPT_VALUE, "disableheader", -1 },
{ "dns-cache", 0, OPT_BOOLEAN, "dnscache", -1 },
#ifdef HAVE_LIBCARES
{ "dns-servers", 0, OPT_VALUE, "dnsservers", -1 },
@@ -544,6 +545,7 @@ init_switches (void)
identical to "--foo", except it has opposite meaning and
it doesn't allow an argument. */
longopt = &long_options[o++];
+
longopt->name = no_prefix (cmdopt->long_name);
longopt->has_arg = no_argument;
/* Mask the value so we'll be able to recognize that we're
@@ -792,6 +794,8 @@ HTTP options:\n"),
--ignore-length ignore 'Content-Length' header field\n"),
N_("\
--header=STRING insert STRING among the headers\n"),
+ N_("\
+ --disable-header=STRING disable STRING among the headers\n"),
#ifdef HAVE_LIBZ
N_("\
--compression=TYPE choose compression, one of auto, gzip and
none. (default: none)\n"),
diff --git a/src/options.h b/src/options.h
index 881e2b2e..5559c6fa 100644
--- a/src/options.h
+++ b/src/options.h
@@ -147,6 +147,7 @@ struct options
char *http_user; /* HTTP username. */
char *http_passwd; /* HTTP password. */
char **user_headers; /* User-defined header(s). */
+ char **disabled_headers; /* User-disabled header(s) */
bool http_keep_alive; /* whether we use keep-alive */
bool use_proxy; /* Do we use proxy? */
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index b5a39ad2..4b3e2d08 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -95,6 +95,9 @@ if HAVE_PYTHON3
Test-cookie-domain-mismatch.py \
Test-cookie-expires.py \
Test-cookie.py \
+ Test-disable-default-headers.py \
+ Test-disable-headers-after.py \
+ Test-disable-headers-before.py \
Test-Head.py \
Test-hsts.py \
Test--https.py \
diff --git a/testenv/README b/testenv/README
index 6580bc99..d2f38a67 100644
--- a/testenv/README
+++ b/testenv/README
@@ -182,6 +182,10 @@ This section lists the currently supported File Rules and
their structure.
* RejectHeader : This list of Headers must NEVER occur in a request. It
uses the same value format as ExpectHeader.
+ * RejectHeaderField : This list of Headers Fields must NOT appear in a
request.
+ The value for this key is a list of strings where each header field is
represented as:
+ |-->Header Field: <Header Field Name>
+
* SendHeader : This list of Headers will be sent in EVERY response to a
request for the respective file. It follows the same value format as
ExpectHeader. Additionally you can specify a list of strings as <Header
Data>
diff --git a/testenv/Test-disable-default-headers.py
b/testenv/Test-disable-default-headers.py
new file mode 100644
index 00000000..22ea54ad
--- /dev/null
+++ b/testenv/Test-disable-default-headers.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from test.base_test import HTTP, HTTPS
+from misc.wget_file import WgetFile
+
+"""
+ This is test ensures that the --disable-header option removes default
request
+ headers. There aren't any user defined header.
+"""
+############# File Definitions ###############################################
+file_content = """Les paroles de la bouche d'un homme sont des eaux profondes;
+La source de la sagesse est un torrent qui jaillit."""
+
+Headers = {
+ 'Authorization',
+ 'User-Agent',
+ 'Referer',
+ 'Cache-Control',
+ 'Pragma',
+ 'If-Modified-Since',
+ 'Range',
+ 'Accept',
+ 'Accept-Encoding',
+ 'Host',
+ 'Connection',
+ 'Proxy-Connection',
+ 'Content-Type',
+ 'Content-Length',
+ 'Proxy-Authorization',
+ 'Cookie',
+ 'MyHeader',
+}
+
+WGET_OPTIONS = ''
+WGET_URLS = [[]]
+Files = [[]]
+
+for index, header in enumerate(Headers, start=1):
+ File_rules = {
+ "RejectHeaderField" : {
+ header
+ }
+ }
+ file_name = "File" + str(index)
+ Files[0].append (WgetFile(file_name, file_content, rules=File_rules))
+ WGET_OPTIONS += ' --disable-header="' + header + '"'
+ WGET_URLS[0].append (file_name)
+
+Servers = [HTTP]
+
+ExpectedReturnCode = 0
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+ "ServerFiles" : Files
+}
+test_options = {
+ "WgetCommands" : WGET_OPTIONS,
+ "Urls" : WGET_URLS
+}
+post_test = {
+ "ExpectedRetcode" : ExpectedReturnCode
+}
+
+err = HTTPTest (
+ pre_hook=pre_test,
+ test_params=test_options,
+ post_hook=post_test,
+ protocols=Servers
+).begin ()
+
+exit (err)
diff --git a/testenv/Test-disable-headers-after.py
b/testenv/Test-disable-headers-after.py
new file mode 100644
index 00000000..c0ffc84d
--- /dev/null
+++ b/testenv/Test-disable-headers-after.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from test.base_test import HTTP, HTTPS
+from misc.wget_file import WgetFile
+
+"""
+ This is test ensures that the --disable-header option removes user headers
+ from the HTTP request when it's placed after --header="header: value".
+"""
+############# File Definitions ###############################################
+file_content = """Les paroles de la bouche d'un homme sont des eaux profondes;
+La source de la sagesse est un torrent qui jaillit."""
+
+Headers = {
+ 'Authorization',
+ 'User-Agent',
+ 'Referer',
+ 'Cache-Control',
+ 'Pragma',
+ 'If-Modified-Since',
+ 'Range',
+ 'Accept',
+ 'Accept-Encoding',
+ 'Host',
+ 'Connection',
+ 'Proxy-Connection',
+ 'Content-Type',
+ 'Content-Length',
+ 'Proxy-Authorization',
+ 'Cookie',
+ 'MyHeader',
+}
+
+WGET_OPTIONS = ''
+WGET_URLS = [[]]
+Files = [[]]
+
+# Define user defined headers
+for header in Headers:
+ WGET_OPTIONS += ' --header="' + header + ': any"'
+
+for index, header in enumerate(Headers, start=1):
+ File_rules = {
+ "RejectHeader" : {
+ header : 'any'
+ }
+ }
+ file_name = "File" + str(index)
+ Files[0].append(WgetFile(file_name, file_content, rules=File_rules))
+ WGET_OPTIONS += ' --disable-header="' + header + '"'
+ WGET_URLS[0].append(file_name)
+
+Servers = [HTTP]
+
+ExpectedReturnCode = 0
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+ "ServerFiles" : Files
+}
+test_options = {
+ "WgetCommands" : WGET_OPTIONS,
+ "Urls" : WGET_URLS
+}
+post_test = {
+ "ExpectedRetcode" : ExpectedReturnCode
+}
+
+err = HTTPTest (
+ pre_hook=pre_test,
+ test_params=test_options,
+ post_hook=post_test,
+ protocols=Servers
+).begin ()
+
+exit (err)
diff --git a/testenv/Test-disable-headers-before.py
b/testenv/Test-disable-headers-before.py
new file mode 100644
index 00000000..d442b008
--- /dev/null
+++ b/testenv/Test-disable-headers-before.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from test.base_test import HTTP, HTTPS
+from misc.wget_file import WgetFile
+
+"""
+ This is test ensures that the --disable-header option doesn't remove user
headers
+ from the HTTP request when it's placed before --header="header: value".
+"""
+############# File Definitions ###############################################
+file_content = """Les paroles de la bouche d'un homme sont des eaux profondes;
+La source de la sagesse est un torrent qui jaillit."""
+
+Headers = {
+ 'Authorization',
+ 'User-Agent',
+ 'Referer',
+ 'Cache-Control',
+ 'Pragma',
+ 'If-Modified-Since',
+ 'Range',
+ 'Accept',
+ 'Accept-Encoding',
+ 'Host',
+ 'Connection',
+ 'Proxy-Connection',
+ 'Content-Type',
+ 'Content-Length',
+ 'Proxy-Authorization',
+ 'Cookie',
+ 'MyHeader',
+}
+
+WGET_OPTIONS = ''
+WGET_URLS = [[]]
+Files = [[]]
+
+for index, header in enumerate(Headers, start=1):
+ File_rules = {
+ "ExpectHeader" : {
+ header : 'any'
+ }
+ }
+ file_name = "File" + str(index)
+ Files[0].append (WgetFile(file_name, file_content, rules=File_rules))
+ WGET_OPTIONS += ' --disable-header="' + header + '"'
+ WGET_URLS[0].append (file_name)
+
+# Define user defined headers
+for header in Headers:
+ WGET_OPTIONS += ' --header="' + header + ': any"'
+
+Servers = [HTTP]
+
+ExpectedReturnCode = 0
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+ "ServerFiles" : Files
+}
+test_options = {
+ "WgetCommands" : WGET_OPTIONS,
+ "Urls" : WGET_URLS
+}
+post_test = {
+ "ExpectedRetcode" : ExpectedReturnCode
+}
+
+err = HTTPTest (
+ pre_hook=pre_test,
+ test_params=test_options,
+ post_hook=post_test,
+ protocols=Servers
+).begin ()
+
+exit (err)
diff --git a/testenv/conf/reject_header_field.py
b/testenv/conf/reject_header_field.py
new file mode 100644
index 00000000..e1009cdd
--- /dev/null
+++ b/testenv/conf/reject_header_field.py
@@ -0,0 +1,12 @@
+from conf import rule
+
+""" Rule: RejectHeaderField
+This is a server side rule which expects a string list of Header Fields
+which should be blacklisted by the server for a particular file's requests.
+"""
+
+
address@hidden()
+class RejectHeaderField:
+ def __init__(self, header_fields):
+ self.header_fields = header_fields
diff --git a/testenv/server/http/http_server.py
b/testenv/server/http/http_server.py
index 2cc82fb9..6f358335 100644
--- a/testenv/server/http/http_server.py
+++ b/testenv/server/http/http_server.py
@@ -370,6 +370,14 @@ class _Handler(BaseHTTPRequestHandler):
header_line)
raise ServerError("Header " + header_line + ' received')
+ def RejectHeaderField(self, header_fields_obj):
+ rej_header_fields = header_fields_obj.header_fields
+ for field in rej_header_fields:
+ if field in self.headers:
+ self.send_error(400, 'Blacklisted Header Field %s received' %
+ field)
+ raise ServerError('Header Field %s received' % field)
+
def __log_request(self, method):
req = method + " " + self.path
self.server.request_headers.append(req)
--
2.21.0