From 4fbe0ee58f41e9d6bceb665739cdab5210ed5602 Mon Sep 17 00:00:00 2001
From: Hubert Tarasiuk
Date: Sat, 9 May 2015 22:47:24 +0200
Subject: [PATCH 5/6] Prototype of If-Modified-Since.
* src/wget.h: Add COND_GET enum for dt.
* src/http.c (time_to_rfc1123): Convert time_t do http time.
* src/http.c (initialize_request): Include If-Modified-Since header
if appropriate.
* src/http.c (set_file_timestamp): Separate this code from check_file_output.
* src/http.c (check_file_output): Use set_file_timestamp.
* src/http.c (gethttp): Treat 304 code as if entire file was downloaded (to
be improved).
* src/http.c (http_loop): Load filename to hstat if condget was requested,
use COND_GET if requested and current timestamp can be obtained.
---
src/http.c | 213 ++++++++++++++++++++++++++++++++++++++++++++-----------------
src/url.h | 3 +-
src/wget.h | 3 +-
3 files changed, 158 insertions(+), 61 deletions(-)
diff --git a/src/http.c b/src/http.c
index 54eb106..5b95c23 100644
--- a/src/http.c
+++ b/src/http.c
@@ -1681,6 +1681,59 @@ read_response_body (struct http_stat *hs, int sock, FILE *fp, wgint contlen,
} while (0)
#endif /* def __VMS [else] */
+/*
+ Convert time_t to one of valid HTTP date formats
+ ie. rfc1123-date.
+
+ HTTP-date = rfc1123-date | rfc850-date | asctime-date
+ rfc1123-date = wkday "," SP date1 SP time SP "GMT"
+ rfc850-date = weekday "," SP date2 SP time SP "GMT"
+ asctime-date = wkday SP date3 SP time SP 4DIGIT
+ date1 = 2DIGIT SP month SP 4DIGIT
+ ; day month year (e.g., 02 Jun 1982)
+ date2 = 2DIGIT "-" month "-" 2DIGIT
+ ; day-month-year (e.g., 02-Jun-82)
+ date3 = month SP ( 2DIGIT | ( SP 1DIGIT ))
+ ; month day (e.g., Jun 2)
+ time = 2DIGIT ":" 2DIGIT ":" 2DIGIT
+ ; 00:00:00 - 23:59:59
+ wkday = "Mon" | "Tue" | "Wed"
+ | "Thu" | "Fri" | "Sat" | "Sun"
+ weekday = "Monday" | "Tuesday" | "Wednesday"
+ | "Thursday" | "Friday" | "Saturday" | "Sunday"
+ month = "Jan" | "Feb" | "Mar" | "Apr"
+ | "May" | "Jun" | "Jul" | "Aug"
+ | "Sep" | "Oct" | "Nov" | "Dec"
+
+ source: RFC2616 */
+static uerr_t
+time_to_rfc1123 (time_t time, char *buf, size_t bufsize)
+{
+ struct tm *gtm;
+ static const char *wkday[] = { "Sun", "Mon", "Tue", "Wed",
+ "Thu", "Fri", "Sat" };
+ static const char *month[] = { "Jan", "Feb", "Mar", "Apr",
+ "May", "Jun", "Jul", "Aug",
+ "Sep", "Oct", "Nov", "Dec" };
+ /* rfc1123 example: Thu, 01 Jan 1998 22:12:57 GMT */
+ static const char *time_format = "%s, %02d %s %04d %02d:%02d:%02d GMT";
+
+ gtm = gmtime (&time);
+ if (!gtm)
+ {
+ logprintf (LOG_NOTQUIET,
+ "gmtime failed. This is probably a bug.\n");
+ abort ();
+ }
+
+ snprintf (buf, bufsize, time_format, wkday[gtm->tm_wday],
+ gtm->tm_mday, month[gtm->tm_mon],
+ gtm->tm_year + 1900, gtm->tm_hour,
+ gtm->tm_min, gtm->tm_sec);
+
+ return RETROK;
+}
+
static struct request *
initialize_request (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
bool inhibit_keep_alive, bool *basic_auth_finished,
@@ -1723,6 +1776,12 @@ initialize_request (struct url *u, struct http_stat *hs, int *dt, struct url *pr
/* ... but some HTTP/1.0 caches doesn't implement Cache-Control. */
request_set_header (req, "Pragma", "no-cache", rel_none);
}
+ if (*dt & COND_GET)
+ {
+ char strtime[32];
+ time_to_rfc1123 (hs->orig_file_tstamp, strtime, countof (strtime));
+ request_set_header (req, "If-Modified-Since", xstrdup(strtime), rel_value);
+ }
if (hs->restval)
request_set_header (req, "Range",
aprintf ("bytes=%s-",
@@ -2025,6 +2084,69 @@ establish_connection (struct url *u, struct url **conn_ref,
}
static uerr_t
+set_file_timestamp (struct http_stat *hs)
+{
+ size_t filename_len = strlen (hs->local_file);
+ char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
+ bool local_dot_orig_file_exists = false;
+ char *local_filename = NULL;
+ struct_stat st;
+
+ if (opt.backup_converted)
+ /* If -K is specified, we'll act on the assumption that it was specified
+ last time these files were downloaded as well, and instead of just
+ comparing local file X against server file X, we'll compare local
+ file X.orig (if extant, else X) against server file X. If -K
+ _wasn't_ specified last time, or the server contains files called
+ *.orig, -N will be back to not operating correctly with -k. */
+ {
+ /* Would a single s[n]printf() call be faster? --dan
+
+ Definitely not. sprintf() is horribly slow. It's a
+ different question whether the difference between the two
+ affects a program. Usually I'd say "no", but at one
+ point I profiled Wget, and found that a measurable and
+ non-negligible amount of time was lost calling sprintf()
+ in url.c. Replacing sprintf with inline calls to
+ strcpy() and number_to_string() made a difference.
+ --hniksic */
+ memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
+ memcpy (filename_plus_orig_suffix + filename_len,
+ ORIG_SFX, sizeof (ORIG_SFX));
+
+ /* Try to stat() the .orig file. */
+ if (stat (filename_plus_orig_suffix, &st) == 0)
+ {
+ local_dot_orig_file_exists = true;
+ local_filename = filename_plus_orig_suffix;
+ }
+ }
+
+ if (!local_dot_orig_file_exists)
+ /* Couldn't stat() .orig, so try to stat() . */
+ if (stat (hs->local_file, &st) == 0)
+ local_filename = hs->local_file;
+
+ if (local_filename != NULL)
+ /* There was a local file, so we'll check later to see if the version
+ the server has is the same version we already have, allowing us to
+ skip a download. */
+ {
+ hs->orig_file_name = xstrdup (local_filename);
+ hs->orig_file_size = st.st_size;
+ hs->orig_file_tstamp = st.st_mtime;
+#ifdef WINDOWS
+ /* Modification time granularity is 2 seconds for Windows, so
+ increase local time by 1 second for later comparison. */
+ ++hs->orig_file_tstamp;
+#endif
+ hs->timestamp_checked = true;
+ }
+
+ return RETROK;
+}
+
+static uerr_t
check_file_output (struct url *u, struct http_stat *hs,
struct response *resp, char *hdrval, size_t hdrsize)
{
@@ -2077,61 +2199,9 @@ check_file_output (struct url *u, struct http_stat *hs,
/* Support timestamping */
if (opt.timestamping && !hs->timestamp_checked)
{
- size_t filename_len = strlen (hs->local_file);
- char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
- bool local_dot_orig_file_exists = false;
- char *local_filename = NULL;
- struct_stat st;
-
- if (opt.backup_converted)
- /* If -K is specified, we'll act on the assumption that it was specified
- last time these files were downloaded as well, and instead of just
- comparing local file X against server file X, we'll compare local
- file X.orig (if extant, else X) against server file X. If -K
- _wasn't_ specified last time, or the server contains files called
- *.orig, -N will be back to not operating correctly with -k. */
- {
- /* Would a single s[n]printf() call be faster? --dan
-
- Definitely not. sprintf() is horribly slow. It's a
- different question whether the difference between the two
- affects a program. Usually I'd say "no", but at one
- point I profiled Wget, and found that a measurable and
- non-negligible amount of time was lost calling sprintf()
- in url.c. Replacing sprintf with inline calls to
- strcpy() and number_to_string() made a difference.
- --hniksic */
- memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
- memcpy (filename_plus_orig_suffix + filename_len,
- ORIG_SFX, sizeof (ORIG_SFX));
-
- /* Try to stat() the .orig file. */
- if (stat (filename_plus_orig_suffix, &st) == 0)
- {
- local_dot_orig_file_exists = true;
- local_filename = filename_plus_orig_suffix;
- }
- }
-
- if (!local_dot_orig_file_exists)
- /* Couldn't stat() .orig, so try to stat() . */
- if (stat (hs->local_file, &st) == 0)
- local_filename = hs->local_file;
-
- if (local_filename != NULL)
- /* There was a local file, so we'll check later to see if the version
- the server has is the same version we already have, allowing us to
- skip a download. */
- {
- hs->orig_file_name = xstrdup (local_filename);
- hs->orig_file_size = st.st_size;
- hs->orig_file_tstamp = st.st_mtime;
-#ifdef WINDOWS
- /* Modification time granularity is 2 seconds for Windows, so
- increase local time by 1 second for later comparison. */
- ++hs->orig_file_tstamp;
-#endif
- }
+ uerr_t timestamp_err = set_file_timestamp (hs);
+ if (timestamp_err != RETROK)
+ return timestamp_err;
}
return RETROK;
}
@@ -2421,6 +2491,9 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
POST). */
bool head_only = !!(*dt & HEAD_ONLY);
+ /* Whether conditional get request will be issued */
+ bool cond_get = !!(*dt & COND_GET);
+
char *head = NULL;
struct response *resp = NULL;
char hdrval[512];
@@ -3020,6 +3093,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
}
}
+ if (cond_get && statcode == HTTP_STATUS_NOT_MODIFIED)
+ {
+ logprintf (LOG_VERBOSE, _("File %s not modified on server. "
+ "Ommiting download.\n\n"), quote(hs->local_file));
+ *dt |= RETROKF;
+ CLOSE_FINISH (sock);
+ retval = RETRUNNEEDED;
+ goto cleanup;
+ }
+
if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
|| (!opt.timestamping && hs->restval > 0 && statcode == HTTP_STATUS_OK
&& contrange == 0 && contlen >= 0 && hs->restval >= contlen))
@@ -3263,14 +3346,26 @@ http_loop (struct url *u, struct url *original_url, char **newloc,
if (opt.content_disposition && opt.always_rest)
send_head_first = true;
- /* Send preliminary HEAD request if -N is given and we have an existing
- * destination file. */
if (!opt.output_document)
file_name = url_file_name (opt.trustservernames ? u : original_url, NULL);
else
file_name = xstrdup (opt.output_document);
- if (opt.timestamping && (file_exists_p (file_name)
- || opt.content_disposition))
+
+ /* Use conditional get request if requested
+ * and if timestamp is known. */
+ if (opt.cond_get && file_exists_p (file_name) && !send_head_first)
+ {
+ *dt |= COND_GET;
+ {
+ uerr_t timestamp_err = set_file_timestamp (&hstat);
+ if (timestamp_err != RETROK)
+ return timestamp_err;
+ }
+ }
+ /* Send preliminary HEAD request if -N is given and we have an existing
+ * destination file. */
+ else if ((opt.timestamping && file_exists_p (file_name))
+ || opt.content_disposition)
send_head_first = true;
xfree (file_name);
diff --git a/src/url.h b/src/url.h
index a543f3d..dbb06a1 100644
--- a/src/url.h
+++ b/src/url.h
@@ -51,7 +51,8 @@ as that of the covered work. */
Defined here to avoid repetition later. #### This will require
rework. */
#define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
- || opt.dirstruct || opt.output_document || opt.backups > 0)
+ || opt.dirstruct || opt.output_document || opt.backups > 0\
+ || opt.cond_get)
/* Specifies how, or whether, user auth information should be included
* in URLs regenerated from URL parse structures. */
diff --git a/src/wget.h b/src/wget.h
index 8d2b0f1..1440123 100644
--- a/src/wget.h
+++ b/src/wget.h
@@ -331,7 +331,8 @@ enum
SEND_NOCACHE = 0x0008, /* send Pragma: no-cache directive */
ACCEPTRANGES = 0x0010, /* Accept-ranges header was found */
ADDED_HTML_EXTENSION = 0x0020, /* added ".html" extension due to -E */
- TEXTCSS = 0x0040 /* document is of type text/css */
+ TEXTCSS = 0x0040, /* document is of type text/css */
+ COND_GET = 0x0080, /* conditional get request */
};
/* Universal error type -- used almost everywhere. Error reporting of
--
2.4.0