bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH 06/27] Bugfix: Keep the download progress when alterna


From: Matthew White
Subject: [Bug-wget] [PATCH 06/27] Bugfix: Keep the download progress when alternating metalink:url
Date: Thu, 29 Sep 2016 06:02:46 +0200

* NEWS: Mention the effects of --continue over Metalink
* src/metalink.c (retrieve_from_metalink): On download error, resume
  output_stream with the next mres->url. Keep fully downloaded files
  started with --continue, otherwise rename/remove the file
* testenv/Makefile.am: Add new file
* testenv/Test-metalink-xml-continue.py: New file. Metalink/XML
  continue/keep existing files (HTTP 416) with --continue tests

Before this patch, with --continue, existing and/or fully retrieved
files which fail the sanity tests were renamed (--keep-badhash), or
removed.

This patch ensures that --continue doesn't rename/remove existing
and/or fully retrieved files (HTTP 416) which fail the sanity tests.
---
 NEWS                                  |   4 +
 src/metalink.c                        |  62 ++++++++----
 testenv/Makefile.am                   |   1 +
 testenv/Test-metalink-xml-continue.py | 185 ++++++++++++++++++++++++++++++++++
 4 files changed, 230 insertions(+), 22 deletions(-)
 create mode 100644 testenv/Test-metalink-xml-continue.py

diff --git a/NEWS b/NEWS
index 0299418..6011250 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,10 @@ Please send GNU Wget bug reports to <address@hidden>.
 
 * Changes in Wget X.Y.Z
 
+* When processing a Metalink file, with --continue resume partially
+  downloaded files and keep fully downloaded files even if they fail
+  the verification.
+
 * When processing a Metalink file, create the parent directories of a
   "path/file" destination file name:
   https://tools.ietf.org/html/rfc5854#section-4.1.2.1
diff --git a/src/metalink.c b/src/metalink.c
index 8294a7e..03a0bb1 100644
--- a/src/metalink.c
+++ b/src/metalink.c
@@ -96,12 +96,14 @@ retrieve_from_metalink (const metalink_t* metalink)
          1 -> verified successfully  */
       char sig_status = 0;
 
+      bool skip_mfile = false;
+
       output_stream = NULL;
 
       DEBUGP (("Processing metalink file %s...\n", quote (mfile->name)));
 
       /* Resources are sorted by priority.  */
-      for (mres_ptr = mfile->resources; *mres_ptr; mres_ptr++)
+      for (mres_ptr = mfile->resources; *mres_ptr && !skip_mfile; mres_ptr++)
         {
           metalink_resource_t *mres = *mres_ptr;
           metalink_checksum_t **mchksum_ptr, *mchksum;
@@ -117,26 +119,30 @@ retrieve_from_metalink (const metalink_t* metalink)
               continue;
             }
 
-          retr_err = METALINK_RETR_ERROR;
-
-          /* If output_stream is not NULL, then we have failed on
-             previous resource and are retrying. Thus, rename/remove
-             the file.  */
-          if (output_stream)
+          /* The file is fully downloaded, but some problems were
+             encountered (checksum failure?).  The loop had been
+             continued to switch to the next url.  */
+          if (output_stream && retr_err == RETROK)
             {
+              /* Do not rename/remove a continued file. Skip it.  */
+              if (opt.always_rest)
+                {
+                  skip_mfile = true;
+                  continue;
+                }
+
               fclose (output_stream);
               output_stream = NULL;
               badhash_or_remove (filename);
               xfree (filename);
             }
-          else if (filename)
+          else if (!output_stream && filename)
             {
-              /* Rename/remove the file downloaded previously before
-                 downloading it again.  */
-              badhash_or_remove (filename);
               xfree (filename);
             }
 
+          retr_err = METALINK_RETR_ERROR;
+
           /* Parse our resource URL.  */
           iri = iri_new ();
           set_uri_encoding (iri, opt.locale, true);
@@ -156,17 +162,29 @@ retrieve_from_metalink (const metalink_t* metalink)
               /* Avoid recursive Metalink from HTTP headers.  */
               bool _metalink_http = opt.metalink_over_http;
 
-              /* Assure proper local file name regardless of the URL
-                 of particular Metalink resource.
-                 To do that we create the local file here and put
-                 it as output_stream. We restore the original configuration
-                 after we are finished with the file.  */
-              if (opt.always_rest)
-                /* continue previous download */
-                output_stream = fopen (mfile->name, "ab");
+              /* If output_stream is not NULL, then we have failed on
+                 previous resource and are retrying. Thus, continue
+                 with the next resource.  Do not close output_stream
+                 while iterating over the resources, or the download
+                 progress will be lost.  */
+              if (output_stream)
+                {
+                  DEBUGP (("Previous resource failed, continue with next 
resource.\n"));
+                }
               else
-                /* create a file with an unique name */
-                output_stream = unique_create (mfile->name, true, &filename);
+                {
+                  /* Assure proper local file name regardless of the URL
+                     of particular Metalink resource.
+                     To do that we create the local file here and put
+                     it as output_stream. We restore the original configuration
+                     after we are finished with the file.  */
+                  if (opt.always_rest)
+                    /* continue previous download */
+                    output_stream = fopen (mfile->name, "ab");
+                  else
+                    /* create a file with an unique name */
+                    output_stream = unique_create (mfile->name, true, 
&filename);
+                }
 
               output_stream_regular = true;
 
@@ -563,7 +581,7 @@ gpg_skip_verification:
       /* Rename the file if error encountered; remove if option specified.
          Note: the file has been downloaded using *_loop. Therefore, it
          is not necessary to keep the file for continuated download.  */
-      if ((retr_err != RETROK || opt.delete_after)
+      if (((retr_err != RETROK && !opt.always_rest) || opt.delete_after)
            && filename != NULL && file_exists_p (filename))
         {
           badhash_or_remove (filename);
diff --git a/testenv/Makefile.am b/testenv/Makefile.am
index b68ef8f..94be023 100644
--- a/testenv/Makefile.am
+++ b/testenv/Makefile.am
@@ -29,6 +29,7 @@
 if METALINK_IS_ENABLED
   METALINK_TESTS = Test-metalink-http.py            \
     Test-metalink-xml.py                            \
+    Test-metalink-xml-continue.py                   \
     Test-metalink-xml-relpath.py                    \
     Test-metalink-xml-abspath.py
 else
diff --git a/testenv/Test-metalink-xml-continue.py 
b/testenv/Test-metalink-xml-continue.py
new file mode 100644
index 0000000..8e3c40a
--- /dev/null
+++ b/testenv/Test-metalink-xml-continue.py
@@ -0,0 +1,185 @@
+#!/usr/bin/env python3
+from sys import exit
+from test.http_test import HTTPTest
+from misc.wget_file import WgetFile
+import hashlib
+
+"""
+    This is to test Metalink/XML --continue support in Wget.
+"""
+############# File Definitions ###############################################
+bad = "Ouch!"
+
+# partial File1 to continue
+File0 = "Would you like"
+
+File1 = "Would you like some Tea?"
+File1_lowPref = "Do not take this"
+File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
+
+File2 = "This is gonna be good"
+File2_lowPref = "Not this one too"
+File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
+
+File3 = "A little more, please"
+File3_lowPref = "That's just too much"
+File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
+
+File4 = "Maybe a biscuit?"
+File4_lowPref = "No, thanks"
+File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
+
+File5 = "More Tea...?"
+File5_lowPref = "I have to go..."
+File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
+
+MetaXml = \
+"""<?xml version="1.0" encoding="utf-8"?>
+<metalink version="3.0" xmlns="http://www.metalinker.org/";>
+  <publisher>
+    <name>GNU Wget</name>
+  </publisher>
+  <license>
+    <name>GNU GPL</name>
+    <url>http://www.gnu.org/licenses/gpl.html</url>
+  </license>
+  <identity>Wget Test Files</identity>
+  <version>1.2.3</version>
+  <description>Wget Test Files description</description>
+  <files>
+    <file name="File1">
+      <verification>
+        <hash type="sha256">{{FILE1_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" 
preference="25">http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref</url>
+        <url type="http" 
preference="30">http://{{SRV_HOST}}:{{SRV_PORT}}/File1</url>
+      </resources>
+    </file>
+    <file name="File2"> <!-- no good resources on purpose, this file shall be 
kept -->
+      <verification>
+        <hash type="sha256">{{FILE2_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" 
preference="35">http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file</url>
+        <url type="http" 
preference="40">http://{{SRV_HOST}}:{{SRV_PORT}}/404</url>
+      </resources>
+    </file>
+    <file name="File3">
+      <verification>
+        <hash type="sha256">{{FILE3_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" 
preference="25">http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref</url>
+        <url type="http" 
preference="30">http://{{SRV_HOST}}:{{SRV_PORT}}/File3</url>
+      </resources>
+    </file>
+    <file name="File4"> <!-- no good resources on purpose, this file shall be 
kept -->
+      <verification>
+        <hash type="sha256">{{FILE4_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" 
preference="35">http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file</url>
+      </resources>
+    </file>
+    <file name="File5">
+      <verification>
+        <hash type="sha256">{{FILE5_HASH}}</hash>
+      </verification>
+      <resources>
+        <url type="http" 
preference="25">http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref</url>
+        <url type="http" 
preference="30">http://{{SRV_HOST}}:{{SRV_PORT}}/File5</url>
+      </resources>
+    </file>
+  </files>
+</metalink>
+"""
+
+wrong_file = WgetFile ("wrong_file", bad)
+
+# partial File1_down to continue
+File0_part = WgetFile ("File1", File0)
+
+File1_orig = WgetFile ("File1", File1)
+File1_down = WgetFile ("File1", File1)
+File1_nono = WgetFile ("File1_lowPref", File1_lowPref)
+
+# no good resources on purpose, this file shall be kept
+File2_ouch = WgetFile ("File2", bad)
+
+File3_orig = WgetFile ("File3", File3)
+File3_down = WgetFile ("File3", File3)
+File3_nono = WgetFile ("File3_lowPref", File3_lowPref)
+
+# no good resources on purpose, this file shall be kept
+File4_ouch = WgetFile ("File4", bad)
+
+File5_orig = WgetFile ("File5", File5)
+File5_down = WgetFile ("File5", File5)
+File5_nono = WgetFile ("File5_lowPref", File5_lowPref)
+
+MetaFile = WgetFile ("test.metalink", MetaXml)
+
+WGET_OPTIONS = "--continue --input-metalink test.metalink"
+WGET_URLS = [[]]
+
+Files = [[
+    wrong_file,
+    File1_orig, File1_nono,
+    File3_orig, File3_nono,
+    File5_orig, File5_nono
+]]
+Existing_Files = [
+    File0_part, # partial File1_down to continue
+    File2_ouch, # wrong but fully downloaded file
+    File3_down, # right and fully downloaded file
+    File4_ouch, # wrong but fully downloaded file
+    MetaFile
+]
+
+ExpectedReturnCode = 1
+ExpectedDownloadedFiles = [
+    File1_down, # continued file from File0_part
+    File2_ouch, # wrong but fully downloaded file
+    File3_down, # right and fully downloaded file
+    File4_ouch, # wrong but fully downloaded file
+    File5_down, # newly fully donwloaded file
+    MetaFile
+]
+
+################ Pre and Post Test Hooks #####################################
+pre_test = {
+    "ServerFiles"       : Files,
+    "LocalFiles"        : Existing_Files
+}
+test_options = {
+    "WgetCommands"      : WGET_OPTIONS,
+    "Urls"              : WGET_URLS
+}
+post_test = {
+    "ExpectedFiles"     : ExpectedDownloadedFiles,
+    "ExpectedRetcode"   : ExpectedReturnCode
+}
+
+http_test = HTTPTest (
+                pre_hook=pre_test,
+                test_params=test_options,
+                post_hook=post_test,
+)
+
+http_test.server_setup()
+### Get and use dynamic server sockname
+srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
+
+MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256)
+MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256)
+MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256)
+MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256)
+MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256)
+MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host)
+MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port))
+MetaFile.content = MetaXml
+
+err = http_test.begin ()
+
+exit (err)
-- 
2.7.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]