bug-wget
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-wget] [PATCH] Fixed problem under content-disposition filename and


From: Dennis, CHENG Renquan
Subject: [Bug-wget] [PATCH] Fixed problem under content-disposition filename and recursive downloading
Date: Thu, 16 Sep 2010 03:11:13 +0800

From: "Dennis, CHENG Renquan" <address@hidden>

The original wget-1.11.4 has a problem while both content-disposition filename
and recursive enabled:

  $ wget -P ~/archives/ -e contentdisposition=on -mS 
'http://tree.celinuxforum.org/CelfPubWiki/ELC2009Presentations?action=AttachFile&do=get&target=LTTng-presentation-celf-2009-0.2.pdf'
it will get out the file
  ~/archives/LTTng-presentation-celf-2009-0.2.pdf
just seems that mirror downloading (which enabled recursive) have no effect,
While the real wanted result is
  
~/archives/tree.celinuxforum.org/CelfPubWiki/LTTng-presentation-celf-2009-0.2.pdf

Changes:
1. url_file_name interface, add a replaced_filename parameter, then it can 
   return with the default filename or a customed given one, in this example,
   is the parsed filename from content-disposition header.
2. parse_content_disposition function's return value, it does not return with
   opt.dir_prefix prefixed anymore, it just return a bare parsed filename,
   this makes it look more tidier.
3. accordingly, the unittest of parse_content_disposition has also be changed:
   the opt.dir_prefix test items are not used any more so removed.

I have run "make check" that results:
  ==================
  64 tests were run
  51 PASS, 0 FAIL
  13 SKIP, 0 UNKNOWN
  ==================
  
Signed-off-by: "Dennis, CHENG Renquan" <address@hidden>

---
 wget-1.12/src/ftp.c  |   10 ++++----
 wget-1.12/src/http.c |   58 +++++++++++++++++++++------------------------------
 wget-1.12/src/url.c  |   35 ++++++++++++++++++++----------
 wget-1.12/src/url.h  |    2 -
 4 files changed, 53 insertions(+), 52 deletions(-)

diff -uNrp wget-1.12.orig/src/ftp.c wget-1.12/src/ftp.c
--- wget-1.12.orig/src/ftp.c    2009-09-22 10:59:21.000000000 +0800
+++ wget-1.12/src/ftp.c 2010-09-16 02:17:33.005589521 +0800
@@ -1366,7 +1366,7 @@ ftp_loop_internal (struct url *u, struct
   else
     {
       /* URL-derived file.  Consider "-O file" name. */
-      con->target = url_file_name (u);
+      con->target = url_file_name (u, NULL);
       if (!opt.output_document)
         locf = con->target;
       else
@@ -1480,7 +1480,7 @@ ftp_loop_internal (struct url *u, struct
             {
               /* Re-determine the file name. */
               xfree_null (con->target);
-              con->target = url_file_name (u);
+              con->target = url_file_name (u, NULL);
               locf = con->target;
             }
           continue;
@@ -1604,7 +1604,7 @@ ftp_get_listing (struct url *u, ccon *co
   /* Find the listing file name.  We do it by taking the file name of
      the URL and replacing the last component with the listing file
      name.  */
-  uf = url_file_name (u);
+  uf = url_file_name (u, NULL);
   lf = file_merge (uf, LIST_FILENAME);
   xfree (uf);
   DEBUGP ((_("Using %s as listing tmp file.\n"), quote (lf)));
@@ -1698,7 +1698,7 @@ ftp_retrieve_list (struct url *u, struct
       ofile = xstrdup (u->file);
       url_set_file (u, f->name);
 
-      con->target = url_file_name (u);
+      con->target = url_file_name (u, NULL);
       err = RETROK;
 
       dlthis = true;
@@ -2146,7 +2146,7 @@ ftp_loop (struct url *u, int *dt, struct
               char *filename = (opt.output_document
                                 ? xstrdup (opt.output_document)
                                 : (con.target ? xstrdup (con.target)
-                                   : url_file_name (u)));
+                                   : url_file_name (u, NULL)));
               res = ftp_index (filename, u, f);
               if (res == FTPOK && opt.verbose)
                 {
diff -uNrp wget-1.12.orig/src/http.c wget-1.12/src/http.c
--- wget-1.12.orig/src/http.c   2009-09-22 11:02:18.000000000 +0800
+++ wget-1.12/src/http.c        2010-09-16 02:42:42.333592252 +0800
@@ -1034,7 +1034,13 @@ extract_param (const char **source, para
    false.
 
    The file name is stripped of directory components and must not be
-   empty.  */
+   empty.
+
+   Historically, this function returned filename prefixed with opt.dir_prefix,
+   now that logic is handled by the caller, new code should pay attention,
+   changed by crq, Sep 2010.
+
+*/
 
 static bool
 parse_content_disposition (const char *hdr, char **filename)
@@ -1052,25 +1058,7 @@ parse_content_disposition (const char *h
           value.b = 1 + (last_slash ? last_slash : last_bs);
         if (value.b == value.e)
           continue;
-        /* Start with the directory prefix, if specified. */
-        if (opt.dir_prefix)
-          {
-            int prefix_length = strlen (opt.dir_prefix);
-            bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
-            int total_length;
-
-            if (add_slash)
-              ++prefix_length;
-            total_length = prefix_length + (value.e - value.b);
-            *filename = xmalloc (total_length + 1);
-            strcpy (*filename, opt.dir_prefix);
-            if (add_slash)
-              (*filename)[prefix_length - 1] = '/';
-            memcpy (*filename + prefix_length, value.b, (value.e - value.b));
-            (*filename)[total_length] = '\0';
-          }
-        else
-          *filename = strdupdelim (value.b, value.e);
+       *filename = strdupdelim (value.b, value.e);
         return true;
       }
   return false;
@@ -1981,16 +1969,24 @@ gethttp (struct url *u, struct http_stat
    * hstat.local_file is set by http_loop to the argument of -O. */
   if (!hs->local_file)
     {
+      char *local_file = NULL;
+
       /* Honor Content-Disposition whether possible. */
       if (!opt.content_disposition
           || !resp_header_copy (resp, "Content-Disposition",
                                 hdrval, sizeof (hdrval))
-          || !parse_content_disposition (hdrval, &hs->local_file))
+          || !parse_content_disposition (hdrval, &local_file))
         {
           /* The Content-Disposition header is missing or broken.
            * Choose unique file name according to given URL. */
-          hs->local_file = url_file_name (u);
+          hs->local_file = url_file_name (u, NULL);
         }
+      else
+       {
+         DEBUGP(("Parsed filename from Content-Disposition: %s\n",
+                 local_file));
+         hs->local_file = url_file_name(u, local_file);
+       }
     }
 
   /* TODO: perform this check only once. */
@@ -2457,7 +2453,7 @@ http_loop (struct url *u, char **newloc,
     }
   else if (!opt.content_disposition)
     {
-      hstat.local_file = url_file_name (u);
+      hstat.local_file = url_file_name (u, NULL);
       got_name = true;
     }
 
@@ -2497,7 +2493,7 @@ File %s already there; not retrieving.\n
 
   /* Send preliminary HEAD request if -N is given and we have an existing
    * destination file. */
-  file_name = url_file_name (u);
+  file_name = url_file_name (u, NULL);
   if (opt.timestamping
       && !opt.content_disposition
       && file_exists_p (file_name))
@@ -3356,18 +3352,13 @@ test_parse_content_disposition()
   int i;
   struct {
     char *hdrval;
-    char *opt_dir_prefix;
     char *filename;
     bool result;
   } test_array[] = {
-    { "filename=\"file.ext\"", NULL, "file.ext", true },
-    { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
-    { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true 
},
-    { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
-    { "attachment; filename=\"file.ext\"; dummy", "somedir", 
"somedir/file.ext", true },
-    { "attachment", NULL, NULL, false },
-    { "attachment", "somedir", NULL, false },
+    { "filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"", "file.ext", true },
+    { "attachment; filename=\"file.ext\"; dummy", "file.ext", true },
+    { "attachment", NULL, false },
   };
 
   for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
@@ -3375,7 +3366,6 @@ test_parse_content_disposition()
       char *filename;
       bool res;
 
-      opt.dir_prefix = test_array[i].opt_dir_prefix;
       res = parse_content_disposition (test_array[i].hdrval, &filename);
 
       mu_assert ("test_parse_content_disposition: wrong result",
diff -uNrp wget-1.12.orig/src/url.c wget-1.12/src/url.c
--- wget-1.12.orig/src/url.c    2009-09-22 11:05:53.000000000 +0800
+++ wget-1.12/src/url.c 2010-09-16 02:15:18.005592216 +0800
@@ -1492,7 +1492,7 @@ append_dir_structure (const struct url *
    possible.  Does not create directories on the file system.  */
 
 char *
-url_file_name (const struct url *u)
+url_file_name (const struct url *u, char *replaced_filename)
 {
   struct growable fnres;        /* stands for "file name result" */
 
@@ -1547,18 +1547,29 @@ url_file_name (const struct url *u)
       append_dir_structure (u, &fnres);
     }
 
-  /* Add the file name. */
-  if (fnres.tail)
-    append_char ('/', &fnres);
-  u_file = *u->file ? u->file : index_filename;
-  append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
-
-  /* Append "?query" to the file name. */
-  u_query = u->query && *u->query ? u->query : NULL;
-  if (u_query)
+  if (!replaced_filename)
     {
-      append_char (FN_QUERY_SEP, &fnres);
-      append_uri_pathel (u_query, u_query + strlen (u_query), true, &fnres);
+      /* Add the file name. */
+      if (fnres.tail)
+       append_char ('/', &fnres);
+      u_file = *u->file ? u->file : index_filename;
+      append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
+
+      /* Append "?query" to the file name. */
+      u_query = u->query && *u->query ? u->query : NULL;
+      if (u_query)
+       {
+         append_char (FN_QUERY_SEP, &fnres);
+         append_uri_pathel (u_query, u_query + strlen (u_query),
+                            true, &fnres);
+       }
+    }
+  else
+    {
+      if (fnres.tail)
+       append_char ('/', &fnres);
+      u_file = replaced_filename;
+      append_uri_pathel (u_file, u_file + strlen (u_file), false, &fnres);
     }
 
   /* Zero-terminate the file name. */
diff -uNrp wget-1.12.orig/src/url.h wget-1.12/src/url.h
--- wget-1.12.orig/src/url.h    2009-09-05 00:31:54.000000000 +0800
+++ wget-1.12/src/url.h 2010-09-16 02:15:42.209591194 +0800
@@ -98,8 +98,7 @@ int scheme_default_port (enum url_scheme
 void scheme_disable (enum url_scheme);
 
 char *url_string (const struct url *, enum url_auth_mode);
-char *url_file_name (const struct url *);
+char *url_file_name (const struct url *, char *);
 
 char *uri_merge (const char *, const char *);
 
--
Git 1.7.1.1

CHENG Renquan
38 St Thomas Walk, Singapore 238118      http://crquan.fedorapeople.org




reply via email to

[Prev in Thread] Current Thread [Next in Thread]