[Top][All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RFC: new cp option: --efficient-sparse=HOW

From: Jim Meyering
Subject: RFC: new cp option: --efficient-sparse=HOW
Date: Mon, 31 Jan 2011 22:46:58 +0100

Now that we have can read sparse files efficiently,
what if I want to copy a 20PiB sparse file, and yet I want to
be sure that it does so efficiently.  Few people can afford
to wait around while a normal processor and storage system process
that much raw data.  But if it's a sparse file and the src and dest
file systems have the right support (FIEMAP ioctl), then it'll be
copied in the time it takes to make a few syscalls.

Currently, when the efficient sparse copy fails, cp falls back
on the regular, expensive, read-every-byte approach.

This proposal adds an option, --efficient-sparse=required,
to make cp fail if the initial attempt to read the sparse file fails,
rather than resorting to the regular (very slow in the above case) copy

The default is --efficient-sparse=auto, and for symmetry,
I've provided --efficient-sparse=never, in case someone finds
a reason to want to skip the ioctl.

You can demonstrate this new feature on a tmpfs file system,
since it supports sparse files, but not the FIEMAP ioctl:

    $ cd /dev/shm
    $ truncate -s128K k
    $ cp --efficient=required k kk
    cp: unable to read sparse `k' efficiently
    [Exit 1]

Here's a preliminary patch
(not including texinfo changes)
I'll add tests, too, of course.

Feedback on the option name or anything else would be most welcome.
The sooner the better.

diff --git a/src/copy.c b/src/copy.c
index 04c678d..72425af 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -305,8 +305,8 @@ write_zeros (int fd, uint64_t n_bytes)
    copy, and thus makes copying sparse files much more efficient.
    Upon a successful copy, return true.  If the initial extent scan
    fails, set *NORMAL_COPY_REQUIRED to true and return false.
-   Upon any other failure, set *NORMAL_COPY_REQUIRED to false and
-   return false.  */
+   Upon any other failure, give a diagnostic, set *NORMAL_COPY_REQUIRED
+   to false and return false.  */
 static bool
 extent_copy (int src_fd, int dest_fd, char *buf, size_t buf_size,
              off_t src_total_size, bool make_holes,
@@ -931,6 +931,7 @@ copy_reg (char const *src_name, char const *dst_name,
       /* Deal with sparse files.  */
       bool make_holes = false;

+      bool src_is_sparse = false;
       if (S_ISREG (sb.st_mode))
           /* Even with --sparse=always, try to create holes only
@@ -943,9 +944,13 @@ copy_reg (char const *src_name, char const *dst_name,
              blocks.  If the file has fewer blocks than would normally be
              needed for a file of its size, then at least one of the blocks in
              the file is a hole.  */
           if (x->sparse_mode == SPARSE_AUTO && S_ISREG (src_open_sb.st_mode)
               && ST_NBLOCKS (src_open_sb) < src_open_sb.st_size / 
-            make_holes = true;
+            {
+              make_holes = true;
+              src_is_sparse = true;
+            }

@@ -977,18 +982,30 @@ copy_reg (char const *src_name, char const *dst_name,
       buf_alloc = xmalloc (buf_size + buf_alignment_slop);
       buf = ptr_align (buf_alloc, buf_alignment);

-      bool normal_copy_required;
+      bool normal_copy_required = true;
       /* Perform an efficient extent-based copy, falling back to the
          standard copy only if the initial extent scan fails.  If the
          '--sparse=never' option is specified, write all data but use
          any extents to read more efficiently.  */
-      if (extent_copy (source_desc, dest_desc, buf, buf_size,
-                       src_open_sb.st_size, make_holes,
-                       src_name, dst_name, &normal_copy_required))
+      if (x->sparse_efficiency != SPARSE_EFF_NEVER
+          && extent_copy (source_desc, dest_desc, buf, buf_size,
+                          src_open_sb.st_size, make_holes,
+                          src_name, dst_name, &normal_copy_required))
         goto preserve_metadata;

       if (! normal_copy_required)
+          /* extent_copy already diagnosed the failure */
+          return_val = false;
+          goto close_src_and_dst_desc;
+        }
+      /* extent_copy failed, and we are instructed not to fall-back */
+      if (src_is_sparse && x->sparse_efficiency == SPARSE_EFF_REQUIRED)
+        {
+          error (0, 0, _("unable to read sparse %s efficiently"),
+                 quote (src_name));
           return_val = false;
           goto close_src_and_dst_desc;
@@ -2519,6 +2536,7 @@ cp_options_default (struct cp_options *x)
   x->chown_privileges = x->owner_privileges = (geteuid () == 0);
+  x->sparse_efficiency = SPARSE_EFF_AUTO;

 /* Return true if it's OK for chown to fail, where errno is
diff --git a/src/copy.h b/src/copy.h
index 5014ea9..fab131b 100644
--- a/src/copy.h
+++ b/src/copy.h
@@ -22,6 +22,22 @@
 # include <stdbool.h>
 # include "hash.h"

+/* Control efficient reading of sparse files.  On some systems, you can
+   use the FIEMAP ioctl to read only the non-sparse parts of a file.  */
+enum Sparse_efficiency
+  /* Do not attempt to treat sparse source files specially.  */
+  /* Attempt to read sparse files efficiently, but if that is not
+     possible, fall back on the regular, less-efficient approach.  */
+  /* Read sparse files efficiently, and if that is not possible,
+     then treat it as failure to copy.  */
 /* Control creation of sparse files (files with holes).  */
 enum Sparse_type
@@ -110,6 +126,9 @@ struct cp_options
   /* Control creation of sparse files.  */
   enum Sparse_type sparse_mode;

+  /* Control efficient reading of sparse files.  */
+  enum Sparse_efficiency sparse_efficiency;
   /* Set the mode of the destination file to exactly this value
      if SET_MODE is nonzero.  */
   mode_t mode;
diff --git a/src/cp.c b/src/cp.c
index 859f21b..711e229 100644
--- a/src/cp.c
+++ b/src/cp.c
@@ -74,6 +74,7 @@ enum
@@ -93,6 +94,16 @@ static bool parents_option = false;
 /* Remove any trailing slashes from each SOURCE argument.  */
 static bool remove_trailing_slashes;

+static char const *const eff_sparse_type_string[] =
+  "never", "auto", "required", NULL
+static enum Sparse_type const eff_sparse_type[] =
+ARGMATCH_VERIFY (eff_sparse_type_string, eff_sparse_type);
 static char const *const sparse_type_string[] =
   "never", "auto", "always", NULL
@@ -120,6 +131,7 @@ static struct option const long_opts[] =
   {"backup", optional_argument, NULL, 'b'},
   {"copy-contents", no_argument, NULL, COPY_CONTENTS_OPTION},
   {"dereference", no_argument, NULL, 'L'},
+  {"efficient-sparse", required_argument, NULL, EFFICIENT_SPARSE_OPTION},
   {"force", no_argument, NULL, 'f'},
   {"interactive", no_argument, NULL, 'i'},
   {"link", no_argument, NULL, 'l'},
@@ -177,6 +189,9 @@ Mandatory arguments to long options are mandatory for short 
options too.\n\
   -d                           same as --no-dereference --preserve=links\n\
 "), stdout);
       fputs (_("\
+      --efficient-sparse=HOW   control efficient reading of sparse files.\n\
+"), stdout);
+      fputs (_("\
   -f, --force                  if an existing destination file cannot be\n\
                                  opened, remove it and try again (redundant if\
@@ -247,6 +262,21 @@ fails, or if --reflink=auto is specified, fall back to a 
standard copy.\n\
 "), stdout);
       fputs (_("\
+By default, cp tries to read sparse SOURCE files efficiently, but if the\n\
+required capability is not available it resorts to copying the usual way.\n\
+--efficient-sparse=auto is the default.  One case in which you would not\n\
+want to fall back on the usual method is when you are copying a very large,\n\
+mostly-sparse file, and processing all bytes in the nominal size would take\n\
+too long.\
+"), stdout);
+      fputs (_("\
+  In that case, use --efficient-sparse=required to make cp fail if\n\
+the efficient method does not work.  I.e., tell cp not to resort to the\n\
+less-efficient method. Finally, --efficient-sparse=never makes cp skip the\n\
+attempt to copy efficiently.\n\
+"), stdout);
+      fputs (_("\
 The backup suffix is `~', unless set with --suffix or SIMPLE_BACKUP_SUFFIX.\n\
 The version control method may be selected via the --backup option or 
 the VERSION_CONTROL environment variable.  Here are the values:\n\
@@ -944,6 +974,12 @@ main (int argc, char **argv)
                                      sparse_type_string, sparse_type);

+          x.sparse_efficiency = XARGMATCH ("--efficient-sparse", optarg,
+                                           eff_sparse_type_string,
+                                           eff_sparse_type);
+          break;
         case REFLINK_OPTION:
           if (optarg == NULL)
             x.reflink_mode = REFLINK_ALWAYS;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]