[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: better buffer size for copy
From: |
Paul Eggert |
Subject: |
Re: better buffer size for copy |
Date: |
Wed, 23 Nov 2005 23:07:26 -0800 |
User-agent: |
Gnus/5.1007 (Gnus v5.10.7) Emacs/21.4 (gnu/linux) |
address@hidden (Robert Latham) writes:
> That's what i thought you'd say. Ok, this patch vs. today's
> CVS adds buffer-lcm.h and buffer-lcm.c, adds those files to
> Makefile.am, and makes copy.c call
> buffer_lcm.
That patch is a reasonable first cut, but it mishandles sparse files
among other things. I installed the following instead. Thanks for
prompting us to look into the problem.
2005-11-23 Paul Eggert <address@hidden>
* src/copy.c: Improve performance a bit by optimizing away
unnecessary system calls and going to a block size of at least
8192 (on normal hosts, anyway). This improved performance 5% on my
Debian stable host (2.4.27 kernel, x86, copying from root
ext3 file system to itself).
Include "buffer-lcm.h".
(copy_reg): Omit last argument. All callers changed.
Use xmalloc to allocate rather than trusting alloca
(which is unwise with large block sizes).
Declare locals more locally, if possible.
Use uintptr_t words instead of int words, for a bit more speed
when looking for null blocks on 64-bit hosts.
Optimize away reads of zero bytes on regular files.
In the typical case, insist on 8 KiB buffers, at least.
Avoid unnecessary extra call to fstat when checking for sparse files.
Avoid now-unnecessary cast to off_t, and "0L".
Avoid unnecessary test of *new_dst when checking for same owner
and group.
* Makefile.am (libcoreutils_a_SOURCES): Add buffer-lcm.c, buffer-lcm.h.
* buffer-lcm.c, buffer-lcm.h: New files, from diffutils.
Index: src/copy.c
===================================================================
RCS file: /fetish/cu/src/copy.c,v
retrieving revision 1.190
diff -p -u -r1.190 copy.c
--- src/copy.c 25 Sep 2005 03:07:33 -0000 1.190
+++ src/copy.c 24 Nov 2005 06:40:25 -0000
@@ -31,6 +31,7 @@
#include "system.h"
#include "backupfile.h"
+#include "buffer-lcm.h"
#include "copy.h"
#include "cp-hash.h"
#include "dirname.h"
@@ -199,29 +200,21 @@ copy_dir (char const *src_name_in, char
X provides many option settings.
Return true if successful.
*NEW_DST and *CHOWN_SUCCEEDED are as in copy_internal.
- SRC_SB and DST_SB are the results of calling XSTAT (aka stat for
- SRC_SB) on SRC_NAME and DST_NAME. */
+ SRC_SB is the result of calling XSTAT (aka stat) on SRC_NAME. */
static bool
copy_reg (char const *src_name, char const *dst_name,
const struct cp_options *x, mode_t dst_mode, bool *new_dst,
bool *chown_succeeded,
- struct stat const *src_sb,
- struct stat const *dst_sb)
+ struct stat const *src_sb)
{
char *buf;
- size_t buf_size;
- size_t buf_alignment;
+ char *buf_alloc = NULL;
int dest_desc;
int source_desc;
struct stat sb;
struct stat src_open_sb;
- char *cp;
- int *ip;
bool return_val = true;
- off_t n_read_total = 0;
- bool last_write_made_hole = false;
- bool make_holes = false;
source_desc = open (src_name, O_RDONLY | O_BINARY);
if (source_desc < 0)
@@ -282,8 +275,6 @@ copy_reg (char const *src_name, char con
goto close_src_desc;
}
- /* Determine the optimal buffer size. */
-
if (fstat (dest_desc, &sb))
{
error (0, errno, _("cannot fstat %s"), quote (dst_name));
@@ -291,126 +282,167 @@ copy_reg (char const *src_name, char con
goto close_src_and_dst_desc;
}
- buf_size = ST_BLKSIZE (sb);
-
- /* Even with --sparse=always, try to create holes only
- if the destination is a regular file. */
- if (x->sparse_mode == SPARSE_ALWAYS && S_ISREG (sb.st_mode))
- make_holes = true;
-
-#if HAVE_STRUCT_STAT_ST_BLOCKS
- if (x->sparse_mode == SPARSE_AUTO && S_ISREG (sb.st_mode))
+ if (! (S_ISREG (src_open_sb.st_mode) && src_open_sb.st_size == 0))
{
- /* Use a heuristic to determine whether SRC_NAME contains any
- sparse blocks. */
+ typedef uintptr_t word;
+ off_t n_read_total = 0;
- if (fstat (source_desc, &sb))
- {
- error (0, errno, _("cannot fstat %s"), quote (src_name));
- return_val = false;
- goto close_src_and_dst_desc;
- }
+ /* Choose a suitable buffer size; it may be adjusted later. */
+ size_t buf_alignment = lcm (getpagesize (), sizeof (word));
+ size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1;
+ size_t buf_size = ST_BLKSIZE (sb);
+
+ /* Deal with sparse files. */
+ bool last_write_made_hole = false;
+ bool make_holes = false;
+
+ if (S_ISREG (sb.st_mode))
+ {
+ /* Even with --sparse=always, try to create holes only
+ if the destination is a regular file. */
+ if (x->sparse_mode == SPARSE_ALWAYS)
+ make_holes = true;
- /* If the file has fewer blocks than would normally
- be needed for a file of its size, then
- at least one of the blocks in the file is a hole. */
- if (S_ISREG (sb.st_mode)
- && sb.st_size / ST_NBLOCKSIZE > ST_NBLOCKS (sb))
- make_holes = true;
- }
+#if HAVE_STRUCT_STAT_ST_BLOCKS
+ /* Use a heuristic to determine whether SRC_NAME contains any sparse
+ blocks. If the file has fewer blocks than would normally be
+ needed for a file of its size, then at least one of the blocks in
+ the file is a hole. */
+ if (x->sparse_mode == SPARSE_AUTO && S_ISREG (src_open_sb.st_mode)
+ && ST_NBLOCKS (src_open_sb) < src_open_sb.st_size / ST_NBLOCKSIZE)
+ make_holes = true;
#endif
+ }
- /* Make a buffer with space for a sentinel at the end. */
-
- buf_alignment = lcm (getpagesize (), sizeof (int));
- buf = alloca (buf_size + sizeof (int) + buf_alignment - 1);
- buf = ptr_align (buf, buf_alignment);
+ /* If not making a sparse file, try to use a more-efficient
+ buffer size. */
+ if (! make_holes)
+ {
+ /* These days there's no point ever messing with buffers smaller
+ than 8 KiB. It would be nice to configure SMALL_BUF_SIZE
+ dynamically for this host and pair of files, but there doesn't
+ seem to be a good way to get readahead info portably. */
+ enum { SMALL_BUF_SIZE = 8 * 1024 };
+
+ /* Compute the least common multiple of the input and output
+ buffer sizes, adjusting for outlandish values. */
+ size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop;
+ size_t blcm = buffer_lcm (ST_BLKSIZE (src_open_sb), buf_size,
+ blcm_max);
+
+ /* Do not use a block size that is too small. */
+ buf_size = MAX (SMALL_BUF_SIZE, blcm);
+
+ /* Do not bother with a buffer larger than the input file, plus one
+ byte to make sure the file has not grown while reading it. */
+ if (S_ISREG (src_open_sb.st_mode) && src_open_sb.st_size < buf_size)
+ buf_size = src_open_sb.st_size + 1;
+
+ /* However, stick with a block size that is a positive multiple of
+ blcm, overriding the above adjustments. Watch out for
+ overflow. */
+ buf_size += blcm - 1;
+ buf_size -= buf_size % blcm;
+ if (buf_size == 0 || blcm_max < buf_size)
+ buf_size = blcm;
+ }
+
+ /* Make a buffer with space for a sentinel at the end. */
+ buf_alloc = xmalloc (buf_size + buf_alignment_slop);
+ buf = ptr_align (buf_alloc, buf_alignment);
- for (;;)
- {
- ssize_t n_read = read (source_desc, buf, buf_size);
- if (n_read < 0)
+ for (;;)
{
+ word *wp = NULL;
+
+ ssize_t n_read = read (source_desc, buf, buf_size);
+ if (n_read < 0)
+ {
#ifdef EINTR
- if (errno == EINTR)
- continue;
+ if (errno == EINTR)
+ continue;
#endif
- error (0, errno, _("reading %s"), quote (src_name));
- return_val = false;
- goto close_src_and_dst_desc;
- }
- if (n_read == 0)
- break;
+ error (0, errno, _("reading %s"), quote (src_name));
+ return_val = false;
+ goto close_src_and_dst_desc;
+ }
+ if (n_read == 0)
+ break;
- n_read_total += n_read;
+ n_read_total += n_read;
- ip = NULL;
- if (make_holes)
- {
- buf[n_read] = 1; /* Sentinel to stop loop. */
+ if (make_holes)
+ {
+ char *cp;
- /* Find first nonzero *word*, or the word with the sentinel. */
+ buf[n_read] = 1; /* Sentinel to stop loop. */
- ip = (int *) buf;
- while (*ip++ == 0)
- ;
+ /* Find first nonzero *word*, or the word with the sentinel. */
- /* Find the first nonzero *byte*, or the sentinel. */
+ wp = (word *) buf;
+ while (*wp++ == 0)
+ continue;
- cp = (char *) (ip - 1);
- while (*cp++ == 0)
- ;
+ /* Find the first nonzero *byte*, or the sentinel. */
- /* If we found the sentinel, the whole input block was zero,
- and we can make a hole. */
+ cp = (char *) (wp - 1);
+ while (*cp++ == 0)
+ continue;
+
+ if (cp <= buf + n_read)
+ /* Clear to indicate that a normal write is needed. */
+ wp = NULL;
+ else
+ {
+ /* We found the sentinel, so the whole input block was zero.
+ Make a hole. */
+ if (lseek (dest_desc, n_read, SEEK_CUR) < 0)
+ {
+ error (0, errno, _("cannot lseek %s"), quote (dst_name));
+ return_val = false;
+ goto close_src_and_dst_desc;
+ }
+ last_write_made_hole = true;
+ }
+ }
- if (cp > buf + n_read)
+ if (!wp)
{
- /* Make a hole. */
- if (lseek (dest_desc, (off_t) n_read, SEEK_CUR) < 0L)
+ size_t n = n_read;
+ if (full_write (dest_desc, buf, n) != n)
{
- error (0, errno, _("cannot lseek %s"), quote (dst_name));
+ error (0, errno, _("writing %s"), quote (dst_name));
return_val = false;
goto close_src_and_dst_desc;
}
- last_write_made_hole = true;
+ last_write_made_hole = false;
+
+ /* A short read on a regular file means EOF. */
+ if (n_read != buf_size && S_ISREG (src_open_sb.st_mode))
+ break;
}
- else
- /* Clear to indicate that a normal write is needed. */
- ip = NULL;
}
- if (ip == NULL)
+
+ /* If the file ends with a `hole', something needs to be written at
+ the end. Otherwise the kernel would truncate the file at the end
+ of the last write operation. */
+
+ if (last_write_made_hole)
{
- size_t n = n_read;
- if (full_write (dest_desc, buf, n) != n)
+#if HAVE_FTRUNCATE
+ /* Write a null character and truncate it again. */
+ if (full_write (dest_desc, "", 1) != 1
+ || ftruncate (dest_desc, n_read_total) < 0)
+#else
+ /* Seek backwards one character and write a null. */
+ if (lseek (dest_desc, (off_t) -1, SEEK_CUR) < 0L
+ || full_write (dest_desc, "", 1) != 1)
+#endif
{
error (0, errno, _("writing %s"), quote (dst_name));
return_val = false;
goto close_src_and_dst_desc;
}
- last_write_made_hole = false;
- }
- }
-
- /* If the file ends with a `hole', something needs to be written at
- the end. Otherwise the kernel would truncate the file at the end
- of the last write operation. */
-
- if (last_write_made_hole)
- {
-#if HAVE_FTRUNCATE
- /* Write a null character and truncate it again. */
- if (full_write (dest_desc, "", 1) != 1
- || ftruncate (dest_desc, n_read_total) < 0)
-#else
- /* Seek backwards one character and write a null. */
- if (lseek (dest_desc, (off_t) -1, SEEK_CUR) < 0L
- || full_write (dest_desc, "", 1) != 1)
-#endif
- {
- error (0, errno, _("writing %s"), quote (dst_name));
- return_val = false;
- goto close_src_and_dst_desc;
}
}
@@ -432,8 +464,7 @@ copy_reg (char const *src_name, char con
}
#if HAVE_FCHOWN
- if (x->preserve_ownership
- && (*new_dst || !SAME_OWNER_AND_GROUP (*src_sb, *dst_sb)))
+ if (x->preserve_ownership && ! SAME_OWNER_AND_GROUP (*src_sb, sb))
{
if (fchown (dest_desc, src_sb->st_uid, src_sb->st_gid) == 0)
*chown_succeeded = true;
@@ -488,6 +519,7 @@ close_src_desc:
return_val = false;
}
+ free (buf_alloc);
return return_val;
}
@@ -1524,7 +1556,7 @@ copy_internal (char const *src_name, cha
with historical practice. */
if (! copy_reg (src_name, dst_name, x,
get_dest_mode (x, src_mode), &new_dst, &chown_succeeded,
- &src_sb, &dst_sb))
+ &src_sb))
goto un_backup;
}
else
Index: lib/Makefile.am
===================================================================
RCS file: /fetish/cu/lib/Makefile.am,v
retrieving revision 1.235
diff -p -u -r1.235 Makefile.am
--- lib/Makefile.am 5 Oct 2005 14:54:17 -0000 1.235
+++ lib/Makefile.am 24 Nov 2005 06:40:24 -0000
@@ -27,6 +27,7 @@ DEFS += -DLIBDIR=\"$(libdir)\"
libcoreutils_a_SOURCES = \
allocsa.c allocsa.h \
+ buffer-lcm.c buffer-lcm.h \
euidaccess.h \
exit.h \
fprintftime.c fprintftime.h \
--- /dev/null 2005-09-24 22:00:15.000000000 -0700
+++ lib/buffer-lcm.h 2005-11-23 15:00:03.000000000 -0800
@@ -0,0 +1,2 @@
+#include <stddef.h>
+size_t buffer_lcm (size_t, size_t, size_t);
--- /dev/null 2005-09-24 22:00:15.000000000 -0700
+++ lib/buffer-lcm.c 2005-11-23 15:02:09.000000000 -0800
@@ -0,0 +1,59 @@
+/* buffer-lcm.c - compute a good buffer size for dealing with two files
+
+ Copyright (C) 2002, 2005 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert. */
+
+#include "buffer-lcm.h"
+
+/* Return a buffer size suitable for doing I/O with files whose block
+ sizes are A and B. However, never return a value greater than
+ LCM_MAX. */
+
+size_t
+buffer_lcm (size_t a, size_t b, size_t lcm_max)
+{
+ size_t size;
+
+ /* Use reasonable values if buffer sizes are zero. */
+ if (!a)
+ size = b ? b : 8 * 1024;
+ else
+ {
+ if (b)
+ {
+ /* Return lcm (A, B) if it is in range; otherwise, fall back
+ on A. */
+
+ size_t lcm, m, n, q, r;
+
+ /* N = gcd (A, B). */
+ for (m = a, n = b; (r = m % n) != 0; m = n, n = r)
+ continue;
+
+ /* LCM = lcm (A, B), if in range. */
+ q = a / n;
+ lcm = q * b;
+ if (lcm <= lcm_max && lcm / b == q)
+ return lcm;
+ }
+
+ size = a;
+ }
+
+ return size <= lcm_max ? size : lcm_max;
+}
- Re: better buffer size for copy, (continued)
- Re: better buffer size for copy, Paul Eggert, 2005/11/05
- Re: better buffer size for copy, Robert Latham, 2005/11/07
- Re: better buffer size for copy, Paul Eggert, 2005/11/07
- Re: better buffer size for copy, Robert Latham, 2005/11/07
- Re: better buffer size for copy, Robert Latham, 2005/11/18
- Re: better buffer size for copy, Phillip Susi, 2005/11/19
- Re: better buffer size for copy, Robert Latham, 2005/11/20
- Re: better buffer size for copy, Phillip Susi, 2005/11/21
- Re: better buffer size for copy, Robert Latham, 2005/11/22
- Re: better buffer size for copy, Phillip Susi, 2005/11/22
- Re: better buffer size for copy,
Paul Eggert <=
- Re: better buffer size for copy, Jim Meyering, 2005/11/24