[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] copy,dd: simplify and optimize NUL bytes detection
From: |
Pádraig Brady |
Subject: |
[PATCH] copy,dd: simplify and optimize NUL bytes detection |
Date: |
Thu, 22 Oct 2015 15:00:53 +0100 |
* src/system.h (is_nul): Reimplement with a version
that doesn't require a sentinel after the buffer,
and which calls down to (the system optimized) memcmp.
Performance analyzed at http://rusty.ozlabs.org/?p=560
* src/dd.c (alloc_obuf): Simplify the is_nul() call by
not needing to write the sentinel.
* src/copy.c (sparse_copy): Likewise.
(copy_reg): Simplify the buffer allocation by avoiding
consideration of the sentinel in the buffer size calculation.
---
src/copy.c | 22 ++++------------------
src/dd.c | 6 ------
src/system.h | 35 ++++++++++++++++++-----------------
3 files changed, 22 insertions(+), 41 deletions(-)
diff --git a/src/copy.c b/src/copy.c
index 5fe69ea..edf022e 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -245,17 +245,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t
buf_size,
csize = MIN (csize, n_read);
if (hole_size && csize)
- {
- /* Setup sentinel required by is_nul(). */
- typedef uintptr_t word;
- word isnul_tmp;
- memcpy (&isnul_tmp, cbuf + csize, sizeof (word));
- memset (cbuf + csize, 1, sizeof (word));
-
- make_hole = is_nul (cbuf, csize);
-
- memcpy (cbuf + csize, &isnul_tmp, sizeof (word));
- }
+ make_hole = is_nul (cbuf, csize);
bool transition = (make_hole != prev_hole) && psize;
bool last_chunk = (n_read == csize && ! make_hole) || ! csize;
@@ -1201,11 +1191,8 @@ copy_reg (char const *src_name, char const *dst_name,
if (data_copy_required)
{
- typedef uintptr_t word;
-
/* Choose a suitable buffer size; it may be adjusted later. */
- size_t buf_alignment = lcm (getpagesize (), sizeof (word));
- size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1;
+ size_t buf_alignment = getpagesize ();
size_t buf_size = io_blksize (sb);
size_t hole_size = ST_BLKSIZE (sb);
@@ -1236,7 +1223,7 @@ copy_reg (char const *src_name, char const *dst_name,
{
/* Compute the least common multiple of the input and output
buffer sizes, adjusting for outlandish values. */
- size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop;
+ size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment;
size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size,
blcm_max);
@@ -1254,8 +1241,7 @@ copy_reg (char const *src_name, char const *dst_name,
buf_size = blcm;
}
- /* Make a buffer with space for a sentinel at the end. */
- buf_alloc = xmalloc (buf_size + buf_alignment_slop);
+ buf_alloc = xmalloc (buf_size + buf_alignment);
buf = ptr_align (buf_alloc, buf_alignment);
if (sparse_src)
diff --git a/src/dd.c b/src/dd.c
index e647294..054cf76 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -20,7 +20,6 @@
#define SWAB_ALIGN_OFFSET 2
-#include <assert.h>
#include <sys/types.h>
#include <signal.h>
#include <getopt.h>
@@ -728,11 +727,6 @@ alloc_obuf (void)
alloc_ibuf ();
obuf = ibuf;
}
-
- /* Write a sentinel to the slop after the buffer,
- to allow efficient checking for NUL blocks. */
- assert (sizeof (uintptr_t) <= OUTPUT_BLOCK_SLOP);
- memset (obuf + output_blocksize, 1, sizeof (uintptr_t));
}
static void
diff --git a/src/system.h b/src/system.h
index 8f6a2ea..de46e33 100644
--- a/src/system.h
+++ b/src/system.h
@@ -487,27 +487,28 @@ ptr_align (void const *ptr, size_t alignment)
}
/* Return whether the buffer consists entirely of NULs.
- Note the word after the buffer must be non NUL. */
+ From CCAN by Rusty Russell <address@hidden>
+ released under CC0 (Public domain). */
static inline bool _GL_ATTRIBUTE_PURE
is_nul (void const *buf, size_t bufsize)
{
- typedef uintptr_t word;
- void const *vp;
- char const *cbuf = buf;
- word const *wp = buf;
-
- /* Find first nonzero *word*, or the word with the sentinel. */
- while (*wp++ == 0)
- continue;
-
- /* Find the first nonzero *byte*, or the sentinel. */
- vp = wp - 1;
- char const *cp = vp;
- while (*cp++ == 0)
- continue;
-
- return cbuf + bufsize < cp;
+ const unsigned char *p = buf;
+ size_t len;
+
+ /* Check first 16 bytes manually. */
+ for (len = 0; len < 16; len++)
+ {
+ if (! bufsize)
+ return true;
+ if (*p)
+ return false;
+ p++;
+ bufsize--;
+ }
+
+ /* Now we know that's zero, memcmp with self. */
+ return memcmp (buf, p, bufsize) == 0;
}
/* If 10*Accum + Digit_val is larger than the maximum value for Type,
--
2.5.0
- [PATCH] copy,dd: simplify and optimize NUL bytes detection,
Pádraig Brady <=
- Re: [PATCH] copy,dd: simplify and optimize NUL bytes detection, Eric Blake, 2015/10/22
- Re: [PATCH] copy, dd: simplify and optimize NUL bytes detection, Paolo Bonzini, 2015/10/22
- Re: [PATCH] copy, dd: simplify and optimize NUL bytes detection, Pádraig Brady, 2015/10/22
- Re: [PATCH] copy, dd: simplify and optimize NUL bytes detection, Paolo Bonzini, 2015/10/22
- Re: [PATCH] copy, dd: simplify and optimize NUL bytes detection, Eric Blake, 2015/10/22
- Re: [PATCH] copy, dd: simplify and optimize NUL bytes detection, Paolo Bonzini, 2015/10/22
- Re: [Qemu-devel] [PATCH] copy, dd: simplify and optimize NUL bytes detection, Radim Krčmář, 2015/10/22
- Re: [Qemu-devel] [PATCH] copy, dd: simplify and optimize NUL bytes detection, Paolo Bonzini, 2015/10/22
- Message not available
- Re: [Qemu-devel] [PATCH] copy, dd: simplify and optimize NUL bytes detection, Paolo Bonzini, 2015/10/23
- Re: [Qemu-devel] [PATCH] copy, dd: simplify and optimize NUL bytes detection, Pádraig Brady, 2015/10/23