>From 4698cf3cb3981c013e412a6080fe3ab1cab34656 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Mon, 27 Feb 2012 13:53:07 +0000 Subject: [PATCH] dd: add support for the conv=sparse option Small seeks are not coalesced to larger ones (like is done in cache_round() for example, for the moment at least. conv= is used rather then oflag= for FreeBSD compatibility. * src/dd.c (last_seek): A new global boolean to flag whether the last "write" was converted to a seek. (usage): Describe the new conf=sparse option. (scanargs): Ignore conv=sparse in some combinations. (iwrite): Convert a write of a NUL block to a seek if requested. (do_copy): Initialize the output buffer to have a sentinel, to allow for efficient testing for NUL output blocks. If the last block in the file was converted to a seek, then convert back to a write so the size ip updated. * NEWS: Mention the new feature. * tests/dd/sparse: A new test for the feature. * tests/Makefile.am: Reference the new test. --- NEWS | 3 + doc/coreutils.texi | 7 +++ src/dd.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++-- tests/Makefile.am | 1 + tests/dd/sparse | 57 ++++++++++++++++++++++++++++ 5 files changed, 168 insertions(+), 4 deletions(-) create mode 100755 tests/dd/sparse diff --git a/NEWS b/NEWS index e2e8fc5..8006669 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,9 @@ GNU coreutils NEWS -*- outline -*- dd now accepts the count_bytes, skip_bytes iflags and the seek_bytes oflag, to more easily allow processing portions of a file. + dd now accepts the conv=sparse flag to attempt to create sparse + output, by seeking rather than writing to the output file. + split now accepts an optional "from" argument to --numeric-suffixes, which changes the start number from the default of 0. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 414626d..f22e7d2 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -8140,6 +8140,13 @@ Change lowercase letters to uppercase. The @samp{lcase} and @samp{ucase} conversions are mutually exclusive. address@hidden sparse address@hidden sparse +Try to seek rather than write @sc{nul} output blocks. +This will create sparse output when extending. +This option is ignored in conjunction with address@hidden or @samp{oflag=append}. + @item swab @opindex swab @r{(byte-swapping)} @cindex byte-swapping diff --git a/src/dd.c b/src/dd.c index fe44a30..903346f 100644 --- a/src/dd.c +++ b/src/dd.c @@ -94,7 +94,7 @@ malloc. See dd_copy for details. INPUT_BLOCK_SLOP must be no less than OUTPUT_BLOCK_SLOP. */ #define INPUT_BLOCK_SLOP (2 * SWAB_ALIGN_OFFSET + 2 * page_size - 1) -#define OUTPUT_BLOCK_SLOP (page_size - 1) +#define OUTPUT_BLOCK_SLOP MAX (sizeof (uintptr_t), page_size - 1) /* Maximum blocksize for the given SLOP. Keep it smaller than SIZE_MAX - SLOP, so that we can @@ -126,7 +126,9 @@ enum C_NOCREAT = 010000, C_EXCL = 020000, C_FDATASYNC = 040000, - C_FSYNC = 0100000 + C_FSYNC = 0100000, + + C_SPARSE = 0200000 }; /* Status bit masks. */ @@ -167,6 +169,9 @@ static uintmax_t seek_records = 0; output. */ static uintmax_t seek_bytes = 0; +/* Whether the last output was done with a seek (rather than a write). */ +static bool last_seek; + /* Copy only this many records. The default is effectively infinity. */ static uintmax_t max_records = (uintmax_t) -1; @@ -271,6 +276,7 @@ static struct symbol_value const conversions[] = {"unblock", C_UNBLOCK | C_TWOBUFS}, /* Fixed to variable length records. */ {"lcase", C_LCASE | C_TWOBUFS}, /* Translate upper to lower case. */ {"ucase", C_UCASE | C_TWOBUFS}, /* Translate lower to upper case. */ + {"sparse", C_SPARSE}, /* Try to sparsely write output. */ {"swab", C_SWAB | C_TWOBUFS}, /* Swap bytes of input. */ {"noerror", C_NOERROR}, /* Ignore i/o errors. */ {"nocreat", C_NOCREAT}, /* Do not create output file. */ @@ -548,6 +554,7 @@ Each CONV symbol may be:\n\ unblock replace trailing spaces in cbs-size records with newline\n\ lcase change upper case to lower case\n\ ucase change lower case to upper case\n\ + sparse try to seek rather than write the output for NUL input blocks\n\ swab swap every pair of input bytes\n\ sync pad every input block with NULs to ibs-size; when used\n\ with block or unblock, pad with spaces rather than NULs\n\ @@ -989,6 +996,27 @@ iread_fullblock (int fd, char *buf, size_t size) return nread; } +/* Return whether the buffer consists entirely of NULs. + Note the word after the buffer must be non NUL. */ + +static bool _GL_ATTRIBUTE_PURE +is_nul (const char *buf, size_t bufsize) +{ + typedef uintptr_t word; + + /* Find first nonzero *word*, or the word with the sentinel. */ + word *wp = (word *) buf; + while (*wp++ == 0) + continue; + + /* Find the first nonzero *byte*, or the sentinel. */ + char *cp = (char *) (wp - 1); + while (*cp++ == 0) + continue; + + return cp > buf + bufsize; +} + /* Write to FD the buffer BUF of size SIZE, processing any signals that arrive. Return the number of bytes written, setting errno if this is less than SIZE. Keep trying if there are partial @@ -1020,9 +1048,28 @@ iwrite (int fd, char const *buf, size_t size) while (total_written < size) { - ssize_t nwritten; + ssize_t nwritten = 0; process_signals (); - nwritten = write (fd, buf + total_written, size - total_written); + + /* Perform a seek for a NUL block if sparse output is enabled. */ + last_seek = false; + if ((conversions_mask & C_SPARSE) && is_nul (buf, size)) + { + if (lseek (fd, size, SEEK_CUR) < 0) + { + conversions_mask &= ~C_SPARSE; + /* Don't warn about the advisory sparse request. */ + } + else + { + last_seek = true; + nwritten = size; + } + } + + if (!nwritten) + nwritten = write (fd, buf + total_written, size - total_written); + if (nwritten < 0) { if (errno != EINTR) @@ -1321,6 +1368,24 @@ scanargs (int argc, char *const *argv) || multiple_bits_set (output_flags & (O_DIRECT | O_NOCACHE))) error (EXIT_FAILURE, 0, _("cannot combine direct and nocache")); + if ((output_flags & O_APPEND) && (conversions_mask & C_SPARSE)) + { + /* Disable sparse seeks as they'll be noops with O_APPEND. + Also they will be relative to the last write + (start of file if no writes) rather than EOF. + This might be catered for with ftruncate and SEEK_END, + but not in an atomic way across processes. */ + error (0, 0, _("oflag=append disables conv=sparse")); + conversions_mask &= ~C_SPARSE; + } + if ((conversions_mask & C_NOTRUNC) && (conversions_mask & C_SPARSE)) + { + /* Disable sparse writing when not truncating the output file, + so that non NUL parts of the output are overwritten by NUL input. */ + error (0, 0, _("conv=notrunc disables conv=sparse")); + conversions_mask &= ~C_SPARSE; + } + if (input_flags & O_NOCACHE) { i_nocache = true; @@ -1861,6 +1926,10 @@ dd_copy (void) obuf = ibuf; } + /* Write sentinel to slop after the buffer, + to allow efficient checking for NUL blocks. */ + memset (obuf + output_blocksize, 1, sizeof (uintptr_t)); + if (skip_records != 0 || skip_bytes != 0) { uintmax_t us_bytes = input_offset + (skip_records * input_blocksize) @@ -2072,6 +2141,33 @@ dd_copy (void) } } + /* If the last write was converted to a seek, then for a regular file, + ftruncate to extend the size. */ + if (last_seek) + { + struct stat stdout_stat; + if (fstat (STDOUT_FILENO, &stdout_stat) != 0) + { + error (0, errno, _("cannot fstat %s"), quote (output_file)); + return EXIT_FAILURE; + } + if (S_ISREG (stdout_stat.st_mode)) + { + off_t output_offset = lseek (STDOUT_FILENO, 0, SEEK_CUR); + if (output_offset > stdout_stat.st_size) + { + if (ftruncate (STDOUT_FILENO, output_offset) != 0) + { + error (0, errno, + _("failed to truncate to %"PRIuMAX" bytes" + " in output file %s"), + output_offset, quote (output_file)); + return EXIT_FAILURE; + } + } + } + } + if ((conversions_mask & C_FDATASYNC) && fdatasync (STDOUT_FILENO) != 0) { if (errno != ENOSYS && errno != EINVAL) diff --git a/tests/Makefile.am b/tests/Makefile.am index 74ff470..5e184ac 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -373,6 +373,7 @@ TESTS = \ dd/skip-seek2 \ dd/bytes \ dd/skip-seek-past-file \ + dd/sparse \ dd/stderr \ dd/unblock \ dd/unblock-sync \ diff --git a/tests/dd/sparse b/tests/dd/sparse new file mode 100755 index 0000000..254cf08 --- /dev/null +++ b/tests/dd/sparse @@ -0,0 +1,57 @@ +#!/bin/sh + +# Copyright (C) 2012 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. "${srcdir=.}/init.sh"; path_prepend_ ../src +print_ver_ dd + +# Ensure basic sparse generation works +truncate -s1M sparse +dd bs=32K if=sparse of=sparse.dd conv=sparse +test $(stat -c %s sparse) = $(stat -c %s sparse.dd) || fail=1 + +# Ensure conv=sparse ignored with oflag=append, +# or otherwise seeks in output are not done +printf 'a\000\000b' > file.in +dd if=file.in bs=1 conv=sparse oflag=append > file.out +cmp file.in file.out || fail=1 + +# Ensure conv=sparse ignored with conv=notrunc, +# or otherwise non NUL data in file.out is not overwritten with NULs +printf 'a\001\001b' > file.out +dd if=file.in bs=1 conv=sparse,notrunc > file.out +cmp file.in file.out || fail=1 + +# Ensure we fall back to write if seek fails +dd if=file.in bs=1 conv=sparse | cat > file.out +cmp file.in file.out || fail=1 + +# Setup for block size tests +dd if=/dev/urandom of=file.in bs=1M count=1 +truncate -s+1M file.in +dd if=/dev/urandom of=file.in bs=1M count=1 conv=notrunc oflag=append + +# Ensure NUL blocks smaller than the block size are not made sparse +dd if=file.in of=file.out bs=2M conv=sparse +test $(stat -c %s file.in) = $(stat -c %s file.out) || fail=1 +test $(stat -c %b file.in) = $(stat -c %b file.out) && fail=1 + +# Ensure NUL blocks >= block size are made sparse +dd if=file.in of=file.out bs=1M conv=sparse +test $(stat -c %s file.in) = $(stat -c %s file.out) || fail=1 +test $(stat -c %b file.in) = $(stat -c %b file.out) || fail=1 + +Exit $fail -- 1.7.6.4