[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module
From: |
Jeff liu |
Subject: |
bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module |
Date: |
Tue, 19 Apr 2011 16:51:38 +0800 |
> Hi All,
>
> Please ignore the current patch, I will submit another patch with a few fixes
> soon.
Now the new patch set coming,
In previous post, I have tried to change the extent_scan_init() interface by
adding a new argument to indicate the source file size,
this will reduce the overhead of call fstat(2) in extent_scan_read(), since
the file size is definitely needed for SEEK* stuff, however, the file size is
redundant for FIEMAP.
so I changed my idea to keep extent_scan_init() as before, instead, to
retrieve the file size in extent_scan_read() when launching the first scan, one
benefit is, there is nothing need to
be modified in extent_copy() for this patch.
Tests:
====
A new test sparse-lseek was introduced in this post, it make use of the sparse
file generation function in Perl, and do `cmp` against the target copied file.
I have also took a look at the `sdb` utility shipped with ZFS, but did not
found any interesting stuff can be used for this test.
Test run passed on my environment as below,
bash-3.00# make check TESTS=cp/sparse-lseek VERBOSE=yes
make check-TESTS
make[1]: Entering directory `/coreutils/tests'
make[2]: Entering directory `/coreutils/tests'
PASS: cp/sparse-lseek
=============
1 test passed
=============
make[2]: Leaving directory `/coreutils/tests'
make[1]: Leaving directory `/coreutils/tests'
GEN vc_exe_in_TESTS
No differences encountered
Manual tests:
===========
1. Ensure trailing blanks, test 0 size sparse file, non-sparse file, sparse
file with hole start and hole end.
2. make syntax-check failed, I have no idea of this issue at the moment, I
also tried to run make distcheck, looks the package building, install and
uninstall procedures all passed,
but it also failed at the final stage, am I missing something here?
The logs which were shown as following,
bash-3.00# make syntax-check
GFDL_version
awk: syntax error near line 1
awk: bailing out near line 1
make: *** [sc_GFDL_version.z] Error 2
make distcheck:
==============
......
make[1]: Entering directory `/coreutils'
GEN check-ls-dircolors
make my-distcheck
make[2]: Entering directory `/coreutils'
make syntax-check
make[3]: Entering directory `/coreutils'
GFDL_version
awk: syntax error near line 1
awk: bailing out near line 1
make[3]: *** [sc_GFDL_version.z] Error 2
make[3]: Leaving directory `/coreutils'
make[2]: *** [my-distcheck] Error 2
make[2]: Leaving directory `/coreutils'
make[1]: *** [distcheck-hook] Error 2
make[1]: Leaving directory `/coreutils'
make: *** [distcheck] Error 1
Below is the revised patch,
From 4f966c1fe6226f3f711faae120cd8bea78e722b8 Mon Sep 17 00:00:00 2001
From: Jie Liu <address@hidden>
Date: Tue, 19 Apr 2011 15:24:50 -0700
Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan module
* src/extent_scan.h: introduce src_total_size to struct extent_info, we
need it for lseek(2) iteration.
* src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA
and SEEK_HOLE if those stuff are supported.
* tests/cp/sparse-lseek: add a new test for lseek(2) extent copy.
Signed-off-by: Jie Liu <address@hidden>
---
src/extent-scan.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++
src/extent-scan.h | 5 ++
tests/Makefile.am | 1 +
tests/cp/sparse-lseek | 56 +++++++++++++++++++++++
4 files changed, 181 insertions(+), 0 deletions(-)
create mode 100755 tests/cp/sparse-lseek
diff --git a/src/extent-scan.c b/src/extent-scan.c
index da7eb9d..a54eca0 100644
--- a/src/extent-scan.c
+++ b/src/extent-scan.c
@@ -17,7 +17,9 @@
Written by Jie Liu (address@hidden). */
#include <config.h>
+#include <fcntl.h>
#include <sys/types.h>
+#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <assert.h>
@@ -71,6 +73,9 @@ extent_scan_init (int src_fd, struct extent_scan *scan)
scan->initial_scan_failed = false;
scan->hit_final_extent = false;
scan->fm_flags = extent_need_sync () ? FIEMAP_FLAG_SYNC : 0;
+#if defined (SEEK_DATA) && defined (SEEK_HOLE)
+ scan->src_total_size = 0;
+#endif
}
#ifdef __linux__
@@ -204,6 +209,120 @@ extent_scan_read (struct extent_scan *scan)
return true;
}
+#elif defined (SEEK_HOLE) && defined (SEEK_DATA)
+extern bool
+extent_scan_read (struct extent_scan *scan)
+{
+ off_t data_pos, hole_pos;
+ union { struct extent_info ei; char c[4096]; } extent_buf;
+ struct extent_info *ext_info = &extent_buf.ei;
+ enum { count = (sizeof extent_buf / sizeof *ext_info) };
+ verify (count != 0);
+
+ memset (&extent_buf, 0, sizeof extent_buf);
+
+ if (scan->scan_start == 0)
+ {
+# ifdef _PC_MIN_HOLE_SIZE
+ /* To determine if the underlaying file system support
+ SEEK_HOLE. If not, fall back to the standard copy. */
+ if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
+ {
+ scan->initial_scan_failed = true;
+ return false;
+ }
+# endif
+
+ /* If we have been compiled on an OS that supports SEEK_HOLE
+ but run on an OS that does not support SEEK_HOLE, we get
+ EINVAL. If the underlying file system does not support the
+ SEEK_HOLE call, we get ENOTSUP, setting initial_scan_failed
+ to true to fall back to the standard copy in either case. */
+ hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
+ if (hole_pos < 0)
+ {
+ if (errno == EINVAL || errno == ENOTSUP)
+ scan->initial_scan_failed = true;
+ return false;
+ }
+
+ /* Seek back to position 0 first. */
+ if (hole_pos > 0)
+ {
+ if (lseek (scan->fd, (off_t) 0, SEEK_SET) < 0)
+ return false;
+ }
+
+ struct stat sb;
+ if (fstat (scan->fd, &sb) < 0)
+ return false;
+
+ /* This is definitely not a sparse file, we treat it as a big extent. */
+ if (hole_pos >= sb.st_size)
+ {
+ scan->ei_count = 1;
+ scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct
extent_info));
+ scan->ext_info[0].ext_logical = 0;
+ scan->ext_info[0].ext_length = sb.st_size;
+ scan->hit_final_extent = true;
+ return true;
+ }
+ scan->src_total_size = sb.st_size;
+ }
+
+ unsigned int i = 0;
+ /* If lseek(2) failed and the errno is set to ENXIO, for
+ SEEK_DATA there are no more data regions past the supplied
+ offset. For SEEK_HOLE, there are no more holes past the
+ supplied offset. Set scan->hit_final_extent to true in
+ either case. */
+ while (scan->scan_start < scan->src_total_size && i < count)
+ {
+ data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
+ if (data_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ scan->hit_final_extent = true;
+ break;
+ }
+ return false;
+ }
+
+ hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
+ if (hole_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ scan->hit_final_extent = true;
+ hole_pos = scan->src_total_size;
+ if (data_pos < hole_pos)
+ goto preserve_ext_info;
+ break;
+ }
+ return false;
+ }
+
+preserve_ext_info:
+ ext_info[i].ext_logical = data_pos;
+ ext_info[i].ext_length = hole_pos - data_pos;
+ scan->scan_start = hole_pos;
+ ++i;
+ }
+
+ scan->ei_count = i;
+ scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
+
+ for (i = 0; i < scan->ei_count; i++)
+ {
+ assert (ext_info[i].ext_logical <= OFF_T_MAX);
+
+ scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
+ scan->ext_info[i].ext_length = ext_info[i].ext_length;
+ }
+
+ return (lseek (scan->fd, (off_t) 0, SEEK_SET) < 0) ? false : true;
+}
#else
extern bool
extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
diff --git a/src/extent-scan.h b/src/extent-scan.h
index 5b4ded5..4fc05c6 100644
--- a/src/extent-scan.h
+++ b/src/extent-scan.h
@@ -38,6 +38,11 @@ struct extent_scan
/* File descriptor of extent scan run against. */
int fd;
+# if defined (SEEK_DATA) && defined (SEEK_HOLE)
+ /* Source file size, i.e, (struct stat) &statbuf.st_size. */
+ size_t src_total_size;
+#endif
+
/* Next scan start offset. */
off_t scan_start;
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 685eb52..6c596b9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -28,6 +28,7 @@ root_tests = \
cp/cp-mv-enotsup-xattr \
cp/capability \
cp/sparse-fiemap \
+ cp/sparse-lseek \
dd/skip-seek-past-dev \
install/install-C-root \
ls/capability \
diff --git a/tests/cp/sparse-lseek b/tests/cp/sparse-lseek
new file mode 100755
index 0000000..5b8f2c1
--- /dev/null
+++ b/tests/cp/sparse-lseek
@@ -0,0 +1,56 @@
+#!/bin/sh
+# Test cp --sparse=always through lseek(SEEK_DATA/SEEK_HOLE) copy
+
+# Copyright (C) 2010-2011 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ cp
+$PERL -e 1 || skip_test_ 'you lack perl'
+
+zfsdisk=diskX
+zfspool=seektest
+
+require_root_
+
+cwd=$PWD
+cleanup_() { zpool destroy $zfspool; }
+
+skip=0
+mkfile 128m "$cwd/$zfsdisk" || skip=1
+
+# Check if the seektest pool is already exists
+zpool list $zfspool 2>/dev/null &&
+ skip_test_ "$zfspool already exists"
+
+# Create pool and verify if it is mounted automatically
+zpool create $zfspool "$cwd/$zfsdisk" || skip=1
+zpool list $zfspool >/dev/null || skip=1
+
+test $skip = 1 && skip_test_ "insufficient ZFS support"
+
+for i in $(seq 1 2 21); do
+ for j in 1 2 31 100; do
+ $PERL -e 'BEGIN { $n = '$i' * 1024; *F = *STDOUT }' \
+ -e 'for (1..'$j') { sysseek (*F, $n, 1)' \
+ -e '&& syswrite (*F, chr($_)x$n) or die "$!"}' > /$zfspool/j1 ||
fail=1
+
+ cp --sparse=always /$zfspool/j1 /$zfspool/j2 || fail=1
+ cmp /$zfspool/j1 /$zfspool/j2 || fail=1
+ test $fail = 1 && break 2
+ done
+done
+
+Exit $fail
--
1.7.4
Any comments are appreciated!
Thanks,
-Jeff
>
>
> Thanks,
> -Jeff
>
>
>> Hello All,
>>
>> This is the first try to introduce the SEEK_DATA/SEEK_HOLE support to
>> extent_scan module for efficient sparse file copy on ZFS, I have delayed it
>> for a long time, sorry for that!
>>
>> Below is the code change lists:
>> src/extent_scan.h: add a new structure item 'src_total_size' to "struct
>> extent_info", since I have to make use of this value to determine
>> a file is sparse of not for the initial scan. If the returns of lseek(fd,
>> 0, SEEK_HOLE) is equal to the src_total_size or large than it, it means the
>> source file
>> is definitely not a sparse file or maybe it is a sparse file but it does not
>> make sense for proceeding scan read.
>> another change in this file is the signature of extent_scan_init(), just as
>> I mentioned above, it need to accept the src_total_size variable.
>> src/extent_scan.c: implement the new exent_scan_read() through
>> SEEK_DATA/SEEK_HOLE, it will be called if those two values are defined at
>> <unistd.h>.
>> src/copy.c: pass src_total_size to extent_scan_init().
>>
>> On my test environment, Solaris10, SunOS 5.10 Generic_142910-17, I have
>> tried a few simple cases, they are works to me.
>>
>> For now, I am using diff(1) to verify the copy result, does anyone know
>> some utilities can be used to write the test script?
>> I have sent an email to ZFS DEV mail-list to ask this question yesterday, a
>> nice guy suggest me to use ZDB(http://cuddletech.com/blog/?p=407) for that,
>> I'm
>> still study this utility now, I also noticed there is patch to add
>> SEEK_HOLE/SEEK_DATA support to os module in Python community, please refer
>> to:
>> http://bugs.python.org/file19566/z.patch
>> but it require very latest python build I think, so could anyone give some
>> other advices in this point?
>>
>> The patch is shown as following, any help testing and comments are
>> appreciated!!
>>
>>
>> Thanks,
>> -Jeff
>>
>>
>> From: Jie Liu <address@hidden>
>> Date: Thu, 17 Feb 2011 21:14:23 +0800
>> Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan
>> module
>>
>> * src/extent_scan.h: add src_total_size to struct extent_info, we need
>> to check the SEEK_HOLE result against it for initial extent scan.
>> modify the extent_scan_init() signature, to add size_t src_total_size.
>> * src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA
>> and SEEK_HOLE.
>> * src/copy.c: pass src_total_size to extent_scan_init().
>>
>> Signed-off-by: Jie Liu <address@hidden>
>> ---
>> src/copy.c | 2 +-
>> src/extent-scan.c | 113
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> src/extent-scan.h | 9 +++-
>> 3 files changed, 120 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/copy.c b/src/copy.c
>> index 104652d..22b9911 100644
>> --- a/src/copy.c
>> +++ b/src/copy.c
>> @@ -306,7 +306,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t
>> buf_size,
>> We may need this at the end, for a final ftruncate. */
>> off_t dest_pos = 0;
>>
>> - extent_scan_init (src_fd, &scan);
>> + extent_scan_init (src_fd, src_total_size, &scan);
>>
>> *require_normal_copy = false;
>> bool wrote_hole_at_eof = true;
>> diff --git a/src/extent-scan.c b/src/extent-scan.c
>> index 1ba59db..ffeab7a 100644
>> --- a/src/extent-scan.c
>> +++ b/src/extent-scan.c
>> @@ -32,13 +32,17 @@
>> /* Allocate space for struct extent_scan, initialize the entries if
>> necessary and return it as the input argument of extent_scan_read(). */
>> extern void
>> -extent_scan_init (int src_fd, struct extent_scan *scan)
>> +extent_scan_init (int src_fd, size_t src_total_size,
>> + struct extent_scan *scan)
>> {
>> scan->fd = src_fd;
>> scan->ei_count = 0;
>> scan->scan_start = 0;
>> scan->initial_scan_failed = false;
>> scan->hit_final_extent = false;
>> +#if defined(SEEK_HOLE) && defined(SEEK_DATA)
>> + scan->src_total_size = src_total_size;
>> +#endif
>> }
>>
>> #ifdef __linux__
>> @@ -106,6 +110,113 @@ extent_scan_read (struct extent_scan *scan)
>>
>> return true;
>> }
>> +#elif defined(SEEK_HOLE) && defined(SEEK_DATA)
>> +extern bool
>> +extent_scan_read (struct extent_scan *scan)
>> +{
>> + off_t data_pos, hole_pos;
>> + union { struct extent_info ei; char c[4096]; } extent_buf;
>> + struct extent_info *ext_info = &extent_buf.ei;
>> + enum { count = (sizeof extent_buf / sizeof *ext_info) };
>> + verify (count != 0);
>> +
>> + memset (&extent_buf, 0, sizeof extent_buf);
>> +
>> + if (scan->scan_start == 0)
>> + {
>> +# ifdef _PC_MIN_HOLE_SIZE
>> + /* To determine if the underlaying file system support
>> + SEEK_HOLE, if not, fall back to the standard copy. */
>> + if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
>> + {
>> + scan->initial_scan_failed = true;
>> + return false;
>> + }
>> +# endif
>> +
>> + /* If we have been compiled on an OS that supports SEEK_HOLE
>> + but run on an OS that does not support SEEK_HOLE, we get
>> + EINVAL. If the underlying filesystem does not support the
>> + SEEK_HOLE call, we get ENOTSUP, fall back to standard copy
>> + in either case. */
>> + hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
>> + if (hole_pos < 0)
>> + {
>> + if (errno == EINVAL || errno == ENOTSUP)
>> + scan->initial_scan_failed = true;
>> + return false;
>> + }
>> +
>> + /* Seek back to position 0 first if we detected a real hole. */
>> + if (hole_pos > 0)
>> + {
>> + off_t tmp_pos;
>> + tmp_pos = lseek (scan->fd, (off_t) 0, SEEK_SET);
>> + if (tmp_pos != (off_t) 0)
>> + return false;
>> +
>> + /* The source file is definitely not a sparse file, or it
>> + maybe a sparse file but SEEK_HOLE returns the source file's
>> + total size, fall back to the standard copy too. */
>> + if (hole_pos >= scan->src_total_size)
>> + {
>> + scan->initial_scan_failed = true;
>> + return false;
>> + }
>> + }
>> + }
>> +
>> + unsigned int i = 0;
>> + /* If lseek(2) failed and the errno is set to ENXIO, for
>> + SEEK_DATA there are no more data regions past the supplied
>> + offset. For SEEK_HOLE, there are no more holes past the
>> + supplied offset. Set scan->hit_final_extent to true for
>> + either case. */
>> + do {
>> + data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
>> + if (data_pos < 0)
>> + {
>> + if (errno != ENXIO)
>> + return false;
>> + else
>> + {
>> + scan->hit_final_extent = true;
>> + return true;
>> + }
>> + }
>> +
>> + hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
>> + if (hole_pos < 0)
>> + {
>> + if (errno != ENXIO)
>> + return false;
>> + else
>> + {
>> + scan->hit_final_extent = true;
>> + return true;
>> + }
>> + }
>> +
>> + ext_info[i].ext_logical = data_pos;
>> + ext_info[i].ext_length = hole_pos - data_pos;
>> + scan->scan_start = hole_pos;
>> + ++i;
>> + } while (scan->scan_start < scan->src_total_size && i < count);
>> +
>> + i--;
>> + scan->ei_count = i;
>> + scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
>> +
>> + for (i = 0; i < scan->ei_count; i++)
>> + {
>> + assert (ext_info[i].ext_logical <= OFF_T_MAX);
>> +
>> + scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
>> + scan->ext_info[i].ext_length = ext_info[i].ext_length;
>> + }
>> +
>> + return true;
>> +}
>> #else
>> extern bool
>> extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
>> diff --git a/src/extent-scan.h b/src/extent-scan.h
>> index 4724b25..a271b95 100644
>> --- a/src/extent-scan.h
>> +++ b/src/extent-scan.h
>> @@ -18,7 +18,6 @@
>>
>> #ifndef EXTENT_SCAN_H
>> # define EXTENT_SCAN_H
>> -
>> /* Structure used to store information of each extent. */
>> struct extent_info
>> {
>> @@ -38,6 +37,11 @@ struct extent_scan
>> /* File descriptor of extent scan run against. */
>> int fd;
>>
>> +#if defined(SEEK_DATA) && defined(SEEK_HOLE)
>> + /* Source file size, i.e, (struct stat) &statbuf.st_size. */
>> + size_t src_total_size;
>> +#endif
>> +
>> /* Next scan start offset. */
>> off_t scan_start;
>>
>> @@ -55,7 +59,8 @@ struct extent_scan
>> struct extent_info *ext_info;
>> };
>>
>> -void extent_scan_init (int src_fd, struct extent_scan *scan);
>> +void extent_scan_init (int src_fd, size_t src_total_size,
>> + struct extent_scan *scan);
>>
>> bool extent_scan_read (struct extent_scan *scan);
>>
>> --
>> 1.7.4
>