[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module
From: |
Jeff liu |
Subject: |
bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module |
Date: |
Thu, 17 Feb 2011 21:57:14 +0800 |
Hello All,
This is the first try to introduce the SEEK_DATA/SEEK_HOLE support to
extent_scan module for efficient sparse file copy on ZFS, I have delayed it
for a long time, sorry for that!
Below is the code change lists:
src/extent_scan.h: add a new structure item 'src_total_size' to "struct
extent_info", since I have to make use of this value to determine
a file is sparse of not for the initial scan. If the returns of lseek(fd, 0,
SEEK_HOLE) is equal to the src_total_size or large than it, it means the source
file
is definitely not a sparse file or maybe it is a sparse file but it does not
make sense for proceeding scan read.
another change in this file is the signature of extent_scan_init(), just as I
mentioned above, it need to accept the src_total_size variable.
src/extent_scan.c: implement the new exent_scan_read() through
SEEK_DATA/SEEK_HOLE, it will be called if those two values are defined at
<unistd.h>.
src/copy.c: pass src_total_size to extent_scan_init().
On my test environment, Solaris10, SunOS 5.10 Generic_142910-17, I have tried
a few simple cases, they are works to me.
For now, I am using diff(1) to verify the copy result, does anyone know some
utilities can be used to write the test script?
I have sent an email to ZFS DEV mail-list to ask this question yesterday, a
nice guy suggest me to use ZDB(http://cuddletech.com/blog/?p=407) for that, I'm
still study this utility now, I also noticed there is patch to add
SEEK_HOLE/SEEK_DATA support to os module in Python community, please refer to:
http://bugs.python.org/file19566/z.patch
but it require very latest python build I think, so could anyone give some
other advices in this point?
The patch is shown as following, any help testing and comments are appreciated!!
Thanks,
-Jeff
From: Jie Liu <address@hidden>
Date: Thu, 17 Feb 2011 21:14:23 +0800
Subject: [PATCH 1/1] copy: add SEEK_DATA/SEEK_HOLE support to extent_scan module
* src/extent_scan.h: add src_total_size to struct extent_info, we need
to check the SEEK_HOLE result against it for initial extent scan.
modify the extent_scan_init() signature, to add size_t src_total_size.
* src/extent_scan.c: implement a new extent_scan_read() through SEEK_DATA
and SEEK_HOLE.
* src/copy.c: pass src_total_size to extent_scan_init().
Signed-off-by: Jie Liu <address@hidden>
---
src/copy.c | 2 +-
src/extent-scan.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
src/extent-scan.h | 9 +++-
3 files changed, 120 insertions(+), 4 deletions(-)
diff --git a/src/copy.c b/src/copy.c
index 104652d..22b9911 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -306,7 +306,7 @@ extent_copy (int src_fd, int dest_fd, char *buf, size_t
buf_size,
We may need this at the end, for a final ftruncate. */
off_t dest_pos = 0;
- extent_scan_init (src_fd, &scan);
+ extent_scan_init (src_fd, src_total_size, &scan);
*require_normal_copy = false;
bool wrote_hole_at_eof = true;
diff --git a/src/extent-scan.c b/src/extent-scan.c
index 1ba59db..ffeab7a 100644
--- a/src/extent-scan.c
+++ b/src/extent-scan.c
@@ -32,13 +32,17 @@
/* Allocate space for struct extent_scan, initialize the entries if
necessary and return it as the input argument of extent_scan_read(). */
extern void
-extent_scan_init (int src_fd, struct extent_scan *scan)
+extent_scan_init (int src_fd, size_t src_total_size,
+ struct extent_scan *scan)
{
scan->fd = src_fd;
scan->ei_count = 0;
scan->scan_start = 0;
scan->initial_scan_failed = false;
scan->hit_final_extent = false;
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
+ scan->src_total_size = src_total_size;
+#endif
}
#ifdef __linux__
@@ -106,6 +110,113 @@ extent_scan_read (struct extent_scan *scan)
return true;
}
+#elif defined(SEEK_HOLE) && defined(SEEK_DATA)
+extern bool
+extent_scan_read (struct extent_scan *scan)
+{
+ off_t data_pos, hole_pos;
+ union { struct extent_info ei; char c[4096]; } extent_buf;
+ struct extent_info *ext_info = &extent_buf.ei;
+ enum { count = (sizeof extent_buf / sizeof *ext_info) };
+ verify (count != 0);
+
+ memset (&extent_buf, 0, sizeof extent_buf);
+
+ if (scan->scan_start == 0)
+ {
+# ifdef _PC_MIN_HOLE_SIZE
+ /* To determine if the underlaying file system support
+ SEEK_HOLE, if not, fall back to the standard copy. */
+ if (fpathconf (scan->fd, _PC_MIN_HOLE_SIZE) < 0)
+ {
+ scan->initial_scan_failed = true;
+ return false;
+ }
+# endif
+
+ /* If we have been compiled on an OS that supports SEEK_HOLE
+ but run on an OS that does not support SEEK_HOLE, we get
+ EINVAL. If the underlying filesystem does not support the
+ SEEK_HOLE call, we get ENOTSUP, fall back to standard copy
+ in either case. */
+ hole_pos = lseek (scan->fd, (off_t) 0, SEEK_HOLE);
+ if (hole_pos < 0)
+ {
+ if (errno == EINVAL || errno == ENOTSUP)
+ scan->initial_scan_failed = true;
+ return false;
+ }
+
+ /* Seek back to position 0 first if we detected a real hole. */
+ if (hole_pos > 0)
+ {
+ off_t tmp_pos;
+ tmp_pos = lseek (scan->fd, (off_t) 0, SEEK_SET);
+ if (tmp_pos != (off_t) 0)
+ return false;
+
+ /* The source file is definitely not a sparse file, or it
+ maybe a sparse file but SEEK_HOLE returns the source file's
+ total size, fall back to the standard copy too. */
+ if (hole_pos >= scan->src_total_size)
+ {
+ scan->initial_scan_failed = true;
+ return false;
+ }
+ }
+ }
+
+ unsigned int i = 0;
+ /* If lseek(2) failed and the errno is set to ENXIO, for
+ SEEK_DATA there are no more data regions past the supplied
+ offset. For SEEK_HOLE, there are no more holes past the
+ supplied offset. Set scan->hit_final_extent to true for
+ either case. */
+ do {
+ data_pos = lseek (scan->fd, scan->scan_start, SEEK_DATA);
+ if (data_pos < 0)
+ {
+ if (errno != ENXIO)
+ return false;
+ else
+ {
+ scan->hit_final_extent = true;
+ return true;
+ }
+ }
+
+ hole_pos = lseek (scan->fd, data_pos, SEEK_HOLE);
+ if (hole_pos < 0)
+ {
+ if (errno != ENXIO)
+ return false;
+ else
+ {
+ scan->hit_final_extent = true;
+ return true;
+ }
+ }
+
+ ext_info[i].ext_logical = data_pos;
+ ext_info[i].ext_length = hole_pos - data_pos;
+ scan->scan_start = hole_pos;
+ ++i;
+ } while (scan->scan_start < scan->src_total_size && i < count);
+
+ i--;
+ scan->ei_count = i;
+ scan->ext_info = xnmalloc (scan->ei_count, sizeof (struct extent_info));
+
+ for (i = 0; i < scan->ei_count; i++)
+ {
+ assert (ext_info[i].ext_logical <= OFF_T_MAX);
+
+ scan->ext_info[i].ext_logical = ext_info[i].ext_logical;
+ scan->ext_info[i].ext_length = ext_info[i].ext_length;
+ }
+
+ return true;
+}
#else
extern bool
extent_scan_read (struct extent_scan *scan ATTRIBUTE_UNUSED)
diff --git a/src/extent-scan.h b/src/extent-scan.h
index 4724b25..a271b95 100644
--- a/src/extent-scan.h
+++ b/src/extent-scan.h
@@ -18,7 +18,6 @@
#ifndef EXTENT_SCAN_H
# define EXTENT_SCAN_H
-
/* Structure used to store information of each extent. */
struct extent_info
{
@@ -38,6 +37,11 @@ struct extent_scan
/* File descriptor of extent scan run against. */
int fd;
+#if defined(SEEK_DATA) && defined(SEEK_HOLE)
+ /* Source file size, i.e, (struct stat) &statbuf.st_size. */
+ size_t src_total_size;
+#endif
+
/* Next scan start offset. */
off_t scan_start;
@@ -55,7 +59,8 @@ struct extent_scan
struct extent_info *ext_info;
};
-void extent_scan_init (int src_fd, struct extent_scan *scan);
+void extent_scan_init (int src_fd, size_t src_total_size,
+ struct extent_scan *scan);
bool extent_scan_read (struct extent_scan *scan);
--
1.7.4
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- bug#8061: Introduce SEEK_DATA/SEEK_HOLE to extent_scan module,
Jeff liu <=