[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] savedir: optionally produce ordered directory list
From: |
Sergey Poznyakoff |
Subject: |
[PATCH] savedir: optionally produce ordered directory list |
Date: |
Tue, 11 Feb 2014 11:45:48 +0200 |
Hello,
According to [1], using directory lists ordered by inode can speed up
tar archivation by up to 40% on ext[34] filesystems (confirmed by my own
measurements). Please take a look at the attached patch. Is it OK
to apply?
Regards,
Sergey
[1] https://savannah.gnu.org/patch/?7892
>From 92ddbfb74f05e5cded933f760cddd0ce007001ae Mon Sep 17 00:00:00 2001
From: Sergey Poznyakoff <address@hidden>
Date: Tue, 11 Feb 2014 10:52:44 +0200
Subject: [PATCH] savedir: optionally produce ordered directory list
New function streamsavedir_ordered returns directory
entries ordered by names or inode numbers. This can
be used by archivers to store archive members in reproducible
order (when sorted by name) or to speed-up archivation on
some filesystems (when sorted by inode).
Patch based on an idea by Dick Streefland.
* lib/savedir.h (SAVEDIR_SORT_NONE, SAVEDIR_SORT_NAME)
(SAVEDIR_SORT_INODE): New constants.
(streamsavedir_ordered)
(set_savedir_sort_order): New prototypes.
* lib/savedir.c (streamsavedir_ordered): New function.
(set_savedir_sort_order): New function.
(streamsavedir): Rewrite as an entry point to
streamsavedir_ordered.
---
lib/savedir.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
lib/savedir.h | 10 +++++
2 files changed, 140 insertions(+), 3 deletions(-)
diff --git a/lib/savedir.c b/lib/savedir.c
index 6d5ed7f..5fc2ff2 100644
--- a/lib/savedir.c
+++ b/lib/savedir.c
@@ -41,25 +41,92 @@
# define NAME_SIZE_DEFAULT 512
#endif
+typedef struct
+{
+ union
+ {
+ char *ptr;
+ size_t off;
+ } name;
+#if D_INO_IN_DIRENT
+ ino_t ino;
+#endif
+} direntry_t;
+
+/* Compare the names of two directory entries */
+
+static int
+direntry_cmp_name (const void *a, const void *b)
+{
+ const direntry_t *dea = (const direntry_t *) a;
+ const direntry_t *deb = (const direntry_t *) b;
+
+ return strcmp(dea->name.ptr, deb->name.ptr);
+}
+
+#if D_INO_IN_DIRENT
+/* Compare the inode numbers of two directory entries */
+
+static int
+direntry_cmp_inode (const void *a, const void *b)
+{
+ const direntry_t *dea = (const direntry_t *) a;
+ const direntry_t *deb = (const direntry_t *) b;
+
+ return (int) (dea->ino - deb->ino);
+}
+#endif
+
/* Return a freshly allocated string containing the file names
in directory DIRP, separated by '\0' characters;
the end is marked by two '\0' characters in a row.
- Return NULL (setting errno) if DIRP cannot be read.
- If DIRP is NULL, return NULL without affecting errno. */
+ Returned values are sorted according to ORDER.
+
+ Return NULL (setting errno) if DIRP cannot be read or ORDER
+ is invalid.
+
+ If DIRP is NULL, return NULL without affecting errno.
+ */
char *
-streamsavedir (DIR *dirp)
+streamsavedir_ordered (DIR *dirp, int order)
{
char *name_space;
+ char *sorted;
size_t allocated = NAME_SIZE_DEFAULT;
+ direntry_t *entries = NULL;
+ size_t entries_allocated = 0;
+ size_t entries_used = 0;
+ size_t i;
size_t used = 0;
int save_errno;
+ int (*cmp) (const void *a, const void *b) = NULL;
if (dirp == NULL)
return NULL;
name_space = xmalloc (allocated);
+ switch (order)
+ {
+ case SAVEDIR_SORT_NONE:
+ break;
+
+ case SAVEDIR_SORT_NAME:
+ cmp = direntry_cmp_name;
+ break;
+
+#if D_INO_IN_DIRENT
+ case SAVEDIR_SORT_INODE:
+ cmp = direntry_cmp_inode;
+ break;
+#endif
+
+ default:
+ errno = EINVAL;
+ return NULL;
+ }
+
for (;;)
{
struct dirent const *dp;
@@ -91,9 +158,41 @@ streamsavedir (DIR *dirp)
name_space = xrealloc (name_space, allocated);
}
memcpy (name_space + used, entry, entry_size);
+ if (cmp)
+ {
+ if (entries_allocated == entries_used)
+ {
+ if (entries_allocated == 0)
+ entries_allocated = NAME_SIZE_DEFAULT / sizeof (direntry_t);
+ entries = x2nrealloc (entries, &entries_allocated,
+ sizeof (entries[0]));
+ }
+ entries[entries_used].name.off = used;
+#if D_INO_IN_DIRENT
+ entries[entries_used].ino = dp->d_ino;
+#endif
+ entries_used++;
+ }
used += entry_size;
}
}
+
+ if (cmp)
+ {
+ for (i = 0; i < entries_used; i++)
+ entries[i].name.ptr = name_space + entries[i].name.off;
+ qsort (entries, entries_used, sizeof (direntry_t), cmp);
+ sorted = xmalloc (used + 1);
+ used = 0;
+ for (i = 0; i < entries_used; i++)
+ {
+ strcpy (sorted + used, entries[i].name.ptr);
+ used += strlen (sorted + used) + 1;
+ }
+ free (entries);
+ free (name_space);
+ name_space = sorted;
+ }
name_space[used] = '\0';
save_errno = errno;
if (save_errno != 0)
@@ -105,6 +204,34 @@ streamsavedir (DIR *dirp)
return name_space;
}
+static int default_sort_order;
+
+int
+set_savedir_sort_order (int order)
+{
+ switch (order)
+ {
+ case SAVEDIR_SORT_NONE:
+ case SAVEDIR_SORT_NAME:
+#if D_INO_IN_DIRENT
+ case SAVEDIR_SORT_INODE:
+#endif
+ default_sort_order = order;
+ break;
+
+ default:
+ errno = EINVAL;
+ return 1;
+ }
+ return 0;
+}
+
+char *
+streamsavedir (DIR *dirp)
+{
+ return streamsavedir_ordered (dirp, default_sort_order);
+}
+
/* Like streamsavedir (DIRP), except also close DIRP. */
static char *
diff --git a/lib/savedir.h b/lib/savedir.h
index eedb0c4..fb59211 100644
--- a/lib/savedir.h
+++ b/lib/savedir.h
@@ -22,6 +22,16 @@
#define _GL_SAVEDIR_H
#include <dirent.h>
+
+enum
+ {
+ SAVEDIR_SORT_NONE,
+ SAVEDIR_SORT_NAME,
+ SAVEDIR_SORT_INODE
+ };
+int set_savedir_sort_order (int order);
+
+char *streamsavedir_ordered (DIR *dirp, int sortorder);
char *streamsavedir (DIR *dirp);
char *savedir (char const *dir);
char *fdsavedir (int fd); /* deprecated */
--
1.7.12.1