bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] savedir: optionally produce ordered directory list


From: Sergey Poznyakoff
Subject: [PATCH] savedir: optionally produce ordered directory list
Date: Tue, 11 Feb 2014 11:45:48 +0200

Hello,

According to [1], using directory lists ordered by inode can speed up
tar archivation by up to 40% on ext[34] filesystems (confirmed by my own
measurements).  Please take a look at the attached patch.  Is it OK
to apply?

Regards,
Sergey

[1] https://savannah.gnu.org/patch/?7892

>From 92ddbfb74f05e5cded933f760cddd0ce007001ae Mon Sep 17 00:00:00 2001
From: Sergey Poznyakoff <address@hidden>
Date: Tue, 11 Feb 2014 10:52:44 +0200
Subject: [PATCH] savedir: optionally produce ordered directory list

New function streamsavedir_ordered returns directory
entries ordered by names or inode numbers.  This can
be used by archivers to store archive members in reproducible
order (when sorted by name) or to speed-up archivation on
some filesystems (when sorted by inode).

Patch based on an idea by Dick Streefland.

* lib/savedir.h (SAVEDIR_SORT_NONE, SAVEDIR_SORT_NAME)
(SAVEDIR_SORT_INODE): New constants.
(streamsavedir_ordered)
(set_savedir_sort_order): New prototypes.
* lib/savedir.c (streamsavedir_ordered): New function.
(set_savedir_sort_order): New function.
(streamsavedir): Rewrite as an entry point to
streamsavedir_ordered.
---
 lib/savedir.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 lib/savedir.h |  10 +++++
 2 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/lib/savedir.c b/lib/savedir.c
index 6d5ed7f..5fc2ff2 100644
--- a/lib/savedir.c
+++ b/lib/savedir.c
@@ -41,25 +41,92 @@
 # define NAME_SIZE_DEFAULT 512
 #endif

+typedef struct
+{
+  union
+  {
+    char *ptr;
+    size_t off;
+  } name;
+#if D_INO_IN_DIRENT
+  ino_t ino;
+#endif
+} direntry_t;
+
+/* Compare the names of two directory entries */
+
+static int
+direntry_cmp_name (const void *a, const void *b)
+{
+  const direntry_t *dea = (const direntry_t *) a;
+  const direntry_t *deb = (const direntry_t *) b;
+
+  return strcmp(dea->name.ptr, deb->name.ptr);
+}
+
+#if D_INO_IN_DIRENT
+/* Compare the inode numbers of two directory entries */
+
+static int
+direntry_cmp_inode (const void *a, const void *b)
+{
+  const direntry_t *dea = (const direntry_t *) a;
+  const direntry_t *deb = (const direntry_t *) b;
+
+  return (int) (dea->ino - deb->ino);
+}
+#endif
+
 /* Return a freshly allocated string containing the file names
    in directory DIRP, separated by '\0' characters;
    the end is marked by two '\0' characters in a row.
-   Return NULL (setting errno) if DIRP cannot be read.
-   If DIRP is NULL, return NULL without affecting errno.  */
+   Returned values are sorted according to ORDER.
+
+   Return NULL (setting errno) if DIRP cannot be read or ORDER
+   is invalid.
+
+   If DIRP is NULL, return NULL without affecting errno.
+ */

 char *
-streamsavedir (DIR *dirp)
+streamsavedir_ordered (DIR *dirp, int order)
 {
   char *name_space;
+  char *sorted;
   size_t allocated = NAME_SIZE_DEFAULT;
+  direntry_t *entries = NULL;
+  size_t entries_allocated = 0;
+  size_t entries_used = 0;
+  size_t i;
   size_t used = 0;
   int save_errno;
+  int (*cmp) (const void *a, const void *b) = NULL;

   if (dirp == NULL)
     return NULL;

   name_space = xmalloc (allocated);

+  switch (order)
+    {
+    case SAVEDIR_SORT_NONE:
+      break;
+
+    case SAVEDIR_SORT_NAME:
+      cmp = direntry_cmp_name;
+      break;
+
+#if D_INO_IN_DIRENT
+    case SAVEDIR_SORT_INODE:
+      cmp = direntry_cmp_inode;
+      break;
+#endif
+
+    default:
+      errno = EINVAL;
+      return NULL;
+    }
+
   for (;;)
     {
       struct dirent const *dp;
@@ -91,9 +158,41 @@ streamsavedir (DIR *dirp)
               name_space = xrealloc (name_space, allocated);
             }
           memcpy (name_space + used, entry, entry_size);
+          if (cmp)
+            {
+              if (entries_allocated == entries_used)
+                {
+                 if (entries_allocated == 0)
+                   entries_allocated = NAME_SIZE_DEFAULT / sizeof (direntry_t);
+                 entries = x2nrealloc (entries, &entries_allocated,
+                                       sizeof (entries[0]));
+                }
+              entries[entries_used].name.off = used;
+#if D_INO_IN_DIRENT
+              entries[entries_used].ino = dp->d_ino;
+#endif
+              entries_used++;
+            }
           used += entry_size;
         }
     }
+
+  if (cmp)
+    {
+      for (i = 0; i < entries_used; i++)
+       entries[i].name.ptr = name_space + entries[i].name.off;
+      qsort (entries, entries_used, sizeof (direntry_t), cmp);
+      sorted = xmalloc (used + 1);
+      used = 0;
+      for (i = 0; i < entries_used; i++)
+        {
+          strcpy (sorted + used, entries[i].name.ptr);
+          used += strlen (sorted + used) + 1;
+        }
+      free (entries);
+      free (name_space);
+      name_space = sorted;
+    }
   name_space[used] = '\0';
   save_errno = errno;
   if (save_errno != 0)
@@ -105,6 +204,34 @@ streamsavedir (DIR *dirp)
   return name_space;
 }

+static int default_sort_order;
+
+int
+set_savedir_sort_order (int order)
+{
+  switch (order)
+    {
+    case SAVEDIR_SORT_NONE:
+    case SAVEDIR_SORT_NAME:
+#if D_INO_IN_DIRENT
+    case SAVEDIR_SORT_INODE:
+#endif
+      default_sort_order = order;
+      break;
+
+    default:
+      errno = EINVAL;
+      return 1;
+   }
+  return 0;
+}
+
+char *
+streamsavedir (DIR *dirp)
+{
+  return streamsavedir_ordered (dirp, default_sort_order);
+}
+
 /* Like streamsavedir (DIRP), except also close DIRP.  */

 static char *
diff --git a/lib/savedir.h b/lib/savedir.h
index eedb0c4..fb59211 100644
--- a/lib/savedir.h
+++ b/lib/savedir.h
@@ -22,6 +22,16 @@
 #define _GL_SAVEDIR_H

 #include <dirent.h>
+
+enum
+  {
+    SAVEDIR_SORT_NONE,
+    SAVEDIR_SORT_NAME,
+    SAVEDIR_SORT_INODE
+  };
+int set_savedir_sort_order (int order);
+
+char *streamsavedir_ordered (DIR *dirp, int sortorder);
 char *streamsavedir (DIR *dirp);
 char *savedir (char const *dir);
 char *fdsavedir (int fd); /* deprecated */
--
1.7.12.1


reply via email to

[Prev in Thread] Current Thread [Next in Thread]