bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#6524: du now uses less than half as much memory, sometimes


From: Paul Eggert
Subject: bug#6524: du now uses less than half as much memory, sometimes
Date: Fri, 02 Jul 2010 15:07:54 -0700
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.9) Gecko/20100423 Thunderbird/3.0.4

(A circa-2008 patch!  You're almost as backlogged as I am!)

Anyway, here's the revised proposal for that, which I privately
promised a day or two ago.  It avoids using unions and bitfields,
which are a bit hard to follow and apparently run into compiler bugs
with GCC 4.5.0 on x86.  In my measurements, this patch uses a bit less
memory than the earlier patch, when the earlier patch worked for me;
but this is highly test-dependent and it would help to try this out on
other test cases.

This patch continues to treat inode 0 specially, to work around a
problem with hash.c.  Recently hash.c was changed to no longer reject
NULL keys, but its internals still depend on not allowing NULL keys so
this patch is careful to never create a key by casting 0 to (void *).
The assumption is that 0 is the only size_t value i such that ((void
*) i == NULL); this assumption is true for all practical architectures
that I know about, though it is not guaranteed by any standard.

One other thing: the gnulib (or prototype gnulib) stuff I converted
to C90; I assume that's still a constraint for gnulib (though not
for coreutils proper).  This was mostly just moving decls to the
starts of blocks.

Change 'du' to use much less memory to track hard links.
* NEWS: Mention this.
* bootstrap.conf (gnulib_modules): Add di-set.
* gl/lib/di-set.c, gl/lib/di-set.h: New files.
* gl/lib/ino-map.c, gl/lib/ino-map.h: New files.
* gl/modules/di-set, gl/modules/di-set-tests: New files.
* gl/modules/ino-map, gl/modules/ino-map-tests: New files.
* gl/tests/test-di-set.c, gl/tests/test-ino-map.c: New files.
* src/du.c: Include di-set.h rather than hash.h.
(INITIAL_TABLE_SIZE, struct entry, htab, entry_hash, entry_compare):
(hash_init): Remove.
(di_set): New static var.
(hash_ins): Just use di_set_insert.
(main): Use di_set_alloc and di_set_free to allocate and free tables,
rather than the old hash_init and hash_free.
diff --git a/NEWS b/NEWS
index 3a24925..2493ef8 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,10 @@ GNU coreutils NEWS                                    -*- 
outline -*-
   du recognizes -d N as equivalent to --max-depth=N, for compatibility
   with FreeBSD.
 
+  du now uses less than half as much memory when operating on trees
+  with many hard-linked files.  With --count-links (-l), or when
+  operating on trees with no hard-linked files, there is no change.
+
   sort now accepts the --debug option, to highlight the part of the
   line significant in the sort, and warn about questionable options.
 
diff --git a/bootstrap.conf b/bootstrap.conf
index 6e85c9a..644c18b 100644
--- a/bootstrap.conf
+++ b/bootstrap.conf
@@ -69,6 +69,7 @@ gnulib_modules="
   cycle-check
   d-ino
   d-type
+  di-set
   diacrit
   dirfd
   dirname
diff --git a/gl/lib/di-set.c b/gl/lib/di-set.c
new file mode 100644
index 0000000..76baf54
--- /dev/null
+++ b/gl/lib/di-set.c
@@ -0,0 +1,232 @@
+/* Set operations for device-inode pairs stored in a space-efficient manner.
+
+   Copyright 2009-2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* written by Paul Eggert and Jim Meyering */
+
+#include <config.h>
+#include "di-set.h"
+
+#include "hash.h"
+#include "ino-map.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/* The hash package hashes "void *", but this package wants to hash
+   integers.  Use integers that are as large as possible, but no
+   larger than void *, so that they can be cast to void * and back
+   without losing information.  */
+typedef size_t hashint;
+#define HASHINT_MAX ((hashint) -1)
+
+/* Integers represent inode numbers.  Integers in the range
+   1..(LARGE_INO_MIN-1) represent inode numbers directly.  (The hash
+   package does not work with null pointers, so inode 0 cannot be used
+   as a key.)  To find the representations of other inode numbers, map
+   them through INO_MAP.  */
+#define LARGE_INO_MIN (HASHINT_MAX / 2)
+
+/* Set operations for device-inode pairs stored in a space-efficient
+   manner.  Use a two-level hash table.  The top level hashes by
+   device number, as there are typically a small number of devices.
+   The lower level hashes by mapped inode numbers.  In the typical
+   case where the inode number is positive and small, the inode number
+   maps to itself, masquerading as a void * value; otherwise, its
+   value is the result of hashing the inode value through INO_MAP.  */
+
+/* A pair that maps a device number to a set of inode numbers.  */
+struct di_ent
+{
+  dev_t dev;
+  struct hash_table *ino_set;
+};
+
+/* A two-level hash table that manages and indexes these pairs.  */
+struct di_set
+{
+  /* Map device numbers to sets of inode number representatives.  */
+  struct hash_table *dev_map;
+
+  /* If nonnull, map large inode numbers to their small
+     representatives.  If null, there are no large inode numbers in
+     this set.  */
+  struct ino_map *ino_map;
+
+  /* Cache of the most recently allocated and otherwise-unused storage
+     for probing this table.  */
+  struct di_ent *probe;
+};
+
+/* Hash a directory set entry.  */
+static size_t
+di_ent_hash (void const *x, size_t table_size)
+{
+  struct di_ent const *p = x;
+
+  /* Use ordinary % if it's known to return a nonnegative value.
+     Otherwise, fall back on uintmax_t %, which could be much slower.  */
+  if ((dev_t) -1 % (size_t) 2 == 1)
+    return p->dev % table_size;
+  else
+    {
+      uintmax_t dev = p->dev;
+      return dev % table_size;
+    }
+}
+
+/* Return true if two directory set entries are the same.  */
+static bool
+di_ent_compare (void const *x, void const *y)
+{
+  struct di_ent const *a = x;
+  struct di_ent const *b = y;
+  return a->dev == b->dev;
+}
+
+/* Free a directory set entry.  */
+static void
+di_ent_free (void *v)
+{
+  struct di_ent *a = v;
+  hash_free (a->ino_set);
+  free (a);
+}
+
+/* Create a set of device-inode pairs.  Return NULL on allocation failure.  */
+struct di_set *
+di_set_alloc (void)
+{
+  struct di_set *dis = malloc (sizeof *dis);
+  if (dis)
+    {
+      enum { INITIAL_DEV_MAP_SIZE = 11 };
+      dis->dev_map = hash_initialize (INITIAL_DEV_MAP_SIZE, NULL,
+                                      di_ent_hash, di_ent_compare,
+                                      di_ent_free);
+      if (! dis->dev_map)
+        {
+          free (dis);
+          return NULL;
+        }
+      dis->ino_map = NULL;
+      dis->probe = NULL;
+    }
+
+  return dis;
+}
+
+/* Free a set of device-inode pairs.  */
+void
+di_set_free (struct di_set *dis)
+{
+  hash_free (dis->dev_map);
+  free (dis->ino_map);
+  free (dis->probe);
+  free (dis);
+}
+
+/* Hash an encoded inode number I.  */
+static size_t
+di_ino_hash (void const *i, size_t table_size)
+{
+  return (hashint) i % table_size;
+}
+
+/* Using the DIS table, map a device to a hash table that represents
+   a set of inode numbers.  Return NULL on error.  */
+static struct hash_table *
+map_device (struct di_set *dis, dev_t dev)
+{
+  /* Find space for the probe, reusing the cache if available.  */
+  struct di_ent *ent;
+  struct di_ent *probe = dis->probe;
+  if (probe)
+    {
+      /* If repeating a recent query, return the cached result.   */
+      if (probe->dev == dev)
+        return probe->ino_set;
+    }
+  else
+    {
+      dis->probe = probe = malloc (sizeof *probe);
+      if (! probe)
+        return NULL;
+    }
+
+  /* Probe for the device.  */
+  probe->dev = dev;
+  ent = hash_insert (dis->dev_map, probe);
+  if (! ent)
+    return NULL;
+
+  if (ent == probe)
+    {
+      enum { INITIAL_INO_SET_SIZE = 1021 };
+
+      /* Prepare to allocate a new probe next time; this one is in use.  */
+      dis->probe = NULL;
+
+      /* DEV is new; allocate an inode set for it.  */
+      probe->ino_set = hash_initialize (INITIAL_INO_SET_SIZE, NULL,
+                                        di_ino_hash, NULL, NULL);
+    }
+  else
+    probe->ino_set = ent->ino_set;
+
+  return probe->ino_set;
+}
+
+/* Using the DIS table, map an inode number to a mapped value.
+   Return INO_MAP_INSERT_FAILURE on error.  */
+static hashint
+map_inode_number (struct di_set *dis, ino_t ino)
+{
+  if (0 < ino && ino < LARGE_INO_MIN)
+    return ino;
+
+  if (! dis->ino_map)
+    {
+      dis->ino_map = ino_map_alloc (LARGE_INO_MIN);
+      if (! dis->ino_map)
+        return INO_MAP_INSERT_FAILURE;
+    }
+
+  return ino_map_insert (dis->ino_map, ino);
+}
+
+/* Attempt to insert the DEV,INO pair into the set DIS.
+   If it matches a pair already in DIS, keep that pair and return 0.
+   Otherwise, if insertion is successful, return 1.
+   Upon any failure return -1.  */
+int
+di_set_insert (struct di_set *dis, dev_t dev, ino_t ino)
+{
+  hashint i;
+
+  /* Map the device number to a set of inodes.  */
+  struct hash_table *ino_set = map_device (dis, dev);
+  if (! ino_set)
+    return -1;
+
+  /* Map the inode number to a small representative I.  */
+  i = map_inode_number (dis, ino);
+  if (i == INO_MAP_INSERT_FAILURE)
+    return -1;
+
+  /* Put I into the inode set.  */
+  return hash_insert0 (ino_set, (void *) i, NULL);
+}
diff --git a/gl/lib/di-set.h b/gl/lib/di-set.h
new file mode 100644
index 0000000..d30a31e
--- /dev/null
+++ b/gl/lib/di-set.h
@@ -0,0 +1,12 @@
+#include <sys/types.h>
+
+#undef _ATTRIBUTE_NONNULL_
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__
+# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m)))
+#else
+# define _ATTRIBUTE_NONNULL_(m)
+#endif
+
+struct di_set *di_set_alloc (void);
+int di_set_insert (struct di_set *, dev_t, ino_t) _ATTRIBUTE_NONNULL_ (1);
+void di_set_free (struct di_set *) _ATTRIBUTE_NONNULL_ (1);
diff --git a/gl/lib/ino-map.c b/gl/lib/ino-map.c
new file mode 100644
index 0000000..083b49d
--- /dev/null
+++ b/gl/lib/ino-map.c
@@ -0,0 +1,159 @@
+/* Map an ino_t inode number to a small integer.
+
+   Copyright 2009, 2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* written by Paul Eggert and Jim Meyering */
+
+#include <config.h>
+#include "ino-map.h"
+
+#include "hash.h"
+#include "verify.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/* A pair that maps an inode number to a mapped inode number; the
+   latter is a small unique ID for the former.  */
+struct ino_map_ent
+{
+  ino_t ino;
+  size_t mapped_ino;
+};
+
+/* A table that manages and indexes these pairs.  */
+struct ino_map
+{
+  /* A table of KEY,VAL pairs, where KEY is the raw ino_t value and
+     VAL is the small number that it maps to.  */
+  struct hash_table *map;
+
+  /* The next mapped inode number to hand out.  */
+  size_t next_mapped_ino;
+
+  /* Cache of the most recently allocated and otherwise-unused storage
+     for probing the table.  */
+  struct ino_map_ent *probe;
+};
+
+/* Hash an inode map entry.  */
+static size_t
+ino_hash (void const *x, size_t table_size)
+{
+  struct ino_map_ent const *p = x;
+
+  /* Use ordinary % if it's known to return a nonnegative value.
+     Otherwise, fall back on uintmax_t %, which could be much slower.  */
+  if ((ino_t) -1 % (size_t) 2 == 1)
+    return p->ino % table_size;
+  else
+    {
+      uintmax_t ino = p->ino;
+      return ino % table_size;
+    }
+}
+
+/* Return true if two inode map entries are the same.  */
+static bool
+ino_compare (void const *x, void const *y)
+{
+  struct ino_map_ent const *a = x;
+  struct ino_map_ent const *b = y;
+  return a->ino == b->ino;
+}
+
+/* Allocate an inode map that will hand out integers starting with
+   NEXT_MAPPED_INO.  Return NULL if memory is exhausted.  */
+struct ino_map *
+ino_map_alloc (size_t next_mapped_ino)
+{
+  struct ino_map *im = malloc (sizeof *im);
+
+  if (im)
+    {
+      enum { INITIAL_INO_MAP_TABLE_SIZE = 1021 };
+      im->map = hash_initialize (INITIAL_INO_MAP_TABLE_SIZE, NULL,
+                                 ino_hash, ino_compare, free);
+      if (! im->map)
+        {
+          free (im);
+          return NULL;
+        }
+      im->next_mapped_ino = next_mapped_ino;
+      im->probe = NULL;
+    }
+
+  return im;
+}
+
+/* Free an inode map.  */
+void
+ino_map_free (struct ino_map *map)
+{
+  hash_free (map->map);
+  free (map->probe);
+  free (map);
+}
+
+
+/* Insert into MAP the inode number INO if it's not there already,
+   and return its nonnegative mapped inode number.
+   If INO is already in MAP, return the existing mapped inode number.
+   Return INO_MAP_INSERT_FAILURE on memory or counter exhaustion.  */
+size_t
+ino_map_insert (struct ino_map *im, ino_t ino)
+{
+  struct ino_map_ent *ent;
+
+  /* Find space for the probe, reusing the cache if available.  */
+  struct ino_map_ent *probe = im->probe;
+  if (probe)
+    {
+      /* If repeating a recent query, return the cached result.   */
+      if (probe->ino == ino)
+        return probe->mapped_ino;
+    }
+  else
+    {
+      im->probe = probe = malloc (sizeof *probe);
+      if (! probe)
+        return INO_MAP_INSERT_FAILURE;
+    }
+
+  probe->ino = ino;
+  ent = hash_insert (im->map, probe);
+  if (! ent)
+    return INO_MAP_INSERT_FAILURE;
+
+  if (ent == probe)
+    {
+      /* If adding 1 to map->next_mapped_ino would cause it to
+         overflow to zero, then it must equal INO_MAP_INSERT_FAILURE,
+         which is value that should be returned in that case.  Verify
+         that this works.  */
+      verify (INO_MAP_INSERT_FAILURE + 1 == 0);
+
+      /* Prepare to allocate a new probe next time; this one is in use.  */
+      im->probe = NULL;
+
+      /* INO is new; allocate a mapped inode number for it.  */
+      probe->mapped_ino = im->next_mapped_ino++;
+    }
+  else
+    probe->mapped_ino = ent->mapped_ino;
+
+  return probe->mapped_ino;
+}
diff --git a/gl/lib/ino-map.h b/gl/lib/ino-map.h
new file mode 100644
index 0000000..661b78a
--- /dev/null
+++ b/gl/lib/ino-map.h
@@ -0,0 +1,14 @@
+#include <sys/types.h>
+
+#undef _ATTRIBUTE_NONNULL_
+#if __GNUC__ == 3 && __GNUC_MINOR__ >= 3 || 3 < __GNUC__
+# define _ATTRIBUTE_NONNULL_(m) __attribute__ ((__nonnull__ (m)))
+#else
+# define _ATTRIBUTE_NONNULL_(m)
+#endif
+
+#define INO_MAP_INSERT_FAILURE ((size_t) -1)
+
+struct ino_map *ino_map_alloc (size_t);
+void ino_map_free (struct ino_map *) _ATTRIBUTE_NONNULL_ (1);
+size_t ino_map_insert (struct ino_map *, ino_t) _ATTRIBUTE_NONNULL_ (1);
diff --git a/gl/modules/di-set b/gl/modules/di-set
new file mode 100644
index 0000000..562db14
--- /dev/null
+++ b/gl/modules/di-set
@@ -0,0 +1,24 @@
+Description:
+manipulate sets of device-inode pairs efficiently
+
+Files:
+lib/di-set.c
+lib/di-set.h
+
+Depends-on:
+ino-map
+hash
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += di-set.c di-set.h
+
+Include:
+"di-set.h"
+
+License
+GPL
+
+Maintainer:
+Jim Meyering
diff --git a/gl/modules/di-set-tests b/gl/modules/di-set-tests
new file mode 100644
index 0000000..d60f7fd
--- /dev/null
+++ b/gl/modules/di-set-tests
@@ -0,0 +1,10 @@
+Files:
+tests/test-di-set.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-di-set
+check_PROGRAMS += test-di-set
diff --git a/gl/modules/ino-map b/gl/modules/ino-map
new file mode 100644
index 0000000..06ee51d
--- /dev/null
+++ b/gl/modules/ino-map
@@ -0,0 +1,24 @@
+Description:
+maintain a mapping of ino_t numbers to small integers
+
+Files:
+lib/ino-map.c
+lib/ino-map.h
+
+Depends-on:
+hash
+verify
+
+configure.ac:
+
+Makefile.am:
+lib_SOURCES += ino-map.c ino-map.h
+
+Include:
+"ino-map.h"
+
+License
+GPL
+
+Maintainer:
+Jim Meyering
diff --git a/gl/modules/ino-map-tests b/gl/modules/ino-map-tests
new file mode 100644
index 0000000..fa10b4f
--- /dev/null
+++ b/gl/modules/ino-map-tests
@@ -0,0 +1,10 @@
+Files:
+tests/test-ino-map.c
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-ino-map
+check_PROGRAMS += test-ino-map
diff --git a/gl/tests/test-di-set.c b/gl/tests/test-di-set.c
new file mode 100644
index 0000000..e5fb6cb
--- /dev/null
+++ b/gl/tests/test-di-set.c
@@ -0,0 +1,64 @@
+/* Test the di-set module.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Jim Meyering.  */
+
+#include <config.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+
+#define ASSERT(expr) \
+  do                                                                         \
+    {                                                                        \
+      if (!(expr))                                                           \
+        {                                                                    \
+          fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+          fflush (stderr);                                                   \
+          abort ();                                                          \
+        }                                                                    \
+    }                                                                        \
+  while (0)
+
+#include "di-set.h"
+
+int
+main (void)
+{
+  /* set_program_name (argv[0]); placate overzealous "syntax-check" test.  */
+  struct di_set *dis = di_set_alloc ();
+  ASSERT (dis);
+
+  ASSERT (di_set_insert (dis, 2, 5) == 1); /* first insertion succeeds */
+  ASSERT (di_set_insert (dis, 2, 5) == 0); /* duplicate fails */
+  ASSERT (di_set_insert (dis, 3, 5) == 1); /* diff dev, duplicate inode is ok 
*/
+  ASSERT (di_set_insert (dis, 2, 8) == 1); /* same dev, different inode is ok 
*/
+
+  /* very large (or negative) inode number */
+  ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 1);
+  ASSERT (di_set_insert (dis, 5, (ino_t) -1) == 0); /* dup */
+
+  unsigned int i;
+  for (i = 0; i < 3000; i++)
+    ASSERT (di_set_insert (dis, 9, i) == 1);
+  for (i = 0; i < 3000; i++)
+    ASSERT (di_set_insert (dis, 9, i) == 0); /* duplicate fails */
+
+  di_set_free (dis);
+
+  return 0;
+}
diff --git a/gl/tests/test-ino-map.c b/gl/tests/test-ino-map.c
new file mode 100644
index 0000000..2b44602
--- /dev/null
+++ b/gl/tests/test-ino-map.c
@@ -0,0 +1,63 @@
+/* Test the ino-map module.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+/* Written by Jim Meyering.  */
+
+#include <config.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* FIXME: once/if in gnulib, use #include "macros.h" in place of this */
+#define ASSERT(expr) \
+  do                                                                         \
+    {                                                                        \
+      if (!(expr))                                                           \
+        {                                                                    \
+          fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
+          fflush (stderr);                                                   \
+          abort ();                                                          \
+        }                                                                    \
+    }                                                                        \
+  while (0)
+
+#include "ino-map.h"
+
+int
+main ()
+{
+  /* set_program_name (argv[0]); placate overzealous "syntax-check" test.  */
+  enum { INO_MAP_INIT = 123 };
+  struct ino_map *ino_map = ino_map_alloc (INO_MAP_INIT);
+  ASSERT (ino_map != NULL);
+
+  ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT);
+  ASSERT (ino_map_insert (ino_map, 42) == INO_MAP_INIT);
+  ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1);
+  ASSERT (ino_map_insert (ino_map, 398) == INO_MAP_INIT + 1);
+  ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2);
+  ASSERT (ino_map_insert (ino_map, 0) == INO_MAP_INIT + 2);
+
+  int i;
+  for (i = 0; i < 100; i++)
+    {
+      ASSERT (ino_map_insert (ino_map, 10000 + i) == INO_MAP_INIT + 3 + i);
+    }
+
+  ino_map_free (ino_map);
+
+  return 0;
+}
diff --git a/src/du.c b/src/du.c
index a90568e..bc24861 100644
--- a/src/du.c
+++ b/src/du.c
@@ -30,10 +30,10 @@
 #include "system.h"
 #include "argmatch.h"
 #include "argv-iter.h"
+#include "di-set.h"
 #include "error.h"
 #include "exclude.h"
 #include "fprintftime.h"
-#include "hash.h"
 #include "human.h"
 #include "quote.h"
 #include "quotearg.h"
@@ -60,19 +60,8 @@ extern bool fts_debug;
 # define FTS_CROSS_CHECK(Fts)
 #endif
 
-/* Initial size of the hash table.  */
-#define INITIAL_TABLE_SIZE 103
-
-/* Hash structure for inode and device numbers.  The separate entry
-   structure makes it easier to rehash "in place".  */
-struct entry
-{
-  ino_t st_ino;
-  dev_t st_dev;
-};
-
 /* A set of dev/ino pairs.  */
-static Hash_table *htab;
+static struct di_set *di_set;
 
 /* Define a class for collecting directory information. */
 
@@ -336,66 +325,16 @@ Mandatory arguments to long options are mandatory for 
short options too.\n\
   exit (status);
 }
 
-static size_t
-entry_hash (void const *x, size_t table_size)
-{
-  struct entry const *p = x;
-
-  /* Ignoring the device number here should be fine.  */
-  /* The cast to uintmax_t prevents negative remainders
-     if st_ino is negative.  */
-  return (uintmax_t) p->st_ino % table_size;
-}
-
-/* Compare two dev/ino pairs.  Return true if they are the same.  */
-static bool
-entry_compare (void const *x, void const *y)
-{
-  struct entry const *a = x;
-  struct entry const *b = y;
-  return SAME_INODE (*a, *b) ? true : false;
-}
-
 /* Try to insert the INO/DEV pair into the global table, HTAB.
    Return true if the pair is successfully inserted,
    false if the pair is already in the table.  */
 static bool
 hash_ins (ino_t ino, dev_t dev)
 {
-  struct entry *ent;
-  struct entry *ent_from_table;
-
-  ent = xmalloc (sizeof *ent);
-  ent->st_ino = ino;
-  ent->st_dev = dev;
-
-  ent_from_table = hash_insert (htab, ent);
-  if (ent_from_table == NULL)
-    {
-      /* Insertion failed due to lack of memory.  */
-      xalloc_die ();
-    }
-
-  if (ent_from_table == ent)
-    {
-      /* Insertion succeeded.  */
-      return true;
-    }
-
-  /* That pair is already in the table, so ENT was not inserted.  Free it.  */
-  free (ent);
-
-  return false;
-}
-
-/* Initialize the hash table.  */
-static void
-hash_init (void)
-{
-  htab = hash_initialize (INITIAL_TABLE_SIZE, NULL,
-                          entry_hash, entry_compare, free);
-  if (htab == NULL)
+  int inserted = di_set_insert (di_set, dev, ino);
+  if (inserted < 0)
     xalloc_die ();
+  return inserted;
 }
 
 /* FIXME: this code is nearly identical to code in date.c  */
@@ -947,8 +886,10 @@ main (int argc, char **argv)
   if (!ai)
     xalloc_die ();
 
-  /* Initialize the hash structure for inode numbers.  */
-  hash_init ();
+  /* Initialize the set of dev,inode pairs.  */
+  di_set = di_set_alloc ();
+  if (!di_set)
+    xalloc_die ();
 
   bit_flags |= symlink_deref_bits;
   static char *temp_argv[] = { NULL, NULL };
@@ -1019,6 +960,7 @@ main (int argc, char **argv)
     }
 
   argv_iter_free (ai);
+  di_set_free (di_set);
 
   if (files_from && (ferror (stdin) || fclose (stdin) != 0))
     error (EXIT_FAILURE, 0, _("error reading %s"), quote (files_from));
@@ -1026,7 +968,5 @@ main (int argc, char **argv)
   if (print_grand_total)
     print_size (&tot_dui, _("total"));
 
-  hash_free (htab);
-
   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }






reply via email to

[Prev in Thread] Current Thread [Next in Thread]