emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master e66870400d4 1/2: Change hash range reduction from remainder to mu


From: Mattias Engdegård
Subject: master e66870400d4 1/2: Change hash range reduction from remainder to multiplication
Date: Tue, 6 Feb 2024 09:50:47 -0500 (EST)

branch: master
commit e66870400d45e3d08265df9f6acd4631a5712139
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>

    Change hash range reduction from remainder to multiplication
    
    This makes both lookups and rehashing cheaper.  The index vector size
    is now always a power of 2.  The first table size is reduced to
    6 (from 8), because index vectors would become excessively big
    otherwise.
    
    * src/lisp.h (struct Lisp_Hash_Table): Replace index_size with
    index_bits.  All references adapted.
    (hash_table_index_size): New accessor; use it where applicable.
    * src/fns.c (hash_index_size): Replace with...
    (compute_hash_index_bits): ...this new function, returning the log2 of the
    index size.  All callers adapted.
    (hash_index_index): Knuth multiplicative hashing instead of remainder.
    (maybe_resize_hash_table): Reduce first table size from 8 to 6.
---
 src/alloc.c   |  7 +++---
 src/fns.c     | 78 +++++++++++++++++++++++++++++------------------------------
 src/lisp.h    | 13 +++++++---
 src/pdumper.c |  2 +-
 4 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index 15bb65cf74f..6abe9e28650 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -3443,7 +3443,7 @@ cleanup_vector (struct Lisp_Vector *vector)
        struct Lisp_Hash_Table *h = PSEUDOVEC_STRUCT (vector, Lisp_Hash_Table);
        if (h->table_size > 0)
          {
-           eassert (h->index_size > 1);
+           eassert (h->index_bits > 0);
            xfree (h->index);
            xfree (h->key_and_value);
            xfree (h->next);
@@ -3451,7 +3451,7 @@ cleanup_vector (struct Lisp_Vector *vector)
            ptrdiff_t bytes = (h->table_size * (2 * sizeof *h->key_and_value
                                                + sizeof *h->hash
                                                + sizeof *h->next)
-                              + h->index_size * sizeof *h->index);
+                              + hash_table_index_size (h) * sizeof *h->index);
            hash_table_allocated_bytes -= bytes;
          }
       }
@@ -5959,7 +5959,8 @@ purecopy_hash_table (struct Lisp_Hash_Table *table)
       for (ptrdiff_t i = 0; i < nvalues; i++)
        pure->key_and_value[i] = purecopy (table->key_and_value[i]);
 
-      ptrdiff_t index_bytes = table->index_size * sizeof *table->index;
+      ptrdiff_t index_bytes = hash_table_index_size (table)
+                             * sizeof *table->index;
       pure->index = pure_alloc (index_bytes, -(int)sizeof *table->index);
       memcpy (pure->index, table->index, index_bytes);
     }
diff --git a/src/fns.c b/src/fns.c
index 08908d481a3..7de2616b359 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4291,7 +4291,7 @@ set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t 
idx, hash_hash_t val)
 static void
 set_hash_index_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, ptrdiff_t val)
 {
-  eassert (idx >= 0 && idx < h->index_size);
+  eassert (idx >= 0 && idx < hash_table_index_size (h));
   h->index[idx] = val;
 }
 
@@ -4392,7 +4392,7 @@ HASH_NEXT (struct Lisp_Hash_Table *h, ptrdiff_t idx)
 static ptrdiff_t
 HASH_INDEX (struct Lisp_Hash_Table *h, ptrdiff_t idx)
 {
-  eassert (idx >= 0 && idx < h->index_size);
+  eassert (idx >= 0 && idx < hash_table_index_size (h));
   return h->index[idx];
 }
 
@@ -4527,26 +4527,19 @@ allocate_hash_table (void)
   return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE);
 }
 
-/* Compute the size of the index from the table capacity.  */
-static ptrdiff_t
-hash_index_size (ptrdiff_t size)
-{
-  /* An upper bound on the size of a hash table index.  It must fit in
-     ptrdiff_t and be a valid Emacs fixnum.  */
-  ptrdiff_t upper_bound = min (MOST_POSITIVE_FIXNUM,
-                              min (TYPE_MAXIMUM (hash_idx_t),
-                                   PTRDIFF_MAX / sizeof (ptrdiff_t)));
-  /* Single-element index vectors are used iff size=0.  */
-  eassert (size > 0);
-  ptrdiff_t lower_bound = 2;
-  ptrdiff_t index_size = size + max (size >> 2, 1);  /* 1.25x larger */
-  if (index_size < upper_bound)
-    index_size = (index_size < lower_bound
-                 ? lower_bound
-                 : next_almost_prime (index_size));
-  if (index_size > upper_bound)
+/* Compute the size of the index (as log2) from the table capacity.  */
+static int
+compute_hash_index_bits (hash_idx_t size)
+{
+  /* An upper bound on the size of a hash table index index.  */
+  hash_idx_t upper_bound = min (MOST_POSITIVE_FIXNUM,
+                               min (TYPE_MAXIMUM (hash_idx_t),
+                                    PTRDIFF_MAX / sizeof (hash_idx_t)));
+  /* Use next higher power of 2.  This works even for size=0.  */
+  int bits = elogb (size) + 1;
+  if (bits >= TYPE_WIDTH (uintmax_t) || ((uintmax_t)1 << bits) > upper_bound)
     error ("Hash table too large");
-  return index_size;
+  return bits;
 }
 
 /* Constant hash index vector used when the table size is zero.
@@ -4587,7 +4580,7 @@ make_hash_table (const struct hash_table_test *test, 
EMACS_INT size,
       h->key_and_value = NULL;
       h->hash = NULL;
       h->next = NULL;
-      h->index_size = 1;
+      h->index_bits = 0;
       h->index = (hash_idx_t *)empty_hash_index_vector;
       h->next_free = -1;
     }
@@ -4605,8 +4598,9 @@ make_hash_table (const struct hash_table_test *test, 
EMACS_INT size,
        h->next[i] = i + 1;
       h->next[size - 1] = -1;
 
-      int index_size = hash_index_size (size);
-      h->index_size = index_size;
+      int index_bits = compute_hash_index_bits (size);
+      h->index_bits = index_bits;
+      ptrdiff_t index_size = hash_table_index_size (h);
       h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
       for (ptrdiff_t i = 0; i < index_size; i++)
        h->index[i] = -1;
@@ -4654,7 +4648,7 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
       h2->next = hash_table_alloc_bytes (next_bytes);
       memcpy (h2->next, h1->next, next_bytes);
 
-      ptrdiff_t index_bytes = h1->index_size * sizeof *h1->index;
+      ptrdiff_t index_bytes = hash_table_index_size (h1) * sizeof *h1->index;
       h2->index = hash_table_alloc_bytes (index_bytes);
       memcpy (h2->index, h1->index, index_bytes);
     }
@@ -4668,8 +4662,11 @@ copy_hash_table (struct Lisp_Hash_Table *h1)
 static inline ptrdiff_t
 hash_index_index (struct Lisp_Hash_Table *h, hash_hash_t hash)
 {
-  eassert (h->index_size > 0);
-  return hash % h->index_size;
+  /* Knuth multiplicative hashing, tailored for 32-bit indices
+     (avoiding a 64-bit multiply).  */
+  uint32_t alpha = 2654435769; /* 2**32/phi */
+  /* Note the cast to uint64_t, to make it work for index_bits=0.  */
+  return (uint64_t)((uint32_t)hash * alpha) >> (32 - h->index_bits);
 }
 
 /* Resize hash table H if it's too full.  If H cannot be resized
@@ -4681,7 +4678,7 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
   if (h->next_free < 0)
     {
       ptrdiff_t old_size = HASH_TABLE_SIZE (h);
-      ptrdiff_t min_size = 8;
+      ptrdiff_t min_size = 6;
       ptrdiff_t base_size = min (max (old_size, min_size), PTRDIFF_MAX / 2);
       /* Grow aggressively at small sizes, then just double.  */
       ptrdiff_t new_size =
@@ -4706,13 +4703,14 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
       hash_hash_t *hash = hash_table_alloc_bytes (new_size * sizeof *hash);
       memcpy (hash, h->hash, old_size * sizeof *hash);
 
-      ptrdiff_t old_index_size = h->index_size;
-      ptrdiff_t index_size = hash_index_size (new_size);
+      ptrdiff_t old_index_size = hash_table_index_size (h);
+      ptrdiff_t index_bits = compute_hash_index_bits (new_size);
+      ptrdiff_t index_size = (ptrdiff_t)1 << index_bits;
       hash_idx_t *index = hash_table_alloc_bytes (index_size * sizeof *index);
       for (ptrdiff_t i = 0; i < index_size; i++)
        index[i] = -1;
 
-      h->index_size = index_size;
+      h->index_bits = index_bits;
       h->table_size = new_size;
       h->next_free = old_size;
 
@@ -4778,18 +4776,19 @@ hash_table_thaw (Lisp_Object hash_table)
       h->key_and_value = NULL;
       h->hash = NULL;
       h->next = NULL;
-      h->index_size = 1;
+      h->index_bits = 0;
       h->index = (hash_idx_t *)empty_hash_index_vector;
     }
   else
     {
-      ptrdiff_t index_size = hash_index_size (size);
-      h->index_size = index_size;
+      ptrdiff_t index_bits = compute_hash_index_bits (size);
+      h->index_bits = index_bits;
 
       h->hash = hash_table_alloc_bytes (size * sizeof *h->hash);
 
       h->next = hash_table_alloc_bytes (size * sizeof *h->next);
 
+      ptrdiff_t index_size = hash_table_index_size (h);
       h->index = hash_table_alloc_bytes (index_size * sizeof *h->index);
       for (ptrdiff_t i = 0; i < index_size; i++)
        h->index[i] = -1;
@@ -4937,7 +4936,8 @@ hash_clear (struct Lisp_Hash_Table *h)
          set_hash_value_slot (h, i, Qnil);
        }
 
-      for (ptrdiff_t i = 0; i < h->index_size; i++)
+      ptrdiff_t index_size = hash_table_index_size (h);
+      for (ptrdiff_t i = 0; i < index_size; i++)
        h->index[i] = -1;
 
       h->next_free = 0;
@@ -4976,7 +4976,7 @@ keep_entry_p (hash_table_weakness_t weakness,
 bool
 sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
 {
-  ptrdiff_t n = h->index_size;
+  ptrdiff_t n = hash_table_index_size (h);
   bool marked = false;
 
   for (ptrdiff_t bucket = 0; bucket < n; ++bucket)
@@ -5701,7 +5701,7 @@ DEFUN ("internal--hash-table-histogram",
   struct Lisp_Hash_Table *h = check_hash_table (hash_table);
   ptrdiff_t size = HASH_TABLE_SIZE (h);
   ptrdiff_t *freq = xzalloc (size * sizeof *freq);
-  ptrdiff_t index_size = h->index_size;
+  ptrdiff_t index_size = hash_table_index_size (h);
   for (ptrdiff_t i = 0; i < index_size; i++)
     {
       ptrdiff_t n = 0;
@@ -5729,7 +5729,7 @@ Internal use only. */)
 {
   struct Lisp_Hash_Table *h = check_hash_table (hash_table);
   Lisp_Object ret = Qnil;
-  ptrdiff_t index_size = h->index_size;
+  ptrdiff_t index_size = hash_table_index_size (h);
   for (ptrdiff_t i = 0; i < index_size; i++)
     {
       Lisp_Object bucket = Qnil;
@@ -5750,7 +5750,7 @@ DEFUN ("internal--hash-table-index-size",
   (Lisp_Object hash_table)
 {
   struct Lisp_Hash_Table *h = check_hash_table (hash_table);
-  return make_int (h->index_size);
+  return make_int (hash_table_index_size (h));
 }
 
 
diff --git a/src/lisp.h b/src/lisp.h
index e6fd8cacb1b..d6bbf15d83b 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2475,14 +2475,14 @@ struct Lisp_Hash_Table
      The table is physically split into three vectors (hash, next,
      key_and_value) which may or may not be beneficial.  */
 
-  hash_idx_t index_size;   /* Size of the index vector.  */
+  int index_bits;         /* log2 (size of the index vector).  */
   hash_idx_t table_size;   /* Size of the next and hash vectors.  */
 
   /* Bucket vector.  An entry of -1 indicates no item is present,
      and a nonnegative entry is the index of the first item in
      a collision chain.
-     This vector is index_size entries long.
-     If index_size is 1 (and table_size is 0), then this is the
+     This vector is 2**index_bits entries long.
+     If index_bits is 0 (and table_size is 0), then this is the
      constant read-only vector {-1}, shared between all instances.
      Otherwise it is heap-allocated.  */
   hash_idx_t *index;
@@ -2597,6 +2597,13 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h)
   return h->table_size;
 }
 
+/* Size of the index vector in hash table H.  */
+INLINE ptrdiff_t
+hash_table_index_size (const struct Lisp_Hash_Table *h)
+{
+  return (ptrdiff_t)1 << h->index_bits;
+}
+
 /* Hash value for KEY in hash table H.  */
 INLINE hash_hash_t
 hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)
diff --git a/src/pdumper.c b/src/pdumper.c
index ee554cda55a..b8006b035ea 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2688,7 +2688,7 @@ hash_table_freeze (struct Lisp_Hash_Table *h)
   h->hash = NULL;
   h->index = NULL;
   h->table_size = 0;
-  h->index_size = 0;
+  h->index_bits = 0;
   h->frozen_test = hash_table_std_test (h->test);
   h->test = NULL;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]