[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master d3cefd3e983 06/20: Leaner hash table dumping and thawing
From: |
Mattias Engdegård |
Subject: |
master d3cefd3e983 06/20: Leaner hash table dumping and thawing |
Date: |
Sat, 13 Jan 2024 14:54:00 -0500 (EST) |
branch: master
commit d3cefd3e98354929d96c9396e5920e8a123784dc
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Leaner hash table dumping and thawing
Only dump the actual data, and the test encoded as an enum. This
simplifies dumping, makes dump files smaller and saves space at run
time.
* src/lisp.h (hash_table_std_test_t): New enum.
(struct Lisp_Hash_Table): Add frozen_test member, consuming no extra space.
* src/fns.c (hashfn_user_defined): Now static.
(hash_table_test_from_std): New.
(hash_table_rehash): Rename to...
(hash_table_thaw): ...this and rewrite.
* src/pdumper.c (hash_table_contents): Only include actual data, not
unused space.
(hash_table_std_test): New.
(hash_table_freeze): Set frozen_test from test.
(dump_hash_table): Dump frozen_test, not the whole test struct.
Don't bother other dumping fields that can be derived.
---
src/fns.c | 53 ++++++++++++++++++++++++++++++-------------------
src/lisp.h | 12 ++++++++++--
src/pdumper.c | 63 +++++++++++++++++++++++++----------------------------------
3 files changed, 70 insertions(+), 58 deletions(-)
diff --git a/src/fns.c b/src/fns.c
index efec74d4959..74fdf29417e 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4474,7 +4474,7 @@ hashfn_eql (Lisp_Object key, struct Lisp_Hash_Table *h)
/* Given H, return a hash code for KEY which uses a user-defined
function to compare keys. */
-Lisp_Object
+static Lisp_Object
hashfn_user_defined (Lisp_Object key, struct Lisp_Hash_Table *h)
{
Lisp_Object args[] = { h->test.user_hash_function, key };
@@ -4638,11 +4638,10 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
if (h->next_free < 0)
{
ptrdiff_t old_size = HASH_TABLE_SIZE (h);
- EMACS_INT new_size;
-
- double float_new_size = old_size * std_rehash_size;
- if (float_new_size < EMACS_INT_MAX)
- new_size = float_new_size;
+ /* FIXME: better growth management, ditch std_rehash_size */
+ EMACS_INT new_size = old_size * std_rehash_size;
+ if (new_size < EMACS_INT_MAX)
+ new_size = max (new_size, 32); /* avoid slow initial growth */
else
new_size = EMACS_INT_MAX;
if (PTRDIFF_MAX < new_size)
@@ -4691,20 +4690,39 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h)
}
}
-/* Recompute the hashes (and hence also the "next" pointers).
- Normally there's never a need to recompute hashes.
- This is done only on first access to a hash-table loaded from
- the "pdump", because the objects' addresses may have changed, thus
- affecting their hashes. */
+static const struct hash_table_test *
+hash_table_test_from_std (hash_table_std_test_t test)
+{
+ switch (test)
+ {
+ case Test_eq: return &hashtest_eq;
+ case Test_eql: return &hashtest_eql;
+ case Test_equal: return &hashtest_equal;
+ }
+ emacs_abort();
+}
+
+/* Rebuild a hash table from its frozen (dumped) form. */
void
-hash_table_rehash (Lisp_Object hash)
+hash_table_thaw (Lisp_Object hash_table)
{
- struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
- ptrdiff_t i, count = h->count;
+ struct Lisp_Hash_Table *h = XHASH_TABLE (hash_table);
+
+ /* Freezing discarded most non-essential information; recompute it.
+ The allocation is minimal with no room for growth. */
+ h->test = *hash_table_test_from_std (h->frozen_test);
+ ptrdiff_t size = ASIZE (h->key_and_value) / 2;
+ h->count = size;
+ ptrdiff_t index_size = hash_index_size (size);
+ h->next_free = -1;
+
+ h->hash = make_nil_vector (size);
+ h->next = make_vector (size, make_fixnum (-1));
+ h->index = make_vector (index_size, make_fixnum (-1));
/* Recompute the actual hash codes for each entry in the table.
Order is still invalid. */
- for (i = 0; i < count; i++)
+ for (ptrdiff_t i = 0; i < size; i++)
{
Lisp_Object key = HASH_KEY (h, i);
Lisp_Object hash_code = hash_from_key (h, key);
@@ -4712,12 +4730,7 @@ hash_table_rehash (Lisp_Object hash)
set_hash_hash_slot (h, i, hash_code);
set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket));
set_hash_index_slot (h, start_of_bucket, i);
- eassert (HASH_NEXT (h, i) != i); /* Stop loops. */
}
-
- ptrdiff_t size = ASIZE (h->next);
- for (; i + 1 < size; i++)
- set_hash_next_slot (h, i, i + 1);
}
/* Lookup KEY in hash table H. If HASH is non-null, return in *HASH
diff --git a/src/lisp.h b/src/lisp.h
index 48e1f943ed8..d9b828b0328 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -2385,6 +2385,12 @@ INLINE int
struct Lisp_Hash_Table;
+typedef enum {
+ Test_eql,
+ Test_eq,
+ Test_equal,
+} hash_table_std_test_t;
+
struct hash_table_test
{
/* Function used to compare keys; always a bare symbol. */
@@ -2473,6 +2479,9 @@ struct Lisp_Hash_Table
/* Weakness of the table. */
hash_table_weakness_t weakness : 8;
+ /* Hash table test (only used when frozen in dump) */
+ hash_table_std_test_t frozen_test : 8;
+
/* True if the table can be purecopied. The table cannot be
changed afterwards. */
bool purecopy;
@@ -2563,7 +2572,7 @@ hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key)
return h->test.hashfn (key, h);
}
-void hash_table_rehash (Lisp_Object);
+void hash_table_thaw (Lisp_Object hash_table);
/* Default size for hash tables if not specified. */
@@ -4038,7 +4047,6 @@ extern void hexbuf_digest (char *, void const *, int);
extern char *extract_data_from_object (Lisp_Object, ptrdiff_t *, ptrdiff_t *);
EMACS_UINT hash_string (char const *, ptrdiff_t);
EMACS_UINT sxhash (Lisp_Object);
-Lisp_Object hashfn_user_defined (Lisp_Object, struct Lisp_Hash_Table *);
Lisp_Object make_hash_table (struct hash_table_test, EMACS_INT,
hash_table_weakness_t, bool);
Lisp_Object hash_table_weakness_symbol (hash_table_weakness_t weak);
diff --git a/src/pdumper.c b/src/pdumper.c
index 8072148c542..e4349f0cb17 100644
--- a/src/pdumper.c
+++ b/src/pdumper.c
@@ -2646,34 +2646,26 @@ dump_vectorlike_generic (struct dump_context *ctx,
return offset;
}
-/* Return a vector of KEY, VALUE pairs in the given hash table H. The
- first H->count pairs are valid, and the rest are unbound. */
+/* Return a vector of KEY, VALUE pairs in the given hash table H.
+ No room for growth is included. */
static Lisp_Object
hash_table_contents (struct Lisp_Hash_Table *h)
{
- if (h->test.hashfn == hashfn_user_defined)
- error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
-
- ptrdiff_t size = HASH_TABLE_SIZE (h);
+ ptrdiff_t old_size = HASH_TABLE_SIZE (h);
+ ptrdiff_t size = h->count;
Lisp_Object key_and_value = make_uninit_vector (2 * size);
ptrdiff_t n = 0;
/* Make sure key_and_value ends up in the same order; charset.c
relies on it by expecting hash table indices to stay constant
across the dump. */
- for (ptrdiff_t i = 0; i < size; i++)
+ for (ptrdiff_t i = 0; i < old_size; i++)
if (!NILP (HASH_HASH (h, i)))
{
ASET (key_and_value, n++, HASH_KEY (h, i));
ASET (key_and_value, n++, HASH_VALUE (h, i));
}
- while (n < 2 * size)
- {
- ASET (key_and_value, n++, Qunbound);
- ASET (key_and_value, n++, Qnil);
- }
-
return key_and_value;
}
@@ -2686,25 +2678,32 @@ dump_hash_table_list (struct dump_context *ctx)
return 0;
}
-static void
-hash_table_freeze (struct Lisp_Hash_Table *h)
+static hash_table_std_test_t
+hash_table_std_test (const struct hash_table_test *t)
{
- ptrdiff_t npairs = ASIZE (h->key_and_value) / 2;
- h->key_and_value = hash_table_contents (h);
- h->next = h->hash = make_fixnum (npairs);
- h->index = make_fixnum (ASIZE (h->index));
- h->next_free = (npairs == h->count ? -1 : h->count);
+ if (BASE_EQ (t->name, Qeq))
+ return Test_eq;
+ if (BASE_EQ (t->name, Qeql))
+ return Test_eql;
+ if (BASE_EQ (t->name, Qequal))
+ return Test_equal;
+ error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */
}
+/* Compact contents and discard inessential information from a hash table,
+ preparing it for dumping.
+ See `hash_table_thaw' for the code that restores the object to a usable
+ state. */
static void
-hash_table_thaw (Lisp_Object hash)
+hash_table_freeze (struct Lisp_Hash_Table *h)
{
- struct Lisp_Hash_Table *h = XHASH_TABLE (hash);
- h->hash = make_nil_vector (XFIXNUM (h->hash));
- h->next = Fmake_vector (h->next, make_fixnum (-1));
- h->index = Fmake_vector (h->index, make_fixnum (-1));
-
- hash_table_rehash (hash);
+ h->key_and_value = hash_table_contents (h);
+ eassert (ASIZE (h->key_and_value) == h->count * 2);
+ h->next = Qnil;
+ h->hash = Qnil;
+ h->index = Qnil;
+ h->count = 0;
+ h->frozen_test = hash_table_std_test (&h->test);
}
static dump_off
@@ -2724,19 +2723,11 @@ dump_hash_table (struct dump_context *ctx, Lisp_Object
object)
dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header);
/* TODO: dump the hash bucket vectors synchronously here to keep
them as close to the hash table as possible. */
- DUMP_FIELD_COPY (out, hash, count);
- DUMP_FIELD_COPY (out, hash, next_free);
DUMP_FIELD_COPY (out, hash, weakness);
DUMP_FIELD_COPY (out, hash, purecopy);
DUMP_FIELD_COPY (out, hash, mutable);
+ DUMP_FIELD_COPY (out, hash, frozen_test);
dump_field_lv (ctx, out, hash, &hash->key_and_value, WEIGHT_STRONG);
- dump_field_lv (ctx, out, hash, &hash->test.name, WEIGHT_STRONG);
- dump_field_lv (ctx, out, hash, &hash->test.user_hash_function,
- WEIGHT_STRONG);
- dump_field_lv (ctx, out, hash, &hash->test.user_cmp_function,
- WEIGHT_STRONG);
- dump_field_emacs_ptr (ctx, out, hash, &hash->test.cmpfn);
- dump_field_emacs_ptr (ctx, out, hash, &hash->test.hashfn);
eassert (hash->next_weak == NULL);
return finish_dump_pvec (ctx, &out->header);
}
- master 4ba6954e695 01/20: * src/print.c (print_object): Don't print hash table test if `eql`., (continued)
- master 4ba6954e695 01/20: * src/print.c (print_object): Don't print hash table test if `eql`., Mattias Engdegård, 2024/01/13
- master 49fd4d120de 07/20: Allow zero hash table size, Mattias Engdegård, 2024/01/13
- master c3d0cc50faf 05/20: Remove rehash-threshold and rehash-size struct members, Mattias Engdegård, 2024/01/13
- master a3ae5653cfe 09/20: Store hash values as integers instead of Lisp_Object, Mattias Engdegård, 2024/01/13
- master ed06de52a53 14/20: Faster hash table growth, starting at zero size, Mattias Engdegård, 2024/01/13
- master fa5c07fc87d 08/20: Use non-Lisp allocation for internal hash-table vectors, Mattias Engdegård, 2024/01/13
- master 7ad5d427730 16/20: Don't dump Qunbound, Mattias Engdegård, 2024/01/13
- master d2c3a983146 20/20: Hash-table documentation updates (bug#68244), Mattias Engdegård, 2024/01/13
- master a09619f2598 02/20: * src/print.c (print_object): Don't print empty hash-table data, Mattias Engdegård, 2024/01/13
- master 3f9c81a87f7 03/20: Don't print or read the hash table size parameter, Mattias Engdegård, 2024/01/13
- master d3cefd3e983 06/20: Leaner hash table dumping and thawing,
Mattias Engdegård <=
- master 519c7ca7356 19/20: Don't pretend that hash-table-size is useful, Mattias Engdegård, 2024/01/13
- master 3b00255a4c7 10/20: Inlined and specialised hash table look-up, Mattias Engdegård, 2024/01/13
- master 1998039f7a8 18/20: Change hash_hash_t to uint32_t, Mattias Engdegård, 2024/01/13
- master 7d93a0147a1 12/20: Share hash table test structs, Mattias Engdegård, 2024/01/13
- master 47502c55b0c 13/20: ; Reorder struct Lisp_Hash_Table and struct hash_table_test, Mattias Engdegård, 2024/01/13
- master c6bdc1ea1dc 04/20: Represent hash table weakness as an enum internally, Mattias Engdegård, 2024/01/13
- master 68f8bc31114 15/20: Change hash_idx_t to int32_t on all platforms, Mattias Engdegård, 2024/01/13
- master 0a998938ca1 11/20: Use hash_idx_t for storing hash indices, Mattias Engdegård, 2024/01/13
- master 11e467eb600 17/20: Use key Qunbound instead of hash value hash_unused for free entries, Mattias Engdegård, 2024/01/13