>From 8a33944ee20f57a8d2efe6fe68aa0f4745aa2d03 Mon Sep 17 00:00:00 2001 From: Pip Cet Date: Tue, 11 Aug 2020 02:16:53 -0700 Subject: [PATCH 1/7] Rehash hash tables eagerly after loading a dump This simplifies code, and helps performance in some cases (Bug#36597). * src/lisp.h (hash_rehash_needed_p): Remove. All uses removed. (hash_rehash_if_needed): Remove. All uses removed. (struct Lisp_Hash_Table): Remove comment about rehashing hash tables. * src/pdumper.c (thaw_hash_tables): New function. (hash_table_thaw): New function. (hash_table_freeze): New function. (dump_hash_table): Simplify. (dump_hash_table_list): New function. (hash_table_contents): New function. (Fdump_emacs_portable): Handle hash tables by eager rehashing. (pdumper_load): Restore hash tables. (init_pdumper_once): New function. --- src/bytecode.c | 1 - src/composite.c | 1 - src/emacs.c | 1 + src/fns.c | 65 ++++------------ src/lisp.h | 21 +---- src/minibuf.c | 3 - src/pdumper.c | 201 +++++++++++++++++++++--------------------------- src/pdumper.h | 1 + 8 files changed, 109 insertions(+), 185 deletions(-) diff --git a/src/bytecode.c b/src/bytecode.c index 1913a4812a..1c3b6eac0d 100644 --- a/src/bytecode.c +++ b/src/bytecode.c @@ -1401,7 +1401,6 @@ #define DEFINE(name, value) LABEL (name) , Lisp_Object v1 = POP; ptrdiff_t i; struct Lisp_Hash_Table *h = XHASH_TABLE (jmp_table); - hash_rehash_if_needed (h); /* h->count is a faster approximation for HASH_TABLE_SIZE (h) here. */ diff --git a/src/composite.c b/src/composite.c index f96f0b7772..ec2b8328f7 100644 --- a/src/composite.c +++ b/src/composite.c @@ -652,7 +652,6 @@ gstring_lookup_cache (Lisp_Object header) composition_gstring_put_cache (Lisp_Object gstring, ptrdiff_t len) { struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table); - hash_rehash_if_needed (h); Lisp_Object header = LGSTRING_HEADER (gstring); Lisp_Object hash = h->test.hashfn (header, h); if (len < 0) diff --git a/src/emacs.c b/src/emacs.c index 8e5eaf5e43..d31fa2cb28 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1536,6 +1536,7 @@ main (int argc, char **argv) if (!initialized) { init_alloc_once (); + init_pdumper_once (); init_obarray_once (); init_eval_once (); init_charset_once (); diff --git a/src/fns.c b/src/fns.c index 811d6e8200..41e26104f3 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4248,50 +4248,28 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) /* Recompute the hashes (and hence also the "next" pointers). Normally there's never a need to recompute hashes. - This is done only on first-access to a hash-table loaded from - the "pdump", because the object's addresses may have changed, thus + This is done only on first access to a hash-table loaded from + the "pdump", because the objects' addresses may have changed, thus affecting their hash. */ void -hash_table_rehash (struct Lisp_Hash_Table *h) +hash_table_rehash (Lisp_Object hash) { - ptrdiff_t size = HASH_TABLE_SIZE (h); - - /* These structures may have been purecopied and shared - (bug#36447). */ - Lisp_Object hash = make_nil_vector (size); - h->next = Fcopy_sequence (h->next); - h->index = Fcopy_sequence (h->index); - + struct Lisp_Hash_Table *h = XHASH_TABLE (hash); /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ - for (ptrdiff_t i = 0; i < size; ++i) + for (ptrdiff_t i = 0; i < h->count; ++i) { Lisp_Object key = HASH_KEY (h, i); - if (!EQ (key, Qunbound)) - ASET (hash, i, h->test.hashfn (key, h)); + Lisp_Object hash_code = h->test.hashfn (key, h); + ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index); + set_hash_hash_slot (h, i, hash_code); + set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); + set_hash_index_slot (h, start_of_bucket, i); + eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ } - /* Reset the index so that any slot we don't fill below is marked - invalid. */ - Ffillarray (h->index, make_fixnum (-1)); - - /* Rebuild the collision chains. */ - for (ptrdiff_t i = 0; i < size; ++i) - if (!NILP (AREF (hash, i))) - { - EMACS_UINT hash_code = XUFIXNUM (AREF (hash, i)); - ptrdiff_t start_of_bucket = hash_code % ASIZE (h->index); - set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); - set_hash_index_slot (h, start_of_bucket, i); - eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ - } - - /* Finally, mark the hash table as having a valid hash order. - Do this last so that if we're interrupted, we retry on next - access. */ - eassert (hash_rehash_needed_p (h)); - h->hash = hash; - eassert (!hash_rehash_needed_p (h)); + for (ptrdiff_t i = h->count; i < ASIZE (h->next) - 1; i++) + set_hash_next_slot (h, i, i + 1); } /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH @@ -4303,8 +4281,6 @@ hash_lookup (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object *hash) { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - Lisp_Object hash_code = h->test.hashfn (key, h); if (hash) *hash = hash_code; @@ -4339,8 +4315,6 @@ hash_put (struct Lisp_Hash_Table *h, Lisp_Object key, Lisp_Object value, { ptrdiff_t start_of_bucket, i; - hash_rehash_if_needed (h); - /* Increment count after resizing because resizing may fail. */ maybe_resize_hash_table (h); h->count++; @@ -4373,8 +4347,6 @@ hash_remove_from_table (struct Lisp_Hash_Table *h, Lisp_Object key) ptrdiff_t start_of_bucket = XUFIXNUM (hash_code) % ASIZE (h->index); ptrdiff_t prev = -1; - hash_rehash_if_needed (h); - for (ptrdiff_t i = HASH_INDEX (h, start_of_bucket); 0 <= i; i = HASH_NEXT (h, i)) @@ -4415,8 +4387,7 @@ hash_clear (struct Lisp_Hash_Table *h) if (h->count > 0) { ptrdiff_t size = HASH_TABLE_SIZE (h); - if (!hash_rehash_needed_p (h)) - memclear (xvector_contents (h->hash), size * word_size); + memclear (xvector_contents (h->hash), size * word_size); for (ptrdiff_t i = 0; i < size; i++) { set_hash_next_slot (h, i, i < size - 1 ? i + 1 : -1); @@ -4452,9 +4423,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p) for (ptrdiff_t bucket = 0; bucket < n; ++bucket) { /* Follow collision chain, removing entries that don't survive - this garbage collection. It's okay if hash_rehash_needed_p - (h) is true, since we're operating entirely on the cached - hash values. */ + this garbage collection. */ ptrdiff_t prev = -1; ptrdiff_t next; for (ptrdiff_t i = HASH_INDEX (h, bucket); 0 <= i; i = next) @@ -4499,7 +4468,7 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p) set_hash_hash_slot (h, i, Qnil); eassert (h->count != 0); - h->count += h->count > 0 ? -1 : 1; + h->count--; } else { @@ -4923,7 +4892,7 @@ DEFUN ("hash-table-count", Fhash_table_count, Shash_table_count, 1, 1, 0, (Lisp_Object table) { struct Lisp_Hash_Table *h = check_hash_table (table); - eassert (h->count >= 0); + return make_fixnum (h->count); } diff --git a/src/lisp.h b/src/lisp.h index 17b92a0414..d88038d91b 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2275,11 +2275,7 @@ #define DEFSYM(sym, name) /* empty */ struct Lisp_Hash_Table { - /* Change pdumper.c if you change the fields here. - - IMPORTANT!!!!!!! - - Call hash_rehash_if_needed() before accessing. */ + /* Change pdumper.c if you change the fields here. */ /* This is for Lisp; the hash table code does not refer to it. */ union vectorlike_header header; @@ -2398,20 +2394,7 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h) return size; } -void hash_table_rehash (struct Lisp_Hash_Table *h); - -INLINE bool -hash_rehash_needed_p (const struct Lisp_Hash_Table *h) -{ - return NILP (h->hash); -} - -INLINE void -hash_rehash_if_needed (struct Lisp_Hash_Table *h) -{ - if (hash_rehash_needed_p (h)) - hash_table_rehash (h); -} +void hash_table_rehash (Lisp_Object); /* Default size for hash tables if not specified. */ diff --git a/src/minibuf.c b/src/minibuf.c index 9d870ce364..cb302c5a60 100644 --- a/src/minibuf.c +++ b/src/minibuf.c @@ -1212,9 +1212,6 @@ DEFUN ("try-completion", Ftry_completion, Stry_completion, 2, 3, 0, bucket = AREF (collection, idx); } - if (HASH_TABLE_P (collection)) - hash_rehash_if_needed (XHASH_TABLE (collection)); - while (1) { /* Get the next element of the alist, obarray, or hash-table. */ diff --git a/src/pdumper.c b/src/pdumper.c index 63ee0fcb7f..10dfa8737f 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -105,17 +105,6 @@ #define VM_MS_WINDOWS 2 # define VM_SUPPORTED 0 #endif -/* PDUMPER_CHECK_REHASHING being true causes the portable dumper to - check, for each hash table it dumps, that the hash table means the - same thing after rehashing. */ -#ifndef PDUMPER_CHECK_REHASHING -# if ENABLE_CHECKING -# define PDUMPER_CHECK_REHASHING 1 -# else -# define PDUMPER_CHECK_REHASHING 0 -# endif -#endif - /* Require an architecture in which pointers, ptrdiff_t and intptr_t are the same size and have the same layout, and where bytes have eight bits --- that is, a general-purpose computer made after 1990. @@ -401,6 +390,8 @@ dump_fingerprint (char const *label, The start of the cold region is always aligned on a page boundary. */ dump_off cold_start; + + dump_off hash_list; }; /* Double-ended singly linked list. */ @@ -558,6 +549,8 @@ dump_fingerprint (char const *label, heap objects. */ Lisp_Object bignum_data; + Lisp_Object hash_tables; + unsigned number_hot_relocations; unsigned number_discardable_relocations; }; @@ -2616,78 +2609,62 @@ dump_vectorlike_generic (struct dump_context *ctx, return offset; } -/* Determine whether the hash table's hash order is stable - across dump and load. If it is, we don't have to trigger - a rehash on access. */ -static bool -dump_hash_table_stable_p (const struct Lisp_Hash_Table *hash) +/* Return a vector of KEY, VALUE pairs in the given hash table H. The + first H->count pairs are valid, the rest is left as nil. */ +static Lisp_Object +hash_table_contents (struct Lisp_Hash_Table *h) { - if (hash->test.hashfn == hashfn_user_defined) + if (h->test.hashfn == hashfn_user_defined) error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */ - bool is_eql = hash->test.hashfn == hashfn_eql; - bool is_equal = hash->test.hashfn == hashfn_equal; - ptrdiff_t size = HASH_TABLE_SIZE (hash); - for (ptrdiff_t i = 0; i < size; ++i) + Lisp_Object contents = Qnil; + + /* Make sure key_and_value ends up in the same order; charset.c + relies on it by expecting hash table indices to stay constant + across the dump. */ + for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h) - h->count; i++) { - Lisp_Object key = HASH_KEY (hash, i); - if (!EQ (key, Qunbound)) - { - bool key_stable = (dump_builtin_symbol_p (key) - || FIXNUMP (key) - || (is_equal - && (STRINGP (key) || BOOL_VECTOR_P (key))) - || ((is_equal || is_eql) - && (FLOATP (key) || BIGNUMP (key)))); - if (!key_stable) - return false; - } + dump_push (&contents, Qnil); + dump_push (&contents, Qunbound); } - return true; + for (ptrdiff_t i = HASH_TABLE_SIZE (h) - 1; i >= 0; --i) + if (!NILP (HASH_HASH (h, i))) + { + dump_push (&contents, HASH_VALUE (h, i)); + dump_push (&contents, HASH_KEY (h, i)); + } + + return CALLN (Fapply, Qvector, contents); } -/* Return a list of (KEY . VALUE) pairs in the given hash table. */ -static Lisp_Object -hash_table_contents (Lisp_Object table) +static dump_off +dump_hash_table_list (struct dump_context *ctx) { - Lisp_Object contents = Qnil; - struct Lisp_Hash_Table *h = XHASH_TABLE (table); - for (ptrdiff_t i = 0; i < HASH_TABLE_SIZE (h); ++i) - { - Lisp_Object key = HASH_KEY (h, i); - if (!EQ (key, Qunbound)) - dump_push (&contents, Fcons (key, HASH_VALUE (h, i))); - } - return Fnreverse (contents); + if (CONSP (ctx->hash_tables)) + return dump_object (ctx, CALLN (Fapply, Qvector, ctx->hash_tables)); + else + return 0; } -/* Copy the given hash table, rehash it, and make sure that we can - look up all the values in the original. */ static void -check_hash_table_rehash (Lisp_Object table_orig) -{ - ptrdiff_t count = XHASH_TABLE (table_orig)->count; - hash_rehash_if_needed (XHASH_TABLE (table_orig)); - Lisp_Object table_rehashed = Fcopy_hash_table (table_orig); - eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed))); - XHASH_TABLE (table_rehashed)->hash = Qnil; - eassert (count == 0 || hash_rehash_needed_p (XHASH_TABLE (table_rehashed))); - hash_rehash_if_needed (XHASH_TABLE (table_rehashed)); - eassert (!hash_rehash_needed_p (XHASH_TABLE (table_rehashed))); - Lisp_Object expected_contents = hash_table_contents (table_orig); - while (!NILP (expected_contents)) - { - Lisp_Object key_value_pair = dump_pop (&expected_contents); - Lisp_Object key = XCAR (key_value_pair); - Lisp_Object expected_value = XCDR (key_value_pair); - Lisp_Object arbitrary = Qdump_emacs_portable__sort_predicate_copied; - Lisp_Object found_value = Fgethash (key, table_rehashed, arbitrary); - eassert (EQ (expected_value, found_value)); - Fremhash (key, table_rehashed); - } +hash_table_freeze (struct Lisp_Hash_Table *h) +{ + ptrdiff_t nkeys = XFIXNAT (Flength (h->key_and_value)) / 2; + h->key_and_value = hash_table_contents (h); + h->next_free = (nkeys == h->count ? -1 : h->count); + h->index = Flength (h->index); + h->next = h->hash = make_fixnum (nkeys); +} - eassert (EQ (Fhash_table_count (table_rehashed), - make_fixnum (0))); +static void +hash_table_thaw (Lisp_Object hash) +{ + struct Lisp_Hash_Table *h = XHASH_TABLE (hash); + h->index = Fmake_vector (h->index, make_fixnum (-1)); + h->hash = Fmake_vector (h->hash, Qnil); + h->next = Fmake_vector (h->next, make_fixnum (-1)); + + hash_table_rehash (hash); } static dump_off @@ -2699,51 +2676,11 @@ dump_hash_table (struct dump_context *ctx, # error "Lisp_Hash_Table changed. See CHECK_STRUCTS comment in config.h." #endif const struct Lisp_Hash_Table *hash_in = XHASH_TABLE (object); - bool is_stable = dump_hash_table_stable_p (hash_in); - /* If the hash table is likely to be modified in memory (either - because we need to rehash, and thus toggle hash->count, or - because we need to assemble a list of weak tables) punt the hash - table to the end of the dump, where we can lump all such hash - tables together. */ - if (!(is_stable || !NILP (hash_in->weak)) - && ctx->flags.defer_hash_tables) - { - if (offset != DUMP_OBJECT_ON_HASH_TABLE_QUEUE) - { - eassert (offset == DUMP_OBJECT_ON_NORMAL_QUEUE - || offset == DUMP_OBJECT_NOT_SEEN); - /* We still want to dump the actual keys and values now. */ - dump_enqueue_object (ctx, hash_in->key_and_value, WEIGHT_NONE); - /* We'll get to the rest later. */ - offset = DUMP_OBJECT_ON_HASH_TABLE_QUEUE; - dump_remember_object (ctx, object, offset); - dump_push (&ctx->deferred_hash_tables, object); - } - return offset; - } - - if (PDUMPER_CHECK_REHASHING) - check_hash_table_rehash (make_lisp_ptr ((void *) hash_in, Lisp_Vectorlike)); - struct Lisp_Hash_Table hash_munged = *hash_in; struct Lisp_Hash_Table *hash = &hash_munged; - /* Remember to rehash this hash table on first access. After a - dump reload, the hash table values will have changed, so we'll - need to rebuild the index. - - TODO: for EQ and EQL hash tables, it should be possible to rehash - here using the preferred load address of the dump, eliminating - the need to rehash-on-access if we can load the dump where we - want. */ - if (hash->count > 0 && !is_stable) - /* Hash codes will have to be recomputed anyway, so let's not dump them. - Also set `hash` to nil for hash_rehash_needed_p. - We could also refrain from dumping the `next' and `index' vectors, - except that `next' is currently used for HASH_TABLE_SIZE and - we'd have to rebuild the next_free list as well as adjust - sweep_weak_hash_table for the case where there's no `index'. */ - hash->hash = Qnil; + hash_table_freeze (hash); + dump_push (&ctx->hash_tables, object); START_DUMP_PVEC (ctx, &hash->header, struct Lisp_Hash_Table, out); dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header); @@ -4151,6 +4088,19 @@ DEFUN ("dump-emacs-portable", || !NILP (ctx->deferred_hash_tables) || !NILP (ctx->deferred_symbols)); + ctx->header.hash_list = ctx->offset; + dump_hash_table_list (ctx); + + do + { + dump_drain_deferred_hash_tables (ctx); + dump_drain_deferred_symbols (ctx); + dump_drain_normal_queue (ctx); + } + while (!dump_queue_empty_p (&ctx->dump_queue) + || !NILP (ctx->deferred_hash_tables) + || !NILP (ctx->deferred_symbols)); + dump_sort_copied_objects (ctx); /* While we copy built-in symbols into the Emacs image, these @@ -5302,6 +5252,9 @@ dump_do_all_emacs_relocations (const struct dump_header *const header, NUMBER_DUMP_SECTIONS, }; +/* Pointer to a stack variable to avoid having to staticpro it. */ +static Lisp_Object *pdumper_hashes = &zero_vector; + /* Load a dump from DUMP_FILENAME. Return an error code. N.B. We run very early in initialization, so we can't use lisp, @@ -5448,6 +5401,15 @@ pdumper_load (const char *dump_filename) for (int i = 0; i < ARRAYELTS (sections); ++i) dump_mmap_reset (§ions[i]); + Lisp_Object hashes = zero_vector; + if (header->hash_list) + { + struct Lisp_Vector *hash_tables = + ((struct Lisp_Vector *)(dump_base + header->hash_list)); + XSETVECTOR (hashes, hash_tables); + } + + pdumper_hashes = &hashes; /* Run the functions Emacs registered for doing post-dump-load initialization. */ for (int i = 0; i < nr_dump_hooks; ++i) @@ -5518,6 +5480,19 @@ DEFUN ("pdumper-stats", Fpdumper_stats, Spdumper_stats, 0, 0, 0, #endif /* HAVE_PDUMPER */ +static void +thaw_hash_tables (void) +{ + Lisp_Object hash_tables = *pdumper_hashes; + for (ptrdiff_t i = 0; i < ASIZE (hash_tables); i++) + hash_table_thaw (AREF (hash_tables, i)); +} + +void +init_pdumper_once (void) +{ + pdumper_do_now_and_after_load (thaw_hash_tables); +} void syms_of_pdumper (void) diff --git a/src/pdumper.h b/src/pdumper.h index 6a99b511f2..c793fb4058 100644 --- a/src/pdumper.h +++ b/src/pdumper.h @@ -256,6 +256,7 @@ pdumper_clear_marks (void) file was loaded. */ extern void pdumper_record_wd (const char *); +void init_pdumper_once (void); void syms_of_pdumper (void); INLINE_HEADER_END -- 2.17.1