=== modified file 'src/alloc.c' --- src/alloc.c 2012-05-25 18:19:24 +0000 +++ src/alloc.c 2012-05-29 05:51:10 +0000 @@ -147,20 +147,14 @@ /* Mark, unmark, query mark bit of a Lisp string. S must be a pointer to a struct Lisp_String. */ -#define MARK_STRING(S) ((S)->size |= ARRAY_MARK_FLAG) -#define UNMARK_STRING(S) ((S)->size &= ~ARRAY_MARK_FLAG) -#define STRING_MARKED_P(S) (((S)->size & ARRAY_MARK_FLAG) != 0) +#define MARK_STRING(S) ((S)->u.imm.gcmarkbit = 1) +#define UNMARK_STRING(S) ((S)->u.imm.gcmarkbit = 0) +#define STRING_MARKED_P(S) ((S)->u.imm.gcmarkbit) #define VECTOR_MARK(V) ((V)->header.size |= ARRAY_MARK_FLAG) #define VECTOR_UNMARK(V) ((V)->header.size &= ~ARRAY_MARK_FLAG) #define VECTOR_MARKED_P(V) (((V)->header.size & ARRAY_MARK_FLAG) != 0) -/* Value is the number of bytes of S, a pointer to a struct Lisp_String. - Be careful during GC, because S->size contains the mark bit for - strings. */ - -#define GC_STRING_BYTES(S) (STRING_BYTES (S)) - /* Global variables. */ struct emacs_globals globals; @@ -392,6 +386,7 @@ static void mark_stack (void); static int live_vector_p (struct mem_node *, void *); static int live_buffer_p (struct mem_node *, void *); +static int live_string_data_p (struct Lisp_String *); static int live_string_p (struct mem_node *, void *); static int live_cons_p (struct mem_node *, void *); static int live_symbol_p (struct mem_node *, void *); @@ -1761,7 +1756,8 @@ a pointer to the `u.data' member of its sdata structure; the structure starts at a constant offset in front of that. */ -#define SDATA_OF_STRING(S) ((struct sdata *) ((S)->data - SDATA_DATA_OFFSET)) +#define SDATA_OF_STRING(S) ((S)->u.imm.immbit ? (struct sdata *) NULL \ + : ((struct sdata *) ((S)->u.dat.data - SDATA_DATA_OFFSET))) #ifdef GC_CHECK_STRING_OVERRUN @@ -1843,21 +1839,28 @@ static int check_string_bytes_count; -#define CHECK_STRING_BYTES(S) STRING_BYTES (S) - - -/* Like GC_STRING_BYTES, but with debugging check. */ +#define CHECK_STRING_BYTES(S) string_bytes (S) ptrdiff_t string_bytes (struct Lisp_String *s) { - ptrdiff_t nbytes = - (s->size_byte < 0 ? s->size & ~ARRAY_MARK_FLAG : s->size_byte); - - if (!PURE_POINTER_P (s) - && s->data - && nbytes != SDATA_NBYTES (SDATA_OF_STRING (s))) - abort (); + ptrdiff_t nbytes; + + if (s->u.imm.immbit) + { + nbytes = s->u.imm.size_byte < 0 ? s->u.imm.size : s->u.imm.size_byte; + eassert (nbytes < STRING_IMM_MAX); + } + else + { + nbytes = s->u.dat.size_byte < 0 ? s->u.dat.size : s->u.dat.size_byte; + eassert (nbytes >= STRING_IMM_MAX); + if (!PURE_POINTER_P (s) && s->u.dat.data && + nbytes != SDATA_NBYTES (SDATA_OF_STRING (s))) + /* Normal non-pure string with size mismatch. */ + abort (); + } + return nbytes; } @@ -1882,7 +1885,7 @@ CHECK_STRING_BYTES (from->string); if (from->string) - nbytes = GC_STRING_BYTES (from->string); + nbytes = string_bytes (from->string); else nbytes = SDATA_NBYTES (from); @@ -2028,8 +2031,8 @@ /* Determine the number of bytes needed to store NBYTES bytes of string data. */ needed = SDATA_SIZE (nbytes); - old_data = s->data ? SDATA_OF_STRING (s) : NULL; - old_nbytes = GC_STRING_BYTES (s); + old_data = s->u.dat.data ? SDATA_OF_STRING (s) : NULL; + old_nbytes = string_bytes (s); MALLOC_BLOCK_INPUT; @@ -2088,13 +2091,11 @@ MALLOC_UNBLOCK_INPUT; data->string = s; - s->data = SDATA_DATA (data); + s->u.dat.data = SDATA_DATA (data); #ifdef GC_CHECK_STRING_BYTES SDATA_NBYTES (data) = nbytes; #endif - s->size = nchars; - s->size_byte = nbytes; - s->data[nbytes] = '\0'; + s->u.dat.data[nbytes] = '\0'; #ifdef GC_CHECK_STRING_OVERRUN memcpy ((char *) data + needed, string_overrun_cookie, GC_STRING_OVERRUN_COOKIE_SIZE); @@ -2112,6 +2113,12 @@ consing_since_gc += needed; } +#ifdef GC_STRING_STATS + +static EMACS_INT total_imm_strings, total_dat_strings, total_imm_intervals; +static EMACS_INT total_imm_bytes, total_dat_bytes, total_dat_intervals; + +#endif /* Sweep and compact strings. */ @@ -2125,6 +2132,12 @@ total_strings = total_free_strings = 0; total_string_size = 0; +#ifdef GC_STRING_STATS + total_imm_strings = total_dat_strings = 0; + total_imm_bytes = total_dat_bytes = 0; + total_imm_intervals = total_dat_intervals = 0; +#endif + /* Scan strings_blocks, free Lisp_Strings that aren't marked. */ for (b = string_blocks; b; b = next) { @@ -2137,49 +2150,64 @@ { struct Lisp_String *s = b->strings + i; - if (s->data) + if (STRING_MARKED_P (s)) + { + /* String is live; unmark it and its intervals. */ + UNMARK_STRING (s); + + if (!NULL_INTERVAL_P (s->intervals)) + UNMARK_BALANCE_INTERVALS (s->intervals); + + ++total_strings; + total_string_size += string_bytes (s); +#ifdef GC_STRING_STATS + if (s->u.imm.immbit) + { + total_imm_strings++; + total_imm_bytes += string_bytes (s); + if (!NULL_INTERVAL_P (s->intervals)) + total_imm_intervals++; + } + else + { + total_dat_strings++; + total_dat_bytes += string_bytes (s); + if (!NULL_INTERVAL_P (s->intervals)) + total_dat_intervals++; + } +#endif /* GC_STRING_STATS */ + } + else { - /* String was not on free-list before. */ - if (STRING_MARKED_P (s)) - { - /* String is live; unmark it and its intervals. */ - UNMARK_STRING (s); - - if (!NULL_INTERVAL_P (s->intervals)) - UNMARK_BALANCE_INTERVALS (s->intervals); - - ++total_strings; - total_string_size += STRING_BYTES (s); - } + if (s->u.imm.immbit) + /* Fill data with special pattern. Used by + GC to find dead immediate strings. */ + memset (s->u.imm.data, 0xff, STRING_IMM_MAX); else { - /* String is dead. Put it on the free-list. */ - struct sdata *data = SDATA_OF_STRING (s); + if (s->u.dat.data) + { + /* String is dead. Put it on the free-list. */ + struct sdata *data = SDATA_OF_STRING (s); - /* Save the size of S in its sdata so that we know - how large that is. Reset the sdata's string - back-pointer so that we know it's free. */ + /* Save the size of S in its sdata so that we know + how large that is. Reset the sdata's string + back-pointer so that we know it's free. */ #ifdef GC_CHECK_STRING_BYTES - if (GC_STRING_BYTES (s) != SDATA_NBYTES (data)) - abort (); + if (string_bytes (s) != SDATA_NBYTES (data)) + abort (); #else - data->u.nbytes = GC_STRING_BYTES (s); + data->u.nbytes = string_bytes (s); #endif - data->string = NULL; - - /* Reset the strings's `data' member so that we - know it's free. */ - s->data = NULL; - - /* Put the string on the free-list. */ - NEXT_FREE_LISP_STRING (s) = string_free_list; - string_free_list = s; - ++nfree; + data->string = NULL; + + /* Reset the strings's `data' member so that we + know it's free. */ + s->u.dat.data = NULL; + } } - } - else - { - /* S was on the free-list before. Put it there again. */ + + /* Put the string on the free-list. */ NEXT_FREE_LISP_STRING (s) = string_free_list; string_free_list = s; ++nfree; @@ -2271,12 +2299,12 @@ /* Check that the string size recorded in the string is the same as the one recorded in the sdata structure. */ if (from->string - && GC_STRING_BYTES (from->string) != SDATA_NBYTES (from)) + && string_bytes (from->string) != SDATA_NBYTES (from)) abort (); #endif /* GC_CHECK_STRING_BYTES */ if (from->string) - nbytes = GC_STRING_BYTES (from->string); + nbytes = string_bytes (from->string); else nbytes = SDATA_NBYTES (from); @@ -2312,7 +2340,7 @@ { xassert (tb != b || to < from); memmove (to, from, nbytes + GC_STRING_EXTRA); - to->string->data = SDATA_DATA (to); + to->string->u.dat.data = SDATA_DATA (to); } /* Advance past the sdata we copied to. */ @@ -2562,13 +2590,24 @@ return empty_multibyte_string; s = allocate_string (); - allocate_string_data (s, nchars, nbytes); + if (nbytes < STRING_IMM_MAX) + { + s->u.imm.immbit = 1; + s->u.imm.size = nchars; + s->u.imm.size_byte = nbytes; + } + else + { + s->u.imm.immbit = 0; + s->u.dat.size = nchars; + s->u.dat.size_byte = nbytes; + allocate_string_data (s, nchars, nbytes); + } XSETSTRING (string, s); string_chars_consed += nbytes; return string; } - /*********************************************************************** Float Allocation @@ -3937,6 +3976,22 @@ x->color = MEM_BLACK; } +/* Non-zero if data of S is valid. */ + +static inline int +live_string_data_p (struct Lisp_String *s) +{ + if (s->u.imm.immbit) + { + unsigned char *p; + + for (p = s->u.imm.data; p < s->u.imm.data + STRING_IMM_MAX; p++) + if (*p != 0xff) + return 1; + return 0; + } + return s->u.dat.data != NULL; +} /* Value is non-zero if P is a pointer to a live Lisp string on the heap. M is a pointer to the mem_block for P. */ @@ -3954,7 +4009,7 @@ return (offset >= 0 && offset % sizeof b->strings[0] == 0 && offset < (STRING_BLOCK_SIZE * sizeof b->strings[0]) - && ((struct Lisp_String *) p)->data != NULL); + && live_string_data_p ((struct Lisp_String *) p)); } else return 0; @@ -4869,15 +4924,29 @@ struct Lisp_String *s; s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String); - s->data = (unsigned char *) find_string_data_in_pure (data, nbytes); - if (s->data == NULL) - { - s->data = (unsigned char *) pure_alloc (nbytes + 1, -1); - memcpy (s->data, data, nbytes); - s->data[nbytes] = '\0'; - } - s->size = nchars; - s->size_byte = multibyte ? nbytes : -1; + + if (nbytes < STRING_IMM_MAX) + { + memcpy (s->u.imm.data, data, nbytes); + s->u.imm.data[nbytes] = '\0'; + s->u.imm.immbit = 1; + s->u.imm.size = nchars; + s->u.imm.size_byte = multibyte ? nbytes : -1; + } + else + { + s->u.dat.data = (unsigned char *) find_string_data_in_pure (data, nbytes); + if (s->u.dat.data == NULL) + { + s->u.dat.data = (unsigned char *) pure_alloc (nbytes + 1, -1); + memcpy (s->u.dat.data, data, nbytes); + s->u.dat.data[nbytes] = '\0'; + } + s->u.imm.immbit = 0; + s->u.dat.size = nchars; + s->u.dat.size_byte = multibyte ? nbytes : -1; + } + s->intervals = NULL_INTERVAL; XSETSTRING (string, s); return string; @@ -4894,9 +4963,23 @@ ptrdiff_t nchars = strlen (data); s = (struct Lisp_String *) pure_alloc (sizeof *s, Lisp_String); - s->size = nchars; - s->size_byte = -1; - s->data = (unsigned char *) data; + + if (nchars < STRING_IMM_MAX) + { + memcpy (s->u.imm.data, data, nchars); + s->u.imm.data[nchars] = '\0'; + s->u.imm.immbit = 1; + s->u.imm.size = nchars; + s->u.imm.size_byte = -1; + } + else + { + s->u.dat.data = (unsigned char *) data; + s->u.imm.immbit = 0; + s->u.dat.size = nchars; + s->u.dat.size_byte = -1; + } + s->intervals = NULL_INTERVAL; XSETSTRING (string, s); return string; @@ -6319,6 +6402,34 @@ return Flist (8, consed); } +#ifdef GC_STRING_STATS + +DEFUN ("string-stats", Fstring_stats, Sstring_stats, 0, 0, 0, + doc: /* Return a list of counters that measures how much +strings of a particular internal structure are alive after last +garbage collection, and how many bytes are in them. +The elements of the value are are as follows: + (IMM-STRINGS IMM-BYTES IMM-INTERVALS DAT-STRINGS DAT-BYTES DAT-INTERVALS) +where IMM-STRINGS is the number of immediate strings, IMM-BYTES is the total +number of bytes in them, and IMM-INTERVALS is the number of immediate string +with non-nil text properties. The rest three numbers has the same meaning +for normal strings, respectively. */) + (void) +{ + Lisp_Object data[6]; + + data[0] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_strings)); + data[1] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_bytes)); + data[2] = make_number (min (MOST_POSITIVE_FIXNUM, total_imm_intervals)); + data[3] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_strings)); + data[4] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_bytes)); + data[5] = make_number (min (MOST_POSITIVE_FIXNUM, total_dat_intervals)); + + return Flist (6, data); +} + +#endif /* GC_STRING_STATS */ + /* Find at most FIND_MAX symbols which have OBJ as their value or function. This is used in gdbinit's `xwhichsymbols' command. */ @@ -6547,7 +6658,9 @@ defsubr (&Sgarbage_collect); defsubr (&Smemory_limit); defsubr (&Smemory_use_counts); - +#ifdef GC_STRING_STATS + defsubr (&Sstring_stats); +#endif #if GC_MARK_STACK == GC_USE_GCPROS_CHECK_ZOMBIES defsubr (&Sgc_status); #endif === modified file 'src/fns.c' --- src/fns.c 2012-05-25 18:19:24 +0000 +++ src/fns.c 2012-05-28 15:03:58 +0000 @@ -2166,8 +2166,8 @@ int len = CHAR_STRING (charval, str); ptrdiff_t size_byte = SBYTES (array); - if (INT_MULTIPLY_OVERFLOW (SCHARS (array), len) - || SCHARS (array) * len != size_byte) + if (INT_MULTIPLY_OVERFLOW (size, len) + || size * len != size_byte) error ("Attempt to change byte length of a string"); for (idx = 0; idx < size_byte; idx++) *p++ = str[idx % len]; === modified file 'src/lisp.h' --- src/lisp.h 2012-05-27 07:51:09 +0000 +++ src/lisp.h 2012-05-29 05:54:03 +0000 @@ -69,7 +69,8 @@ BITS_PER_SHORT = CHAR_BIT * sizeof (short), BITS_PER_INT = CHAR_BIT * sizeof (int), BITS_PER_LONG = CHAR_BIT * sizeof (long int), - BITS_PER_EMACS_INT = CHAR_BIT * sizeof (EMACS_INT) + BITS_PER_EMACS_INT = CHAR_BIT * sizeof (EMACS_INT), + BITS_PER_PTRDIFF_T = CHAR_BIT * sizeof (ptrdiff_t) }; /* printmax_t and uprintmax_t are types for printing large integers. @@ -743,17 +744,23 @@ /* Convenience macros for dealing with Lisp strings. */ -#define SDATA(string) (XSTRING (string)->data + 0) +#define SDATA(string) (XSTRING (string)->u.imm.immbit ? \ + (XSTRING (string)->u.imm.data) : \ + (XSTRING (string)->u.dat.data)) #define SREF(string, index) (SDATA (string)[index] + 0) #define SSET(string, index, new) (SDATA (string)[index] = (new)) -#define SCHARS(string) (XSTRING (string)->size + 0) -#define SBYTES(string) (STRING_BYTES (XSTRING (string)) + 0) +#define SCHARS(string) (XSTRING (string)->u.imm.immbit ? \ + (XSTRING (string)->u.imm.size) : \ + (XSTRING (string)->u.dat.size)) +#define SBYTES(string) (string_bytes (XSTRING (string))) /* Avoid "differ in sign" warnings. */ #define SSDATA(x) ((char *) SDATA (x)) -#define STRING_SET_CHARS(string, newsize) \ - (XSTRING (string)->size = (newsize)) +#define STRING_SET_CHARS(string, newsize) \ + (XSTRING (string)->u.imm.immbit ? \ + (XSTRING (string)->u.imm.size = (newsize)) : \ + (XSTRING (string)->u.dat.size = (newsize))) #define STRING_COPYIN(string, index, new, count) \ memcpy (SDATA (string) + index, new, count) @@ -843,24 +850,12 @@ #define CDR_SAFE(c) \ (CONSP ((c)) ? XCDR ((c)) : Qnil) +#define STRING_SIZE_BYTE(string) (XSTRING (string)->u.imm.immbit ? \ + XSTRING (string)->u.imm.size_byte : \ + XSTRING (string)->u.dat.size_byte) + /* Nonzero if STR is a multibyte string. */ -#define STRING_MULTIBYTE(STR) \ - (XSTRING (STR)->size_byte >= 0) - -/* Return the length in bytes of STR. */ - -#ifdef GC_CHECK_STRING_BYTES - -struct Lisp_String; -extern ptrdiff_t string_bytes (struct Lisp_String *); -#define STRING_BYTES(S) string_bytes ((S)) - -#else /* not GC_CHECK_STRING_BYTES */ - -#define STRING_BYTES(STR) \ - ((STR)->size_byte < 0 ? (STR)->size : (STR)->size_byte) - -#endif /* not GC_CHECK_STRING_BYTES */ +#define STRING_MULTIBYTE(string) (STRING_SIZE_BYTE (string) > 0) /* An upper bound on the number of bytes in a Lisp string, not counting the terminating null. This a tight enough bound to @@ -876,18 +871,28 @@ #define STRING_BYTES_BOUND \ min (MOST_POSITIVE_FIXNUM, (ptrdiff_t) min (SIZE_MAX, PTRDIFF_MAX) - 1) +/* Maximum amount of bytes, including '\0', in an immediate string. */ +#define STRING_IMM_MAX (3 * sizeof (ptrdiff_t) - 2) + /* Mark STR as a unibyte string. */ #define STRING_SET_UNIBYTE(STR) \ - do { if (EQ (STR, empty_multibyte_string)) \ - (STR) = empty_unibyte_string; \ - else XSTRING (STR)->size_byte = -1; } while (0) + do { if (EQ (STR, empty_multibyte_string)) \ + (STR) = empty_unibyte_string; \ + else if (XSTRING (STR)->u.imm.immbit) \ + XSTRING (STR)->u.imm.size_byte = -1; \ + else \ + XSTRING (STR)->u.dat.size_byte = -1; } while (0) /* Mark STR as a multibyte string. Assure that STR contains only ASCII characters in advance. */ -#define STRING_SET_MULTIBYTE(STR) \ - do { if (EQ (STR, empty_unibyte_string)) \ - (STR) = empty_multibyte_string; \ - else XSTRING (STR)->size_byte = XSTRING (STR)->size; } while (0) +#define STRING_SET_MULTIBYTE(STR) \ + do { if (EQ (STR, empty_unibyte_string)) \ + (STR) = empty_multibyte_string; \ + else if (XSTRING (STR)->u.imm.immbit) \ + XSTRING (STR)->u.imm.size_byte = XSTRING (STR)->u.imm.size; \ + else \ + XSTRING (STR)->u.dat.size_byte = XSTRING (STR)->u.dat.size; \ + } while (0) /* Get text properties. */ #define STRING_INTERVALS(STR) (XSTRING (STR)->intervals + 0) @@ -898,12 +903,55 @@ /* In a string or vector, the sign bit of the `size' is the gc mark bit */ struct Lisp_String - { - ptrdiff_t size; - ptrdiff_t size_byte; - INTERVAL intervals; /* text properties in this string */ - unsigned char *data; - }; +{ + /* Text properties in this string. Should be the first + member since NEXT_FREE_LISP_STRING from alloc.c uses it. */ + INTERVAL intervals; + + union { + /* GC mark bit and subtype bit are in IMM just by convention - when + IMMBIT is 0, the DAT field is used except it's UNUSED field. */ + struct { + unsigned gcmarkbit : 1; + unsigned immbit : 1; + ptrdiff_t size : 7; + ptrdiff_t size_byte : 7; + unsigned char data[STRING_IMM_MAX]; + } imm; + + struct { + unsigned unused : 2; /* Do not access this placeholder. */ + ptrdiff_t size : BITS_PER_PTRDIFF_T - 2; + ptrdiff_t size_byte : BITS_PER_PTRDIFF_T - 2; + unsigned char *data; + } dat; + } u; +}; + +/* Return the length in bytes of STR. */ + +#ifdef GC_CHECK_STRING_BYTES + +struct Lisp_String; +extern ptrdiff_t string_bytes (struct Lisp_String *); +#define STRING_BYTES(S) string_bytes ((S)) + +#else /* not GC_CHECK_STRING_BYTES */ + +static inline +ptrdiff_t string_bytes (struct Lisp_String *s) +{ + ptrdiff_t size, size_byte; + + if (s->u.imm.immbit) + size = s->u.imm.size, size_byte = s->u.imm.size_byte; + else + size = s->u.dat.size, size_byte = s->u.dat.size_byte; + + return size_byte < 0 ? size : size_byte; +} + +#endif /* not GC_CHECK_STRING_BYTES */ /* Header of vector-like objects. This documents the layout constraints on vectors and pseudovectors other than struct Lisp_Subr. It also prevents