From 65caf5b205d22f76bb4ec85cfe597b621a83afb3 Mon Sep 17 00:00:00 2001 From: Mattias EngdegÄrd Date: Fri, 31 Dec 2021 17:24:31 +0100 Subject: Pin bytecode strings to avoid copy at call time Avoid making a copy (in the interpreter C stack frame) of the bytecode string by making sure it won't be moved by the GC. This is done by reallocating it to the heap normally only used for large strings, which isn't compacted. This requires that we retain an explicit reference to the bytecode string object (`bytestr`) lest it be GCed away should all other references vanish during execution. We allocate an extra stack slot for that, as we already do for the constant vector object. * src/alloc.c (allocate_string_data): Add `immovable` argument. (resize_string_data, make_clear_multibyte_string): Use it. (pin_string): New. * src/pdumper.c (dump_string): Fix incorrect comment. Update hash for Lisp_String (only comments changed, not contents). * src/lread.c (read1): * src/alloc.c (Fmake_byte_code, purecopy): * src/bytecode.c (Fbyte_code): Pin bytecode on object creation. (exec_byte_code): Don't copy bytecode. Retain `bytestr` explicitly. * src/lisp.h (Lisp_String): Explain special size_byte values. (string_immovable_p): New. --- src/alloc.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'src/alloc.c') diff --git a/src/alloc.c b/src/alloc.c index e0b2c220231..e01ea36e642 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -1853,7 +1853,8 @@ allocate_string (void) static void allocate_string_data (struct Lisp_String *s, - EMACS_INT nchars, EMACS_INT nbytes, bool clearit) + EMACS_INT nchars, EMACS_INT nbytes, bool clearit, + bool immovable) { sdata *data; struct sblock *b; @@ -1867,7 +1868,7 @@ allocate_string_data (struct Lisp_String *s, MALLOC_BLOCK_INPUT; - if (nbytes > LARGE_STRING_BYTES) + if (nbytes > LARGE_STRING_BYTES || immovable) { size_t size = FLEXSIZEOF (struct sblock, data, needed); @@ -1967,7 +1968,7 @@ resize_string_data (Lisp_Object string, ptrdiff_t cidx_byte, } else { - allocate_string_data (XSTRING (string), nchars, new_nbytes, false); + allocate_string_data (XSTRING (string), nchars, new_nbytes, false, false); unsigned char *new_data = SDATA (string); new_charaddr = new_data + cidx_byte; memcpy (new_charaddr + new_clen, data + cidx_byte + clen, @@ -2483,7 +2484,7 @@ make_clear_multibyte_string (EMACS_INT nchars, EMACS_INT nbytes, bool clearit) s = allocate_string (); s->u.s.intervals = NULL; - allocate_string_data (s, nchars, nbytes, clearit); + allocate_string_data (s, nchars, nbytes, clearit, false); XSETSTRING (string, s); string_chars_consed += nbytes; return string; @@ -2513,6 +2514,29 @@ make_formatted_string (char *buf, const char *format, ...) return make_string (buf, length); } +/* Pin a unibyte string in place so that it won't move during GC. */ +void +pin_string (Lisp_Object string) +{ + eassert (STRINGP (string) && !STRING_MULTIBYTE (string)); + struct Lisp_String *s = XSTRING (string); + ptrdiff_t size = STRING_BYTES (s); + unsigned char *data = s->u.s.data; + + if (!(size > LARGE_STRING_BYTES + || PURE_P (data) || pdumper_object_p (data) + || s->u.s.size_byte == -3)) + { + eassert (s->u.s.size_byte == -1); + sdata *old_sdata = SDATA_OF_STRING (s); + allocate_string_data (s, size, size, false, true); + memcpy (s->u.s.data, data, size); + old_sdata->string = NULL; + SDATA_NBYTES (old_sdata) = size; + } + s->u.s.size_byte = -3; +} + /*********************************************************************** Float Allocation @@ -3515,6 +3539,8 @@ usage: (make-byte-code ARGLIST BYTE-CODE CONSTANTS DEPTH &optional DOCSTRING INT && FIXNATP (args[COMPILED_STACK_DEPTH]))) error ("Invalid byte-code object"); + pin_string (args[COMPILED_BYTECODE]); // Bytecode must be immovable. + /* We used to purecopy everything here, if purify-flag was set. This worked OK for Emacs-23, but with Emacs-24's lexical binding code, it can be dangerous, since make-byte-code is used during execution to build @@ -5653,6 +5679,10 @@ purecopy (Lisp_Object obj) memcpy (vec, objp, nbytes); for (i = 0; i < size; i++) vec->contents[i] = purecopy (vec->contents[i]); + // Byte code strings must be pinned. + if (COMPILEDP (obj) && size >= 2 && STRINGP (vec->contents[1]) + && !STRING_MULTIBYTE (vec->contents[1])) + pin_string (vec->contents[1]); XSETVECTOR (obj, vec); } else if (BARE_SYMBOL_P (obj)) -- cgit v1.2.3