diff --git a/ChangeLog b/ChangeLog index 6e94266e..2e9d6c7d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,27 @@ +2025-02-05 Arnold D. Robbins + + * configure.ac: Add checks for spawn.h and _NSGetExecutablePath + function. + * awk.h (os_disable_aslr): Add function declaration. + * main.c (enable_pma): Move OS specific code out of this function + and in posix/gawkmisc.c. Instead, call os_disable_aslr(). + +2025-02-05 Arnold D. Robbins + + * main.c (enable_pma): Remove unused argc parameter and adjust + call. Add call to unsetenv() for magic env var if it was + there, so that the environment is as it was before the exec. + +2025-02-05 Arnold D. Robbins + + * main.c (enable_pma): New function. Has the entire init flow + for PMA. Also has new linux-specific code to deal with being + an PIE executable. + (main): Call enable_pma(). + * configure.ac: Add checks for and the + personality() system call. + * NEWS: Updated. + 2025-02-05 Arnold D. Robbins Bug fixes for indirect calls of match and patsplit. diff --git a/NEWS b/NEWS index a02a9904..cf05f011 100644 --- a/NEWS +++ b/NEWS @@ -15,6 +15,10 @@ Changes from 5.3.1 to 5.3.x 2. OpenVMS 9.2-2 x86_64 is now supported. +3. On Linux and macos systems, the -no-pie linker flag is no longer required. + PMA now works on macos systems with Apple silicon, and not just + Intel systems. + XX. As usual, there have been several minor code cleanups and bug fixes. See the ChangeLog for details. diff --git a/awk.h b/awk.h index 8741dbe9..761f3af8 100644 --- a/awk.h +++ b/awk.h @@ -1640,6 +1640,7 @@ extern int os_is_setuid(void); extern int os_setbinmode(int fd, int mode); extern void os_restore_mode(int fd); extern void os_maybe_set_errno(void); +extern void os_disable_aslr(const char *persist_file, char **argv); extern size_t optimal_bufsize(int fd, struct stat *sbuf); extern int ispath(const char *file); extern int isdirpunct(int c); diff --git a/configh.in b/configh.in index c3465c6e..885c4b91 100644 --- a/configh.in +++ b/configh.in @@ -177,6 +177,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_NETINET_IN_H +/* Define to 1 if you have the `personality' function. */ +#undef HAVE_PERSONALITY + /* Define to 1 if you have the `posix_openpt' function. */ #undef HAVE_POSIX_OPENPT @@ -201,6 +204,9 @@ /* we have sockets on this system */ #undef HAVE_SOCKETS +/* Define to 1 if you have the header file. */ +#undef HAVE_SPAWN_H + /* Define to 1 if you have the header file. */ #undef HAVE_STDBOOL_H @@ -276,6 +282,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_PARAM_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PERSONALITY_H + /* Define to 1 if you have the header file. */ #undef HAVE_SYS_SELECT_H @@ -356,6 +365,9 @@ /* systems should define this type here */ #undef HAVE_WINT_T +/* Define to 1 if you have the `_NSGetExecutablePath' function. */ +#undef HAVE__NSGETEXECUTABLEPATH + /* Define to 1 if you have the `__etoa_l' function. */ #undef HAVE___ETOA_L diff --git a/configure.ac b/configure.ac index 679504de..015e18ff 100644 --- a/configure.ac +++ b/configure.ac @@ -187,8 +187,9 @@ gt_LC_MESSAGES dnl checks for header files AC_CHECK_HEADERS(arpa/inet.h fcntl.h locale.h libintl.h mcheck.h \ netdb.h netinet/in.h stddef.h string.h \ - sys/ioctl.h sys/param.h sys/select.h sys/socket.h sys/time.h unistd.h \ - termios.h stropts.h wchar.h wctype.h) + spawn.h \ + sys/ioctl.h sys/param.h sys/personality.h sys/select.h sys/socket.h sys/time.h \ + unistd.h termios.h stropts.h wchar.h wctype.h) gl_C_BOOL AC_HEADER_SYS_WAIT @@ -317,7 +318,8 @@ AC_CHECK_FUNCS(__etoa_l atexit btowc fmod fwrite_unlocked gai_strerror \ gettimeofday clock_gettime lstat \ getdtablesize \ mbrlen memcmp memcpy memmove memset \ - mkstemp mtrace posix_openpt setenv setlocale setsid sigprocmask \ + _NSGetExecutablePath \ + mkstemp mtrace personality posix_openpt setenv setlocale setsid sigprocmask \ snprintf strcasecmp strchr strcoll strerror strftime strncasecmp \ strsignal strtod strtoul system timegm tmpfile towlower towupper \ tzset usleep waitpid wcrtomb wcscoll wctype) diff --git a/doc/ChangeLog b/doc/ChangeLog index 595d3c9d..30ee9cd2 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,9 @@ +2025-02-01 Arnold D. Robbins + + * gawk.1: Remove note that persistent memory is experimental. + * gawk.texi (Persistent Memory): Remove stuff related to PIE + and ASLR. + 2025-01-22 Arnold D. Robbins * gawk.texi (Persistent Memory): Note that if a Linux system diff --git a/doc/gawk.1 b/doc/gawk.1 index 4656940f..28092569 100644 --- a/doc/gawk.1 +++ b/doc/gawk.1 @@ -13,7 +13,7 @@ . if \w'\(rq' .ds rq "\(rq . \} .\} -.TH GAWK 1 "Apr 24 2024" "Free Software Foundation" "Utility Commands" +.TH GAWK 1 "Feb 2 2025" "Free Software Foundation" "Utility Commands" .SH NAME gawk \- pattern scanning and processing language .SH SYNOPSIS @@ -2291,7 +2291,6 @@ The .B GAWK_PERSIST_FILE environment variable, if present, specifies a file to use as the backing store for persistent memory. -.IR "This is an experimental feature" . See \*(EP for the details. .PP The diff --git a/doc/gawk.texi b/doc/gawk.texi index dc045cdf..34c7a837 100644 --- a/doc/gawk.texi +++ b/doc/gawk.texi @@ -30690,12 +30690,11 @@ in a persistent heap, which resides in a file in the filesystem. When persistent memory is not in use (the normal case), @command{gawk}'s data resides in ephemeral system memory. -Persistent memory is enabled on certain 64-bit systems supporting the @code{mmap()} -and @code{munmap()} system calls. @command{gawk} must be compiled as a -non-PIE (Position Independent Executable) binary, since the persistent +Persistent memory is enabled on certain 64-bit systems supporting the +@code{mmap()} and @code{munmap()} system calls. Since the persistent store ends up holding pointers to functions held within the @command{gawk} -executable. This also means that to use the persistent memory, you must -use the same @command{gawk} executable from run to run. +executable, in order to use persistent memory, you must use the same +@command{gawk} executable from run to run. You can see if your version of @command{gawk} supports persistent memory like so: @@ -30713,7 +30712,7 @@ If you see the @samp{PMA} with a version indicator, then it's supported. @cindex @env{REALLY_USE_PERSIST_MALLOC} environment variable @cindex environment variables @subentry @env{REALLY_USE_PERSIST_MALLOC} As of this writing, persistent memory has only been tested on GNU/Linux, -Cygwin, Solaris 2.11, Intel architecture macOS systems, +Cygwin, Solaris 2.11, macOS, FreeBSD 13.1 and OpenBSD 7.1. On all others, persistent memory is disabled by default. You can force it to be enabled by exporting the shell variable @@ -30768,13 +30767,12 @@ As shown, in subsequent runs using the same data file, the values of @command{gawk}'s variables are preserved. However, @command{gawk}'s special variables, such as @code{NR}, are reset upon each run. Only the variables defined by the program are preserved across runs. - @end enumerate -Interestingly, the program that you execute need not be the same from +Interestingly, the @command{awk} program that you execute need not be the same from run to run; the persistent store only maintains the values of variables, arrays, and user-defined functions, not the totality of @command{gawk}'s -internal state. This lets you share data between unrelated programs, +internal state. This lets you share data between unrelated @command{awk} programs, eliminating the need for scripts to communicate via text files. @cindex Kelly, Terence @@ -30834,16 +30832,6 @@ The GNU/Linux CIFS filesystem is known to not work well with the PMA allocator. Don't use a backing file on a CIFS filesystem. -@item -Some GNU/Linux distributions enable Address Space Layout Randomization -(ASLR) by default. This breaks @command{gawk}'s use of the PMA allocator. -You can work around this using @samp{setarch -R gawk @dots{}} to run -@command{gawk}; this disables ASLR for the particular execution of -@command{gawk}. If ASLR is the default on your system, and you want to use -persistent memory, you may wish to set up a shell alias, shell function, -or shell script to run @command{gawk} by way of the @command{setarch} -command. - @item If @command{gawk} is run by the @command{root} user, then persistent memory is not allowed. This is to avoid the possibility diff --git a/m4/ChangeLog b/m4/ChangeLog index f81f8504..8498a9ac 100644 --- a/m4/ChangeLog +++ b/m4/ChangeLog @@ -1,3 +1,13 @@ +2025-02-01 Arnold D. Robbins + + * pma.m4: On macos, no longer need to do anything special, + but we do have to have a case for it so that PMA is enabled. + +2025-01-29 Arnold D. Robbins + + * pma.m4: On Linux, no longer need to do anything special, + but we do have to have a case for it so that PMA is enabled. + 2024-09-17 Arnold D. Robbins * 5.3.1: Release tar made. diff --git a/m4/pma.m4 b/m4/pma.m4 index 6b7c6ed8..f270f4f3 100644 --- a/m4/pma.m4 +++ b/m4/pma.m4 @@ -1,6 +1,6 @@ dnl Decide whether or not to use the persistent memory allocator dnl -dnl Copyright (C) 2022, 2023 Free Software Foundation, Inc. +dnl Copyright (C) 2022, 2023, 2025 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. @@ -19,25 +19,13 @@ then use_persistent_malloc=yes case $host_os in linux-*) - AX_CHECK_COMPILE_FLAG([-no-pie], - [LDFLAGS="${LDFLAGS} -no-pie" - export LDFLAGS]) + true # On Linux we no longer need -no-pie ;; *darwin*) - # 27 November 2022: PMA only works on Intel. - case $host in - x86_64-*) - LDFLAGS="${LDFLAGS} -Xlinker -no_pie" - export LDFLAGS - ;; - *) - # disable on all other macOS systems - use_persistent_malloc=no - ;; - esac + true # On macos we no longer need -no-pie ;; *cygwin* | *CYGWIN* | *solaris2.11* | freebsd13.* | openbsd7.* ) - true # nothing do, exes on these systems are not PIE + true # nothing to do, exes on these systems are not PIE ;; # Other OS's go here... *) diff --git a/main.c b/main.c index e50b1890..0e79bec0 100644 --- a/main.c +++ b/main.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 1986, 1988, 1989, 1991-2024, + * Copyright (C) 1986, 1988, 1989, 1991-2025, * the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the @@ -143,6 +143,7 @@ static const char *locale_dir = LOCALEDIR; /* default locale dir */ #ifdef USE_PERSISTENT_MALLOC const char *get_pma_version(void); #endif +static bool enable_pma(char **argv); int use_lc_numeric = false; /* obey locale for decimal point */ @@ -213,28 +214,13 @@ main(int argc, char **argv) bool have_srcfile = false; SRCFILE *s; char *cp; - const char *persist_file = getenv("GAWK_PERSIST_FILE"); /* backing file for PMA */ #if defined(LOCALEDEBUG) const char *initial_locale; #endif myname = gawk_name(argv[0]); - check_pma_security(persist_file); - - int pma_result = pma_init(1, persist_file); - if (pma_result != 0) { - // don't use 'fatal' routine, memory can't be allocated - fprintf(stderr, _("%s: fatal: persistent memory allocator failed to initialize: return value %d, pma.c line: %d.\n"), - myname, pma_result, pma_errno); - exit(EXIT_FATAL); - } - - using_persistent_malloc = (persist_file != NULL); -#ifndef USE_PERSISTENT_MALLOC - if (using_persistent_malloc) - warning(_("persistent memory is not supported")); -#endif + using_persistent_malloc = enable_pma(argv); #ifdef HAVE_MPFR mp_set_memory_functions(mpfr_mem_alloc, mpfr_mem_realloc, mpfr_mem_free); #endif @@ -1925,3 +1911,32 @@ check_pma_security(const char *pma_file) } #endif /* USE_PERSISTENT_MALLOC */ } + +/* enable_pma --- do the PMA flow, handle ASLR on Linux */ + +static bool +enable_pma(char **argv) +{ + const char *persist_file = getenv("GAWK_PERSIST_FILE"); /* backing file for PMA */ + +#ifndef USE_PERSISTENT_MALLOC + if (persist_file != NULL) { + warning(_("persistent memory is not supported")); + return false; + } +#else + os_disable_aslr(persist_file, argv); + + check_pma_security(persist_file); + int pma_result = pma_init(1, persist_file); + if (pma_result != 0) { + // don't use 'fatal' routine, memory can't be allocated + fprintf(stderr, _("%s: fatal: persistent memory allocator failed to initialize: return value %d, pma.c line: %d.\n"), + myname, pma_result, pma_errno); + exit(EXIT_FATAL); + } + + + return (persist_file != NULL); +#endif +} diff --git a/posix/ChangeLog b/posix/ChangeLog index 28f74e96..09247343 100644 --- a/posix/ChangeLog +++ b/posix/ChangeLog @@ -1,3 +1,7 @@ +2025-02-01 Arnold D. Robbins + + * gawkmisc.c (os_disable_aslr): New function for *nix. + 2024-09-17 Arnold D. Robbins * 5.3.1: Release tar made. diff --git a/posix/gawkmisc.c b/posix/gawkmisc.c index 0b30d746..1656ae96 100644 --- a/posix/gawkmisc.c +++ b/posix/gawkmisc.c @@ -1,6 +1,7 @@ /* gawkmisc.c --- miscellaneous gawk routines that are OS specific. - Copyright (C) 1986, 1988, 1989, 1991 - 1998, 2001 - 2004, 2011, 2021, 2022, 2023, + Copyright (C) 1986, 1988, 1989, 1991 - 1998, 2001 - 2004, 2011, + 2021, 2022, 2023, 2025, the Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify @@ -26,6 +27,14 @@ #include /* for declaration of setmode(). */ #endif +#ifdef HAVE_SYS_PERSONALITY_H // for linux +#include +#endif + +#ifdef HAVE_SPAWN_H +#include // for macos +#endif + const char quote = '\''; const char *defpath = DEFPATH; const char *deflibpath = DEFLIBPATH; @@ -297,6 +306,81 @@ os_maybe_set_errno(void) { } +/* os_disable_aslr --- disable Address Space Layout Randomization */ + +// This for Linux and MacOS. It's not needed on other *nix systems. + +void +os_disable_aslr(const char *persist_file, char **argv) +{ +#ifdef HAVE_PERSONALITY + // This code is Linux specific, both the reliance on /proc/self/exe + // and the personality system call. + if (persist_file != NULL) { + const char *cp = getenv("GAWK_PMA_REINCARNATION"); + + if (cp == NULL) { + char fullpath[BUFSIZ]; + int n; + + if ((n = readlink("/proc/self/exe", fullpath, sizeof(fullpath)-1)) < 0) { + fprintf(stderr, _("warning: /proc/self/exe: readlink: %s\n"), + strerror(errno)); + return; + } + fullpath[n] = '\0'; + putenv("GAWK_PMA_REINCARNATION=true"); + if (personality(PER_LINUX | ADDR_NO_RANDOMIZE) < 0) { + fprintf(stderr, _("warning: personality: %s\n"), + strerror(errno)); + fflush(stderr); + // do the exec anyway... + } + execv(fullpath, argv); + } else + (void) unsetenv("GAWK_PMA_REINCARNATION"); + } +#endif +#ifdef HAVE__NSGETEXECUTABLEPATH + // This code is for macos + if (persist_file != NULL) { + const char *cp = getenv("GAWK_PMA_REINCARNATION"); + + if (cp == NULL) { + char fullpath[BUFSIZ]; + int n; + posix_spawnattr_t p_attr; + int status; + pid_t pid; + extern char **environ; + size_t size = BUFSIZ; + + memset(fullpath, 0, BUFSIZ); + n = _NSGetExecutablePath(fullpath, &size); + + putenv("GAWK_PMA_REINCARNATION=true"); + + posix_spawnattr_init(&p_attr); + posix_spawnattr_setflags(&p_attr, 0x100); + status = posix_spawnp(&pid, fullpath, NULL, &p_attr, argv, environ); + if (status == 0) { + if (waitpid(pid, &status, WUNTRACED) != -1) { + if (WIFEXITED(status)) + exit WEXITSTATUS(status); // use original exit code + } else { + fprintf(stderr, _("waitpid: got exit status %#o\n"), status); + exit(EXIT_FATAL); + } + } else { + fprintf(stderr, _("fatal: posix_spawn: %s\n"), strerror(errno)); + exit(EXIT_FATAL); + } + } else + (void) unsetenv("GAWK_PMA_REINCARNATION"); + } +#endif +} + // For MSYS, restore behavior of working in text mode. #ifdef __MSYS__ void