=== modified file 'src/ChangeLog' --- src/ChangeLog 2012-11-25 07:50:55 +0000 +++ src/ChangeLog 2012-11-26 22:34:58 +0000 @@ -1,3 +1,55 @@ +2012-11-26 Paul Eggert + + Don't let call-process be a zombie factory (Bug#12980). + Fixing this bug required some cleanup of the signal-handling code. + As a side effect, this change also fixes a longstanding rare race + condition whereby Emacs could mistakenly kill unrelated processes, + and it fixes a bug where a second C-g does not kill a recalcitrant + synchronous process in GNU/Linux and similar platforms. + The patch should also fix the last vestiges of Bug#9488, + a bug which has mostly been fixed on the trunk by other changes. + * callproc.c, process.h (synch_process_alive, synch_process_death) + (synch_process_termsig, sync_process_retcode): + Remove. All uses removed, to simplify analysis and so that + less consing is done inside critical sections. + * callproc.c (reapable_child, block_child_signal, unblock_child_signal): + New functions, to avoid races that could kill innocent-victim processes. + (call_process_kill, call_process_cleanup, Fcall_process): Use them. + (call_process_kill): Record killed processes as deleted, so that + zombies do not clutter up the system. Do this inside a critical + section, to avoid a race that would allow the clutter. Do not + assume that sending SIGKILL to a process makes it reapable. + (call_process_cleanup): Fix code so that the second C-g works again + on common platforms such as GNU/Linux. + (Fcall_process): Create the child process in a critical section, + to fix a race condition. If creating an asynchronous process, + record it as deleted so that zombies do not clutter up the system. + Do not store process state in static variables, as that's less safe. + * callproc.c (call_process_cleanup): + * w32proc.c (waitpid): Simplify now that synch_process_alive is gone. + * process.c (record_deleted_pid): New function, containing + code refactored out of Fdelete_process. + (Fdelete_process): Use it. + (process_status_retrieved): Change args to look more like those of + waitpid; this is clearer. Return true if the process has changed, + false otherwise. All uses changed. Define only if SIGCHLD. + (record_child_status_change): Remove, folding its contents into ... + (handle_child_signal): ... this signal handler. Now, this + function is purely a handler for SIGCHLD, and is not called after + a synchronous waitpid returns; the synchronous code is moved to + wait_for_termination. There is no need to worry about reaping + more than one child now. + * sysdep.c (wait_for_termination): Now takes int * status and bool + interruptible arguments, too. Do not record child status change; + that's now the caller's responsibility. All callers changed. + (wait_for_termination_1, interruptible_wait_for_termination): + Remove. All callers changed to use wait_for_termination. + * syswait.h: Include , for bool. + (record_child_status_change, interruptible_wait_for_termination): + Remove decls. + (record_deleted_pid): New decl. + (wait_for_termination): Adjust to API changes noted above. + 2012-11-25 Paul Eggert * sysdep.c (sys_subshell): Don't assume pid_t fits in int. === modified file 'src/callproc.c' --- src/callproc.c 2012-11-17 22:12:47 +0000 +++ src/callproc.c 2012-11-26 22:15:42 +0000 @@ -67,21 +67,57 @@ /* Pattern used by call-process-region to make temp files. */ static Lisp_Object Vtemp_file_name_pattern; -/* True if we are about to fork off a synchronous process or if we - are waiting for it. */ -bool synch_process_alive; - -/* Nonzero => this is a string explaining death of synchronous subprocess. */ -const char *synch_process_death; - -/* Nonzero => this is the signal number that terminated the subprocess. */ -int synch_process_termsig; - -/* If synch_process_death is zero, - this is exit code of synchronous subprocess. */ -int synch_process_retcode; - +/* Return true if CHILD is reapable. CHILD must be the process ID of + a child process. As a side effect this function may reap CHILD, + discarding its status, and therefore report that CHILD is not + reapable. + + This function is intended for use after the caller was interrupted + while trying to reap CHILD, and where the caller later tests + whether CHILD was in fact reaped. The caller should not create + another child process (via vfork, say) between the earlier attempt + to reap CHILD and now, as that would cause a race if the newly + created child process happened to have CHILD as its process-ID. */ + +static bool +reapable_child (pid_t child) +{ + /* Do not use kill (CHILD, ...) to test whether CHILD is reapable, + as that might kill an innocent victim if CHILD has already been reaped + and CHILD's PID has been reused. */ + int status; + pid_t pid; + do + pid = waitpid (child, &status, WNOHANG); + while (pid < 0 && errno == EINTR); + + return !pid; +} + +/* Block SIGCHLD. */ + +static void +block_child_signal (void) +{ +#ifdef SIGCHLD + sigset_t blocked; + sigemptyset (&blocked); + sigaddset (&blocked, SIGCHLD); + pthread_sigmask (SIG_BLOCK, &blocked, 0); +#endif +} + +/* Unblock SIGCHLD. */ + +static void +unblock_child_signal (void) +{ +#ifdef SIGCHLD + pthread_sigmask (SIG_SETMASK, &empty_mask, 0); +#endif +} + /* Clean up when exiting Fcall_process. On MSDOS, delete the temporary file on any kind of termination. On Unix, kill the process and any children on termination by signal. */ @@ -97,8 +133,18 @@ CONS_TO_INTEGER (Fcar (fdpid), int, fd); CONS_TO_INTEGER (Fcdr (fdpid), pid_t, pid); emacs_close (fd); - EMACS_KILLPG (pid, SIGKILL); - synch_process_alive = 0; + + if (reapable_child (pid) && EMACS_KILLPG (pid, SIGKILL) == 0) + { + /* Record PID as a deleted process. Do this in a critical + section, as record_deleted_pid is not async-signal-safe. + PID will be reaped on receipt of the first SIGCHLD after the + critical section. */ + block_child_signal (); + record_deleted_pid (pid); + unblock_child_signal (); + } + return Qnil; } @@ -107,46 +153,43 @@ { Lisp_Object fdpid = Fcdr (arg); int fd; -#if defined (MSDOS) - Lisp_Object file; -#else - pid_t pid; -#endif Fset_buffer (Fcar (arg)); CONS_TO_INTEGER (Fcar (fdpid), int, fd); #if defined (MSDOS) - /* for MSDOS fdpid is really (fd . tempfile) */ - file = Fcdr (fdpid); - /* FD is -1 and FILE is "" when we didn't actually create a - temporary file in call-process. */ - if (fd >= 0) - emacs_close (fd); - if (!(strcmp (SDATA (file), NULL_DEVICE) == 0 || SREF (file, 0) == '\0')) - unlink (SDATA (file)); + { + /* for MSDOS fdpid is really (fd . tempfile) */ + Lisp_Object file = Fcdr (fdpid); + /* FD is -1 and FILE is "" when we didn't actually create a + temporary file in call-process. */ + if (fd >= 0) + emacs_close (fd); + if (!(strcmp (SDATA (file), NULL_DEVICE) == 0 || SREF (file, 0) == '\0')) + unlink (SDATA (file)); + } #else /* not MSDOS */ - CONS_TO_INTEGER (Fcdr (fdpid), pid_t, pid); - - if (call_process_exited) - { - emacs_close (fd); - return Qnil; - } - - if (EMACS_KILLPG (pid, SIGINT) == 0) - { - ptrdiff_t count = SPECPDL_INDEX (); - record_unwind_protect (call_process_kill, fdpid); - message1 ("Waiting for process to die...(type C-g again to kill it instantly)"); - immediate_quit = 1; - QUIT; - wait_for_termination (pid); - immediate_quit = 0; - specpdl_ptr = specpdl + count; /* Discard the unwind protect. */ - message1 ("Waiting for process to die...done"); - } - synch_process_alive = 0; + + /* If the process still exists, kill its process group. */ + if (!call_process_exited) + { + pid_t pid; + CONS_TO_INTEGER (Fcdr (fdpid), pid_t, pid); + + if (reapable_child (pid) && EMACS_KILLPG (pid, SIGINT) == 0) + { + ptrdiff_t count = SPECPDL_INDEX (); + record_unwind_protect (call_process_kill, fdpid); + message1 ("Waiting for process to die..." + "(type C-g again to kill it instantly)"); + immediate_quit = 1; + QUIT; + wait_for_termination (pid, 0, 1); + immediate_quit = 0; + specpdl_ptr = specpdl + count; /* Discard the unwind protect. */ + message1 ("Waiting for process to die...done"); + } + } emacs_close (fd); #endif /* not MSDOS */ return Qnil; @@ -189,6 +232,7 @@ #define CALLPROC_BUFFER_SIZE_MAX (4 * CALLPROC_BUFFER_SIZE_MIN) char buf[CALLPROC_BUFFER_SIZE_MAX]; int bufsize = CALLPROC_BUFFER_SIZE_MIN; + int status; ptrdiff_t count = SPECPDL_INDEX (); USE_SAFE_ALLOCA; @@ -494,16 +538,6 @@ if (fd_output >= 0) fd1 = fd_output; - /* Record that we're about to create a synchronous process. */ - synch_process_alive = 1; - - /* These vars record information from process termination. - Clear them now before process can possibly terminate, - to avoid timing error if process terminates soon. */ - synch_process_death = 0; - synch_process_retcode = 0; - synch_process_termsig = 0; - if (NILP (error_file)) fd_error = emacs_open (NULL_DEVICE, O_WRONLY, 0); else if (STRINGP (error_file)) @@ -536,20 +570,10 @@ #ifdef MSDOS /* MW, July 1993 */ /* Note that on MSDOS `child_setup' actually returns the child process - exit status, not its PID, so we assign it to `synch_process_retcode' - below. */ + exit status, not its PID, so assign it to status below. */ pid = child_setup (filefd, outfilefd, fd_error, (char **) new_argv, 0, current_dir); - - /* Record that the synchronous process exited and note its - termination status. */ - synch_process_alive = 0; - synch_process_retcode = pid; - if (synch_process_retcode < 0) /* means it couldn't be exec'ed */ - { - synchronize_system_messages_locale (); - synch_process_death = strerror (errno); - } + status = pid; emacs_close (outfilefd); if (fd_error != outfilefd) @@ -577,6 +601,7 @@ #else /* not WINDOWSNT */ block_input (); + block_child_signal (); /* vfork, and prevent local vars from being clobbered by the vfork. */ { @@ -609,6 +634,8 @@ if (pid == 0) { + unblock_child_signal (); + if (fd[0] >= 0) emacs_close (fd[0]); @@ -621,6 +648,26 @@ 0, current_dir); } + if (0 < pid) + { + if (INTEGERP (buffer)) + record_deleted_pid (pid); + else + { +#ifdef MSDOS + /* MSDOS needs different cleanup information. */ + cleanup_info_tail = build_string (tempfile ? tempfile : ""); +#else + cleanup_info_tail = INTEGER_TO_CONS (pid); +#endif + record_unwind_protect (call_process_cleanup, + Fcons (Fcurrent_buffer (), + Fcons (INTEGER_TO_CONS (fd[0]), + cleanup_info_tail))); + } + } + + unblock_child_signal (); unblock_input (); #endif /* not WINDOWSNT */ @@ -658,17 +705,6 @@ /* Enable sending signal if user quits below. */ call_process_exited = 0; -#if defined (MSDOS) - /* MSDOS needs different cleanup information. */ - cleanup_info_tail = build_string (tempfile ? tempfile : ""); -#else - cleanup_info_tail = INTEGER_TO_CONS (pid); -#endif /* not MSDOS */ - record_unwind_protect (call_process_cleanup, - Fcons (Fcurrent_buffer (), - Fcons (INTEGER_TO_CONS (fd[0]), - cleanup_info_tail))); - if (BUFFERP (buffer)) Fset_buffer (buffer); @@ -850,10 +886,7 @@ #ifndef MSDOS /* Wait for it to terminate, unless it already has. */ - if (output_to_buffer) - wait_for_termination (pid); - else - interruptible_wait_for_termination (pid); + wait_for_termination (pid, &status, !output_to_buffer); #endif immediate_quit = 0; @@ -865,23 +898,22 @@ SAFE_FREE (); unbind_to (count, Qnil); - if (synch_process_termsig) + if (WIFSIGNALED (status)) { const char *signame; synchronize_system_messages_locale (); - signame = strsignal (synch_process_termsig); + signame = strsignal (WTERMSIG (status)); if (signame == 0) signame = "unknown"; - synch_process_death = signame; + return code_convert_string_norecord (build_string (signame), + Vlocale_coding_system, 0); } - if (synch_process_death) - return code_convert_string_norecord (build_string (synch_process_death), - Vlocale_coding_system, 0); - return make_number (synch_process_retcode); + eassert (WIFEXITED (status)); + return make_number (WEXITSTATUS (status)); } static Lisp_Object === modified file 'src/process.c' --- src/process.c 2012-11-24 01:57:09 +0000 +++ src/process.c 2012-11-26 22:06:24 +0000 @@ -777,10 +777,23 @@ /* Fdelete_process promises to immediately forget about the process, but in reality, Emacs needs to remember those processes until they have been treated by the SIGCHLD handler and waitpid has been invoked on them; - otherwise they might fill up the kernel's process table. */ + otherwise they might fill up the kernel's process table. + + Some processes created by call-process are also put onto this list. */ static Lisp_Object deleted_pid_list; #endif +void +record_deleted_pid (pid_t pid) +{ +#ifdef SIGCHLD + deleted_pid_list = Fcons (make_fixnum_or_float (pid), + /* GC treated elements set to nil. */ + Fdelq (Qnil, deleted_pid_list)); + +#endif +} + DEFUN ("delete-process", Fdelete_process, Sdelete_process, 1, 1, 0, doc: /* Delete PROCESS: kill it and forget about it immediately. PROCESS may be a process, a buffer, the name of a process or buffer, or @@ -807,9 +820,7 @@ pid_t pid = p->pid; /* No problem storing the pid here, as it is still in Vprocess_alist. */ - deleted_pid_list = Fcons (make_fixnum_or_float (pid), - /* GC treated elements set to nil. */ - Fdelq (Qnil, deleted_pid_list)); + record_deleted_pid (pid); /* If the process has already signaled, remove it from the list. */ if (p->raw_status_new) update_status (p); @@ -6169,35 +6180,37 @@ return process; } +#ifdef SIGCHLD + /* If the status of the process DESIRED has changed, return true and - set *STATUS to its exit status; otherwise, return false. - If HAVE is nonnegative, assume that HAVE = waitpid (HAVE, STATUS, ...) - has already been invoked, and do not invoke waitpid again. */ + set *STATUS to its exit status. Otherwise, return false. Use + OPTIONS as extra options when invoking waitpid. DESIRED must be a + child process that has not been reaped. */ static bool -process_status_retrieved (pid_t desired, pid_t have, int *status) +process_status_retrieved (pid_t desired, int *status, int options) { - if (have < 0) + /* Invoke waitpid only with a known process ID; do not invoke + waitpid with a nonpositive argument. Otherwise, Emacs might + reap an unwanted process by mistake. For example, invoking + waitpid (-1, ...) can mess up glib by reaping glib's subprocesses, + so that another thread running glib won't find them. */ + eassert (0 < desired); + + while (1) { - /* Invoke waitpid only with a known process ID; do not invoke - waitpid with a nonpositive argument. Otherwise, Emacs might - reap an unwanted process by mistake. For example, invoking - waitpid (-1, ...) can mess up glib by reaping glib's subprocesses, - so that another thread running glib won't find them. */ - do - have = waitpid (desired, status, WNOHANG | WUNTRACED); - while (have < 0 && errno == EINTR); + pid_t pid = waitpid (desired, status, WNOHANG | options); + if (0 <= pid) + return 0 < pid; + if (errno == ECHILD) + return 0; + eassert (errno == EINTR); } - - return have == desired; } -/* If PID is nonnegative, the child process PID with wait status W has - changed its status; record this and return true. - - If PID is negative, ignore W, and look for known child processes - of Emacs whose status have changed. For each one found, record its new - status. +/* Handle a SIGCHLD signal by looking for known child processes of + Emacs whose status have changed. For each one found, record its + new status. All we do is change the status; we do not run sentinels or print notifications. That is saved for the next time keyboard input is @@ -6220,20 +6233,16 @@ ** Malloc WARNING: This should never call malloc either directly or indirectly; if it does, that is a bug */ -void -record_child_status_change (pid_t pid, int w) +static void +handle_child_signal (int sig) { -#ifdef SIGCHLD - - /* Record at most one child only if we already know one child that - has exited. */ - bool record_at_most_one_child = 0 <= pid; - Lisp_Object tail; + int status; /* Find the process that signaled us, and record its status. */ - /* The process can have been deleted by Fdelete_process. */ + /* The process can have been deleted by Fdelete_process, or have + been started asynchronously by Fcall_process. */ for (tail = deleted_pid_list; CONSP (tail); tail = XCDR (tail)) { bool all_pids_are_fixnums @@ -6247,12 +6256,8 @@ deleted_pid = XINT (xpid); else deleted_pid = XFLOAT_DATA (xpid); - if (process_status_retrieved (deleted_pid, pid, &w)) - { - XSETCAR (tail, Qnil); - if (record_at_most_one_child) - return; - } + if (process_status_retrieved (deleted_pid, &status, 0)) + XSETCAR (tail, Qnil); } } @@ -6261,15 +6266,16 @@ { Lisp_Object proc = XCDR (XCAR (tail)); struct Lisp_Process *p = XPROCESS (proc); - if (p->alive && process_status_retrieved (p->pid, pid, &w)) + + if (p->alive && process_status_retrieved (p->pid, &status, WUNTRACED)) { /* Change the status of the process that was found. */ p->tick = ++process_tick; - p->raw_status = w; + p->raw_status = status; p->raw_status_new = 1; /* If process has terminated, stop waiting for its output. */ - if (WIFSIGNALED (w) || WIFEXITED (w)) + if (WIFSIGNALED (status) || WIFEXITED (status)) { int clear_desc_flag = 0; p->alive = 0; @@ -6288,39 +6294,8 @@ look around. */ if (input_available_clear_time) *input_available_clear_time = make_emacs_time (0, 0); - - if (record_at_most_one_child) - return; } } - - if (0 <= pid) - { - /* The caller successfully waited for a pid but no asynchronous - process was found for it, so this is a synchronous process. */ - - synch_process_alive = 0; - - /* Report the status of the synchronous process. */ - if (WIFEXITED (w)) - synch_process_retcode = WEXITSTATUS (w); - else if (WIFSIGNALED (w)) - synch_process_termsig = WTERMSIG (w); - - /* Tell wait_reading_process_output that it needs to wake up and - look around. */ - if (input_available_clear_time) - *input_available_clear_time = make_emacs_time (0, 0); - } -#endif -} - -#ifdef SIGCHLD - -static void -handle_child_signal (int sig) -{ - record_child_status_change (-1, 0); } static void === modified file 'src/process.h' --- src/process.h 2012-11-24 01:57:09 +0000 +++ src/process.h 2012-11-26 22:06:24 +0000 @@ -185,23 +185,6 @@ } #endif -/* True if we are about to fork off a synchronous process or if we - are waiting for it. */ -extern bool synch_process_alive; - -/* Communicate exit status of sync process to from sigchld_handler - to Fcall_process. */ - -/* Nonzero => this is a string explaining death of synchronous subprocess. */ -extern const char *synch_process_death; - -/* Nonzero => this is the signal number that terminated the subprocess. */ -extern int synch_process_termsig; - -/* If synch_process_death is zero, - this is exit code of synchronous subprocess. */ -extern int synch_process_retcode; - /* Nonzero means don't run process sentinels. This is used when exiting. */ extern int inhibit_sentinels; === modified file 'src/sysdep.c' --- src/sysdep.c 2012-11-25 07:50:55 +0000 +++ src/sysdep.c 2012-11-26 22:06:24 +0000 @@ -266,21 +266,27 @@ #ifndef MSDOS -static void -wait_for_termination_1 (pid_t pid, int interruptible) +/* Wait for subprocess with process id PID to terminate. + If STATUS is non-null, store the waitpid-style exit status into *STATUS. + If INTERRUPTIBLE, this function is interruptible by a signal. + + This function can be called twice for the same subprocess, so long + as no other subprocess is created (via fork etc.) between the two + calls (as the other subprocess might be assigned the same pid). + Double calls can occur due to race conditions involving signals + arriving when waiting for a synchronous process to exit; when this + happens, the later (cleanup) call should use a null STATUS to + indicate that the status is not needed. */ +void +wait_for_termination (pid_t pid, int *status, bool interruptible) { - while (1) + int dummy; + + while (waitpid (pid, status ? status : &dummy, 0) < 0) { - int status; - int wait_result = waitpid (pid, &status, 0); - if (wait_result < 0) - { - if (errno != EINTR) - break; - } - else - { - record_child_status_change (wait_result, status); + if (errno != EINTR) + { + eassert (! status); break; } @@ -289,22 +295,11 @@ if (interruptible) QUIT; } -} - -/* Wait for subprocess with process id `pid' to terminate and - make sure it will get eliminated (not remain forever as a zombie) */ - -void -wait_for_termination (pid_t pid) -{ - wait_for_termination_1 (pid, 0); -} - -/* Like the above, but allow keyboard interruption. */ -void -interruptible_wait_for_termination (pid_t pid) -{ - wait_for_termination_1 (pid, 1); + + /* Tell wait_reading_process_output that it needs to wake up and + look around. */ + if (input_available_clear_time) + *input_available_clear_time = make_emacs_time (0, 0); } /* @@ -454,6 +449,7 @@ char oldwd[MAXPATHLEN+1]; /* Fixed length is safe on MSDOS. */ #endif pid_t pid; + int status; struct save_signal saved_handlers[5]; Lisp_Object dir; unsigned char *volatile str_volatile = 0; @@ -491,7 +487,6 @@ #ifdef DOS_NT pid = 0; save_signal_handlers (saved_handlers); - synch_process_alive = 1; #else pid = vfork (); if (pid == -1) @@ -560,14 +555,12 @@ /* Do this now if we did not do it before. */ #ifndef MSDOS save_signal_handlers (saved_handlers); - synch_process_alive = 1; #endif #ifndef DOS_NT - wait_for_termination (pid); + wait_for_termination (pid, &status, 0); #endif restore_signal_handlers (saved_handlers); - synch_process_alive = 0; } static void === modified file 'src/syswait.h' --- src/syswait.h 2012-09-23 22:25:22 +0000 +++ src/syswait.h 2012-11-26 22:06:24 +0000 @@ -23,6 +23,7 @@ #ifndef EMACS_SYSWAIT_H #define EMACS_SYSWAIT_H +#include #include #ifdef HAVE_SYS_WAIT_H /* We have sys/wait.h with POSIXoid definitions. */ @@ -52,10 +53,9 @@ #endif /* Defined in process.c. */ -extern void record_child_status_change (pid_t, int); +extern void record_deleted_pid (pid_t); /* Defined in sysdep.c. */ -extern void wait_for_termination (pid_t); -extern void interruptible_wait_for_termination (pid_t); +extern void wait_for_termination (pid_t, int *, bool); #endif /* EMACS_SYSWAIT_H */ === modified file 'src/w32proc.c' --- src/w32proc.c 2012-11-24 01:57:09 +0000 +++ src/w32proc.c 2012-11-26 22:06:24 +0000 @@ -1273,34 +1273,7 @@ DebPrint (("Wait signaled with process pid %d\n", cp->pid)); #endif - if (status) - { - *status = retval; - } - else if (synch_process_alive) - { - synch_process_alive = 0; - - /* Report the status of the synchronous process. */ - if (WIFEXITED (retval)) - synch_process_retcode = WEXITSTATUS (retval); - else if (WIFSIGNALED (retval)) - { - int code = WTERMSIG (retval); - const char *signame; - - synchronize_system_messages_locale (); - signame = strsignal (code); - - if (signame == 0) - signame = "unknown"; - - synch_process_death = signame; - } - - reap_subprocess (cp); - } - + *status = retval; reap_subprocess (cp); return pid;