[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[shepherd] 03/12: service: 'start' blocks when a service is already bein
From: |
Ludovic Courtès |
Subject: |
[shepherd] 03/12: service: 'start' blocks when a service is already being started. |
Date: |
Sun, 19 Feb 2023 16:58:36 -0500 (EST) |
civodul pushed a commit to branch wip-service-monitor
in repository shepherd.
commit 0f75fb49bb84a1154355152421f62f2a22b8deab
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Sat Jun 25 23:24:05 2022 +0200
service: 'start' blocks when a service is already being started.
This change allows the monitor to keep track of services that are being
started, in addition to running/stopped. The 'start' method now blocks
when called on a service already being started, until the service has
actually been started. This was the case before 0.9.0; since 0.9.0
though, invoking 'start' on a service already being started would
lead to a second invocation of its 'start' method.
* modules/shepherd/service.scm (start): Use the new 'start' protocol
with the monitor and adjust accordingly.
(service-monitor)[*service-started*, started-message?]: New variables.
Add 'starting' variable to 'loop'. Remove ('set-running ...) clause;
add ('start ...) clause.
* tests/starting-status.sh: New file.
* Makefile.am (TESTS): Add it.
---
Makefile.am | 1 +
modules/shepherd/service.scm | 138 ++++++++++++++++++++++++++++++++-----------
tests/starting-status.sh | 110 ++++++++++++++++++++++++++++++++++
3 files changed, 213 insertions(+), 36 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index fe071fb..b50f535 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -230,6 +230,7 @@ SUFFIXES = .go
TESTS = \
tests/basic.sh \
+ tests/starting-status.sh \
tests/replacement.sh \
tests/respawn.sh \
tests/respawn-throttling.sh \
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index 9214de0..63e00e9 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -28,6 +28,7 @@
#:hide (sleep))
#:use-module (fibers channels)
#:use-module (fibers operations)
+ #:use-module (fibers conditions)
#:use-module (fibers scheduler)
#:use-module (fibers timers)
#:use-module (oop goops)
@@ -35,7 +36,7 @@
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-26)
#:use-module (srfi srfi-34)
- #:use-module (srfi srfi-35)
+ #:use-module ((srfi srfi-35) #:hide (make-condition))
#:use-module (rnrs io ports)
#:use-module ((ice-9 control) #:select (call/ec))
#:use-module (ice-9 match)
@@ -397,30 +398,40 @@ wire."
(let ((problem
;; Resolve all dependencies.
(find (negate start) (required-by obj))))
- (if problem
- (local-output (l10n "Service ~a depends on ~a.")
- (canonical-name obj)
- problem)
- ;; Start the service itself.
- (let ((running (catch #t
- (lambda ()
- (apply (slot-ref obj 'start) args))
- (lambda (key . args)
- (report-exception 'start obj key args)))))
- (put-message (current-monitor-channel)
- `(set-running ,obj ,running))))
+ (define running
+ (if problem
+ (local-output (l10n "Service ~a depends on ~a.")
+ (canonical-name obj)
+ problem)
+ ;; Start the service itself.
+ (let ((reply (make-channel)))
+ (put-message (current-monitor-channel) `(start ,obj ,reply))
+ (match (get-message reply)
+ (#f
+ ;; We lost the race: OBJ is already running.
+ (service-running-value obj))
+ ((? channel? notification)
+ ;; We won the race: we're responsible for starting OBJ
+ ;; and sending its running value on NOTIFICATION.
+ (let ((running (catch #t
+ (lambda ()
+ (apply (slot-ref obj 'start) args))
+ (lambda (key . args)
+ (report-exception 'start obj key
args)))))
+ (put-message notification running)
+ running))))))
;; Status message.
- (let ((running (service-running-value obj)))
- (when (one-shot? obj)
- (put-message (current-monitor-channel)
- `(notify-termination ,obj)))
- (local-output (if running
- (l10n "Service ~a has been started.")
- (l10n "Service ~a could not be started."))
- (canonical-name obj))
+ (when (one-shot? obj)
+ (put-message (current-monitor-channel)
+ `(notify-termination ,obj)))
+
+ (local-output (if running
+ (l10n "Service ~a has been started.")
+ (l10n "Service ~a could not be started."))
+ (canonical-name obj))
- running)))))
+ running))))
(define (replace-service old-service new-service)
"Replace OLD-SERVICE with NEW-SERVICE in the services registry. This
@@ -666,8 +677,12 @@ clients."
(define (service-monitor channel)
"Encapsulate shepherd state (registered and running services) and serve
requests arriving on @var{channel}."
+ (define *service-started* (list 'service 'started!))
+ (define (started-message? obj) (eq? *service-started* obj))
+
(let loop ((registered vlist-null)
- (running vlist-null))
+ (running vlist-null)
+ (starting vlist-null))
(define (unregister services)
;; Return REGISTERED minus SERVICE.
(vhash-fold (lambda (name service result)
@@ -690,35 +705,36 @@ requests arriving on @var{channel}."
(let ((name (canonical-name service)))
(match (vhash-assq name registered)
(#f
- (loop (register service) running))
+ (loop (register service) running starting))
((_ . old)
(if (vhash-assq old running)
(begin
(slot-set! old 'replacement service)
- (loop registered running))
+ (loop registered running starting))
(loop (register service (unregister (list old)))
- running))))))
+ running starting))))))
(('unregister services) ;no reply
(match (filter (cut vhash-assq <> running) services)
(()
- (loop (unregister services) running))
+ (loop (unregister services) running starting))
(lst ;
(local-output
(l10n "Cannot unregister service ~a, which is still running"
"Cannot unregister services~{ ~a,~} which are still running"
(length lst))
(map canonical-name lst))
- (loop registered running))))
+ (loop registered running starting))))
(('unregister-all) ;no reply
(let ((root (cdr (vhash-assq 'root registered))))
(loop (fold (cut vhash-consq <> root <>)
vlist-null
(provided-by root))
- (vhash-consq root #t running))))
+ (vhash-consq root #t running)
+ starting)))
(('lookup name reply)
(put-message reply
(vhash-foldq* cons '() name registered))
- (loop registered running))
+ (loop registered running starting))
(('service-list reply)
(let ((names (delete-duplicates
(vhash-fold (lambda (key _ result)
@@ -734,19 +750,69 @@ requests arriving on @var{channel}."
result))
'()
names))
- (loop registered running)))
+ (loop registered running starting)))
(('running service reply)
(put-message reply
(match (vhash-assq service running)
(#f #f)
((_ . value) value)))
- (loop registered running))
- (('set-running service value) ;no reply
- (loop registered
- (vhash-consq service value running)))
+ (loop registered running starting))
+ (('start service reply)
+ ;; Attempt to start SERVICE, blocking if it is already being started.
+ ;; Send #f on REPLY if SERVICE was already running or being started;
+ ;; otherwise send a channel on which to send SERVICE's value one it has
+ ;; been started.
+ (cond ((vhash-assq service running)
+ =>
+ ;; SERVICE is already running: send #f on REPLY.
+ (lambda (pair)
+ (match pair
+ ((_ . value)
+ (put-message reply #f)
+ (loop registered running starting)))))
+ ((vhash-assq service starting)
+ =>
+ ;; SERVICE is being started: wait until it has started and then
+ ;; send #f on REPLY.
+ (lambda (pair)
+ (match pair
+ ((_ . condition)
+ (spawn-fiber
+ (lambda ()
+ (wait condition)
+ (put-message reply #f)))
+ (loop registered running starting)))))
+ (else
+ ;; Become the one who starts SERVICE.
+ (let ((condition (make-condition))
+ (notification (make-channel)))
+ (spawn-fiber
+ (lambda ()
+ (let ((running (get-message notification)))
+ (local-output (l10n "Service ~a started.")
+ (canonical-name service))
+ (put-message channel
+ (list *service-started* service running)))))
+ (local-output (l10n "Starting service ~a...")
+ (canonical-name service))
+ (put-message reply notification)
+ (loop registered running
+ (vhash-consq service condition starting))))))
+ (((? started-message?) service value) ;no reply
+ (local-output (l10n "Service ~a running with value ~s.")
+ (canonical-name service) value)
+ (match (vhash-assq service starting)
+ ((_ . condition)
+ (signal-condition! condition)
+ (loop registered
+ (if (or (one-shot? service) (not value))
+ running
+ (vhash-consq service value running))
+ (vhash-delq service starting)))))
(('notify-termination service) ;no reply
(loop registered
- (vhash-delq service running)))))) ;XXX: complexity
+ (vhash-delq service running) ;XXX: complexity
+ starting)))))
(define (spawn-service-monitor)
"Spawn a new service monitor fiber and return a channel to send it requests."
diff --git a/tests/starting-status.sh b/tests/starting-status.sh
new file mode 100644
index 0000000..e0a2c45
--- /dev/null
+++ b/tests/starting-status.sh
@@ -0,0 +1,110 @@
+# GNU Shepherd --- Test the "starting" status.
+# Copyright © 2022 Ludovic Courtès <ludo@gnu.org>
+#
+# This file is part of the GNU Shepherd.
+#
+# The GNU Shepherd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU Shepherd is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the GNU Shepherd. If not, see <http://www.gnu.org/licenses/>.
+
+shepherd --version
+herd --version
+
+socket="t-socket-$$"
+conf="t-conf-$$"
+confdir="t-confdir-$$"
+datadir="t-datadir-$$"
+log="t-log-$$"
+stamp="t-stamp-$$"
+pid="t-pid-$$"
+
+herd="herd -s $socket"
+
+trap "cat $log || true; rm -f $socket $conf $stamp $log;
+ test -f $pid && kill \`cat $pid\` || true; rm -f $pid" EXIT
+
+cat > "$conf" <<EOF
+(register-services
+ (make <service>
+ #:provides '(test)
+ #:start (lambda _
+ (let loop ((n 30))
+ (if (or (file-exists? "$stamp") (zero? n))
+ (> n 0)
+ (begin
+ ((@ (fibers) sleep) 1)
+ (loop (- n 1))))))
+ #:stop (lambda _
+ (delete-file "$stamp"))
+ #:respawn? #f))
+EOF
+
+rm -f "$pid" "$stamp"
+shepherd -I -s "$socket" -c "$conf" -l "$log" --pid="$pid" &
+
+# Wait till it's ready.
+while ! test -f "$pid" ; do sleep 0.3 ; done
+
+shepherd_pid="`cat $pid`"
+
+$herd start test &
+herd_pid=$!
+
+# Currently, 'test' is considered as "stopped" while starting.
+$herd status
+$herd status test
+$herd status test | grep stopped
+
+$herd start test &
+herd_pid2=$!
+sleep 1
+kill -0 "$herd_pid"
+kill -0 "$herd_pid2"
+
+# Trigger actual service start.
+touch "$stamp"
+
+# Make sure the service is marked as "started" soon shortly after.
+n=0
+while : ; do
+ if $herd status test | grep started
+ then break
+ else n=$(expr $n + 1)
+ fi
+
+ test $n -le 10
+ sleep 1
+done
+
+# Make sure the 'herd' processes terminated.
+n=0
+while : ; do
+ if kill -0 "$herd_pid" || kill -0 "$herd_pid2"
+ then
+ n=$(expr $n + 1)
+ test $n -le 10
+ sleep 1
+ else
+ break
+ fi
+done
+
+$herd stop test
+! test -f "$stamp"
+
+$herd stop root
+! kill -0 $shepherd_pid
+
+test $(grep "Starting service test" "$log" | wc -l) = 1
+
+rm -rf "$confdir"
+rm -rf "$datadir"
- [shepherd] branch wip-service-monitor created (now da158c5), Ludovic Courtès, 2023/02/19
- [shepherd] 01/12: service: Remove 'stop-delay?' and 'waiting-for-termination?'., Ludovic Courtès, 2023/02/19
- [shepherd] 02/12: service: Handle service state in a monitoring agent., Ludovic Courtès, 2023/02/19
- [shepherd] 06/12: service: 'stop' blocks when a service is already being stopped., Ludovic Courtès, 2023/02/19
- [shepherd] 09/12: service: Rename "service monitor" to "service registry"., Ludovic Courtès, 2023/02/19
- [shepherd] 10/12: service: Clarify expected statuses., Ludovic Courtès, 2023/02/19
- [shepherd] 05/12: service: 'start-in-the-background' starts services in parallel., Ludovic Courtès, 2023/02/19
- [shepherd] 03/12: service: 'start' blocks when a service is already being started.,
Ludovic Courtès <=
- [shepherd] 04/12: service: Start dependent services in parallel., Ludovic Courtès, 2023/02/19
- [shepherd] 07/12: service: 'service-running-value' uses a fresh channel for each reply., Ludovic Courtès, 2023/02/19
- [shepherd] 11/12: service: Communicate the service status symbol to clients., Ludovic Courtès, 2023/02/19
- [shepherd] 12/12: herd: Report 'starting' and 'stopping' service statuses., Ludovic Courtès, 2023/02/19
- [shepherd] 08/12: service: Associate a control fiber with each service., Ludovic Courtès, 2023/02/19