qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v8 17/20] multi-process: heartbeat messages to remote


From: Jagannathan Raman
Subject: [PATCH v8 17/20] multi-process: heartbeat messages to remote
Date: Fri, 31 Jul 2020 14:20:24 -0400

From: Elena Ufimtseva <elena.ufimtseva@oracle.com>

In order to detect remote processes which are hung, the
proxy periodically sends heartbeat messages to confirm if
the remote process is alive. The remote process responds
to this heartbeat message to confirm it is alive.

Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
 hw/i386/remote-msg.c     | 19 ++++++++++++++++++
 hw/pci/proxy.c           | 52 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/pci/proxy.h   |  2 ++
 include/io/mpqemu-link.h |  1 +
 io/mpqemu-link.c         |  8 ++++++++
 5 files changed, 82 insertions(+)

diff --git a/hw/i386/remote-msg.c b/hw/i386/remote-msg.c
index 756b710..2a4d7f1 100644
--- a/hw/i386/remote-msg.c
+++ b/hw/i386/remote-msg.c
@@ -26,6 +26,7 @@ static void process_config_read(QIOChannel *ioc, PCIDevice 
*dev,
                                 MPQemuMsg *msg);
 static void process_bar_write(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
 static void process_bar_read(QIOChannel *ioc, MPQemuMsg *msg, Error **errp);
+static void process_proxy_ping_msg(QIOChannel *ioc, Error **errp);
 
 gboolean mpqemu_process_msg(QIOChannel *ioc, GIOCondition cond,
                             gpointer opaque)
@@ -75,6 +76,9 @@ gboolean mpqemu_process_msg(QIOChannel *ioc, GIOCondition 
cond,
     case SET_IRQFD:
         process_set_irqfd_msg(pci_dev, &msg);
         break;
+    case PROXY_PING:
+        process_proxy_ping_msg(ioc, &local_err);
+        break;
     default:
         error_setg(&local_err,
                    "Unknown command (%d) received for device %s (pid=%d)",
@@ -223,3 +227,18 @@ fail:
                    "in remote process pid=%d", getpid());
     }
 }
+
+static void process_proxy_ping_msg(QIOChannel *ioc, Error **errp)
+{
+    MPQemuMsg ret = { 0 };
+    Error *local_err = NULL;
+
+    ret.cmd = RET_MSG;
+    ret.size = sizeof(ret.data1);
+
+    mpqemu_msg_send(&ret, ioc, &local_err);
+    if (local_err) {
+        error_setg(errp, "Error while sending message to proxy "
+                   "in remote process pid=%d", getpid());
+    }
+}
diff --git a/hw/pci/proxy.c b/hw/pci/proxy.c
index 50a806c..490093c 100644
--- a/hw/pci/proxy.c
+++ b/hw/pci/proxy.c
@@ -24,6 +24,8 @@
 #include "util/event_notifier-posix.c"
 
 static void probe_pci_info(PCIDevice *dev, Error **errp);
+static void start_hb_timer(PCIProxyDev *dev);
+static void stop_hb_timer(PCIProxyDev *dev);
 
 static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
 {
@@ -111,6 +113,8 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error 
**errp)
     setup_irqfd(dev);
 
     probe_pci_info(PCI_DEVICE(dev), errp);
+
+    start_hb_timer(dev);
 }
 
 static void pci_proxy_dev_exit(PCIDevice *pdev)
@@ -123,6 +127,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev)
 
     event_notifier_cleanup(&dev->intr);
     event_notifier_cleanup(&dev->resample);
+
+    stop_hb_timer(dev);
 }
 
 static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
@@ -343,3 +349,49 @@ static void probe_pci_info(PCIDevice *dev, Error **errp)
         }
     }
 }
+
+static void hb_msg(PCIProxyDev *dev)
+{
+    DeviceState *ds = DEVICE(dev);
+    Error *local_err = NULL;
+    MPQemuMsg msg = { 0 };
+
+    msg.cmd = PROXY_PING;
+    msg.bytestream = 0;
+    msg.size = 0;
+
+    (void)mpqemu_msg_send_and_await_reply(&msg, dev->ioc, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        qio_channel_close(dev->ioc, &local_err);
+        error_setg(&error_fatal, "Lost contact with device %s", ds->id);
+    }
+}
+
+#define NOP_INTERVAL 1000
+
+static void remote_ping(void *opaque)
+{
+    PCIProxyDev *dev = opaque;
+
+    hb_msg(dev);
+
+    timer_mod(dev->hb_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+}
+
+static void start_hb_timer(PCIProxyDev *dev)
+{
+    dev->hb_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                 remote_ping,
+                                 dev);
+
+    timer_mod(dev->hb_timer,
+              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + NOP_INTERVAL);
+}
+
+static void stop_hb_timer(PCIProxyDev *dev)
+{
+    timer_del(dev->hb_timer);
+    timer_free(dev->hb_timer);
+}
diff --git a/include/hw/pci/proxy.h b/include/hw/pci/proxy.h
index 15cc381..d784328 100644
--- a/include/hw/pci/proxy.h
+++ b/include/hw/pci/proxy.h
@@ -40,6 +40,8 @@ struct PCIProxyDev {
     EventNotifier intr;
     EventNotifier resample;
 
+    QEMUTimer *hb_timer;
+
     ProxyMemoryRegion region[PCI_NUM_REGIONS];
 };
 
diff --git a/include/io/mpqemu-link.h b/include/io/mpqemu-link.h
index a3844e1..5f2913f 100644
--- a/include/io/mpqemu-link.h
+++ b/include/io/mpqemu-link.h
@@ -40,6 +40,7 @@ typedef enum {
     BAR_READ,
     SET_IRQFD,
     GET_PCI_INFO,
+    PROXY_PING,
     MAX = INT_MAX,
 } MPQemuCmd;
 
diff --git a/io/mpqemu-link.c b/io/mpqemu-link.c
index 6fa4665..0222e81 100644
--- a/io/mpqemu-link.c
+++ b/io/mpqemu-link.c
@@ -280,6 +280,14 @@ bool mpqemu_msg_valid(MPQemuMsg *msg)
             return false;
         }
         break;
+    case PROXY_PING:
+        if (msg->bytestream || msg->num_fds) {
+            return false;
+        }
+        if (msg->size) {
+            return false;
+        }
+        break;
     default:
         break;
     }
-- 
1.8.3.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]