qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] target/i386: log guest name and memory error type AO, AR for MCE


From: Mario Smarduch
Subject: [PATCH] target/i386: log guest name and memory error type AO, AR for MCEs
Date: Fri, 4 Oct 2019 16:53:38 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.2.1

In a large VPC environment we want to log memory error occurrences
and log them with guest name and type - there are few use cases


- if VM crashes on AR mce inform the user about the reason and
  resolve the case
- if VM hangs notify the user to reboot and resume processing
- if VM continues to run let the user know, he/she maybe able to
  correlate to vm internal outage
- Rawhammer attacks - isolate/determine the attacker possible
  migrating it off the hypervisor
- In general track memory errors on a hyperviosr over time to determine
  trends

Monitoring our fleet we come across quite a few of these and been
able to take action where before there were no clues to the causes.

When memory error occurs we get a log entry in qemu log:

Guest [Droplet-12345678] 2019-08-02T05:00:11.940270Z qemu-system-x86_64:
Guest MCE Memory Error at qemu addr 0x7f3c7622f000 and guest 78e42f000
addr of type BUS_MCEERR_AR injected

with enterprise logging environment we can to take further actions.

Signed-off-by: Mario Smarduch <address@hidden>
---
 target/i386/kvm.c | 27 ++++++++++++++++++++++-----
 util/qemu-error.c | 24 ++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 92069099ab..79ebccc684 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -555,9 +555,9 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr
paddr, int code)
                        (MCM_ADDR_PHYS << 6) | 0xc, flags);
 }

-static void hardware_memory_error(void)
+static void hardware_memory_error(void *addr)
 {
-    fprintf(stderr, "Hardware memory error!\n");
+    error_report("QEMU got Hardware memory error at addr %p", addr);
     exit(1);
 }

@@ -581,15 +581,32 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int
code, void *addr)
             kvm_physical_memory_addr_from_host(c->kvm_state, addr,
&paddr)) {
             kvm_hwpoison_page_add(ram_addr);
             kvm_mce_inject(cpu, paddr, code);
+            /*
+             * Use different logging severity based on error type.
+             * If mcelog is running qemu va addr will help debug via
mcelog.
+             */
+            if (code == BUS_MCEERR_AR) {
+                error_report("Guest MCE Memory Error at qemu addr %p and "
+                    "guest %lx addr of type %s injected", addr, paddr,
+                     "BUS_MCEERR_AR");
+            } else {
+                 warn_report("Guest MCE Memory Error at qemu addr %p and "
+                     "guest %lx addr of type %s injected", addr,
+                     paddr, "BUS_MCEERR_AO");
+            }
+
             return;
         }

-        fprintf(stderr, "Hardware memory error for memory used by "
-                "QEMU itself instead of guest system!\n");
+        if (code == BUS_MCEERR_AO) {
+            warn_report("Hardware memory error at addr %p of type %s "
+                "for memory used by QEMU itself instead of guest system!",
+                addr, "BUS_MCEERR_AO");
+        }
     }

     if (code == BUS_MCEERR_AR) {
-        hardware_memory_error();
+        hardware_memory_error(addr);
     }

     /* Hope we are lucky for AO MCE */
diff --git a/util/qemu-error.c b/util/qemu-error.c
index f373f3b3b0..2ebafd4405 100644
--- a/util/qemu-error.c
+++ b/util/qemu-error.c
@@ -11,6 +11,8 @@
  */

 #include "qemu/osdep.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
 #include "monitor/monitor.h"
 #include "qemu/error-report.h"

@@ -35,11 +37,31 @@ int error_printf(const char *fmt, ...)
     return ret;
 }

+static const char *error_get_guestname(void)
+{
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("name"), NULL);
+    return qemu_opt_get(opts, "guest");
+}
+
+/*
+ * Print guest name associated with error, to aid debugging errors from
+ * multiple guests in centralized logging environment.
+ */
+static void error_print_guestname(void)
+{
+    const char *name;
+    name = error_get_guestname();
+    if (name != NULL && !cur_mon) {
+        error_printf("Guest [%s] ", name);
+    }
+}
+
 int error_printf_unless_qmp(const char *fmt, ...)
 {
     va_list ap;
     int ret;

+    error_print_guestname();
     va_start(ap, fmt);
     ret = error_vprintf_unless_qmp(fmt, ap);
     va_end(ap);
@@ -274,6 +296,7 @@ void error_report(const char *fmt, ...)
 {
     va_list ap;

+    error_print_guestname();
     va_start(ap, fmt);
     vreport(REPORT_TYPE_ERROR, fmt, ap);
     va_end(ap);
@@ -289,6 +312,7 @@ void warn_report(const char *fmt, ...)
 {
     va_list ap;

+    error_print_guestname();
     va_start(ap, fmt);
     vreport(REPORT_TYPE_WARNING, fmt, ap);
     va_end(ap);
--
2.17.1



reply via email to

[Prev in Thread] Current Thread [Next in Thread]