qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] RFC: vmcoreinfo device


From: Marc-André Lureau
Subject: [Qemu-devel] [PATCH] RFC: vmcoreinfo device
Date: Mon, 24 Apr 2017 17:03:55 +0400

The VM coreinfo (vmcoreinfo) device is an emulated device which
exposes a 4k memory range to the guest to store various informations
useful to debug the guest OS. (it is greatly inspired by the VMGENID
device implementation)

This is an early-boot alternative to the qemu-ga VMDUMP_INFO event
proposed in "[PATCH 00/21] WIP: dump: add kaslr support".

If deemed more appropriate, we can consider writing to fw_cfg directly
instead of guest memory, now that qemu 2.9 supports it again.

The proof-of-concept kernel module:
https://github.com/elmarco/vmgenid-test/blob/master/qemuvmci-test.c

Signed-off-by: Marc-André Lureau <address@hidden>
---
 include/hw/acpi/aml-build.h        |   1 +
 include/hw/acpi/vmcoreinfo.h       |  36 +++++++
 include/hw/compat.h                |   4 +
 hw/acpi/aml-build.c                |   2 +
 hw/acpi/vmcoreinfo.c               | 207 +++++++++++++++++++++++++++++++++++++
 hw/i386/acpi-build.c               |  14 +++
 default-configs/i386-softmmu.mak   |   1 +
 default-configs/x86_64-softmmu.mak |   1 +
 docs/specs/vmcoreinfo.txt          | 138 +++++++++++++++++++++++++
 hw/acpi/Makefile.objs              |   1 +
 10 files changed, 405 insertions(+)
 create mode 100644 include/hw/acpi/vmcoreinfo.h
 create mode 100644 hw/acpi/vmcoreinfo.c
 create mode 100644 docs/specs/vmcoreinfo.txt

diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 00c21f160c..fd479115e1 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -211,6 +211,7 @@ struct AcpiBuildTables {
     GArray *rsdp;
     GArray *tcpalog;
     GArray *vmgenid;
+    GArray *vmcoreinfo;
     BIOSLinker *linker;
 } AcpiBuildTables;
 
diff --git a/include/hw/acpi/vmcoreinfo.h b/include/hw/acpi/vmcoreinfo.h
new file mode 100644
index 0000000000..63196aeee0
--- /dev/null
+++ b/include/hw/acpi/vmcoreinfo.h
@@ -0,0 +1,36 @@
+#ifndef ACPI_VMCOREINFO_H
+#define ACPI_VMCOREINFO_H
+
+#include "hw/acpi/bios-linker-loader.h"
+#include "hw/qdev.h"
+
+#define VMCOREINFO_DEVICE           "vmcoreinfo"
+#define VMCOREINFO_FW_CFG_FILE      "etc/vmcoreinfo"
+#define VMCOREINFO_ADDR_FW_CFG_FILE "etc/vmcoreinfo-addr"
+
+#define VMCOREINFO_FW_CFG_SIZE      4096 /* Occupy a page of memory */
+#define VMCOREINFO_OFFSET           40   /* allow space for
+                                          * OVMF SDT Header Probe Supressor
+                                          */
+
+#define VMCOREINFO(obj) OBJECT_CHECK(VmcoreinfoState, (obj), VMCOREINFO_DEVICE)
+
+typedef struct VmcoreinfoState {
+    DeviceClass parent_obj;
+    uint8_t vmcoreinfo_addr_le[8];   /* Address of memory region */
+    bool write_pointer_available;
+} VmcoreinfoState;
+
+/* returns NULL unless there is exactly one device */
+static inline Object *find_vmcoreinfo_dev(void)
+{
+    return object_resolve_path_type("", VMCOREINFO_DEVICE, NULL);
+}
+
+void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data,
+                           GArray *vmci, BIOSLinker *linker);
+void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray *vmci);
+bool vmcoreinfo_get(VmcoreinfoState *vis, uint64_t *paddr, uint32_t *size,
+                    Error **errp);
+
+#endif
diff --git a/include/hw/compat.h b/include/hw/compat.h
index 5d5be91daf..d0c9b71902 100644
--- a/include/hw/compat.h
+++ b/include/hw/compat.h
@@ -135,6 +135,10 @@
         .driver   = "vmgenid",\
         .property = "x-write-pointer-available",\
         .value    = "off",\
+    },{\
+        .driver   = "vmcoreinfo",\
+        .property = "x-write-pointer-available",\
+        .value    = "off",\
     },
 
 #define HW_COMPAT_2_3 \
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index c6f2032dec..cd639586a4 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1560,6 +1560,7 @@ void acpi_build_tables_init(AcpiBuildTables *tables)
     tables->table_data = g_array_new(false, true /* clear */, 1);
     tables->tcpalog = g_array_new(false, true /* clear */, 1);
     tables->vmgenid = g_array_new(false, true /* clear */, 1);
+    tables->vmcoreinfo = g_array_new(false, true /* clear */, 1);
     tables->linker = bios_linker_loader_init();
 }
 
@@ -1570,6 +1571,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, 
bool mfre)
     g_array_free(tables->table_data, true);
     g_array_free(tables->tcpalog, mfre);
     g_array_free(tables->vmgenid, mfre);
+    g_array_free(tables->vmcoreinfo, mfre);
 }
 
 /* Build rsdt table */
diff --git a/hw/acpi/vmcoreinfo.c b/hw/acpi/vmcoreinfo.c
new file mode 100644
index 0000000000..dec4feac1e
--- /dev/null
+++ b/hw/acpi/vmcoreinfo.c
@@ -0,0 +1,207 @@
+/*
+ *  Virtual Machine coreinfo device
+ *  (based on Virtual Machine Generation ID Device)
+ *
+ *  Copyright (C) 2017 Red Hat, Inc.
+ *  Copyright (C) 2017 Skyport Systems.
+ *
+ *  Authors: Marc-André Lureau <address@hidden>
+ *           Ben Warren <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu/osdep.h"
+#include "hw/acpi/acpi.h"
+#include "hw/acpi/aml-build.h"
+#include "hw/acpi/vmcoreinfo.h"
+#include "hw/nvram/fw_cfg.h"
+#include "sysemu/sysemu.h"
+#include "qapi/error.h"
+
+void vmcoreinfo_build_acpi(VmcoreinfoState *vis, GArray *table_data,
+                           GArray *vmci, BIOSLinker *linker)
+{
+    Aml *ssdt, *dev, *scope, *method, *addr, *if_ctx;
+    uint32_t vgia_offset;
+
+    g_array_set_size(vmci, VMCOREINFO_FW_CFG_SIZE);
+
+    /* Put this in a separate SSDT table */
+    ssdt = init_aml_allocator();
+
+    /* Reserve space for header */
+    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));
+
+    /* Storage address */
+    vgia_offset = table_data->len +
+        build_append_named_dword(ssdt->buf, "VCIA");
+    scope = aml_scope("\\_SB");
+    dev = aml_device("VMCI");
+    aml_append(dev, aml_name_decl("_HID", aml_string("QEMUVMCI")));
+
+    /* Simple status method to check that address is linked and non-zero */
+    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_int(0xf), addr));
+    if_ctx = aml_if(aml_equal(aml_name("VCIA"), aml_int(0)));
+    aml_append(if_ctx, aml_store(aml_int(0), addr));
+    aml_append(method, if_ctx);
+    aml_append(method, aml_return(addr));
+    aml_append(dev, method);
+
+    /* the ADDR method returns two 32-bit words representing the lower and
+     * upper halves of the physical address of the vmcoreinfo area
+     */
+    method = aml_method("ADDR", 0, AML_NOTSERIALIZED);
+
+    addr = aml_local(0);
+    aml_append(method, aml_store(aml_package(2), addr));
+
+    aml_append(method, aml_store(aml_add(aml_name("VCIA"),
+                                         aml_int(VMCOREINFO_OFFSET), NULL),
+                                 aml_index(addr, aml_int(0))));
+    aml_append(method, aml_store(aml_int(0), aml_index(addr, aml_int(1))));
+    aml_append(method, aml_return(addr));
+
+    aml_append(dev, method);
+    aml_append(scope, dev);
+    aml_append(ssdt, scope);
+
+    g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+
+    /* Allocate guest memory */
+    bios_linker_loader_alloc(linker, VMCOREINFO_FW_CFG_FILE, vmci, 4096,
+                             false /* page boundary, high memory */);
+
+    /* Patch address of vmcoreinfo fw_cfg blob into the ADDR fw_cfg
+     * blob so QEMU can read the info from there.  The address is
+     * expected to be < 4GB, but write 64 bits anyway.
+     * The address that is patched in is offset in order to implement
+     * the "OVMF SDT Header probe suppressor"
+     * see docs/specs/vmcoreinfo.txt for more details.
+     */
+    bios_linker_loader_write_pointer(linker,
+        VMCOREINFO_ADDR_FW_CFG_FILE, 0, sizeof(uint64_t),
+        VMCOREINFO_FW_CFG_FILE, VMCOREINFO_OFFSET);
+
+    /* Patch address of vmcoreinfo into the AML so OSPM can retrieve
+     * and read it.  Note that while we provide storage for 64 bits, only
+     * the least-signficant 32 get patched into AML.
+     */
+    bios_linker_loader_add_pointer(linker,
+        ACPI_BUILD_TABLE_FILE, vgia_offset, sizeof(uint32_t),
+        VMCOREINFO_FW_CFG_FILE, 0);
+
+    build_header(linker, table_data,
+        (void *)(table_data->data + table_data->len - ssdt->buf->len),
+        "SSDT", ssdt->buf->len, 1, NULL, "VMCOREIN");
+    free_aml_allocator();
+}
+
+void vmcoreinfo_add_fw_cfg(VmcoreinfoState *vis, FWCfgState *s, GArray *vmci)
+{
+    /* Create a read-only fw_cfg file for vmcoreinfo allocation */
+    /* XXX: linker could learn to allocate without backing fw_cfg? */
+    fw_cfg_add_file(s, VMCOREINFO_FW_CFG_FILE, vmci->data,
+                    VMCOREINFO_FW_CFG_SIZE);
+    /* Create a read-write fw_cfg file for Address */
+    fw_cfg_add_file_callback(s, VMCOREINFO_ADDR_FW_CFG_FILE, NULL, NULL,
+                             vis->vmcoreinfo_addr_le,
+                             ARRAY_SIZE(vis->vmcoreinfo_addr_le), false);
+}
+
+bool vmcoreinfo_get(VmcoreinfoState *vis,
+                    uint64_t *paddr, uint32_t *size,
+                    Error **errp)
+{
+    uint32_t vmcoreinfo_addr;
+    uint32_t version;
+
+    assert(vis);
+    assert(paddr);
+    assert(size);
+
+    memcpy(&vmcoreinfo_addr, vis->vmcoreinfo_addr_le, sizeof(vmcoreinfo_addr));
+    vmcoreinfo_addr = le32_to_cpu(vmcoreinfo_addr);
+    if (!vmcoreinfo_addr) {
+        error_setg(errp, "BIOS has not yet written the address of %s",
+                   VMCOREINFO_DEVICE);
+        return false;
+    }
+
+    cpu_physical_memory_read(vmcoreinfo_addr, &version, sizeof(version));
+    if (version != 0) {
+        error_setg(errp, "Unknown %s memory version", VMCOREINFO_DEVICE);
+        return false;
+    }
+
+    cpu_physical_memory_read(vmcoreinfo_addr + 4, paddr, sizeof(paddr));
+    *paddr = le64_to_cpu(*paddr);
+    cpu_physical_memory_read(vmcoreinfo_addr + 12, size, sizeof(size));
+    *size = le32_to_cpu(*size);
+
+    return true;
+}
+
+static const VMStateDescription vmstate_vmcoreinfo = {
+    .name = "vmcoreinfo",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(vmcoreinfo_addr_le, VmcoreinfoState, 
sizeof(uint64_t)),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property vmcoreinfo_properties[] = {
+    DEFINE_PROP_BOOL("x-write-pointer-available", VmcoreinfoState,
+                     write_pointer_available, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vmcoreinfo_realize(DeviceState *dev, Error **errp)
+{
+    VmcoreinfoState *vms = VMCOREINFO(dev);
+
+    if (!vms->write_pointer_available) {
+        error_setg(errp, "%s requires DMA write support in fw_cfg, "
+                   "which this machine type does not provide",
+                   VMCOREINFO_DEVICE);
+        return;
+    }
+
+    /* Given that this function is executing, there is at least one VMCOREINFO
+     * device. Check if there are several.
+     */
+    if (!find_vmcoreinfo_dev()) {
+        error_setg(errp, "at most one %s device is permitted",
+                   VMCOREINFO_DEVICE);
+        return;
+    }
+}
+
+static void vmcoreinfo_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->vmsd = &vmstate_vmcoreinfo;
+    dc->realize = vmcoreinfo_realize;
+    dc->hotpluggable = false;
+    dc->props = vmcoreinfo_properties;
+}
+
+static const TypeInfo vmcoreinfo_device_info = {
+    .name          = VMCOREINFO_DEVICE,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(VmcoreinfoState),
+    .class_init    = vmcoreinfo_device_class_init,
+};
+
+static void vmcoreinfo_register_types(void)
+{
+    type_register_static(&vmcoreinfo_device_info);
+}
+
+type_init(vmcoreinfo_register_types)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 2073108577..97f04401c4 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -43,6 +43,7 @@
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
 #include "hw/acpi/vmgenid.h"
+#include "hw/acpi/vmcoreinfo.h"
 #include "sysemu/tpm_backend.h"
 #include "hw/timer/mc146818rtc_regs.h"
 #include "sysemu/numa.h"
@@ -2612,6 +2613,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
     GArray *tables_blob = tables->table_data;
     AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL };
     Object *vmgenid_dev;
+    Object *vmcoreinfo_dev;
 
     acpi_get_pm_info(&pm);
     acpi_get_misc_info(&misc);
@@ -2661,6 +2663,12 @@ void acpi_build(AcpiBuildTables *tables, MachineState 
*machine)
         vmgenid_build_acpi(VMGENID(vmgenid_dev), tables_blob,
                            tables->vmgenid, tables->linker);
     }
+    vmcoreinfo_dev = find_vmcoreinfo_dev();
+    if (vmcoreinfo_dev) {
+        acpi_add_table(table_offsets, tables_blob);
+        vmcoreinfo_build_acpi(VMCOREINFO(vmcoreinfo_dev), tables_blob,
+                              tables->vmcoreinfo, tables->linker);
+    }
 
     if (misc.has_hpet) {
         acpi_add_table(table_offsets, tables_blob);
@@ -2833,6 +2841,7 @@ void acpi_setup(void)
     AcpiBuildTables tables;
     AcpiBuildState *build_state;
     Object *vmgenid_dev;
+    Object *vmcoreinfo_dev;
 
     if (!pcms->fw_cfg) {
         ACPI_BUILD_DPRINTF("No fw cfg. Bailing out.\n");
@@ -2874,6 +2883,11 @@ void acpi_setup(void)
         vmgenid_add_fw_cfg(VMGENID(vmgenid_dev), pcms->fw_cfg,
                            tables.vmgenid);
     }
+    vmcoreinfo_dev = find_vmcoreinfo_dev();
+    if (vmcoreinfo_dev) {
+        vmcoreinfo_add_fw_cfg(VMCOREINFO(vmcoreinfo_dev), pcms->fw_cfg,
+                              tables.vmcoreinfo);
+    }
 
     if (!pcmc->rsdp_in_ram) {
         /*
diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 029e95202a..5b0acdb715 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -60,3 +60,4 @@ CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
 CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_VMCOREINFO=y
diff --git a/default-configs/x86_64-softmmu.mak 
b/default-configs/x86_64-softmmu.mak
index d1d7432f74..71033e26fa 100644
--- a/default-configs/x86_64-softmmu.mak
+++ b/default-configs/x86_64-softmmu.mak
@@ -60,3 +60,4 @@ CONFIG_SMBIOS=y
 CONFIG_HYPERV_TESTDEV=$(CONFIG_KVM)
 CONFIG_PXB=y
 CONFIG_ACPI_VMGENID=y
+CONFIG_ACPI_VMCOREINFO=y
diff --git a/docs/specs/vmcoreinfo.txt b/docs/specs/vmcoreinfo.txt
new file mode 100644
index 0000000000..70d9716fe0
--- /dev/null
+++ b/docs/specs/vmcoreinfo.txt
@@ -0,0 +1,138 @@
+VIRTUAL MACHINE COREINFO DEVICE
+===============================
+
+Copyright (C) 2017 Red Hat, Inc.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+===
+
+The VM coreinfo (vmcoreinfo) device is an emulated device which
+exposes a 4k memory range to the guest to store various informations
+useful to debug the guest OS.
+
+QEMU Implementation
+-------------------
+
+The vmcoreinfo device is put in its own ACPI descriptor table, in a
+Secondary System Description Table, or SSDT.
+
+The following is a dump of the contents from a running system:
+
+# iasl -p ./SSDT -d /sys/firmware/acpi/tables/SSDT
+/*
+ * Intel ACPI Component Architecture
+ * AML/ASL+ Disassembler version 20160831-64
+ * Copyright (c) 2000 - 2016 Intel Corporation
+ *
+ * Disassembling to symbolic ASL+ operators
+ *
+ * Disassembly of /sys/firmware/acpi/tables/SSDT, Mon Apr 24 15:59:53 2017
+ *
+ * Original Table Header:
+ *     Signature        "SSDT"
+ *     Length           0x00000086 (134)
+ *     Revision         0x01
+ *     Checksum         0x5C
+ *     OEM ID           "BOCHS "
+ *     OEM Table ID     "VMCOREIN"
+ *     OEM Revision     0x00000001 (1)
+ *     Compiler ID      "BXPC"
+ *     Compiler Version 0x00000001 (1)
+ */
+DefinitionBlock ("", "SSDT", 1, "BOCHS ", "VMCOREIN", 0x00000001)
+{
+    Name (VCIA, 0x3FFFF000)
+    Scope (\_SB)
+    {
+        Device (VMCI)
+        {
+            Name (_HID, "QEMUVMCI")  // _HID: Hardware ID
+            Method (_STA, 0, NotSerialized)  // _STA: Status
+            {
+                Local0 = 0x0F
+                If (VCIA == Zero)
+                {
+                    Local0 = Zero
+                }
+
+                Return (Local0)
+            }
+
+            Method (ADDR, 0, NotSerialized)
+            {
+                Local0 = Package (0x02) {}
+                Local0 [Zero] = (VCIA + 0x28)
+                Local0 [One] = Zero
+                Return (Local0)
+            }
+        }
+    }
+}
+
+
+Design Details:
+---------------
+
+QEMU must be able to read the contents of the device memory,
+specifically when starting a memory dump.  In order to do this, QEMU
+must know the address that has been allocated.
+
+The mechanism chosen for this memory sharing is writeable fw_cfg blobs.
+These are data object that are visible to both QEMU and guests, and are
+addressable as sequential files.
+
+More information about fw_cfg can be found in "docs/specs/fw_cfg.txt"
+
+Two fw_cfg blobs are used in this case:
+
+/etc/vmcoreinfo      - used to allocate memory range, read-only to the guest
+/etc/vmcoreinfo-addr - contains the address of the allocated range
+                     - writeable by the guest
+
+
+QEMU sends the following commands to the guest at startup:
+
+1. Allocate memory for vmcoreinfo fw_cfg blob.
+2. Write the address of vmcoreinfo into the SSDT (VCIA ACPI variable as
+   shown above in the iasl dump).  Note that this change is not propagated
+   back to QEMU.
+3. Write the address of vmcoreinfo back to QEMU's copy of vmcoreinfo-addr
+   via the fw_cfg DMA interface.
+
+After step 3, QEMU is able to read the contents of vmcoreinfo.
+
+The value of VCIA is persisted via the VMState mechanism.
+
+
+Storage Format:
+---------------
+
+The content is expected to use little-endian format.
+
+In order to implement an OVMF "SDT Header Probe Suppressor", the contents of
+the vmcoreinfo blob has 40 bytes of padding:
+
++-----------------------------------+
+| SSDT with OEM Table ID = VMCOREIN |
++-----------------------------------+
+| ...                               |       TOP OF PAGE
+| VCIA dword object ----------------|-----> +---------------------------+
+| ...                               |       | fw-allocated array for    |
+| _STA method referring to VCIA     |       | "etc/vmcoreinfo"          |
+| ...                               |       +---------------------------+
+| ADDR method referring to VCIA     |       |  0: OVMF SDT Header probe |
+| ...                               |       |     suppressor            |
++-----------------------------------+       | 40: uint32 version field  |
+                                            | 44: info contents         |
+                                            |     ....                  |
+                                            +---------------------------+
+                                            END OF PAGE
+
+Version 0 content:
+
+ uint64 paddr:
+  Physical address of the Linux vmcoreinfo ELF note.
+ uint32 size:
+  Size of the vmcoreinfo ELF note.
diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index 11c35bcb44..9623078f95 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -6,6 +6,7 @@ common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu.o
 common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
 common-obj-$(CONFIG_ACPI_VMGENID) += vmgenid.o
+common-obj-$(CONFIG_ACPI_VMCOREINFO) += vmcoreinfo.o
 common-obj-$(call lnot,$(CONFIG_ACPI_X86)) += acpi-stub.o
 
 common-obj-y += acpi_interface.o
-- 
2.12.0.191.gc5d8de91d




reply via email to

[Prev in Thread] Current Thread [Next in Thread]