qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH v5 14/52] hw/xen: Add xen_overlay device for emulating sh


From: Dr. David Alan Gilbert
Subject: Re: [RFC PATCH v5 14/52] hw/xen: Add xen_overlay device for emulating shared xenheap pages
Date: Tue, 3 Jan 2023 17:54:26 +0000
User-agent: Mutt/2.2.9 (2022-11-12)

* David Woodhouse (dwmw2@infradead.org) wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> For the shared info page and for grant tables, Xen shares its own pages
> from the "Xen heap" to the guest. The guest requests that a given page
> from a certain address space (XENMAPSPACE_shared_info, etc.) be mapped
> to a given GPA using the XENMEM_add_to_physmap hypercall.
> 
> To support that in qemu when *emulating* Xen, create a memory region
> (migratable) and allow it to be mapped as an overlay when requested.
> 
> Xen theoretically allows the same page to be mapped multiple times
> into the guest, but that's hard to track and reinstate over migration,
> so we automatically *unmap* any previous mapping when creating a new
> one. This approach has been used in production with.... a non-trivial
> number of guests expecting true Xen, without any problems yet being
> noticed.
> 
> This adds just the shared info page for now. The grant tables will be
> a larger region, and will need to be overlaid one page at a time. I
> think that means I need to create separate aliases for each page of
> the overall grant_frames region, so that they can be mapped individually.
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>  hw/i386/kvm/meson.build   |   1 +
>  hw/i386/kvm/xen_overlay.c | 200 ++++++++++++++++++++++++++++++++++++++
>  hw/i386/kvm/xen_overlay.h |  20 ++++
>  include/sysemu/kvm_xen.h  |   4 +
>  4 files changed, 225 insertions(+)
>  create mode 100644 hw/i386/kvm/xen_overlay.c
>  create mode 100644 hw/i386/kvm/xen_overlay.h
> 
> diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build
> index 95467f1ded..6165cbf019 100644
> --- a/hw/i386/kvm/meson.build
> +++ b/hw/i386/kvm/meson.build
> @@ -4,5 +4,6 @@ i386_kvm_ss.add(when: 'CONFIG_APIC', if_true: files('apic.c'))
>  i386_kvm_ss.add(when: 'CONFIG_I8254', if_true: files('i8254.c'))
>  i386_kvm_ss.add(when: 'CONFIG_I8259', if_true: files('i8259.c'))
>  i386_kvm_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c'))
> +i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen_overlay.c'))
>  
>  i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss)
> diff --git a/hw/i386/kvm/xen_overlay.c b/hw/i386/kvm/xen_overlay.c
> new file mode 100644
> index 0000000000..331dea6b8b
> --- /dev/null
> +++ b/hw/i386/kvm/xen_overlay.c
> @@ -0,0 +1,200 @@
> +/*
> + * QEMU Xen emulation: Shared/overlay pages support
> + *
> + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + *
> + * Authors: David Woodhouse <dwmw2@infradead.org>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/host-utils.h"
> +#include "qemu/module.h"
> +#include "qemu/main-loop.h"
> +#include "qapi/error.h"
> +#include "qom/object.h"
> +#include "exec/target_page.h"
> +#include "exec/address-spaces.h"
> +#include "migration/vmstate.h"
> +
> +#include "hw/sysbus.h"
> +#include "hw/xen/xen.h"
> +#include "xen_overlay.h"
> +
> +#include "sysemu/kvm.h"
> +#include "sysemu/kvm_xen.h"
> +#include <linux/kvm.h>
> +
> +#include "standard-headers/xen/memory.h"
> +
> +
> +#define TYPE_XEN_OVERLAY "xen-overlay"
> +OBJECT_DECLARE_SIMPLE_TYPE(XenOverlayState, XEN_OVERLAY)
> +
> +#define XEN_PAGE_SHIFT 12
> +#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
> +
> +struct XenOverlayState {
> +    /*< private >*/
> +    SysBusDevice busdev;
> +    /*< public >*/
> +
> +    MemoryRegion shinfo_mem;
> +    void *shinfo_ptr;
> +    uint64_t shinfo_gpa;
> +};
> +
> +struct XenOverlayState *xen_overlay_singleton;
> +
> +static void xen_overlay_map_page_locked(MemoryRegion *page, uint64_t gpa)
> +{
> +    /*
> +     * Xen allows guests to map the same page as many times as it likes
> +     * into guest physical frames. We don't, because it would be hard
> +     * to track and restore them all. One mapping of each page is
> +     * perfectly sufficient for all known guests... and we've tested
> +     * that theory on a few now in other implementations. dwmw2.
> +     */
> +    if (memory_region_is_mapped(page)) {
> +        if (gpa == INVALID_GPA) {
> +            memory_region_del_subregion(get_system_memory(), page);
> +        } else {
> +            /* Just move it */
> +            memory_region_set_address(page, gpa);
> +        }
> +    } else if (gpa != INVALID_GPA) {
> +        memory_region_add_subregion_overlap(get_system_memory(), gpa, page, 
> 0);
> +    }
> +}
> +
> +/* KVM is the only existing back end for now. Let's not overengineer it yet. 
> */
> +static int xen_overlay_set_be_shinfo(uint64_t gfn)
> +{
> +    struct kvm_xen_hvm_attr xa = {
> +        .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
> +        .u.shared_info.gfn = gfn,
> +    };
> +
> +    return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
> +}
> +
> +
> +static void xen_overlay_realize(DeviceState *dev, Error **errp)
> +{
> +    XenOverlayState *s = XEN_OVERLAY(dev);
> +
> +    if (xen_mode != XEN_EMULATE) {
> +        error_setg(errp, "Xen overlay page support is for Xen emulation");
> +        return;
> +    }
> +
> +    memory_region_init_ram(&s->shinfo_mem, OBJECT(dev), "xen:shared_info",
> +                           XEN_PAGE_SIZE, &error_abort);
> +    memory_region_set_enabled(&s->shinfo_mem, true);
> +
> +    s->shinfo_ptr = memory_region_get_ram_ptr(&s->shinfo_mem);
> +    s->shinfo_gpa = INVALID_GPA;
> +    memset(s->shinfo_ptr, 0, XEN_PAGE_SIZE);
> +}
> +
> +static int xen_overlay_post_load(void *opaque, int version_id)
> +{
> +    XenOverlayState *s = opaque;
> +
> +    if (s->shinfo_gpa != INVALID_GPA) {
> +        xen_overlay_map_page_locked(&s->shinfo_mem, s->shinfo_gpa);
> +        xen_overlay_set_be_shinfo(s->shinfo_gpa >> XEN_PAGE_SHIFT);
> +    }
> +
> +    return 0;
> +}
> +
> +static bool xen_overlay_is_needed(void *opaque)
> +{
> +    return xen_mode == XEN_EMULATE;
> +}
> +
> +static const VMStateDescription xen_overlay_vmstate = {
> +    .name = "xen_overlay",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = xen_overlay_is_needed,
> +    .post_load = xen_overlay_post_load,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(shinfo_gpa, XenOverlayState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static void xen_overlay_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->realize = xen_overlay_realize;
> +    dc->vmsd = &xen_overlay_vmstate;

That looks OK from a migration point of view

> +}
> +
> +static const TypeInfo xen_overlay_info = {
> +    .name          = TYPE_XEN_OVERLAY,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(XenOverlayState),
> +    .class_init    = xen_overlay_class_init,
> +};
> +
> +void xen_overlay_create(void)
> +{
> +    xen_overlay_singleton = 
> XEN_OVERLAY(sysbus_create_simple(TYPE_XEN_OVERLAY,
> +                                                             -1, NULL));
> +}
> +
> +static void xen_overlay_register_types(void)
> +{
> +    type_register_static(&xen_overlay_info);
> +}
> +
> +type_init(xen_overlay_register_types)
> +
> +int xen_overlay_map_shinfo_page(uint64_t gpa)
> +{
> +    XenOverlayState *s = xen_overlay_singleton;
> +    int ret;
> +
> +    if (!s) {
> +        return -ENOENT;
> +    }
> +
> +    qemu_mutex_lock_iothread();
> +    if (s->shinfo_gpa) {
> +            /* If removing shinfo page, turn the kernel magic off first */

Odd indent?

Dave

> +        ret = xen_overlay_set_be_shinfo(INVALID_GFN);
> +        if (ret) {
> +            goto out;
> +        }
> +    }
> +
> +    xen_overlay_map_page_locked(&s->shinfo_mem, gpa);
> +    if (gpa != INVALID_GPA) {
> +        ret = xen_overlay_set_be_shinfo(gpa >> XEN_PAGE_SHIFT);
> +        if (ret) {
> +            goto out;
> +        }
> +    }
> +    s->shinfo_gpa = gpa;
> + out:
> +    qemu_mutex_unlock_iothread();
> +
> +    return ret;
> +}
> +
> +void *xen_overlay_get_shinfo_ptr(void)
> +{
> +    XenOverlayState *s = xen_overlay_singleton;
> +
> +    if (!s) {
> +        return NULL;
> +    }
> +
> +    return s->shinfo_ptr;
> +}
> diff --git a/hw/i386/kvm/xen_overlay.h b/hw/i386/kvm/xen_overlay.h
> new file mode 100644
> index 0000000000..00cff05bb0
> --- /dev/null
> +++ b/hw/i386/kvm/xen_overlay.h
> @@ -0,0 +1,20 @@
> +/*
> + * QEMU Xen emulation: Shared/overlay pages support
> + *
> + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + *
> + * Authors: David Woodhouse <dwmw2@infradead.org>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#ifndef QEMU_XEN_OVERLAY_H
> +#define QEMU_XEN_OVERLAY_H
> +
> +void xen_overlay_create(void);
> +
> +int xen_overlay_map_shinfo_page(uint64_t gpa);
> +void *xen_overlay_get_shinfo_ptr(void);
> +
> +#endif /* QEMU_XEN_OVERLAY_H */
> diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h
> index 296533f2d5..3e43cd7843 100644
> --- a/include/sysemu/kvm_xen.h
> +++ b/include/sysemu/kvm_xen.h
> @@ -12,6 +12,10 @@
>  #ifndef QEMU_SYSEMU_KVM_XEN_H
>  #define QEMU_SYSEMU_KVM_XEN_H
>  
> +/* The KVM API uses these to indicate "no GPA" or "no GFN" */
> +#define INVALID_GPA UINT64_MAX
> +#define INVALID_GFN UINT64_MAX
> +
>  uint32_t kvm_xen_get_caps(void);
>  
>  #define kvm_xen_has_cap(cap) (!!(kvm_xen_get_caps() &           \
> -- 
> 2.35.3
> 
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK




reply via email to

[Prev in Thread] Current Thread [Next in Thread]