qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v5 16/50] multi-process: Synchronize remote memory


From: Dr. David Alan Gilbert
Subject: Re: [PATCH v5 16/50] multi-process: Synchronize remote memory
Date: Wed, 4 Mar 2020 11:53:23 +0000
User-agent: Mutt/1.13.3 (2020-01-12)

* Jagannathan Raman (address@hidden) wrote:
> Add memory-listener object which is used to keep the view of the RAM
> in sync between QEMU and remote process.
> A MemoryListener is registered for system-memory AddressSpace. The
> listener sends SYNC_SYSMEM message to the remote process when memory
> listener commits the changes to memory, the remote process receives
> the message and processes it in the handler for SYNC_SYSMEM message.
> 
> TODO: No need to create object for remote memory listener.
> 
> Signed-off-by: Jagannathan Raman <address@hidden>
> Signed-off-by: John G Johnson <address@hidden>
> Signed-off-by: Elena Ufimtseva <address@hidden>
> ---
>  Makefile.target                |   3 +
>  hw/proxy/memory-sync.c         | 212 
> +++++++++++++++++++++++++++++++++++++++++
>  hw/proxy/qemu-proxy.c          |   5 +
>  include/hw/proxy/memory-sync.h |  37 +++++++
>  include/hw/proxy/qemu-proxy.h  |   5 +
>  remote/remote-main.c           |  11 +++
>  6 files changed, 273 insertions(+)
>  create mode 100644 hw/proxy/memory-sync.c
>  create mode 100644 include/hw/proxy/memory-sync.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index cfd36c1..271d883 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o
>  obj-y += target/$(TARGET_BASE_ARCH)/
>  obj-y += disas.o
>  obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
> +ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-)
> +obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o
> +endif
>  LIBS := $(libs_cpu) $(LIBS)
>  
>  obj-$(CONFIG_PLUGIN) += plugins/
> diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c
> new file mode 100644
> index 0000000..3edbb19
> --- /dev/null
> +++ b/hw/proxy/memory-sync.c
> @@ -0,0 +1,212 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include <sys/types.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "qemu/osdep.h"
> +#include "qemu/compiler.h"
> +#include "qemu/int128.h"
> +#include "qemu/range.h"
> +#include "exec/memory.h"
> +#include "exec/cpu-common.h"
> +#include "cpu.h"
> +#include "exec/ram_addr.h"
> +#include "exec/address-spaces.h"
> +#include "io/mpqemu-link.h"
> +#include "hw/proxy/memory-sync.h"
> +
> +static const TypeInfo remote_mem_sync_type_info = {
> +    .name          = TYPE_MEMORY_LISTENER,
> +    .parent        = TYPE_OBJECT,
> +    .instance_size = sizeof(RemoteMemSync),
> +};
> +
> +static void remote_mem_sync_register_types(void)
> +{
> +    type_register_static(&remote_mem_sync_type_info);
> +}
> +
> +type_init(remote_mem_sync_register_types)
> +
> +static void proxy_ml_begin(MemoryListener *listener)
> +{
> +    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> +    int mrs;
> +
> +    for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
> +        memory_region_unref(sync->mr_sections[mrs].mr);
> +    }
> +
> +    g_free(sync->mr_sections);
> +    sync->mr_sections = NULL;
> +    sync->n_mr_sections = 0;
> +}
> +
> +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
> +{
> +    MemoryRegion *mr;
> +    ram_addr_t off;
> +
> +    mr = memory_region_from_host((void *)(uintptr_t)host, &off);

Do you need to just check we found an 'mr' ?

> +    if (offset) {
> +        *offset = off;
> +    }
> +
> +    return memory_region_get_fd(mr);
> +}
> +
> +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t 
> size)
> +{
> +    bool merge;
> +    int fd1, fd2;
> +
> +    fd1 = get_fd_from_hostaddr(host, NULL);
> +
> +    fd2 = get_fd_from_hostaddr(prev_host, NULL);
> +
> +    merge = (fd1 == fd2);
> +
> +    merge &= ((prev_host + size) == host);

It's interesting; I think the vhost code checks that the two mr's are
the same where you are checking for the same underlying fd - but I think
that's OK.
(I wonder if we need to check offset's within the fd's match up when
they're merged - since you added that offset feature in an earlier
patch?
That would also need checking in vhost_region_add_section)

> +    return merge;
> +}
> +
> +static void proxy_ml_region_addnop(MemoryListener *listener,
> +                                   MemoryRegionSection *section)
> +{
> +    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> +    bool need_add = true;
> +    uint64_t mrs_size, mrs_gpa, mrs_page;
> +    uintptr_t mrs_host;
> +    RAMBlock *mrs_rb;
> +    MemoryRegionSection *prev_sec;
> +
> +    if (!(memory_region_is_ram(section->mr) &&
> +          !memory_region_is_rom(section->mr))) {
> +        return;
> +    }
> +
> +    mrs_rb = section->mr->ram_block;
> +    mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> +    mrs_size = int128_get64(section->size);
> +    mrs_gpa = section->offset_within_address_space;
> +    mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> +               section->offset_within_region;
> +
> +    if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) {
> +        return;
> +    }
> +
> +    mrs_host = mrs_host & ~(mrs_page - 1);
> +    mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> +    mrs_size = ROUND_UP(mrs_size, mrs_page);

OK, just note the more complex code in vhost_region_add_section for page
aligning regions that are needed for postcopy; I think that would be the
same if you wanted to do postcopy with remote processes.

> +    if (sync->n_mr_sections) {
> +        prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
> +        uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
> +        uint64_t prev_size = int128_get64(prev_sec->size);
> +        uint64_t prev_gpa_end   = range_get_last(prev_gpa_start, prev_size);
> +        uint64_t prev_host_start =
> +            (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
> +            prev_sec->offset_within_region;
> +        uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
> +
> +        if (mrs_gpa <= (prev_gpa_end + 1)) {
> +            if (mrs_gpa < prev_gpa_start) {
> +                assert(0);
> +            }

g_assert(mrs_gpa < prev_gpa_start);


> +            if ((section->mr == prev_sec->mr) &&
> +                proxy_mrs_can_merge(mrs_host, prev_host_start,
> +                                    (mrs_gpa - prev_gpa_start))) {
> +                uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
> +                need_add = false;
> +                prev_sec->offset_within_address_space =
> +                    MIN(prev_gpa_start, mrs_gpa);
> +                prev_sec->offset_within_region =
> +                    MIN(prev_host_start, mrs_host) -
> +                    (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
> +                prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
> +                                                             mrs_host));
> +            }
> +        }
> +    }
> +
> +    if (need_add) {
> +        ++sync->n_mr_sections;
> +        sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
> +                                    sync->n_mr_sections);
> +        sync->mr_sections[sync->n_mr_sections - 1] = *section;
> +        sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
> +        memory_region_ref(section->mr);
> +    }

I'd add some tracing in this function; it's a nightmare to debug when it
does something unexpected.

> +}
> +
> +static void proxy_ml_commit(MemoryListener *listener)
> +{
> +    RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> +    MPQemuMsg msg;
> +    MemoryRegionSection section;
> +    ram_addr_t offset;
> +    uintptr_t host_addr;
> +    int region;
> +
> +    memset(&msg, 0, sizeof(MPQemuMsg));
> +
> +    msg.cmd = SYNC_SYSMEM;
> +    msg.bytestream = 0;
> +    msg.num_fds = sync->n_mr_sections;
> +    msg.size = sizeof(msg.data1);
> +    assert(msg.num_fds <= REMOTE_MAX_FDS);
> +
> +    for (region = 0; region < sync->n_mr_sections; region++) {
> +        section = sync->mr_sections[region];
> +        msg.data1.sync_sysmem.gpas[region] =
> +            section.offset_within_address_space;
> +        msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size);
> +        host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) +
> +                    section.offset_within_region;
> +        msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);

Since you already have section.mr you cna use memory_region_get_fd.

> +        msg.data1.sync_sysmem.offsets[region] = offset;
> +    }
> +    mpqemu_msg_send(&msg, sync->mpqemu_link->com);
> +}
> +
> +void deconfigure_memory_sync(RemoteMemSync *sync)
> +{
> +    memory_listener_unregister(&sync->listener);
> +}
> +
> +/*
> + * TODO: Memory Sync need not be instantianted once per every proxy device.
> + *       All remote devices are going to get the exact same updates at the
> + *       same time. It therefore makes sense to have a broadcast model.
> + *
> + *       Broadcast model would involve running the MemorySync object in a
> + *       thread. MemorySync would contain a list of mpqemu-link objects
> + *       that need notification. proxy_ml_commit() could send the same
> + *       message to all the links at the same time.
> + */
> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link)
> +{
> +    sync->n_mr_sections = 0;
> +    sync->mr_sections = NULL;
> +
> +    sync->mpqemu_link = mpqemu_link;
> +
> +    sync->listener.begin = proxy_ml_begin;
> +    sync->listener.commit = proxy_ml_commit;
> +    sync->listener.region_add = proxy_ml_region_addnop;
> +    sync->listener.region_nop = proxy_ml_region_addnop;
> +    sync->listener.priority = 10;
> +
> +    memory_listener_register(&sync->listener, &address_space_memory);
> +}
> diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
> index b17d9bb..d3a9d38 100644
> --- a/hw/proxy/qemu-proxy.c
> +++ b/hw/proxy/qemu-proxy.c
> @@ -16,6 +16,8 @@
>  #include "qapi/qmp/qjson.h"
>  #include "qapi/qmp/qstring.h"
>  #include "hw/proxy/qemu-proxy.h"
> +#include "hw/proxy/memory-sync.h"
> +#include "qom/object.h"
>  
>  static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp);
>  
> @@ -243,6 +245,8 @@ static void init_proxy(PCIDevice *dev, char *command, 
> char *exec_name,
>  
>      mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com,
>                          pdev->socket);
> +
> +    configure_memory_sync(pdev->sync, pdev->mpqemu_link);
>  }
>  
>  static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
> @@ -261,6 +265,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, 
> Error **errp)
>      dev->set_proxy_sock = set_proxy_sock;
>      dev->get_proxy_sock = get_proxy_sock;
>      dev->init_proxy = init_proxy;
> +    dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER));
>  }
>  
>  static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
> diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h
> new file mode 100644
> index 0000000..d8329c9
> --- /dev/null
> +++ b/include/hw/proxy/memory-sync.h
> @@ -0,0 +1,37 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef MEMORY_SYNC_H
> +#define MEMORY_SYNC_H
> +
> +#include <sys/types.h>
> +
> +#include "qemu/osdep.h"
> +#include "qom/object.h"
> +#include "exec/memory.h"
> +#include "io/mpqemu-link.h"
> +
> +#define TYPE_MEMORY_LISTENER "memory-listener"
> +#define REMOTE_MEM_SYNC(obj) \
> +            OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER)
> +
> +typedef struct RemoteMemSync {
> +    Object obj;
> +
> +    MemoryListener listener;
> +
> +    int n_mr_sections;
> +    MemoryRegionSection *mr_sections;
> +
> +    MPQemuLinkState *mpqemu_link;
> +} RemoteMemSync;
> +
> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState 
> *mpqemu_link);
> +void deconfigure_memory_sync(RemoteMemSync *sync);
> +
> +#endif
> diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
> index 44e370e..c93ffe3 100644
> --- a/include/hw/proxy/qemu-proxy.h
> +++ b/include/hw/proxy/qemu-proxy.h
> @@ -10,6 +10,7 @@
>  #define QEMU_PROXY_H
>  
>  #include "io/mpqemu-link.h"
> +#include "hw/proxy/memory-sync.h"
>  
>  #define TYPE_PCI_PROXY_DEV "pci-proxy-dev"
>  
> @@ -37,8 +38,12 @@ extern const MemoryRegionOps proxy_default_ops;
>  struct PCIProxyDev {
>      PCIDevice parent_dev;
>  
> +    int n_mr_sections;
> +    MemoryRegionSection *mr_sections;
> +
>      MPQemuLinkState *mpqemu_link;
>  
> +    RemoteMemSync *sync;
>      pid_t remote_pid;
>      int socket;
>  
> diff --git a/remote/remote-main.c b/remote/remote-main.c
> index acd8daf..9512a3b 100644
> --- a/remote/remote-main.c
> +++ b/remote/remote-main.c
> @@ -34,6 +34,7 @@
>  #include "block/block.h"
>  #include "exec/ramlist.h"
>  #include "exec/memattrs.h"
> +#include "exec/address-spaces.h"
>  
>  static MPQemuLinkState *mpqemu_link;
>  PCIDevice *remote_pci_dev;
> @@ -161,6 +162,16 @@ static void process_msg(GIOCondition cond, MPQemuChannel 
> *chan)
>              goto finalize_loop;
>          }
>          break;
> +    case SYNC_SYSMEM:
> +        /*
> +         * TODO: ensure no active DMA is happening when
> +         * sysmem is being updated

In practice this turns out to be hard!

Dave

> +         */
> +        remote_sysmem_reconfig(msg, &err);
> +        if (err) {
> +            goto finalize_loop;
> +        }
> +        break;
>      default:
>          error_setg(&err, "Unknown command");
>          goto finalize_loop;
> -- 
> 1.8.3.1
> 
--
Dr. David Alan Gilbert / address@hidden / Manchester, UK




reply via email to

[Prev in Thread] Current Thread [Next in Thread]