[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v1 2/2] virtio-mem: Handle preallocation with migration
From: |
Dr. David Alan Gilbert |
Subject: |
Re: [PATCH v1 2/2] virtio-mem: Handle preallocation with migration |
Date: |
Tue, 25 Jan 2022 11:43:10 +0000 |
User-agent: |
Mutt/2.1.5 (2021-12-30) |
* David Hildenbrand (david@redhat.com) wrote:
> During precopy we usually write all plugged ares and essentially
> allocate them. However, there are two corner cases:
>
> 1) Migrating the zeropage
>
> When the zeropage gets migrated, we first check if the destination range is
> already zero and avoid performing a write in that case:
> ram_handle_compressed(). If the memory backend, like anonymous RAM or
> most filesystems, populate the shared zeropage when reading a (file) hole,
> we don't preallocate backend memory. In that case, we have to explicitly
> trigger the allocation to allocate actual backend memory.
>
> 2) Excluding memory ranges during migration
>
> For example, virtio-balloon free page hinting will exclude some pages
> from getting migrated. In that case, we don't allocate memory for
> plugged ranges when migrating.
>
> So trigger allocation of all plugged ranges when restoring the device state
> and fail gracefully if allocation fails.
>
> Handling postcopy is a bit more tricky, as postcopy and preallocation
> are problematic in general. To at least mimic what ordinary
> preallocation does, temporarily try allocating the requested amount
> of memory and fail postcopy in case the requested size on source and
> destination doesn't match. This way, we at least checked that there isn't
> a fundamental configuration issue and that we were able to preallocate
> the required amount of memory at least once, instead of failing
> unrecoverably during postcopy later. However, just as ordinary
> preallocation with postcopy, it's racy.
>
> Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
> hw/virtio/virtio-mem.c | 136 +++++++++++++++++++++++++++++++++
> include/hw/virtio/virtio-mem.h | 6 ++
> 2 files changed, 142 insertions(+)
>
> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> index 6c337db0a7..f48e684aa9 100644
> --- a/hw/virtio/virtio-mem.c
> +++ b/hw/virtio/virtio-mem.c
> @@ -27,6 +27,7 @@
> #include "qapi/visitor.h"
> #include "exec/ram_addr.h"
> #include "migration/misc.h"
> +#include "migration/postcopy-ram.h"
> #include "hw/boards.h"
> #include "hw/qdev-properties.h"
> #include CONFIG_DEVICES
> @@ -193,6 +194,30 @@ static int virtio_mem_for_each_unplugged_range(const
> VirtIOMEM *vmem, void *arg,
> return ret;
> }
>
> +static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void
> *arg,
> + virtio_mem_range_cb cb)
> +{
> + unsigned long first_bit, last_bit;
> + uint64_t offset, size;
> + int ret = 0;
> +
> + first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
> + while (first_bit < vmem->bitmap_size) {
> + offset = first_bit * vmem->block_size;
> + last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
> + first_bit + 1) - 1;
> + size = (last_bit - first_bit + 1) * vmem->block_size;
> +
> + ret = cb(vmem, arg, offset, size);
> + if (ret) {
> + break;
> + }
> + first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
> + last_bit + 2);
> + }
> + return ret;
> +}
> +
> /*
> * Adjust the memory section to cover the intersection with the given range.
> *
> @@ -819,6 +844,7 @@ static void virtio_mem_device_realize(DeviceState *dev,
> Error **errp)
> if (!vmem->block_size) {
> vmem->block_size = virtio_mem_default_block_size(rb);
> }
> + vmem->initial_requested_size = vmem->requested_size;
>
> if (vmem->block_size < page_size) {
> error_setg(errp, "'%s' property has to be at least the page size
> (0x%"
> @@ -879,6 +905,7 @@ static void virtio_mem_device_realize(DeviceState *dev,
> Error **errp)
> */
> memory_region_set_ram_discard_manager(&vmem->memdev->mr,
> RAM_DISCARD_MANAGER(vmem));
> + postcopy_add_notifier(&vmem->postcopy_notifier);
> }
>
> static void virtio_mem_device_unrealize(DeviceState *dev)
> @@ -886,6 +913,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
> VirtIODevice *vdev = VIRTIO_DEVICE(dev);
> VirtIOMEM *vmem = VIRTIO_MEM(dev);
>
> + postcopy_remove_notifier(&vmem->postcopy_notifier);
> /*
> * The unplug handler unmapped the memory region, it cannot be
> * found via an address space anymore. Unset ourselves.
> @@ -915,12 +943,119 @@ static int virtio_mem_restore_unplugged(VirtIOMEM
> *vmem)
> virtio_mem_discard_range_cb);
> }
>
> +static int virtio_mem_prealloc_range(const VirtIOMEM *vmem, uint64_t offset,
> + uint64_t size)
> +{
> + void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
> + int fd = memory_region_get_fd(&vmem->memdev->mr);
> + Error *local_err = NULL;
> +
> + os_mem_prealloc(fd, area, size, 1, &local_err);
> + if (local_err) {
> + error_report_err(local_err);
> + return -ENOMEM;
> + }
> + return 0;
> +}
> +
> +static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
> + uint64_t offset, uint64_t size)
> +{
> + return virtio_mem_prealloc_range(vmem, offset, size);
> +}
> +
> +static int virtio_mem_restore_prealloc(VirtIOMEM *vmem)
> +{
> + /*
> + * Make sure any preallocated memory is really preallocated. Migration
> + * might have skipped some pages or optimized for the zeropage.
> + */
> + return virtio_mem_for_each_plugged_range(vmem, NULL,
> + virtio_mem_prealloc_range_cb);
> +}
> +
> +static int virtio_mem_postcopy_notify(NotifierWithReturn *notifier,
> + void *opaque)
> +{
> + struct PostcopyNotifyData *pnd = opaque;
> + VirtIOMEM *vmem = container_of(notifier, VirtIOMEM, postcopy_notifier);
> + RAMBlock *rb = vmem->memdev->mr.ram_block;
> + int ret;
> +
> + if (pnd->reason != POSTCOPY_NOTIFY_INBOUND_ADVISE || !vmem->prealloc ||
> + !vmem->initial_requested_size) {
> + return 0;
> + }
> + assert(!vmem->size);
> +
> + /*
> + * When creating the device we discard all memory and we don't know
> + * which blocks the source has plugged (and should be preallocated)
> until we
> + * restore the device state. However, we cannot allocate when restoring
> the
> + * device state either if postcopy is already active.
> + *
> + * If we reach this point, postcopy is possible and we have preallocation
> + * enabled.
> + *
> + * Temporarily allocate the requested size to see if there is a
> fundamental
> + * configuration issue that would make postcopy fail because the memory
> + * backend is out of memory. While this increases reliability,
> + * prealloc+postcopy cannot be fully reliable: see the comment in
> + * virtio_mem_post_load().
> + */
> + ret = virtio_mem_prealloc_range(vmem, 0, vmem->initial_requested_size);
> + if (ram_block_discard_range(rb, 0, vmem->initial_requested_size)) {
> + ret = ret ? ret : -EINVAL;
> + return ret;
> + }
> + return 0;
> +}
> +
> static int virtio_mem_post_load(void *opaque, int version_id)
> {
> VirtIOMEM *vmem = VIRTIO_MEM(opaque);
> RamDiscardListener *rdl;
> int ret;
>
> + if (vmem->prealloc) {
> + if (migration_in_incoming_postcopy()) {
> + /*
> + * Prealloc with postcopy cannot possibly work fully reliable in
> + * general: preallocation has to populate all memory immediately
> and
> + * fail gracefully before the guest started running on the
> + * destination while postcopy wants to discard memory and
> populate
> + * on demand after the guest started running on the destination.
> + *
> + * For ordinary memory backends, "prealloc=on" is essentially
> + * overridden by postcopy, which will simply discard preallocated
> + * pages and might fail later when running out of backend memory
> + * when trying to place a page: the earlier preallocation only
> makes
> + * it less likely to fail, but nothing (not even huge page
> + * reservation) will guarantee that postcopy will find a free
> page
> + * to place once the guest is running on the destination.
> + *
> + * We temporarily allocate "requested-size" during
> + * POSTCOPY_NOTIFY_INBOUND_ADVISE, before migrating any memory.
> This
> + * resembles what is done with ordinary memory backends.
> + *
> + * We need to have a matching requested size on source and
> + * destination that we actually temporarily allocated the right
> + * amount of memory. As requested-size changed when restoring the
> + * state, check against the initial value.
> + */
> + if (vmem->requested_size != vmem->initial_requested_size) {
> + error_report("postcopy with 'prealloc=on' needs matching"
> + " 'requested-size' on source and destination");
> + return -EINVAL;
> + }
> + } else {
> + ret = virtio_mem_restore_prealloc(vmem);
> + if (ret) {
> + return ret;
> + }
> + }
> + }
> +
> /*
> * We started out with all memory discarded and our memory region is
> mapped
> * into an address space. Replay, now that we updated the bitmap.
> @@ -1189,6 +1324,7 @@ static void virtio_mem_instance_init(Object *obj)
>
> notifier_list_init(&vmem->size_change_notifiers);
> QLIST_INIT(&vmem->rdl_list);
> + vmem->postcopy_notifier.notify = virtio_mem_postcopy_notify;
>
> object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size",
> virtio_mem_get_size,
> NULL, NULL, NULL);
> diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
> index 7745cfc1a3..45395152d2 100644
> --- a/include/hw/virtio/virtio-mem.h
> +++ b/include/hw/virtio/virtio-mem.h
> @@ -61,6 +61,9 @@ struct VirtIOMEM {
> /* requested size */
> uint64_t requested_size;
>
> + /* initial requested size on startup */
> + uint64_t initial_requested_size;
> +
> /* block size and alignment */
> uint64_t block_size;
>
> @@ -77,6 +80,9 @@ struct VirtIOMEM {
> /* notifiers to notify when "size" changes */
> NotifierList size_change_notifiers;
>
> + /* notifier for postcopy events */
> + NotifierWithReturn postcopy_notifier;
> +
> /* listeners to notify on plug/unplug activity. */
> QLIST_HEAD(, RamDiscardListener) rdl_list;
> };
> --
> 2.34.1
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK