qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 2/2] virtio-mem: Handle preallocation with migration


From: Dr. David Alan Gilbert
Subject: Re: [PATCH v1 2/2] virtio-mem: Handle preallocation with migration
Date: Tue, 25 Jan 2022 11:43:10 +0000
User-agent: Mutt/2.1.5 (2021-12-30)

* David Hildenbrand (david@redhat.com) wrote:
> During precopy we usually write all plugged ares and essentially
> allocate them. However, there are two corner cases:
> 
> 1) Migrating the zeropage
> 
> When the zeropage gets migrated, we first check if the destination range is
> already zero and avoid performing a write in that case:
> ram_handle_compressed(). If the memory backend, like anonymous RAM or
> most filesystems, populate the shared zeropage when reading a (file) hole,
> we don't preallocate backend memory. In that case, we have to explicitly
> trigger the allocation to allocate actual backend memory.
> 
> 2) Excluding memory ranges during migration
> 
> For example, virtio-balloon free page hinting will exclude some pages
> from getting migrated. In that case, we don't allocate memory for
> plugged ranges when migrating.
> 
> So trigger allocation of all plugged ranges when restoring the device state
> and fail gracefully if allocation fails.
> 
> Handling postcopy is a bit more tricky, as postcopy and preallocation
> are problematic in general. To at least mimic what ordinary
> preallocation does, temporarily try allocating the requested amount
> of memory and fail postcopy in case the requested size on source and
> destination doesn't match. This way, we at least checked that there isn't
> a fundamental configuration issue and that we were able to preallocate
> the required amount of memory at least once, instead of failing
> unrecoverably during postcopy later. However, just as ordinary
> preallocation with postcopy, it's racy.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

> ---
>  hw/virtio/virtio-mem.c         | 136 +++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio-mem.h |   6 ++
>  2 files changed, 142 insertions(+)
> 
> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> index 6c337db0a7..f48e684aa9 100644
> --- a/hw/virtio/virtio-mem.c
> +++ b/hw/virtio/virtio-mem.c
> @@ -27,6 +27,7 @@
>  #include "qapi/visitor.h"
>  #include "exec/ram_addr.h"
>  #include "migration/misc.h"
> +#include "migration/postcopy-ram.h"
>  #include "hw/boards.h"
>  #include "hw/qdev-properties.h"
>  #include CONFIG_DEVICES
> @@ -193,6 +194,30 @@ static int virtio_mem_for_each_unplugged_range(const 
> VirtIOMEM *vmem, void *arg,
>      return ret;
>  }
>  
> +static int virtio_mem_for_each_plugged_range(const VirtIOMEM *vmem, void 
> *arg,
> +                                             virtio_mem_range_cb cb)
> +{
> +    unsigned long first_bit, last_bit;
> +    uint64_t offset, size;
> +    int ret = 0;
> +
> +    first_bit = find_first_bit(vmem->bitmap, vmem->bitmap_size);
> +    while (first_bit < vmem->bitmap_size) {
> +        offset = first_bit * vmem->block_size;
> +        last_bit = find_next_zero_bit(vmem->bitmap, vmem->bitmap_size,
> +                                      first_bit + 1) - 1;
> +        size = (last_bit - first_bit + 1) * vmem->block_size;
> +
> +        ret = cb(vmem, arg, offset, size);
> +        if (ret) {
> +            break;
> +        }
> +        first_bit = find_next_bit(vmem->bitmap, vmem->bitmap_size,
> +                                  last_bit + 2);
> +    }
> +    return ret;
> +}
> +
>  /*
>   * Adjust the memory section to cover the intersection with the given range.
>   *
> @@ -819,6 +844,7 @@ static void virtio_mem_device_realize(DeviceState *dev, 
> Error **errp)
>      if (!vmem->block_size) {
>          vmem->block_size = virtio_mem_default_block_size(rb);
>      }
> +    vmem->initial_requested_size = vmem->requested_size;
>  
>      if (vmem->block_size < page_size) {
>          error_setg(errp, "'%s' property has to be at least the page size 
> (0x%"
> @@ -879,6 +905,7 @@ static void virtio_mem_device_realize(DeviceState *dev, 
> Error **errp)
>       */
>      memory_region_set_ram_discard_manager(&vmem->memdev->mr,
>                                            RAM_DISCARD_MANAGER(vmem));
> +    postcopy_add_notifier(&vmem->postcopy_notifier);
>  }
>  
>  static void virtio_mem_device_unrealize(DeviceState *dev)
> @@ -886,6 +913,7 @@ static void virtio_mem_device_unrealize(DeviceState *dev)
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOMEM *vmem = VIRTIO_MEM(dev);
>  
> +    postcopy_remove_notifier(&vmem->postcopy_notifier);
>      /*
>       * The unplug handler unmapped the memory region, it cannot be
>       * found via an address space anymore. Unset ourselves.
> @@ -915,12 +943,119 @@ static int virtio_mem_restore_unplugged(VirtIOMEM 
> *vmem)
>                                                 virtio_mem_discard_range_cb);
>  }
>  
> +static int virtio_mem_prealloc_range(const VirtIOMEM *vmem, uint64_t offset,
> +                                     uint64_t size)
> +{
> +    void *area = memory_region_get_ram_ptr(&vmem->memdev->mr) + offset;
> +    int fd = memory_region_get_fd(&vmem->memdev->mr);
> +    Error *local_err = NULL;
> +
> +    os_mem_prealloc(fd, area, size, 1, &local_err);
> +    if (local_err) {
> +        error_report_err(local_err);
> +        return -ENOMEM;
> +    }
> +    return 0;
> +}
> +
> +static int virtio_mem_prealloc_range_cb(const VirtIOMEM *vmem, void *arg,
> +                                        uint64_t offset, uint64_t size)
> +{
> +    return virtio_mem_prealloc_range(vmem, offset, size);
> +}
> +
> +static int virtio_mem_restore_prealloc(VirtIOMEM *vmem)
> +{
> +    /*
> +     * Make sure any preallocated memory is really preallocated. Migration
> +     * might have skipped some pages or optimized for the zeropage.
> +     */
> +    return virtio_mem_for_each_plugged_range(vmem, NULL,
> +                                             virtio_mem_prealloc_range_cb);
> +}
> +
> +static int virtio_mem_postcopy_notify(NotifierWithReturn *notifier,
> +                                      void *opaque)
> +{
> +    struct PostcopyNotifyData *pnd = opaque;
> +    VirtIOMEM *vmem = container_of(notifier, VirtIOMEM, postcopy_notifier);
> +    RAMBlock *rb = vmem->memdev->mr.ram_block;
> +    int ret;
> +
> +    if (pnd->reason != POSTCOPY_NOTIFY_INBOUND_ADVISE || !vmem->prealloc ||
> +        !vmem->initial_requested_size) {
> +        return 0;
> +    }
> +    assert(!vmem->size);
> +
> +    /*
> +     * When creating the device we discard all memory and we don't know
> +     * which blocks the source has plugged (and should be preallocated) 
> until we
> +     * restore the device state. However, we cannot allocate when restoring 
> the
> +     * device state either if postcopy is already active.
> +     *
> +     * If we reach this point, postcopy is possible and we have preallocation
> +     * enabled.
> +     *
> +     * Temporarily allocate the requested size to see if there is a 
> fundamental
> +     * configuration issue that would make postcopy fail because the memory
> +     * backend is out of memory. While this increases reliability,
> +     * prealloc+postcopy cannot be fully reliable: see the comment in
> +     * virtio_mem_post_load().
> +     */
> +    ret = virtio_mem_prealloc_range(vmem, 0, vmem->initial_requested_size);
> +    if (ram_block_discard_range(rb, 0, vmem->initial_requested_size)) {
> +        ret = ret ? ret : -EINVAL;
> +        return ret;
> +    }
> +    return 0;
> +}
> +
>  static int virtio_mem_post_load(void *opaque, int version_id)
>  {
>      VirtIOMEM *vmem = VIRTIO_MEM(opaque);
>      RamDiscardListener *rdl;
>      int ret;
>  
> +    if (vmem->prealloc) {
> +        if (migration_in_incoming_postcopy()) {
> +            /*
> +             * Prealloc with postcopy cannot possibly work fully reliable in
> +             * general: preallocation has to populate all memory immediately 
> and
> +             * fail gracefully before the guest started running on the
> +             * destination while postcopy wants to discard memory and 
> populate
> +             * on demand after the guest started running on the destination.
> +             *
> +             * For ordinary memory backends, "prealloc=on" is essentially
> +             * overridden by postcopy, which will simply discard preallocated
> +             * pages and might fail later when running out of backend memory
> +             * when trying to place a page: the earlier preallocation only 
> makes
> +             * it less likely to fail, but nothing (not even huge page
> +             * reservation) will guarantee that postcopy will find a free 
> page
> +             * to place once the guest is running on the destination.
> +             *
> +             * We temporarily allocate "requested-size" during
> +             * POSTCOPY_NOTIFY_INBOUND_ADVISE, before migrating any memory. 
> This
> +             * resembles what is done with ordinary memory backends.
> +             *
> +             * We need to have a matching requested size on source and
> +             * destination that we actually temporarily allocated the right
> +             * amount of memory. As requested-size changed when restoring the
> +             * state, check against the initial value.
> +             */
> +            if (vmem->requested_size != vmem->initial_requested_size) {
> +                error_report("postcopy with 'prealloc=on' needs matching"
> +                             " 'requested-size' on source and destination");
> +                return -EINVAL;
> +            }
> +        } else {
> +            ret = virtio_mem_restore_prealloc(vmem);
> +            if (ret) {
> +                return ret;
> +            }
> +        }
> +    }
> +
>      /*
>       * We started out with all memory discarded and our memory region is 
> mapped
>       * into an address space. Replay, now that we updated the bitmap.
> @@ -1189,6 +1324,7 @@ static void virtio_mem_instance_init(Object *obj)
>  
>      notifier_list_init(&vmem->size_change_notifiers);
>      QLIST_INIT(&vmem->rdl_list);
> +    vmem->postcopy_notifier.notify = virtio_mem_postcopy_notify;
>  
>      object_property_add(obj, VIRTIO_MEM_SIZE_PROP, "size", 
> virtio_mem_get_size,
>                          NULL, NULL, NULL);
> diff --git a/include/hw/virtio/virtio-mem.h b/include/hw/virtio/virtio-mem.h
> index 7745cfc1a3..45395152d2 100644
> --- a/include/hw/virtio/virtio-mem.h
> +++ b/include/hw/virtio/virtio-mem.h
> @@ -61,6 +61,9 @@ struct VirtIOMEM {
>      /* requested size */
>      uint64_t requested_size;
>  
> +    /* initial requested size on startup */
> +    uint64_t initial_requested_size;
> +
>      /* block size and alignment */
>      uint64_t block_size;
>  
> @@ -77,6 +80,9 @@ struct VirtIOMEM {
>      /* notifiers to notify when "size" changes */
>      NotifierList size_change_notifiers;
>  
> +    /* notifier for postcopy events */
> +    NotifierWithReturn postcopy_notifier;
> +
>      /* listeners to notify on plug/unplug activity. */
>      QLIST_HEAD(, RamDiscardListener) rdl_list;
>  };
> -- 
> 2.34.1
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK




reply via email to

[Prev in Thread] Current Thread [Next in Thread]