[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized()
From: |
Murilo Opsfelder Araújo |
Subject: |
Re: [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized() |
Date: |
Wed, 25 Mar 2020 12:17:47 -0300 |
On Thursday, March 5, 2020 11:29:42 AM -03 David Hildenbrand wrote:
> Let's implement ram_block_resized(), allowing resizeable mappings.
>
> For resizeable mappings, we reserve $max_size IOVA address space, but only
> map $size of it. When resizing, unmap the old part and remap the new
> part. We'll need e.g., new ioctl to do this atomically (e.g., to resize
> while the guest is running).
>
> Right now, we only resize RAM blocks during incoming migration (when
> syncing RAM block sizes during the precopy phase) or after guest
> resets when building acpi tables. Any future user of resizeable RAM has to
> be aware that vfio has to be treated with care.
>
> Reviewed-by: Peter Xu <address@hidden>
> Cc: Richard Henderson <address@hidden>
> Cc: Paolo Bonzini <address@hidden>
> Cc: Eduardo Habkost <address@hidden>
> Cc: Marcel Apfelbaum <address@hidden>
> Cc: Alex Williamson <address@hidden>
> Cc: Stefan Hajnoczi <address@hidden>
> Cc: "Dr. David Alan Gilbert" <address@hidden>
> Signed-off-by: David Hildenbrand <address@hidden>
> ---
Acked-by: Murilo Opsfelder Araujo <address@hidden>
> util/trace-events | 7 ++--
> util/vfio-helpers.c | 95 +++++++++++++++++++++++++++++++++++++++------
> 2 files changed, 88 insertions(+), 14 deletions(-)
>
> diff --git a/util/trace-events b/util/trace-events
> index 83b6639018..a4d39eca5e 100644
> --- a/util/trace-events
> +++ b/util/trace-events
> @@ -74,10 +74,11 @@ qemu_mutex_unlock(void *mutex, const char *file, const
> int line) "released mutex
>
> # vfio-helpers.c
> qemu_vfio_dma_reset_temporary(void *s) "s %p"
> -qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size
> 0x%zx" -qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p
> host %p size 0x%zx" +qemu_vfio_ram_block_added(void *s, void *p, size_t
> size, size_t max_size) "s %p host %p size 0x%zx max_size 0x%zx"
> +qemu_vfio_ram_block_removed(void *s, void *p, size_t size, size_t
> max_size) "s %p host %p size 0x%zx max_size 0x%zx"
> +qemu_vfio_ram_block_resized(void *s, void *p, size_t old_size, size_t
> new_sizze) "s %p host %p old_size 0x%zx new_size 0x%zx"
> qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
> -qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t
> iova) "s %p host %p size %zu index %d iova 0x%"PRIx64
> +qemu_vfio_new_mapping(void *s, void *host, size_t size, size_t max_size,
> int index, uint64_t iova) "s %p host %p size %zu max_size %zu index %d iova
> 0x%"PRIx64 qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t
> iova) "s %p host %p size %zu iova 0x%"PRIx64 qemu_vfio_dma_map(void *s,
> void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size
> %zu temporary %d iova %p" qemu_vfio_dma_unmap(void *s, void *host) "s %p
> host %p"
> diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
> index f0c77f0d69..789faf38bd 100644
> --- a/util/vfio-helpers.c
> +++ b/util/vfio-helpers.c
> @@ -36,6 +36,7 @@ typedef struct {
> /* Page aligned addr. */
> void *host;
> size_t size;
> + size_t max_size;
> uint64_t iova;
> } IOVAMapping;
>
> @@ -372,14 +373,20 @@ fail_container:
> return ret;
> }
>
> +static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
> + size_t size, size_t max_size,
> + bool temporary, uint64_t *iova);
> +static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
> + size_t old_size, size_t new_size);
> +
> static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
> size_t size, size_t max_size)
> {
> QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
> int ret;
>
> - trace_qemu_vfio_ram_block_added(s, host, max_size);
> - ret = qemu_vfio_dma_map(s, host, max_size, false, NULL);
> + trace_qemu_vfio_ram_block_added(s, host, size, max_size);
> + ret = qemu_vfio_dma_map_resizeable(s, host, size, max_size, false,
> NULL); if (ret) {
> error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host,
> max_size, strerror(-ret));
> @@ -391,16 +398,28 @@ static void
> qemu_vfio_ram_block_removed(RAMBlockNotifier *n, void *host, {
> QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
> if (host) {
> - trace_qemu_vfio_ram_block_removed(s, host, max_size);
> + trace_qemu_vfio_ram_block_removed(s, host, size, max_size);
> qemu_vfio_dma_unmap(s, host);
> }
> }
>
> +static void qemu_vfio_ram_block_resized(RAMBlockNotifier *n, void *host,
> + size_t old_size, size_t new_size)
> +{
> + QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
> +
> + if (host) {
> + trace_qemu_vfio_ram_block_resized(s, host, old_size, new_size);
> + qemu_vfio_dma_map_resize(s, host, old_size, new_size);
> + }
> +}
> +
> static void qemu_vfio_open_common(QEMUVFIOState *s)
> {
> qemu_mutex_init(&s->lock);
> s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
> s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
> + s->ram_notifier.ram_block_resized = qemu_vfio_ram_block_resized;
> s->low_water_mark = QEMU_VFIO_IOVA_MIN;
> s->high_water_mark = QEMU_VFIO_IOVA_MAX;
> ram_block_notifier_add(&s->ram_notifier);
> @@ -495,16 +514,23 @@ static IOVAMapping
> *qemu_vfio_find_mapping(QEMUVFIOState *s, void *host, */
> static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
> void *host, size_t size,
> - int index, uint64_t iova)
> + size_t max_size, int index,
> + uint64_t iova)
> {
> + const IOVAMapping m = {
> + .host = host,
> + .size = size,
> + .max_size = max_size,
> + .iova = iova,
> + };
> int shift;
> - IOVAMapping m = {.host = host, .size = size, .iova = iova};
> IOVAMapping *insert;
>
> assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
> + assert(size <= max_size);
> assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
> assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
> - trace_qemu_vfio_new_mapping(s, host, size, index, iova);
> + trace_qemu_vfio_new_mapping(s, host, size, max_size, index, iova);
>
> assert(index >= 0);
> s->nr_mappings++;
> @@ -597,9 +623,14 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
> * the result in @iova if not NULL. The caller need to make sure the area is
> * aligned to page size, and mustn't overlap with existing mapping areas
> (split * mapping status within this area is not allowed).
> + *
> + * If size < max_size, a region of max_size in IOVA address is reserved,
> such + * that the mapping can later be resized. Resizeable mappings are
> only allowed + * for !temporary mappings.
> */
> -int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
> - bool temporary, uint64_t *iova)
> +static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
> + size_t size, size_t max_size,
> + bool temporary, uint64_t *iova)
> {
> int ret = 0;
> int index;
> @@ -608,19 +639,24 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host,
> size_t size,
>
> assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
> assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
> + assert(QEMU_IS_ALIGNED(max_size, qemu_real_host_page_size));
> + assert(size == max_size || !temporary);
> + assert(size <= max_size);
> +
> trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
> qemu_mutex_lock(&s->lock);
> mapping = qemu_vfio_find_mapping(s, host, &index);
> if (mapping) {
> iova0 = mapping->iova + ((uint8_t *)host - (uint8_t
> *)mapping->host); } else {
> - if (s->high_water_mark - s->low_water_mark + 1 < size) {
> + if (s->high_water_mark - s->low_water_mark + 1 < max_size) {
> ret = -ENOMEM;
> goto out;
> }
> if (!temporary) {
> iova0 = s->low_water_mark;
> - mapping = qemu_vfio_add_mapping(s, host, size, index + 1,
> iova0); + mapping = qemu_vfio_add_mapping(s, host, size,
> max_size, index + 1, + iova0);
> if (!mapping) {
> ret = -ENOMEM;
> goto out;
> @@ -631,7 +667,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host,
> size_t size, qemu_vfio_remove_mapping(s, mapping);
> goto out;
> }
> - s->low_water_mark += size;
> + s->low_water_mark += max_size;
> qemu_vfio_dump_mappings(s);
> } else {
> iova0 = s->high_water_mark - size;
> @@ -650,6 +686,12 @@ out:
> return ret;
> }
>
> +int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
> + bool temporary, uint64_t *iova)
> +{
> + return qemu_vfio_dma_map_resizeable(s, host, size, size, temporary,
> iova); +}
> +
> /* Reset the high watermark and free all "temporary" mappings. */
> int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
> {
> @@ -694,6 +736,37 @@ out:
> qemu_mutex_unlock(&s->lock);
> }
>
> +static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
> + size_t old_size, size_t new_size)
> +{
> + IOVAMapping *m;
> + int index = 0;
> +
> + qemu_mutex_lock(&s->lock);
> + m = qemu_vfio_find_mapping(s, host, &index);
> + if (!m) {
> + return;
> + }
> + assert(m->size == old_size);
> + assert(new_size <= m->max_size);
> +
> + /*
> + * For now, we must unmap the whole mapped range first and remap with
> + * the new size. The reason is that VFIO_IOMMU_UNMAP_DMA might fail
> + * when partially unmapping previous mappings. Although we could add
> + * new mappings to extend the old range, we won't able to always
> + * shrink. The side effect is that it's never safe to resize during VM
> + * execution and we'll e.g., need a new IOCTL to make this work.
> + */
> + qemu_vfio_undo_mapping(s, m->iova, m->size);
> + qemu_vfio_do_mapping(s, host, m->iova, new_size);
> +
> + m->size = new_size;
> + assert(qemu_vfio_verify_mappings(s));
> +
> + qemu_mutex_unlock(&s->lock);
> +}
> +
> static void qemu_vfio_reset(QEMUVFIOState *s)
> {
> ioctl(s->device, VFIO_DEVICE_RESET);
--
Murilo
- [PATCH v4 09/15] util/mmap-alloc: Factor out activating of memory to mmap_activate(), (continued)
- [PATCH v4 09/15] util/mmap-alloc: Factor out activating of memory to mmap_activate(), David Hildenbrand, 2020/03/05
- [PATCH v4 10/15] util/mmap-alloc: Prepare for resizeable mmaps, David Hildenbrand, 2020/03/05
- [PATCH v4 11/15] util/mmap-alloc: Implement resizeable mmaps, David Hildenbrand, 2020/03/05
- [PATCH v4 07/15] util/mmap-alloc: Factor out calculation of the pagesize for the guard page, David Hildenbrand, 2020/03/05
- [PATCH v4 13/15] util: oslib: Resizeable anonymous allocations under POSIX, David Hildenbrand, 2020/03/05
- [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized(), David Hildenbrand, 2020/03/05
- Re: [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized(),
Murilo Opsfelder Araújo <=
- [PATCH v4 14/15] numa: Introduce ram_block_notifiers_support_resize(), David Hildenbrand, 2020/03/05
- [PATCH v4 15/15] exec: Ram blocks with resizeable anonymous allocations under POSIX, David Hildenbrand, 2020/03/05