qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized()


From: Murilo Opsfelder Araújo
Subject: Re: [PATCH v4 12/15] util: vfio-helpers: Implement ram_block_resized()
Date: Wed, 25 Mar 2020 12:17:47 -0300

On Thursday, March 5, 2020 11:29:42 AM -03 David Hildenbrand wrote:
> Let's implement ram_block_resized(), allowing resizeable mappings.
>
> For resizeable mappings, we reserve $max_size IOVA address space, but only
> map $size of it. When resizing, unmap the old part and remap the new
> part. We'll need e.g., new ioctl to do this atomically (e.g., to resize
> while the guest is running).
>
> Right now, we only resize RAM blocks during incoming migration (when
> syncing RAM block sizes during the precopy phase) or after guest
> resets when building acpi tables. Any future user of resizeable RAM has to
> be aware that vfio has to be treated with care.
>
> Reviewed-by: Peter Xu <address@hidden>
> Cc: Richard Henderson <address@hidden>
> Cc: Paolo Bonzini <address@hidden>
> Cc: Eduardo Habkost <address@hidden>
> Cc: Marcel Apfelbaum <address@hidden>
> Cc: Alex Williamson <address@hidden>
> Cc: Stefan Hajnoczi <address@hidden>
> Cc: "Dr. David Alan Gilbert" <address@hidden>
> Signed-off-by: David Hildenbrand <address@hidden>
> ---

Acked-by: Murilo Opsfelder Araujo <address@hidden>

>  util/trace-events   |  7 ++--
>  util/vfio-helpers.c | 95 +++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 88 insertions(+), 14 deletions(-)
>
> diff --git a/util/trace-events b/util/trace-events
> index 83b6639018..a4d39eca5e 100644
> --- a/util/trace-events
> +++ b/util/trace-events
> @@ -74,10 +74,11 @@ qemu_mutex_unlock(void *mutex, const char *file, const
> int line) "released mutex
>
>  # vfio-helpers.c
>  qemu_vfio_dma_reset_temporary(void *s) "s %p"
> -qemu_vfio_ram_block_added(void *s, void *p, size_t size) "s %p host %p size
> 0x%zx" -qemu_vfio_ram_block_removed(void *s, void *p, size_t size) "s %p
> host %p size 0x%zx" +qemu_vfio_ram_block_added(void *s, void *p, size_t
> size, size_t max_size) "s %p host %p size 0x%zx max_size 0x%zx"
> +qemu_vfio_ram_block_removed(void *s, void *p, size_t size, size_t
> max_size) "s %p host %p size 0x%zx max_size 0x%zx"
> +qemu_vfio_ram_block_resized(void *s, void *p, size_t old_size, size_t
> new_sizze) "s %p host %p old_size 0x%zx new_size 0x%zx"
> qemu_vfio_find_mapping(void *s, void *p) "s %p host %p"
> -qemu_vfio_new_mapping(void *s, void *host, size_t size, int index, uint64_t
> iova) "s %p host %p size %zu index %d iova 0x%"PRIx64
> +qemu_vfio_new_mapping(void *s, void *host, size_t size, size_t max_size,
> int index, uint64_t iova) "s %p host %p size %zu max_size %zu index %d iova
> 0x%"PRIx64 qemu_vfio_do_mapping(void *s, void *host, size_t size, uint64_t
> iova) "s %p host %p size %zu iova 0x%"PRIx64 qemu_vfio_dma_map(void *s,
> void *host, size_t size, bool temporary, uint64_t *iova) "s %p host %p size
> %zu temporary %d iova %p" qemu_vfio_dma_unmap(void *s, void *host) "s %p
> host %p"
> diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
> index f0c77f0d69..789faf38bd 100644
> --- a/util/vfio-helpers.c
> +++ b/util/vfio-helpers.c
> @@ -36,6 +36,7 @@ typedef struct {
>      /* Page aligned addr. */
>      void *host;
>      size_t size;
> +    size_t max_size;
>      uint64_t iova;
>  } IOVAMapping;
>
> @@ -372,14 +373,20 @@ fail_container:
>      return ret;
>  }
>
> +static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
> +                                        size_t size, size_t max_size,
> +                                        bool temporary, uint64_t *iova);
> +static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
> +                                     size_t old_size, size_t new_size);
> +
>  static void qemu_vfio_ram_block_added(RAMBlockNotifier *n, void *host,
>                                        size_t size, size_t max_size)
>  {
>      QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
>      int ret;
>
> -    trace_qemu_vfio_ram_block_added(s, host, max_size);
> -    ret = qemu_vfio_dma_map(s, host, max_size, false, NULL);
> +    trace_qemu_vfio_ram_block_added(s, host, size, max_size);
> +    ret = qemu_vfio_dma_map_resizeable(s, host, size, max_size, false,
> NULL); if (ret) {
>          error_report("qemu_vfio_dma_map(%p, %zu) failed: %s", host,
> max_size, strerror(-ret));
> @@ -391,16 +398,28 @@ static void
> qemu_vfio_ram_block_removed(RAMBlockNotifier *n, void *host, {
>      QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
>      if (host) {
> -        trace_qemu_vfio_ram_block_removed(s, host, max_size);
> +        trace_qemu_vfio_ram_block_removed(s, host, size, max_size);
>          qemu_vfio_dma_unmap(s, host);
>      }
>  }
>
> +static void qemu_vfio_ram_block_resized(RAMBlockNotifier *n, void *host,
> +                                        size_t old_size, size_t new_size)
> +{
> +    QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
> +
> +    if (host) {
> +        trace_qemu_vfio_ram_block_resized(s, host, old_size, new_size);
> +        qemu_vfio_dma_map_resize(s, host, old_size, new_size);
> +    }
> +}
> +
>  static void qemu_vfio_open_common(QEMUVFIOState *s)
>  {
>      qemu_mutex_init(&s->lock);
>      s->ram_notifier.ram_block_added = qemu_vfio_ram_block_added;
>      s->ram_notifier.ram_block_removed = qemu_vfio_ram_block_removed;
> +    s->ram_notifier.ram_block_resized = qemu_vfio_ram_block_resized;
>      s->low_water_mark = QEMU_VFIO_IOVA_MIN;
>      s->high_water_mark = QEMU_VFIO_IOVA_MAX;
>      ram_block_notifier_add(&s->ram_notifier);
> @@ -495,16 +514,23 @@ static IOVAMapping
> *qemu_vfio_find_mapping(QEMUVFIOState *s, void *host, */
>  static IOVAMapping *qemu_vfio_add_mapping(QEMUVFIOState *s,
>                                            void *host, size_t size,
> -                                          int index, uint64_t iova)
> +                                          size_t max_size, int index,
> +                                          uint64_t iova)
>  {
> +    const IOVAMapping m = {
> +        .host = host,
> +        .size = size,
> +        .max_size = max_size,
> +        .iova = iova,
> +    };
>      int shift;
> -    IOVAMapping m = {.host = host, .size = size, .iova = iova};
>      IOVAMapping *insert;
>
>      assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
> +    assert(size <= max_size);
>      assert(QEMU_IS_ALIGNED(s->low_water_mark, qemu_real_host_page_size));
>      assert(QEMU_IS_ALIGNED(s->high_water_mark, qemu_real_host_page_size));
> -    trace_qemu_vfio_new_mapping(s, host, size, index, iova);
> +    trace_qemu_vfio_new_mapping(s, host, size, max_size, index, iova);
>
>      assert(index >= 0);
>      s->nr_mappings++;
> @@ -597,9 +623,14 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
> * the result in @iova if not NULL. The caller need to make sure the area is
> * aligned to page size, and mustn't overlap with existing mapping areas
> (split * mapping status within this area is not allowed).
> + *
> + * If size < max_size, a region of max_size in IOVA address is reserved,
> such + * that the mapping can later be resized. Resizeable mappings are
> only allowed + * for !temporary mappings.
>   */
> -int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
> -                      bool temporary, uint64_t *iova)
> +static int qemu_vfio_dma_map_resizeable(QEMUVFIOState *s, void *host,
> +                                        size_t size, size_t max_size,
> +                                        bool temporary, uint64_t *iova)
>  {
>      int ret = 0;
>      int index;
> @@ -608,19 +639,24 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host,
> size_t size,
>
>      assert(QEMU_PTR_IS_ALIGNED(host, qemu_real_host_page_size));
>      assert(QEMU_IS_ALIGNED(size, qemu_real_host_page_size));
> +    assert(QEMU_IS_ALIGNED(max_size, qemu_real_host_page_size));
> +    assert(size == max_size || !temporary);
> +    assert(size <= max_size);
> +
>      trace_qemu_vfio_dma_map(s, host, size, temporary, iova);
>      qemu_mutex_lock(&s->lock);
>      mapping = qemu_vfio_find_mapping(s, host, &index);
>      if (mapping) {
>          iova0 = mapping->iova + ((uint8_t *)host - (uint8_t
> *)mapping->host); } else {
> -        if (s->high_water_mark - s->low_water_mark + 1 < size) {
> +        if (s->high_water_mark - s->low_water_mark + 1 < max_size) {
>              ret = -ENOMEM;
>              goto out;
>          }
>          if (!temporary) {
>              iova0 = s->low_water_mark;
> -            mapping = qemu_vfio_add_mapping(s, host, size, index + 1,
> iova0); +            mapping = qemu_vfio_add_mapping(s, host, size,
> max_size, index + 1, +                                            iova0);
>              if (!mapping) {
>                  ret = -ENOMEM;
>                  goto out;
> @@ -631,7 +667,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host,
> size_t size, qemu_vfio_remove_mapping(s, mapping);
>                  goto out;
>              }
> -            s->low_water_mark += size;
> +            s->low_water_mark += max_size;
>              qemu_vfio_dump_mappings(s);
>          } else {
>              iova0 = s->high_water_mark - size;
> @@ -650,6 +686,12 @@ out:
>      return ret;
>  }
>
> +int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
> +                      bool temporary, uint64_t *iova)
> +{
> +    return qemu_vfio_dma_map_resizeable(s, host, size, size, temporary,
> iova); +}
> +
>  /* Reset the high watermark and free all "temporary" mappings. */
>  int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s)
>  {
> @@ -694,6 +736,37 @@ out:
>      qemu_mutex_unlock(&s->lock);
>  }
>
> +static void qemu_vfio_dma_map_resize(QEMUVFIOState *s, void *host,
> +                                     size_t old_size, size_t new_size)
> +{
> +    IOVAMapping *m;
> +    int index = 0;
> +
> +    qemu_mutex_lock(&s->lock);
> +    m = qemu_vfio_find_mapping(s, host, &index);
> +    if (!m) {
> +        return;
> +    }
> +    assert(m->size == old_size);
> +    assert(new_size <= m->max_size);
> +
> +    /*
> +     * For now, we must unmap the whole mapped range first and remap with
> +     * the new size. The reason is that VFIO_IOMMU_UNMAP_DMA might fail
> +     * when partially unmapping previous mappings. Although we could add
> +     * new mappings to extend the old range, we won't able to always
> +     * shrink. The side effect is that it's never safe to resize during VM
> +     * execution and we'll e.g., need a new IOCTL to make this work.
> +     */
> +    qemu_vfio_undo_mapping(s, m->iova, m->size);
> +    qemu_vfio_do_mapping(s, host, m->iova, new_size);
> +
> +    m->size = new_size;
> +    assert(qemu_vfio_verify_mappings(s));
> +
> +    qemu_mutex_unlock(&s->lock);
> +}
> +
>  static void qemu_vfio_reset(QEMUVFIOState *s)
>  {
>      ioctl(s->device, VFIO_DEVICE_RESET);


--
Murilo



reply via email to

[Prev in Thread] Current Thread [Next in Thread]