qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RE: [RFC v4 20/21] vfio-user: migration support


From: Thanos Makatos
Subject: RE: [RFC v4 20/21] vfio-user: migration support
Date: Fri, 11 Feb 2022 13:31:09 +0000


> -----Original Message-----
> From: Qemu-devel <qemu-devel-
> bounces+thanos.makatos=nutanix.com@nongnu.org> On Behalf Of John
> Johnson
> Sent: 12 January 2022 00:44
> To: qemu-devel@nongnu.org
> Subject: [RFC v4 20/21] vfio-user: migration support
> 
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
>  hw/vfio/user-protocol.h | 18 +++++++++++++++++
>  hw/vfio/migration.c     | 30 +++++++++++++--------------
>  hw/vfio/pci.c           |  7 +++++++
>  hw/vfio/user.c          | 54
> +++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 93 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
> index 8932311..abe7002 100644
> --- a/hw/vfio/user-protocol.h
> +++ b/hw/vfio/user-protocol.h
> @@ -193,6 +193,10 @@ typedef struct {
>      char data[];
>  } VFIOUserDMARW;
> 
> +/*
> + * VFIO_USER_DIRTY_PAGES
> + */
> +
>  /*imported from struct vfio_bitmap */
>  typedef struct {
>      uint64_t pgsize;
> @@ -200,4 +204,18 @@ typedef struct {
>      char data[];
>  } VFIOUserBitmap;
> 
> +/* imported from struct vfio_iommu_type1_dirty_bitmap_get */
> +typedef struct {
> +    uint64_t iova;
> +    uint64_t size;
> +    VFIOUserBitmap bitmap;
> +} VFIOUserBitmapRange;
> +
> +/* imported from struct vfio_iommu_type1_dirty_bitmap */
> +typedef struct {
> +    VFIOUserHdr hdr;
> +    uint32_t argsz;
> +    uint32_t flags;
> +} VFIOUserDirtyPages;
> +
>  #endif /* VFIO_USER_PROTOCOL_H */
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index ff6b45d..df63f5c 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -27,6 +27,7 @@
>  #include "pci.h"
>  #include "trace.h"
>  #include "hw/hw.h"
> +#include "user.h"
> 
>  /*
>   * Flags to be used as unique delimiters for VFIO devices in the migration
> @@ -49,11 +50,13 @@ static int64_t bytes_transferred;
>  static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
>                                    off_t off, bool iswrite)
>  {
> +    VFIORegion *region = &vbasedev->migration->region;
>      int ret;
> 
> -    ret = iswrite ? pwrite(vbasedev->fd, val, count, off) :
> -                    pread(vbasedev->fd, val, count, off);
> -    if (ret < count) {
> +    ret = iswrite ?
> +        VDEV_REGION_WRITE(vbasedev, region->nr, off, count, val, false) :
> +        VDEV_REGION_READ(vbasedev, region->nr, off, count, val);
> +     if (ret < count) {
>          error_report("vfio_mig_%s %d byte %s: failed at offset 0x%"
>                       HWADDR_PRIx", err: %s", iswrite ? "write" : "read", 
> count,
>                       vbasedev->name, off, strerror(errno));
> @@ -111,9 +114,7 @@ static int vfio_migration_set_state(VFIODevice
> *vbasedev, uint32_t mask,
>                                      uint32_t value)
>  {
>      VFIOMigration *migration = vbasedev->migration;
> -    VFIORegion *region = &migration->region;
> -    off_t dev_state_off = region->fd_offset +
> -                          VFIO_MIG_STRUCT_OFFSET(device_state);
> +    off_t dev_state_off = VFIO_MIG_STRUCT_OFFSET(device_state);
>      uint32_t device_state;
>      int ret;
> 
> @@ -201,13 +202,13 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice
> *vbasedev, uint64_t *size)
>      int ret;
> 
>      ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
> -                      region->fd_offset + 
> VFIO_MIG_STRUCT_OFFSET(data_offset));
> +                        VFIO_MIG_STRUCT_OFFSET(data_offset));
>      if (ret < 0) {
>          return ret;
>      }
> 
>      ret = vfio_mig_read(vbasedev, &data_size, sizeof(data_size),
> -                        region->fd_offset + 
> VFIO_MIG_STRUCT_OFFSET(data_size));
> +                        VFIO_MIG_STRUCT_OFFSET(data_size));
>      if (ret < 0) {
>          return ret;
>      }
> @@ -233,8 +234,7 @@ static int vfio_save_buffer(QEMUFile *f, VFIODevice
> *vbasedev, uint64_t *size)
>              }
>              buf_allocated = true;
> 
> -            ret = vfio_mig_read(vbasedev, buf, sec_size,
> -                                region->fd_offset + data_offset);
> +            ret = vfio_mig_read(vbasedev, buf, sec_size, data_offset);
>              if (ret < 0) {
>                  g_free(buf);
>                  return ret;
> @@ -269,7 +269,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
> 
>      do {
>          ret = vfio_mig_read(vbasedev, &data_offset, sizeof(data_offset),
> -                      region->fd_offset + 
> VFIO_MIG_STRUCT_OFFSET(data_offset));
> +                            VFIO_MIG_STRUCT_OFFSET(data_offset));
>          if (ret < 0) {
>              return ret;
>          }
> @@ -309,8 +309,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
>              qemu_get_buffer(f, buf, sec_size);
> 
>              if (buf_alloc) {
> -                ret = vfio_mig_write(vbasedev, buf, sec_size,
> -                        region->fd_offset + data_offset);
> +                ret = vfio_mig_write(vbasedev, buf, sec_size, data_offset);
>                  g_free(buf);
> 
>                  if (ret < 0) {
> @@ -322,7 +321,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
>          }
> 
>          ret = vfio_mig_write(vbasedev, &report_size, sizeof(report_size),
> -                        region->fd_offset + 
> VFIO_MIG_STRUCT_OFFSET(data_size));
> +                             VFIO_MIG_STRUCT_OFFSET(data_size));
>          if (ret < 0) {
>              return ret;
>          }
> @@ -334,12 +333,11 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice
> *vbasedev,
>  static int vfio_update_pending(VFIODevice *vbasedev)
>  {
>      VFIOMigration *migration = vbasedev->migration;
> -    VFIORegion *region = &migration->region;
>      uint64_t pending_bytes = 0;
>      int ret;
> 
>      ret = vfio_mig_read(vbasedev, &pending_bytes, sizeof(pending_bytes),
> -                    region->fd_offset + 
> VFIO_MIG_STRUCT_OFFSET(pending_bytes));
> +                        VFIO_MIG_STRUCT_OFFSET(pending_bytes));
>      if (ret < 0) {
>          migration->pending_bytes = 0;
>          return ret;
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index d47b98e..598e9ed 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3677,6 +3677,13 @@ static void vfio_user_pci_realize(PCIDevice *pdev,
> Error **errp)
>          goto out_teardown;
>      }
> 
> +    if (!pdev->failover_pair_id) {
> +        ret = vfio_migration_probe(&vdev->vbasedev, errp);
> +        if (ret) {
> +            error_report("%s: Migration disabled", vdev->vbasedev.name);
> +        }
> +    }
> +
>      vfio_register_err_notifier(vdev);
>      vfio_register_req_notifier(vdev);
> 
> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
> index 33d8f06..2eac62a 100644
> --- a/hw/vfio/user.c
> +++ b/hw/vfio/user.c
> @@ -1410,6 +1410,52 @@ void vfio_user_reset(VFIOProxy *proxy)
>      }
>  }
> 
> +static int vfio_user_dirty_bitmap(VFIOProxy *proxy,
> +                                  struct vfio_iommu_type1_dirty_bitmap *cmd,
> +                                  struct vfio_iommu_type1_dirty_bitmap_get
> +                                  *dbitmap)
> +{
> +    g_autofree struct {
> +        VFIOUserDirtyPages msg;
> +        VFIOUserBitmapRange range;
> +    } *msgp = NULL;
> +    int msize, rsize;
> +
> +    /*
> +     * If just the command is sent, the returned bitmap isn't needed.
> +     * The bitmap structs are different from the ioctl() versions,
> +     * ioctl() returns the bitmap in a local VA
> +     */
> +    if (dbitmap != NULL) {
> +        msize = sizeof(*msgp);
> +        rsize = msize + dbitmap->bitmap.size;
> +        msgp = g_malloc0(rsize);
> +        msgp->range.iova = dbitmap->iova;
> +        msgp->range.size = dbitmap->size;
> +        msgp->range.bitmap.pgsize = dbitmap->bitmap.pgsize;
> +        msgp->range.bitmap.size = dbitmap->bitmap.size;
> +    } else {
> +        msize = rsize = sizeof(VFIOUserDirtyPages);
> +        msgp = g_malloc0(rsize);
> +    }
> +
> +    vfio_user_request_msg(&msgp->msg.hdr, VFIO_USER_DIRTY_PAGES, msize,
> 0);
> +    msgp->msg.argsz = rsize - sizeof(VFIOUserHdr);
> +    msgp->msg.flags = cmd->flags;
> +
> +    vfio_user_send_wait(proxy, &msgp->msg.hdr, NULL, rsize, false);
> +    if (msgp->msg.hdr.flags & VFIO_USER_ERROR) {
> +        return -msgp->msg.hdr.error_reply;
> +    }

We need to check argsz in the response, in which case the client needs to retry 
with a larger argsz.

> +
> +    if (dbitmap != NULL) {
> +        memcpy(dbitmap->bitmap.data, &msgp->range.bitmap.data,
> +               dbitmap->bitmap.size);
> +    }
> +
> +    return 0;
> +}
> +
> 
>  /*
>   * Socket-based io_ops
> @@ -1530,6 +1576,13 @@ static int vfio_user_io_dma_unmap(VFIOContainer
> *container,
>                                 container->async_ops);
>  }
> 
> +static int vfio_user_io_dirty_bitmap(VFIOContainer *container,
> +                        struct vfio_iommu_type1_dirty_bitmap *bitmap,
> +                        struct vfio_iommu_type1_dirty_bitmap_get *range)
> +{
> +    return vfio_user_dirty_bitmap(container->proxy, bitmap, range);
> +}
> +
>  static void vfio_user_io_wait_commit(VFIOContainer *container)
>  {
>      vfio_user_wait_reqs(container->proxy);
> @@ -1538,5 +1591,6 @@ static void vfio_user_io_wait_commit(VFIOContainer
> *container)
>  VFIOContIO vfio_cont_io_sock = {
>      .dma_map = vfio_user_io_dma_map,
>      .dma_unmap = vfio_user_io_dma_unmap,
> +    .dirty_bitmap = vfio_user_io_dirty_bitmap,
>      .wait_commit = vfio_user_io_wait_commit,
>  };
> --
> 1.8.3.1
> 


reply via email to

[Prev in Thread] Current Thread [Next in Thread]