qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v26 13/17] vfio: create mapped iova list when vIOMMU is enabl


From: Kirti Wankhede
Subject: Re: [PATCH v26 13/17] vfio: create mapped iova list when vIOMMU is enabled
Date: Mon, 19 Oct 2020 11:31:03 +0530
User-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Thunderbird/68.12.0



On 9/26/2020 3:53 AM, Alex Williamson wrote:
On Wed, 23 Sep 2020 04:54:15 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

Create mapped iova list when vIOMMU is enabled. For each mapped iova
save translated address. Add node to list on MAP and remove node from
list on UNMAP.
This list is used to track dirty pages during migration.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
---
  hw/vfio/common.c              | 58 ++++++++++++++++++++++++++++++++++++++-----
  include/hw/vfio/vfio-common.h |  8 ++++++
  2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index d4959c036dd1..dc56cded2d95 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -407,8 +407,8 @@ static bool 
vfio_listener_skipped_section(MemoryRegionSection *section)
  }
/* Called with rcu_read_lock held. */
-static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void **vaddr,
-                           bool *read_only)
+static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
+                               ram_addr_t *ram_addr, bool *read_only)
  {
      MemoryRegion *mr;
      hwaddr xlat;
@@ -439,8 +439,17 @@ static bool vfio_get_vaddr(IOMMUTLBEntry *iotlb, void 
**vaddr,
          return false;
      }
- *vaddr = memory_region_get_ram_ptr(mr) + xlat;
-    *read_only = !writable || mr->readonly;
+    if (vaddr) {
+        *vaddr = memory_region_get_ram_ptr(mr) + xlat;
+    }
+
+    if (ram_addr) {
+        *ram_addr = memory_region_get_ram_addr(mr) + xlat;
+    }
+
+    if (read_only) {
+        *read_only = !writable || mr->readonly;
+    }
return true;
  }
@@ -450,7 +459,6 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
      VFIOContainer *container = giommu->container;
      hwaddr iova = iotlb->iova + giommu->iommu_offset;
-    bool read_only;
      void *vaddr;
      int ret;
@@ -466,7 +474,10 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
      rcu_read_lock();
if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
-        if (!vfio_get_vaddr(iotlb, &vaddr, &read_only)) {
+        ram_addr_t ram_addr;
+        bool read_only;
+
+        if (!vfio_get_xlat_addr(iotlb, &vaddr, &ram_addr, &read_only)) {
              goto out;
          }
          /*
@@ -484,8 +495,28 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
                           container, iova,
                           iotlb->addr_mask + 1, vaddr, ret);
+        } else {
+            VFIOIovaRange *iova_range;
+
+            iova_range = g_malloc0(sizeof(*iova_range));
+            iova_range->iova = iova;
+            iova_range->size = iotlb->addr_mask + 1;
+            iova_range->ram_addr = ram_addr;
+
+            QLIST_INSERT_HEAD(&giommu->iova_list, iova_range, next);
          }
      } else {
+        VFIOIovaRange *iova_range, *tmp;
+
+        QLIST_FOREACH_SAFE(iova_range, &giommu->iova_list, next, tmp) {
+            if (iova_range->iova >= iova &&
+                iova_range->iova + iova_range->size <= iova +
+                                                       iotlb->addr_mask + 1) {
+                QLIST_REMOVE(iova_range, next);
+                g_free(iova_range);
+            }
+        }
+


This is some pretty serious overhead... can't we trigger a replay when
migration is enabled to build this information then?

Are you suggesting to call memory_region_iommu_replay() before vfio_sync_dirty_bitmap(), which would call vfio_iommu_map_notify() where iova list of mapping is maintained? Then in the notifer check if migration_is_running() and container->dirty_pages_supported == true, then only create iova mapping tree? In this case how would we know that this is triggered by
vfio_sync_dirty_bitmap()
 -> memory_region_iommu_replay()
and we don't have to call vfio_dma_map()?

We're looking at
potentially thousands of entries, so a list is probably also not a good
choice.

Changing it to tree.

Thanks,
Kirti

 I don't think it's acceptable to incur this even when not
migrating (ie. the vast majority of the time).  Thanks,

Alex

          ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
          if (ret) {
              error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
@@ -642,6 +673,7 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
              g_free(giommu);
              goto fail;
          }
+        QLIST_INIT(&giommu->iova_list);
          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
          memory_region_iommu_replay(giommu->iommu, &giommu->n);
@@ -740,6 +772,13 @@ static void vfio_listener_region_del(MemoryListener *listener,
          QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
              if (MEMORY_REGION(giommu->iommu) == section->mr &&
                  giommu->n.start == section->offset_within_region) {
+                VFIOIovaRange *iova_range, *tmp;
+
+                QLIST_FOREACH_SAFE(iova_range, &giommu->iova_list, next, tmp) {
+                    QLIST_REMOVE(iova_range, next);
+                    g_free(iova_range);
+                }
+
                  memory_region_unregister_iommu_notifier(section->mr,
                                                          &giommu->n);
                  QLIST_REMOVE(giommu, giommu_next);
@@ -1541,6 +1580,13 @@ static void vfio_disconnect_container(VFIOGroup *group)
          QLIST_REMOVE(container, next);
QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
+            VFIOIovaRange *iova_range, *itmp;
+
+            QLIST_FOREACH_SAFE(iova_range, &giommu->iova_list, next, itmp) {
+                QLIST_REMOVE(iova_range, next);
+                g_free(iova_range);
+            }
+
              memory_region_unregister_iommu_notifier(
                      MEMORY_REGION(giommu->iommu), &giommu->n);
              QLIST_REMOVE(giommu, giommu_next);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 0a1651eda2d0..aa7524fe2cc5 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -89,11 +89,19 @@ typedef struct VFIOContainer {
      QLIST_ENTRY(VFIOContainer) next;
  } VFIOContainer;
+typedef struct VFIOIovaRange {
+    hwaddr iova;
+    size_t size;
+    ram_addr_t ram_addr;
+    QLIST_ENTRY(VFIOIovaRange) next;
+} VFIOIovaRange;
+
  typedef struct VFIOGuestIOMMU {
      VFIOContainer *container;
      IOMMUMemoryRegion *iommu;
      hwaddr iommu_offset;
      IOMMUNotifier n;
+    QLIST_HEAD(, VFIOIovaRange) iova_list;
      QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
  } VFIOGuestIOMMU;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]