[PATCH Kernel v24 8/8] vfio: Selective dirty page tracking if IOMMU back

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH Kernel v24 8/8] vfio: Selective dirty page tracking if IOMMU back

From:	Kirti Wankhede
Subject:	[PATCH Kernel v24 8/8] vfio: Selective dirty page tracking if IOMMU backed device pins pages
Date:	Fri, 29 May 2020 02:00:54 +0530

Added a check such that only singleton IOMMU groups can pin pages.
>From the point when vendor driver pins any pages, consider IOMMU group
dirty page scope to be limited to pinned pages.

To optimize to avoid walking list often, added flag
pinned_page_dirty_scope to indicate if all of the vfio_groups for each
vfio_domain in the domain_list dirty page scope is limited to pinned
pages. This flag is updated on first pinned pages request for that IOMMU
group and on attaching/detaching group.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
---
 drivers/vfio/vfio.c             |  13 +++--
 drivers/vfio/vfio_iommu_type1.c | 103 +++++++++++++++++++++++++++++++++++++---
 include/linux/vfio.h            |   4 +-
 3 files changed, 109 insertions(+), 11 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 765e0e5d83ed..580099afeaff 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -85,6 +85,7 @@ struct vfio_group {
        atomic_t                        opened;
        wait_queue_head_t               container_q;
        bool                            noiommu;
+       unsigned int                    dev_counter;
        struct kvm                      *kvm;
        struct blocking_notifier_head   notifier;
 };
@@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct 
vfio_group *group,
 
        mutex_lock(&group->device_lock);
        list_add(&device->group_next, &group->device_list);
+       group->dev_counter++;
        mutex_unlock(&group->device_lock);
 
        return device;
@@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref)
        struct vfio_group *group = device->group;
 
        list_del(&device->group_next);
+       group->dev_counter--;
        mutex_unlock(&group->device_lock);
 
        dev_set_drvdata(device->dev, NULL);
@@ -1945,6 +1948,9 @@ int vfio_pin_pages(struct device *dev, unsigned long 
*user_pfn, int npage,
        if (!group)
                return -ENODEV;
 
+       if (group->dev_counter > 1)
+               return -EINVAL;
+
        ret = vfio_group_add_container_user(group);
        if (ret)
                goto err_pin_pages;
@@ -1952,7 +1958,8 @@ int vfio_pin_pages(struct device *dev, unsigned long 
*user_pfn, int npage,
        container = group->container;
        driver = container->iommu_driver;
        if (likely(driver && driver->ops->pin_pages))
-               ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
+               ret = driver->ops->pin_pages(container->iommu_data,
+                                            group->iommu_group, user_pfn,
                                             npage, prot, phys_pfn);
        else
                ret = -ENOTTY;
@@ -2050,8 +2057,8 @@ int vfio_group_pin_pages(struct vfio_group *group,
        driver = container->iommu_driver;
        if (likely(driver && driver->ops->pin_pages))
                ret = driver->ops->pin_pages(container->iommu_data,
-                                            user_iova_pfn, npage,
-                                            prot, phys_pfn);
+                                            group->iommu_group, user_iova_pfn,
+                                            npage, prot, phys_pfn);
        else
                ret = -ENOTTY;
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index f27c29df6fc5..97a29bc04d5d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -73,6 +73,7 @@ struct vfio_iommu {
        bool                    v2;
        bool                    nesting;
        bool                    dirty_page_tracking;
+       bool                    pinned_page_dirty_scope;
 };
 
 struct vfio_domain {
@@ -100,6 +101,7 @@ struct vfio_group {
        struct iommu_group      *iommu_group;
        struct list_head        next;
        bool                    mdev_group;     /* An mdev group */
+       bool                    pinned_page_dirty_scope;
 };
 
 struct vfio_iova {
@@ -143,6 +145,10 @@ struct vfio_regions {
 
 static int put_pfn(unsigned long pfn, int prot);
 
+static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
+                                              struct iommu_group *iommu_group);
+
+static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu);
 /*
  * This code handles mapping and unmapping of user data buffers
  * into DMA'ble space using the IOMMU
@@ -622,11 +628,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, 
dma_addr_t iova,
 }
 
 static int vfio_iommu_type1_pin_pages(void *iommu_data,
+                                     struct iommu_group *iommu_group,
                                      unsigned long *user_pfn,
                                      int npage, int prot,
                                      unsigned long *phys_pfn)
 {
        struct vfio_iommu *iommu = iommu_data;
+       struct vfio_group *group;
        int i, j, ret;
        unsigned long remote_vaddr;
        struct vfio_dma *dma;
@@ -699,8 +707,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
                                   (iova - dma->iova) >> pgshift, 1);
                }
        }
-
        ret = i;
+
+       group = vfio_iommu_find_iommu_group(iommu, iommu_group);
+       if (!group->pinned_page_dirty_scope) {
+               group->pinned_page_dirty_scope = true;
+               update_pinned_page_dirty_scope(iommu);
+       }
+
        goto pin_done;
 
 pin_unwind:
@@ -960,8 +974,9 @@ static void vfio_update_pgsize_bitmap(struct vfio_iommu 
*iommu)
        }
 }
 
-static int update_user_bitmap(u64 __user *bitmap, struct vfio_dma *dma,
-                             dma_addr_t base_iova, size_t pgsize)
+static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
+                             struct vfio_dma *dma, dma_addr_t base_iova,
+                             size_t pgsize)
 {
        unsigned long pgshift = __ffs(pgsize);
        unsigned long nbits = dma->size >> pgshift;
@@ -970,8 +985,11 @@ static int update_user_bitmap(u64 __user *bitmap, struct 
vfio_dma *dma,
        unsigned long shift = bit_offset % BITS_PER_LONG;
        unsigned long leftover;
 
-       /* mark all pages dirty if all pages are pinned and mapped. */
-       if (dma->iommu_mapped)
+       /*
+        * mark all pages dirty if any IOMMU capable device is not able
+        * to report dirty pages and all pages are pinned and mapped.
+        */
+       if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped)
                bitmap_set(dma->bitmap, 0, nbits);
 
        if (shift) {
@@ -1024,7 +1042,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, 
struct vfio_iommu *iommu,
                if (dma->iova > iova + size - 1)
                        break;
 
-               ret = update_user_bitmap(bitmap, dma, iova, pgsize);
+               ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
                if (ret)
                        return ret;
 
@@ -1169,7 +1187,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
                }
 
                if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
-                       ret = update_user_bitmap(bitmap->data, dma,
+                       ret = update_user_bitmap(bitmap->data, iommu, dma,
                                                 unmap->iova, pgsize);
                        if (ret)
                                break;
@@ -1521,6 +1539,51 @@ static struct vfio_group *find_iommu_group(struct 
vfio_domain *domain,
        return NULL;
 }
 
+static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
+                                              struct iommu_group *iommu_group)
+{
+       struct vfio_domain *domain;
+       struct vfio_group *group = NULL;
+
+       list_for_each_entry(domain, &iommu->domain_list, next) {
+               group = find_iommu_group(domain, iommu_group);
+               if (group)
+                       return group;
+       }
+
+       if (iommu->external_domain)
+               group = find_iommu_group(iommu->external_domain, iommu_group);
+
+       return group;
+}
+
+static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu)
+{
+       struct vfio_domain *domain;
+       struct vfio_group *group;
+
+       list_for_each_entry(domain, &iommu->domain_list, next) {
+               list_for_each_entry(group, &domain->group_list, next) {
+                       if (!group->pinned_page_dirty_scope) {
+                               iommu->pinned_page_dirty_scope = false;
+                               return;
+                       }
+               }
+       }
+
+       if (iommu->external_domain) {
+               domain = iommu->external_domain;
+               list_for_each_entry(group, &domain->group_list, next) {
+                       if (!group->pinned_page_dirty_scope) {
+                               iommu->pinned_page_dirty_scope = false;
+                               return;
+                       }
+               }
+       }
+
+       iommu->pinned_page_dirty_scope = true;
+}
+
 static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
                                  phys_addr_t *base)
 {
@@ -1928,6 +1991,16 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
 
                        list_add(&group->next,
                                 &iommu->external_domain->group_list);
+                       /*
+                        * Non-iommu backed group cannot dirty memory directly,
+                        * it can only use interfaces that provide dirty
+                        * tracking.
+                        * The iommu scope can only be promoted with the
+                        * addition of a dirty tracking group.
+                        */
+                       group->pinned_page_dirty_scope = true;
+                       if (!iommu->pinned_page_dirty_scope)
+                               update_pinned_page_dirty_scope(iommu);
                        mutex_unlock(&iommu->lock);
 
                        return 0;
@@ -2051,6 +2124,13 @@ static int vfio_iommu_type1_attach_group(void 
*iommu_data,
 done:
        /* Delete the old one and insert new iova list */
        vfio_iommu_iova_insert_copy(iommu, &iova_copy);
+
+       /*
+        * An iommu backed group can dirty memory directly and therefore
+        * demotes the iommu scope until it declares itself dirty tracking
+        * capable via the page pinning interface.
+        */
+       iommu->pinned_page_dirty_scope = false;
        mutex_unlock(&iommu->lock);
        vfio_iommu_resv_free(&group_resv_regions);
 
@@ -2203,6 +2283,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
        struct vfio_iommu *iommu = iommu_data;
        struct vfio_domain *domain;
        struct vfio_group *group;
+       bool update_dirty_scope = false;
        LIST_HEAD(iova_copy);
 
        mutex_lock(&iommu->lock);
@@ -2210,6 +2291,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
        if (iommu->external_domain) {
                group = find_iommu_group(iommu->external_domain, iommu_group);
                if (group) {
+                       update_dirty_scope = !group->pinned_page_dirty_scope;
                        list_del(&group->next);
                        kfree(group);
 
@@ -2239,6 +2321,7 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
                        continue;
 
                vfio_iommu_detach_group(domain, group);
+               update_dirty_scope = !group->pinned_page_dirty_scope;
                list_del(&group->next);
                kfree(group);
                /*
@@ -2270,6 +2353,12 @@ static void vfio_iommu_type1_detach_group(void 
*iommu_data,
                vfio_iommu_iova_free(&iova_copy);
 
 detach_group_done:
+       /*
+        * Removal of a group without dirty tracking may allow the iommu scope
+        * to be promoted.
+        */
+       if (update_dirty_scope)
+               update_pinned_page_dirty_scope(iommu);
        mutex_unlock(&iommu->lock);
 }
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 5d92ee15d098..38d3c6a8dc7e 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -76,7 +76,9 @@ struct vfio_iommu_driver_ops {
                                        struct iommu_group *group);
        void            (*detach_group)(void *iommu_data,
                                        struct iommu_group *group);
-       int             (*pin_pages)(void *iommu_data, unsigned long *user_pfn,
+       int             (*pin_pages)(void *iommu_data,
+                                    struct iommu_group *group,
+                                    unsigned long *user_pfn,
                                     int npage, int prot,
                                     unsigned long *phys_pfn);
        int             (*unpin_pages)(void *iommu_data,
-- 
2.7.0

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH Kernel v24 0/8] Add UAPIs to support migration for VFIO devices, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 1/8] vfio: UAPI for migration interface for device state, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 2/8] vfio iommu: Remove atomicity of ref_count of pinned pages, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 3/8] vfio iommu: Cache pgsize_bitmap in struct vfio_iommu, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 4/8] vfio iommu: Add ioctl definition for dirty pages tracking, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 5/8] vfio iommu: Implementation of ioctl for dirty pages tracking, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 6/8] vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 7/8] vfio iommu: Add migration capability to report supported features, Kirti Wankhede, 2020/05/28
- [PATCH Kernel v24 8/8] vfio: Selective dirty page tracking if IOMMU backed device pins pages, Kirti Wankhede <=
- Re: [PATCH Kernel v24 0/8] Add UAPIs to support migration for VFIO devices, Alex Williamson, 2020/05/29

Prev by Date: [PATCH Kernel v24 7/8] vfio iommu: Add migration capability to report supported features
Next by Date: [PATCH 1/2] sev: add sev-inject-launch-secret
Previous by thread: [PATCH Kernel v24 7/8] vfio iommu: Add migration capability to report supported features
Next by thread: Re: [PATCH Kernel v24 0/8] Add UAPIs to support migration for VFIO devices
Index(es):
- Date
- Thread