qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC v3 16/19] vfio-user: dma map/unmap operations


From: John Johnson
Subject: Re: [RFC v3 16/19] vfio-user: dma map/unmap operations
Date: Tue, 7 Dec 2021 07:50:44 +0000


> On Nov 19, 2021, at 2:42 PM, Alex Williamson <alex.williamson@redhat.com> 
> wrote:
> 
> On Mon,  8 Nov 2021 16:46:44 -0800
> John Johnson <john.g.johnson@oracle.com> wrote:
> 
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> ---
>> hw/vfio/pci.h                 |   1 +
>> hw/vfio/user-protocol.h       |  32 +++++++
>> hw/vfio/user.h                |   1 +
>> include/hw/vfio/vfio-common.h |   4 +
>> hw/vfio/common.c              |  76 +++++++++++++---
>> hw/vfio/pci.c                 |   4 +
>> hw/vfio/user.c                | 206 
>> ++++++++++++++++++++++++++++++++++++++++++
>> 7 files changed, 309 insertions(+), 15 deletions(-)
>> 
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index 643ff75..156fee2 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -193,6 +193,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, 
>> VFIO_USER_PCI)
>> struct VFIOUserPCIDevice {
>>     VFIOPCIDevice device;
>>     char *sock_name;
>> +    bool secure_dma;    /* disable shared mem for DMA */
> 
> ????????????  It's there, it's gone, it's back.
> 

        This was a merge mistake



>> 
>> 
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index c0e7632..dcfae2c 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -90,6 +90,8 @@ typedef struct VFIOContainer {
>>     VFIOContIO *io_ops;
>>     bool initialized;
>>     bool dirty_pages_supported;
>> +    bool will_commit;
> 
> The entire will_commit concept hidden in the map and unmap operations
> from many patches ago should be introduced here, or later.
> 

        ok


>> +    bool need_map_fd;
>>     uint64_t dirty_pgsizes;
>>     uint64_t max_dirty_bitmap_size;
>>     unsigned long pgsizes;
>> @@ -210,6 +212,7 @@ struct VFIOContIO {
>>     int (*dirty_bitmap)(VFIOContainer *container,
>>                         struct vfio_iommu_type1_dirty_bitmap *bitmap,
>>                         struct vfio_iommu_type1_dirty_bitmap_get *range);
>> +    void (*wait_commit)(VFIOContainer *container);
>> };
>> 
>> #define CONT_DMA_MAP(cont, map, fd, will_commit) \
>> @@ -218,6 +221,7 @@ struct VFIOContIO {
>>     ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap), (will_commit)))
>> #define CONT_DIRTY_BITMAP(cont, bitmap, range) \
>>     ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
>> +#define CONT_WAIT_COMMIT(cont) ((cont)->io_ops->wait_commit(cont))
>> 
>> extern VFIODevIO vfio_dev_io_ioctl;
>> extern VFIOContIO vfio_cont_io_ioctl;
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index fdd2702..0840c8f 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -411,6 +411,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer 
>> *container,
>>     struct vfio_iommu_type1_dma_unmap *unmap;
>>     struct vfio_bitmap *bitmap;
>>     uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size;
>> +    bool will_commit = container->will_commit;
>>     int ret;
>> 
>>     unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
>> @@ -444,7 +445,7 @@ static int vfio_dma_unmap_bitmap(VFIOContainer 
>> *container,
>>         goto unmap_exit;
>>     }
>> 
>> -    ret = CONT_DMA_UNMAP(container, unmap, bitmap, false);
>> +    ret = CONT_DMA_UNMAP(container, unmap, bitmap, will_commit);
>>     if (!ret) {
>>         cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
>>                 iotlb->translated_addr, pages);
>> @@ -471,16 +472,17 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>         .iova = iova,
>>         .size = size,
>>     };
>> +    bool will_commit = container->will_commit;
>> 
>>     if (iotlb && container->dirty_pages_supported &&
>>         vfio_devices_all_running_and_saving(container)) {
>>         return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>>     }
>> 
>> -    return CONT_DMA_UNMAP(container, &unmap, NULL, false);
>> +    return CONT_DMA_UNMAP(container, &unmap, NULL, will_commit);
> 
> We're passing the container, why do we need a separate will_commit arg
> for these?
> 

        ok

> 
>> }
>> 
>> -static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
>> +static int vfio_dma_map(VFIOContainer *container, MemoryRegion *mr, hwaddr 
>> iova,
>>                         ram_addr_t size, void *vaddr, bool readonly)
>> {
>>     struct vfio_iommu_type1_dma_map map = {
>> @@ -490,13 +492,23 @@ static int vfio_dma_map(VFIOContainer *container, 
>> hwaddr iova,
>>         .iova = iova,
>>         .size = size,
>>     };
>> -    int ret;
>> +    int fd, ret;
>> +    bool will_commit = container->will_commit;
>> 
>>     if (!readonly) {
>>         map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
>>     }
>> 
>> -    ret = CONT_DMA_MAP(container, &map, -1, false);
>> +    if (container->need_map_fd) {
>> +        fd = memory_region_get_fd(mr);
>> +        if (fd != -1) {
>> +            map.vaddr = qemu_ram_block_host_offset(mr->ram_block, vaddr);
>> +        }
>> +    } else {
>> +        fd = -1;
>> +    }
>> +
>> +    ret = CONT_DMA_MAP(container, &map, fd, will_commit);
> 
> Why were we even passing a -1 fd previously?  Would it make more sense
> to pass the mr and put this in the user variant .map_dma?  We're going
> to the trouble to pass the mr down this far here.  If the map callback
> handled the above fd and map.vaddr we could also avoid of the
> need_map_fd flag on the container.
> 

        ok

>> 
>> 
>> +static void vfio_listener_begin(MemoryListener *listener)
>> +{
>> +    VFIOContainer *container = container_of(listener, VFIOContainer, 
>> listener);
>> +
>> +    /* cannot drop BQL during the transaction, send maps/demaps async */
>> +    container->will_commit = true;
>> +}
>> +
>> +static void vfio_listener_commit(MemoryListener *listener)
>> +{
>> +    VFIOContainer *container = container_of(listener, VFIOContainer, 
>> listener);
>> +
>> +    /* wait for any async requests sent during the transaction */
>> +    CONT_WAIT_COMMIT(container);
>> +    container->will_commit = false;
>> +}
> 
> Not sure I follow the semantics, when would the map/unmap callbacks get
> called when will_commit is false?
> 

        If map/unmap is called outside a transaction, it can drop BQL and
wait.  For unmap/map inside a transaction, they're sent async, and waited
for after the commit (when it’s safe to drop BQL while we wait)


> Does it really make sense to have macros for ops that we call in one
> place?
> 

        I did it this way so all container ops calls are done the same way.


>> \
>> static const MemoryListener vfio_memory_listener = {
>> +    .begin = vfio_listener_begin,
>> +    .commit = vfio_listener_commit,
>>     .region_add = vfio_listener_region_add,
>>     .region_del = vfio_listener_region_del,
>>     .log_global_start = vfio_listener_log_global_start,
>> @@ -1561,6 +1598,7 @@ int vfio_region_setup(Object *obj, VFIODevice 
>> *vbasedev, VFIORegion *region,
>>     region->size = info->size;
>>     region->fd_offset = info->offset;
>>     region->nr = index;
>> +    region->post_wr = false;
> 
> Should this be in a different patch?  It looks unrelated.
> 

        I think you said as much in the patch that introduced the region write
ops call.


>>     region->remfd = vfio_get_region_info_remfd(vbasedev, index);
>> 
>>     if (region->size) {
>> @@ -2047,6 +2085,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as,
>>     container->dirty_pages_supported = false;
>>     container->dma_max_mappings = 0;
>>     container->io_ops = &vfio_cont_io_ioctl;
>> +    container->need_map_fd = false;
>>     QLIST_INIT(&container->giommu_list);
>>     QLIST_INIT(&container->hostwin_list);
>>     QLIST_INIT(&container->vrdl_list);
>> @@ -2230,6 +2269,7 @@ void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup 
>> *group, AddressSpace *as)
>>     container->space = space;
>>     container->fd = -1;
>>     container->io_ops = &vfio_cont_io_sock;
>> +    container->need_map_fd = (proxy->flags & VFIO_PROXY_SECURE) == 0;
>>     QLIST_INIT(&container->giommu_list);
>>     QLIST_INIT(&container->hostwin_list);
>>     container->proxy = proxy;
>> @@ -2879,8 +2919,14 @@ static int vfio_io_dirty_bitmap(VFIOContainer 
>> *container,
>>     return ret;
>> }
>> 
>> +static void vfio_io_wait_commit(VFIOContainer *container)
>> +{
>> +    /* ioctl()s are synchronous */
>> +}
>> +
> 
> Maybe these should just be "dma_commit" rather than "wait_commit"?  I'd
> also tend to suggest "async" rather than "will_commit".
> 

        ok


>> VFIOContIO vfio_cont_io_ioctl = {
>>     .dma_map = vfio_io_dma_map,
>>     .dma_unmap = vfio_io_dma_unmap,
>>     .dirty_bitmap = vfio_io_dirty_bitmap,
>> +    .wait_commit = vfio_io_wait_commit,
>> };
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index d657b01..ca821da 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -3516,6 +3516,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, 
>> Error **errp)
>>     vbasedev->proxy = proxy;
>>     vfio_user_set_handler(vbasedev, vfio_user_pci_process_req, vdev);
>> 
>> +    if (udev->secure_dma) {
>> +        proxy->flags |= VFIO_PROXY_SECURE;
>> +    }
>>     if (udev->send_queued) {
>>         proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
>>     }
>> @@ -3686,6 +3689,7 @@ static void vfio_user_instance_finalize(Object *obj)
>> 
>> static Property vfio_user_pci_dev_properties[] = {
>>     DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
>> +    DEFINE_PROP_BOOL("secure-dma", VFIOUserPCIDevice, secure_dma, false),
> 
> "secure_dma" looks entirely compartmentalized that it could be a
> separate patch.  Thanks,
> 

        ok

                JJ




reply via email to

[Prev in Thread] Current Thread [Next in Thread]