qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH] vfio: spapr: Move SPAPR-related code to a s


From: Alex Williamson
Subject: Re: [Qemu-devel] [RFC PATCH] vfio: spapr: Move SPAPR-related code to a separate file
Date: Tue, 03 Mar 2015 13:24:55 -0700

On Mon, 2015-02-23 at 19:22 +1100, Alexey Kardashevskiy wrote:
> This moves SPAPR bits to a separate file to avoid pollution of x86 code.
> 
> This is a mechanical patch.
> 
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> 
> There is another patch coming with DMA memory preregistration which will
> add another listener on RAM (not IOMMU as in previos posted patchset which is
> wrong); having these two in the same file with "type1" might be confusing.
> 
> Does this make sense? There is some code duplication, is there any sense
> to move them all to helpers?

I guess I don't have a significant objection to this.  It does sort of
beg the question whether type1 should also have it's memory listener
functions pulled out to a separate file as well, in which case yes, we
may want to create helpers to avoid the code duplication.  I'm also a
little disappointed that this seems like it might be removing guest
IOMMU support from common paths, because we'll probably need to deal
with that eventually for x86/type1, but I expect it's currently broken
for anything other than spapr anyway.  Thanks,

Alex

> ---
>  hw/vfio/Makefile.objs         |   1 +
>  hw/vfio/common.c              | 134 ++-----------------------
>  hw/vfio/spapr.c               | 226 
> ++++++++++++++++++++++++++++++++++++++++++
>  include/hw/vfio/vfio-common.h |  13 +++
>  4 files changed, 246 insertions(+), 128 deletions(-)
>  create mode 100644 hw/vfio/spapr.c
> 
> diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
> index e31f30e..92d32b6 100644
> --- a/hw/vfio/Makefile.objs
> +++ b/hw/vfio/Makefile.objs
> @@ -1,4 +1,5 @@
>  ifeq ($(CONFIG_LINUX), y)
>  obj-$(CONFIG_SOFTMMU) += common.o
>  obj-$(CONFIG_PCI) += pci.o
> +obj-$(CONFIG_PSERIES) += spapr.o
>  endif
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 15fbaed..4e48aed 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -190,8 +190,8 @@ const MemoryRegionOps vfio_region_ops = {
>  /*
>   * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
>   */
> -static int vfio_dma_unmap(VFIOContainer *container,
> -                          hwaddr iova, ram_addr_t size)
> +int vfio_dma_unmap(VFIOContainer *container,
> +                   hwaddr iova, ram_addr_t size)
>  {
>      struct vfio_iommu_type1_dma_unmap unmap = {
>          .argsz = sizeof(unmap),
> @@ -208,8 +208,8 @@ static int vfio_dma_unmap(VFIOContainer *container,
>      return 0;
>  }
>  
> -static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
> -                        ram_addr_t size, void *vaddr, bool readonly)
> +int vfio_dma_map(VFIOContainer *container, hwaddr iova,
> +                 ram_addr_t size, void *vaddr, bool readonly)
>  {
>      struct vfio_iommu_type1_dma_map map = {
>          .argsz = sizeof(map),
> @@ -238,7 +238,7 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr 
> iova,
>      return -errno;
>  }
>  
> -static bool vfio_listener_skipped_section(MemoryRegionSection *section)
> +bool vfio_listener_skipped_section(MemoryRegionSection *section)
>  {
>      return (!memory_region_is_ram(section->mr) &&
>              !memory_region_is_iommu(section->mr)) ||
> @@ -251,64 +251,6 @@ static bool 
> vfio_listener_skipped_section(MemoryRegionSection *section)
>             section->offset_within_address_space & (1ULL << 63);
>  }
>  
> -static void vfio_iommu_map_notify(Notifier *n, void *data)
> -{
> -    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> -    VFIOContainer *container = giommu->container;
> -    IOMMUTLBEntry *iotlb = data;
> -    MemoryRegion *mr;
> -    hwaddr xlat;
> -    hwaddr len = iotlb->addr_mask + 1;
> -    void *vaddr;
> -    int ret;
> -
> -    trace_vfio_iommu_map_notify(iotlb->iova,
> -                                iotlb->iova + iotlb->addr_mask);
> -
> -    /*
> -     * The IOMMU TLB entry we have just covers translation through
> -     * this IOMMU to its immediate target.  We need to translate
> -     * it the rest of the way through to memory.
> -     */
> -    mr = address_space_translate(&address_space_memory,
> -                                 iotlb->translated_addr,
> -                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> -    if (!memory_region_is_ram(mr)) {
> -        error_report("iommu map to non memory area %"HWADDR_PRIx"\n",
> -                     xlat);
> -        return;
> -    }
> -    /*
> -     * Translation truncates length to the IOMMU page size,
> -     * check that it did not truncate too much.
> -     */
> -    if (len & iotlb->addr_mask) {
> -        error_report("iommu has granularity incompatible with target AS\n");
> -        return;
> -    }
> -
> -    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> -        vaddr = memory_region_get_ram_ptr(mr) + xlat;
> -        ret = vfio_dma_map(container, iotlb->iova,
> -                           iotlb->addr_mask + 1, vaddr,
> -                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> -        if (ret) {
> -            error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> -                         "0x%"HWADDR_PRIx", %p) = %d (%m)",
> -                         container, iotlb->iova,
> -                         iotlb->addr_mask + 1, vaddr, ret);
> -        }
> -    } else {
> -        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
> -        if (ret) {
> -            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> -                         "0x%"HWADDR_PRIx") = %d (%m)",
> -                         container, iotlb->iova,
> -                         iotlb->addr_mask + 1, ret);
> -        }
> -    }
> -}
> -
>  static void vfio_listener_region_add(MemoryListener *listener,
>                                       MemoryRegionSection *section)
>  {
> @@ -344,45 +286,6 @@ static void vfio_listener_region_add(MemoryListener 
> *listener,
>  
>      memory_region_ref(section->mr);
>  
> -    if (memory_region_is_iommu(section->mr)) {
> -        VFIOGuestIOMMU *giommu;
> -
> -        trace_vfio_listener_region_add_iommu(iova,
> -                    int128_get64(int128_sub(llend, int128_one())));
> -        /*
> -         * FIXME: We should do some checking to see if the
> -         * capabilities of the host VFIO IOMMU are adequate to model
> -         * the guest IOMMU
> -         *
> -         * FIXME: For VFIO iommu types which have KVM acceleration to
> -         * avoid bouncing all map/unmaps through qemu this way, this
> -         * would be the right place to wire that up (tell the KVM
> -         * device emulation the VFIO iommu handles to use).
> -         */
> -        /*
> -         * This assumes that the guest IOMMU is empty of
> -         * mappings at this point.
> -         *
> -         * One way of doing this is:
> -         * 1. Avoid sharing IOMMUs between emulated devices or different
> -         * IOMMU groups.
> -         * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
> -         * there are some mappings in IOMMU.
> -         *
> -         * VFIO on SPAPR does that. Other IOMMU models may do that different,
> -         * they must make sure there are no existing mappings or
> -         * loop through existing mappings to map them into VFIO.
> -         */
> -        giommu = g_malloc0(sizeof(*giommu));
> -        giommu->iommu = section->mr;
> -        giommu->container = container;
> -        giommu->n.notify = vfio_iommu_map_notify;
> -        QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> -        memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> -
> -        return;
> -    }
> -
>      /* Here we assume that memory_region_is_ram(section->mr)==true */
>  
>      end = int128_get64(llend);
> @@ -435,27 +338,6 @@ static void vfio_listener_region_del(MemoryListener 
> *listener,
>          return;
>      }
>  
> -    if (memory_region_is_iommu(section->mr)) {
> -        VFIOGuestIOMMU *giommu;
> -
> -        QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
> -            if (giommu->iommu == section->mr) {
> -                memory_region_unregister_iommu_notifier(&giommu->n);
> -                QLIST_REMOVE(giommu, giommu_next);
> -                g_free(giommu);
> -                break;
> -            }
> -        }
> -
> -        /*
> -         * FIXME: We assume the one big unmap below is adequate to
> -         * remove any individual page mappings in the IOMMU which
> -         * might have been copied into VFIO. This works for a page table
> -         * based IOMMU where a big unmap flattens a large range of IO-PTEs.
> -         * That may not be true for all IOMMU types.
> -         */
> -    }
> -
>      iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>      end = (section->offset_within_address_space + 
> int128_get64(section->size)) &
>            TARGET_PAGE_MASK;
> @@ -721,11 +603,7 @@ static int vfio_connect_container(VFIOGroup *group, 
> AddressSpace *as)
>              goto free_container_exit;
>          }
>  
> -        container->iommu_data.type1.listener = vfio_memory_listener;
> -        container->iommu_data.release = vfio_listener_release;
> -
> -        memory_listener_register(&container->iommu_data.type1.listener,
> -                                 container->space->as);
> +        spapr_memory_listener_register(container);
>  
>      } else {
>          error_report("vfio: No available IOMMU models");
> diff --git a/hw/vfio/spapr.c b/hw/vfio/spapr.c
> new file mode 100644
> index 0000000..f743138
> --- /dev/null
> +++ b/hw/vfio/spapr.c
> @@ -0,0 +1,226 @@
> +/*
> + * QEMU sPAPR VFIO IOMMU
> + *
> + * Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation.
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License,
> + *  or (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "hw/vfio/vfio-common.h"
> +#include "qemu/error-report.h"
> +#include "trace.h"
> +
> +static void vfio_iommu_map_notify(Notifier *n, void *data)
> +{
> +    VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> +    VFIOContainer *container = giommu->container;
> +    IOMMUTLBEntry *iotlb = data;
> +    MemoryRegion *mr;
> +    hwaddr xlat;
> +    hwaddr len = iotlb->addr_mask + 1;
> +    void *vaddr;
> +    int ret;
> +
> +    trace_vfio_iommu_map_notify(iotlb->iova,
> +                                iotlb->iova + iotlb->addr_mask);
> +
> +    /*
> +     * The IOMMU TLB entry we have just covers translation through
> +     * this IOMMU to its immediate target.  We need to translate
> +     * it the rest of the way through to memory.
> +     */
> +    mr = address_space_translate(&address_space_memory,
> +                                 iotlb->translated_addr,
> +                                 &xlat, &len, iotlb->perm & IOMMU_WO);
> +    if (!memory_region_is_ram(mr)) {
> +        error_report("iommu map to non memory area %"HWADDR_PRIx"\n",
> +                     xlat);
> +        return;
> +    }
> +    /*
> +     * Translation truncates length to the IOMMU page size,
> +     * check that it did not truncate too much.
> +     */
> +    if (len & iotlb->addr_mask) {
> +        error_report("iommu has granularity incompatible with target AS\n");
> +        return;
> +    }
> +
> +    if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> +        vaddr = memory_region_get_ram_ptr(mr) + xlat;
> +        ret = vfio_dma_map(container, iotlb->iova,
> +                           iotlb->addr_mask + 1, vaddr,
> +                           !(iotlb->perm & IOMMU_WO) || mr->readonly);
> +        if (ret) {
> +            error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> +                         "0x%"HWADDR_PRIx", %p) = %d (%m)",
> +                         container, iotlb->iova,
> +                         iotlb->addr_mask + 1, vaddr, ret);
> +        }
> +    } else {
> +        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
> +        if (ret) {
> +            error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> +                         "0x%"HWADDR_PRIx") = %d (%m)",
> +                         container, iotlb->iova,
> +                         iotlb->addr_mask + 1, ret);
> +        }
> +    }
> +}
> +
> +static void vfio_spapr_listener_region_add(MemoryListener *listener,
> +                                     MemoryRegionSection *section)
> +{
> +    VFIOContainer *container = container_of(listener, VFIOContainer,
> +                                            iommu_data.spapr.listener);
> +    hwaddr iova;
> +    Int128 llend;
> +    VFIOGuestIOMMU *giommu;
> +
> +    if (vfio_listener_skipped_section(section)) {
> +        trace_vfio_listener_region_add_skip(
> +            section->offset_within_address_space,
> +            section->offset_within_address_space +
> +            int128_get64(int128_sub(section->size, int128_one())));
> +        return;
> +    }
> +
> +    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) 
> !=
> +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +        error_report("%s received unaligned region", __func__);
> +        return;
> +    }
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    llend = int128_make64(section->offset_within_address_space);
> +    llend = int128_add(llend, section->size);
> +    llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
> +
> +    if (int128_ge(int128_make64(iova), llend)) {
> +        return;
> +    }
> +
> +    memory_region_ref(section->mr);
> +
> +    trace_vfio_listener_region_add_iommu(iova,
> +         int128_get64(int128_sub(llend, int128_one())));
> +    /*
> +     * FIXME: We should do some checking to see if the
> +     * capabilities of the host VFIO IOMMU are adequate to model
> +     * the guest IOMMU
> +     *
> +     * FIXME: For VFIO iommu types which have KVM acceleration to
> +     * avoid bouncing all map/unmaps through qemu this way, this
> +     * would be the right place to wire that up (tell the KVM
> +     * device emulation the VFIO iommu handles to use).
> +     */
> +    /*
> +     * This assumes that the guest IOMMU is empty of
> +     * mappings at this point.
> +     *
> +     * One way of doing this is:
> +     * 1. Avoid sharing IOMMUs between emulated devices or different
> +     * IOMMU groups.
> +     * 2. Implement VFIO_IOMMU_ENABLE in the host kernel to fail if
> +     * there are some mappings in IOMMU.
> +     *
> +     * VFIO on SPAPR does that. Other IOMMU models may do that different,
> +     * they must make sure there are no existing mappings or
> +     * loop through existing mappings to map them into VFIO.
> +     */
> +    giommu = g_malloc0(sizeof(*giommu));
> +    giommu->iommu = section->mr;
> +    giommu->container = container;
> +    giommu->n.notify = vfio_iommu_map_notify;
> +    QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> +    memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> +}
> +
> +static void vfio_spapr_listener_region_del(MemoryListener *listener,
> +                                     MemoryRegionSection *section)
> +{
> +    VFIOContainer *container = container_of(listener, VFIOContainer,
> +                                            iommu_data.spapr.listener);
> +    hwaddr iova, end;
> +    int ret;
> +    VFIOGuestIOMMU *giommu;
> +
> +    if (vfio_listener_skipped_section(section)) {
> +        trace_vfio_listener_region_del_skip(
> +            section->offset_within_address_space,
> +            section->offset_within_address_space +
> +            int128_get64(int128_sub(section->size, int128_one())));
> +        return;
> +    }
> +
> +    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) 
> !=
> +                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
> +        error_report("%s received unaligned region", __func__);
> +        return;
> +    }
> +
> +    QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
> +        if (giommu->iommu == section->mr) {
> +            memory_region_unregister_iommu_notifier(&giommu->n);
> +            QLIST_REMOVE(giommu, giommu_next);
> +            g_free(giommu);
> +            break;
> +        }
> +    }
> +
> +    /*
> +     * FIXME: We assume the one big unmap below is adequate to
> +     * remove any individual page mappings in the IOMMU which
> +     * might have been copied into VFIO. This works for a page table
> +     * based IOMMU where a big unmap flattens a large range of IO-PTEs.
> +     * That may not be true for all IOMMU types.
> +     */
> +
> +    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
> +    end = (section->offset_within_address_space + 
> int128_get64(section->size)) &
> +        TARGET_PAGE_MASK;
> +
> +    if (iova >= end) {
> +        return;
> +    }
> +
> +    trace_vfio_listener_region_del(iova, end - 1);
> +
> +    ret = vfio_dma_unmap(container, iova, end - iova);
> +    memory_region_unref(section->mr);
> +    if (ret) {
> +        error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> +                     "0x%"HWADDR_PRIx") = %d (%m)",
> +                     container, iova, end - iova, ret);
> +    }
> +}
> +
> +static const MemoryListener vfio_spapr_memory_listener = {
> +    .region_add = vfio_spapr_listener_region_add,
> +    .region_del = vfio_spapr_listener_region_del,
> +};
> +
> +static void vfio_spapr_listener_release(VFIOContainer *container)
> +{
> +    memory_listener_unregister(&container->iommu_data.spapr.listener);
> +}
> +
> +void spapr_memory_listener_register(VFIOContainer *container)
> +{
> +    container->iommu_data.spapr.listener = vfio_spapr_memory_listener;
> +    container->iommu_data.release = vfio_spapr_listener_release;
> +
> +    memory_listener_register(&container->iommu_data.spapr.listener,
> +                             container->space->as);
> +}
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index da85aa9..88d9e1f 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -70,6 +70,10 @@ typedef struct VFIOType1 {
>      bool initialized;
>  } VFIOType1;
>  
> +typedef struct VFIOSPAPR {
> +    MemoryListener listener;
> +} VFIOSPAPR;
> +
>  typedef struct VFIOContainer {
>      VFIOAddressSpace *space;
>      int fd; /* /dev/vfio/vfio, empowered by the attached groups */
> @@ -77,6 +81,7 @@ typedef struct VFIOContainer {
>          /* enable abstraction to support various iommu backends */
>          union {
>              VFIOType1 type1;
> +            VFIOSPAPR spapr;
>          };
>          void (*release)(struct VFIOContainer *);
>      } iommu_data;
> @@ -145,4 +150,12 @@ extern const MemoryRegionOps vfio_region_ops;
>  extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;
>  extern QLIST_HEAD(vfio_as_head, VFIOAddressSpace) vfio_address_spaces;
>  
> +extern int vfio_dma_map(VFIOContainer *container, hwaddr iova,
> +                        ram_addr_t size, void *vaddr, bool readonly);
> +extern int vfio_dma_unmap(VFIOContainer *container,
> +                          hwaddr iova, ram_addr_t size);
> +bool vfio_listener_skipped_section(MemoryRegionSection *section);
> +
> +extern void spapr_memory_listener_register(VFIOContainer *container);
> +
>  #endif /* !HW_VFIO_VFIO_COMMON_H */






reply via email to

[Prev in Thread] Current Thread [Next in Thread]