[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [PATCH 7/8] spapr vfio: add spapr-pci-vfio-host-bridge to
From: |
Alexander Graf |
Subject: |
Re: [Qemu-ppc] [PATCH 7/8] spapr vfio: add spapr-pci-vfio-host-bridge to support vfio |
Date: |
Tue, 27 Aug 2013 13:08:01 +0200 |
On 07.08.2013, at 10:21, Alexey Kardashevskiy wrote:
> The patch adds a spapr-pci-vfio-host-bridge device type
> which is a PCI Host Bridge with VFIO support. The new device
> inherits from the spapr-pci-host-bridge device and adds
> the following properties:
> iommu - IOMMU group ID which represents a Partitionable
> Endpoint, QEMU/ppc64 uses a separate PHB for
> an IOMMU group so the guest kernel has to have
> PCI Domain support enabled.
> forceaddr (optional, 0 by default) - forces QEMU to copy
> device:function from the host address as
> certain guest drivers expect devices to appear in
> particular locations;
> mf (optional, 0 by default) - forces multifunction bit for
> the function #0 of a found device, only makes sense
> for multifunction devices and only with the forceaddr
> property set. It would not be required if there
> was a way to know in advance whether a device is
> multifunctional or not.
> scan (optional, 1 by default) - if non-zero, the new PHB walks
> through all non-bridge devices in the group and tries
> adding them to the PHB; if zero, all devices in the group
> have to be configured manually via the QEMU command line.
>
> The patch also adds a VFIO IOMMU type support to the existing
> sPAPR TCE list in spapr_iommu.c.
>
> The patch also uses the host kernel support of a new KVM_CAP_SPAPR_TCE_IOMMU
> capability and KVM_CREATE_SPAPR_TCE_IOMMU ioctl which let QEMU tell
> the host what LIOBN is used for an IOMMU group. This ioctl turns real mode TCE
> requests handling on which accelerates actual throughput in 2.5-5 times.
>
> Examples:
> 1) Scan and add all devices from IOMMU group with ID=1 to QEMU's PHB #6:
> -device spapr-pci-vfio-host-bridge,id=DEVICENAME,iommu=1,index=6
>
> 2) Configure and Add 3 functions of a multifunctional device to QEMU:
> (the NEC PCI USB card is used as an example here):
> -device spapr-pci-vfio-host-bridge,id=USB,iommu=4,scan=0,index=7 \
> -device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true
> -device vfio-pci,host=4:0:1.1,addr=1.1,bus=USB
> -device vfio-pci,host=4:0:1.2,addr=1.2,bus=USB
>
> Cc: David Gibson <address@hidden>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> hw/ppc/spapr_iommu.c | 176 ++++++++++++++++++++++++++++++++-----
> hw/ppc/spapr_pci.c | 209 +++++++++++++++++++++++++++++++++++++++++---
> include/hw/pci-host/spapr.h | 12 +++
> include/hw/ppc/spapr.h | 19 ++++
> target-ppc/kvm.c | 33 +++++++
> target-ppc/kvm_ppc.h | 12 +++
> trace-events | 4 +
> 7 files changed, 429 insertions(+), 36 deletions(-)
>
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 22b09be..096b6a9 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -16,12 +16,14 @@
> * You should have received a copy of the GNU Lesser General Public
> * License along with this library; if not, see
> <http://www.gnu.org/licenses/>.
> */
> +
> #include "hw/hw.h"
> #include "sysemu/kvm.h"
> #include "hw/qdev.h"
> #include "kvm_ppc.h"
> #include "sysemu/dma.h"
> #include "exec/address-spaces.h"
> +#include "trace.h"
>
> #include "hw/ppc/spapr.h"
>
> @@ -244,6 +246,74 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet,
> target_ulong ioba,
> return H_SUCCESS;
> }
>
> +static IOMMUTLBEntry spapr_vfio_translate_iommu(MemoryRegion *iommu, hwaddr
> addr)
> +{
> + IOMMUTLBEntry entry;
> + /* Must never be called */
> + assert(0);
> + return entry;
> +}
> +
> +static MemoryRegionIOMMUOps spapr_vfio_iommu_ops = {
> + .translate = spapr_vfio_translate_iommu,
> +};
> +
> +static int spapr_tce_table_vfio_realize(DeviceState *dev)
> +{
> + sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
> +
> + memory_region_init_iommu(&tcet->iommu, NULL, &spapr_vfio_iommu_ops,
> + "iommu-vfio-spapr", (uint64_t)INT64_MAX+1);
> +
> + QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
> +
> + return 0;
> +}
> +
> +sPAPRTCETable *spapr_vfio_new_table(DeviceState *owner, uint32_t liobn,
> + int group_fd)
> +{
> + sPAPRTCETable *tcet;
> + int fd;
> +
> + if (spapr_tce_find_by_liobn(liobn)) {
> + fprintf(stderr, "Attempted to create TCE table with duplicate"
> + " LIOBN 0x%x\n", liobn);
> + return NULL;
> + }
> +
> + fd = kvmppc_create_spapr_tce_iommu(liobn, group_fd);
> +
> + tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE_VFIO));
> + tcet->liobn = liobn;
> + tcet->fd = fd;
> + object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet),
> NULL);
> +
> + qdev_init_nofail(DEVICE(tcet));
> +
> + return tcet;
> +}
> +
> +static target_ulong put_tce_vfio(sPAPRTCETable *tcet, target_ulong ioba,
> + target_ulong tce)
> +{
> + IOMMUTLBEntry entry;
> +
> + entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
> + entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
> + entry.addr_mask = SPAPR_TCE_PAGE_MASK;
> + entry.perm = 0;
> + if ((tce & SPAPR_TCE_RO) == SPAPR_TCE_RO) {
> + entry.perm |= IOMMU_RO;
> + }
> + if ((tce & SPAPR_TCE_WO) == SPAPR_TCE_WO) {
> + entry.perm |= IOMMU_WO;
> + }
> + memory_region_notify_iommu(&tcet->iommu, entry);
> +
> + return H_SUCCESS;
> +}
> +
> static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> target_ulong opcode, target_ulong
> *args)
> @@ -255,18 +325,36 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
> target_ulong npages = args[3];
> target_ulong ret = 0;
> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> + sPAPRTCETableClass *info;
>
> - if (tcet) {
> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> - target_ulong tce = ldq_phys((tce_list & ~SPAPR_TCE_PAGE_MASK) +
> - i * sizeof(target_ulong));
> - ret = put_tce_emu(tcet, ioba, tce);
> - if (ret) {
> - break;
> - }
> + if (!tcet) {
> + return H_PARAMETER;
> + }
> +
> + info = SPAPR_TCE_TABLE_GET_CLASS(tcet);
> + if (!info || !info->put_tce) {
> + return H_PARAMETER;
> + }
> +
> + if ((tce_list & SPAPR_TCE_PAGE_MASK) || (npages > 512)) {
> + return H_PARAMETER;
> + }
> +
> + if (liobn & 0xFFFFFFFF00000000ULL) {
> + hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
> + TARGET_FMT_lx "\n", liobn);
> + return H_PARAMETER;
> + }
> +
> + for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> + target_ulong tce = ldq_phys((tce_list & ~SPAPR_TCE_PAGE_MASK) +
> + i * sizeof(target_ulong));
> + ret = info->put_tce(tcet, ioba, tce);
> + if (ret) {
> + break;
> }
> - return ret;
> }
> +
> #ifdef DEBUG_TCE
> fprintf(stderr, "%s on liobn=" TARGET_FMT_lx
> " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx
> @@ -274,7 +362,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
> __func__, liobn, ioba, tce_list, ret);
> #endif
>
> - return H_PARAMETER;
> + return ret;
> }
>
> static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> @@ -287,17 +375,30 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> target_ulong npages = args[3];
> target_ulong ret = 0;
> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> + sPAPRTCETableClass *info;
> +
> + if (!tcet) {
> + return H_PARAMETER;
> + }
> +
> + info = SPAPR_TCE_TABLE_GET_CLASS(tcet);
> + if (!info || !info->put_tce) {
> + return H_PARAMETER;
> + }
> +
> + if (liobn & 0xFFFFFFFF00000000ULL) {
> + hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
> + TARGET_FMT_lx "\n", liobn);
> + return H_PARAMETER;
> + }
>
> ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>
> - if (tcet) {
> - for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> - ret = put_tce_emu(tcet, ioba, tce_value);
> - if (ret) {
> - break;
> - }
> + for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> + ret = info->put_tce(tcet, ioba, tce_value);
> + if (ret) {
> + break;
> }
> - return ret;
> }
> #ifdef DEBUG_TCE
> fprintf(stderr, "%s on liobn=" TARGET_FMT_lx
> @@ -306,7 +407,7 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> __func__, liobn, ioba, tce_value, ret);
> #endif
>
> - return H_PARAMETER;
> + return ret;
> }
>
> static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> @@ -316,12 +417,21 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> target_ulong ioba = args[1];
> target_ulong tce = args[2];
> sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> + target_ulong ret;
> + sPAPRTCETableClass *info;
> +
> + if (!tcet) {
> + return H_PARAMETER;
> + }
> +
> + info = SPAPR_TCE_TABLE_GET_CLASS(tcet);
> + if (!info || !info->put_tce) {
> + return H_PARAMETER;
> + }
>
> ioba &= ~(SPAPR_TCE_PAGE_SIZE - 1);
>
> - if (tcet) {
> - return put_tce_emu(tcet, ioba, tce);
> - }
> + ret = info->put_tce(tcet, ioba, tce);
> #ifdef DEBUG_TCE
> fprintf(stderr, "%s on liobn=" TARGET_FMT_lx
> " ioba 0x" TARGET_FMT_lx " TCE 0x" TARGET_FMT_lx
> @@ -329,7 +439,7 @@ static target_ulong h_put_tce(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> __func__, liobn, ioba, tce, ret);
> #endif
>
> - return H_PARAMETER;
> + return ret;
> }
>
> int spapr_dma_dt(void *fdt, int node_off, const char *propname,
> @@ -376,9 +486,12 @@ int spapr_tcet_dma_dt(void *fdt, int node_off, const
> char *propname,
> static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
> {
> DeviceClass *dc = DEVICE_CLASS(klass);
> + sPAPRTCETableClass *k = SPAPR_TCE_TABLE_CLASS(klass);
> +
> dc->vmsd = &vmstate_spapr_tce_table;
> dc->init = spapr_tce_table_realize;
> dc->reset = spapr_tce_reset;
> + k->put_tce = put_tce_emu;
>
> QLIST_INIT(&spapr_tce_tables);
>
> @@ -393,12 +506,31 @@ static TypeInfo spapr_tce_table_info = {
> .parent = TYPE_DEVICE,
> .instance_size = sizeof(sPAPRTCETable),
> .class_init = spapr_tce_table_class_init,
> + .class_size = sizeof(sPAPRTCETableClass),
> .instance_finalize = spapr_tce_table_finalize,
> };
>
> +static void spapr_tce_table_vfio_class_init(ObjectClass *klass, void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(klass);
> + sPAPRTCETableClass *k = SPAPR_TCE_TABLE_CLASS(klass);
> +
> + dc->init = spapr_tce_table_vfio_realize;
> + k->put_tce = put_tce_vfio;
> +}
> +
> +static TypeInfo spapr_tce_table_vfio_info = {
> + .name = TYPE_SPAPR_TCE_TABLE_VFIO,
> + .parent = TYPE_SPAPR_TCE_TABLE,
> + .instance_size = sizeof(sPAPRTCETable),
> + .class_init = spapr_tce_table_vfio_class_init,
> + .class_size = sizeof(sPAPRTCETableClass),
> +};
> +
> static void register_types(void)
> {
> type_register_static(&spapr_tce_table_info);
> + type_register_static(&spapr_tce_table_vfio_info);
> }
>
> type_init(register_types);
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 869ca43..3f37cac 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
I think we should move the vfio phb into a separate file and make it be a
proper subclass without even the chance to randomly call normal spapr pci
functions ;).
Andreas, could you please check through this and see if you can spot a way to
isolate it out?
Alex
> @@ -22,6 +22,9 @@
> * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> * THE SOFTWARE.
> */
> +#include <sys/types.h>
> +#include <dirent.h>
> +
> #include "hw/hw.h"
> #include "hw/pci/pci.h"
> #include "hw/pci/msi.h"
> @@ -32,6 +35,7 @@
> #include "exec/address-spaces.h"
> #include <libfdt.h>
> #include "trace.h"
> +#include "hw/misc/vfio.h"
>
> #include "hw/pci/pci_bus.h"
>
> @@ -496,7 +500,11 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus,
> void *opaque, int devfn)
> return &phb->iommu_as;
> }
>
> -static int spapr_phb_init(SysBusDevice *s)
> +/*
> + * This is the common initialization part for both emulated and VFIO PHBs
> + * which includes everything but DMA and device scan (optional, VFIO only).
> + */
> +static int _spapr_phb_init(SysBusDevice *s)
> {
> DeviceState *dev = DEVICE(s);
> sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
> @@ -610,19 +618,6 @@ static int spapr_phb_init(SysBusDevice *s)
> PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
> phb->bus = bus;
>
> - sphb->dma_window_start = 0;
> - sphb->dma_window_size = 0x40000000;
> - sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn,
> - sphb->dma_window_size);
> - if (!sphb->tcet) {
> - fprintf(stderr, "Unable to create TCE table for %s\n",
> sphb->dtbusname);
> - return -1;
> - }
> - address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
> - sphb->dtbusname);
> -
> - pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
> -
> QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
>
> /* Initialize the LSI table */
> @@ -641,6 +636,30 @@ static int spapr_phb_init(SysBusDevice *s)
> return 0;
> }
>
> +static int spapr_phb_init(SysBusDevice *s)
> +{
> + sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
> + int ret;
> +
> + ret = _spapr_phb_init(s);
> + if (ret)
> + return ret;
> +
> + sphb->dma_window_start = 0;
> + sphb->dma_window_size = 0x40000000;
> + sphb->tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
> + sphb->dma_window_size);
> + if (!sphb->tcet) {
> + fprintf(stderr, "Unable to create TCE table for %s\n",
> sphb->dtbusname);
> + return -1;
> + }
> + address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
> + sphb->dtbusname);
> + pci_setup_iommu(sphb->parent_obj.bus, spapr_pci_dma_iommu, sphb);
> +
> + return 0;
> +}
> +
> static void spapr_phb_reset(DeviceState *qdev)
> {
> SysBusDevice *s = SYS_BUS_DEVICE(qdev);
> @@ -749,6 +768,163 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr,
> int index)
> return PCI_HOST_BRIDGE(dev);
> }
>
> +/* sPAPR VFIO */
> +static Property spapr_phb_vfio_properties[] = {
> + DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
> + DEFINE_PROP_UINT8("scan", sPAPRPHBVFIOState, scan, 1),
> + DEFINE_PROP_UINT8("mf", sPAPRPHBVFIOState, enable_multifunction, 0),
> + DEFINE_PROP_UINT8("forceaddr", sPAPRPHBVFIOState, force_addr, 0),
> + DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static int spapr_pci_vfio_scan(sPAPRPHBVFIOState *svphb)
> +{
> + PCIHostState *phb = PCI_HOST_BRIDGE(svphb);
> + char *iommupath;
> + DIR *dirp;
> + struct dirent *entry;
> +
> + if (!svphb->scan) {
> + trace_spapr_pci("autoscan disabled for ", svphb->phb.dtbusname);
> + return 0;
> + }
> +
> + iommupath = g_strdup_printf("/sys/kernel/iommu_groups/%d/devices/",
> + svphb->iommugroupid);
> + if (!iommupath) {
> + return -ENOMEM;
> + }
> +
> + dirp = opendir(iommupath);
> + if (!dirp) {
> + fprintf(stderr, "failed to scan group=%d\n", svphb->iommugroupid);
> + g_free(iommupath);
> + return -1;
> + }
> +
> + while ((entry = readdir(dirp)) != NULL) {
> + Error *err = NULL;
> + char *tmp;
> + FILE *deviceclassfile;
> + unsigned deviceclass = 0, domainid, busid, devid, fnid;
> + char addr[32];
> + DeviceState *dev;
> +
> + if (sscanf(entry->d_name, "%X:%X:%X.%x",
> + &domainid, &busid, &devid, &fnid) != 4) {
> + continue;
> + }
> +
> + tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
> + trace_spapr_pci("Reading device class from ", tmp);
> +
> + deviceclassfile = fopen(tmp, "r");
> + if (deviceclassfile) {
> + int ret = fscanf(deviceclassfile, "%x", &deviceclass);
> + fclose(deviceclassfile);
> + if (ret != 1) {
> + continue;
> + }
> + }
> + g_free(tmp);
> +
> + if (!deviceclass) {
> + continue;
> + }
> + if ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8)) {
> + /* Skip bridges */
> + continue;
> + }
> + trace_spapr_pci("Creating device from ", entry->d_name);
> +
> + dev = qdev_create(&phb->bus->qbus, "vfio-pci");
> + if (!dev) {
> + fprintf(stderr, "failed to create vfio-pci\n");
> + continue;
> + }
> + qdev_prop_parse(dev, "host", entry->d_name, &err);
> + if (err != NULL) {
> + continue;
> + }
> + if (svphb->force_addr) {
> + snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
> + err = NULL;
> + qdev_prop_parse(dev, "addr", addr, &err);
> + if (err != NULL) {
> + continue;
> + }
> + }
> + if (svphb->enable_multifunction) {
> + qdev_prop_set_bit(dev, "multifunction", 1);
> + }
> + qdev_init_nofail(dev);
> + }
> + closedir(dirp);
> + g_free(iommupath);
> +
> + return 0;
> +}
> +
> +static int spapr_phb_vfio_init(SysBusDevice *s)
> +{
> + sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(s);
> + sPAPRPHBState *sphb = &svphb->phb;
> + struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
> + int ret, group_fd;
> +
> + if (svphb->iommugroupid == -1) {
> + fprintf(stderr, "Wrong IOMMU group ID %d\n", svphb->iommugroupid);
> + return -1;
> + }
> +
> + ret = _spapr_phb_init(s);
> + if (ret) {
> + return ret;
> + }
> +
> + ret = vfio_container_spapr_get_info(&svphb->phb.iommu_as,
> + svphb->iommugroupid,
> + &info, &group_fd);
> + if (ret)
> + return ret;
> +
> + svphb->phb.dma_window_start = info.dma32_window_start;
> + svphb->phb.dma_window_size = info.dma32_window_size;
> + svphb->phb.tcet = spapr_vfio_new_table(DEVICE(sphb),
> svphb->phb.dma_liobn,
> + group_fd);
> +
> + address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
> + sphb->dtbusname);
> + pci_setup_iommu(sphb->parent_obj.bus, spapr_pci_dma_iommu, sphb);
> +
> + ret = spapr_pci_vfio_scan(svphb);
> +
> + return ret;
> +}
> +
> +static void spapr_phb_vfio_reset(DeviceState *qdev)
> +{
> + /* Do nothing */
> +}
> +
> +static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
> +{
> + SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
> + DeviceClass *dc = DEVICE_CLASS(klass);
> +
> + sdc->init = spapr_phb_vfio_init;
> + dc->props = spapr_phb_vfio_properties;
> + dc->reset = spapr_phb_vfio_reset;
> + dc->vmsd = &vmstate_spapr_pci;
> +}
> +
> +static const TypeInfo spapr_phb_vfio_info = {
> + .name = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
> + .parent = TYPE_SPAPR_PCI_HOST_BRIDGE,
> + .instance_size = sizeof(sPAPRPHBVFIOState),
> + .class_init = spapr_phb_vfio_class_init,
> +};
> +
> /* Macros to operate with address in OF binding to PCI */
> #define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p))
> #define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */
> @@ -839,6 +1015,10 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
> _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
> sizeof(interrupt_map)));
>
> + if (!phb->dma_window_size) {
> + fprintf(stderr, "Unexpected error: DMA window is zero, exiting\n");
> + exit(1);
> + }
> spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
> phb->dma_liobn, phb->dma_window_start,
> phb->dma_window_size);
> @@ -862,6 +1042,7 @@ void spapr_pci_rtas_init(void)
> static void spapr_pci_register_types(void)
> {
> type_register_static(&spapr_phb_info);
> + type_register_static(&spapr_phb_vfio_info);
> }
>
> type_init(spapr_pci_register_types)
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 970b4a9..fab18e5 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -30,10 +30,14 @@
> #define SPAPR_MSIX_MAX_DEVS 32
>
> #define TYPE_SPAPR_PCI_HOST_BRIDGE "spapr-pci-host-bridge"
> +#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
>
> #define SPAPR_PCI_HOST_BRIDGE(obj) \
> OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
>
> +#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
> + OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
> +
> typedef struct sPAPRPHBState {
> PCIHostState parent_obj;
>
> @@ -64,6 +68,14 @@ typedef struct sPAPRPHBState {
> QLIST_ENTRY(sPAPRPHBState) list;
> } sPAPRPHBState;
>
> +typedef struct sPAPRPHBVFIOState {
> + sPAPRPHBState phb;
> +
> + struct VFIOContainer *container;
> + int32_t iommugroupid;
> + uint8_t scan, enable_multifunction, force_addr;
> +} sPAPRPHBVFIOState;
> +
> #define SPAPR_PCI_BASE_BUID 0x800000020000000ULL
>
> #define SPAPR_PCI_WINDOW_BASE 0x10000000000ULL
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 2dc3d06..a64e58a 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -353,12 +353,29 @@ int spapr_rtas_device_tree_setup(void *fdt, hwaddr
> rtas_addr,
>
> #define RTAS_ERROR_LOG_MAX 2048
>
> +typedef struct sPAPRTCETableClass sPAPRTCETableClass;
> typedef struct sPAPRTCETable sPAPRTCETable;
>
> #define TYPE_SPAPR_TCE_TABLE "spapr-tce-table"
> #define SPAPR_TCE_TABLE(obj) \
> OBJECT_CHECK(sPAPRTCETable, (obj), TYPE_SPAPR_TCE_TABLE)
>
> +#define TYPE_SPAPR_TCE_TABLE_VFIO "spapr-tce-table-vfio"
> +#define SPAPR_TCE_TABLE_VFIO(obj) \
> + OBJECT_CHECK(sPAPRTCETable, (obj), TYPE_SPAPR_TCE_TABLE_VFIO)
> +
> +#define SPAPR_TCE_TABLE_CLASS(klass) \
> + OBJECT_CLASS_CHECK(sPAPRTCETableClass, (klass), TYPE_SPAPR_TCE_TABLE)
> +#define SPAPR_TCE_TABLE_GET_CLASS(obj) \
> + OBJECT_GET_CLASS(sPAPRTCETableClass, (obj), TYPE_SPAPR_TCE_TABLE)
> +
> +struct sPAPRTCETableClass {
> + DeviceClass parent_class;
> +
> + target_ulong (*put_tce)(sPAPRTCETable *tcet, target_ulong ioba,
> + target_ulong tce);
> +};
> +
> struct sPAPRTCETable {
> DeviceState parent;
> uint32_t liobn;
> @@ -375,6 +392,8 @@ void spapr_events_init(sPAPREnvironment *spapr);
> void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
> sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
> size_t window_size);
> +sPAPRTCETable *spapr_vfio_new_table(DeviceState *owner, uint32_t liobn,
> + int group_fd);
> MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
> void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
> int spapr_dma_dt(void *fdt, int node_off, const char *propname,
> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> index 3d0e398..eb59d7d 100644
> --- a/target-ppc/kvm.c
> +++ b/target-ppc/kvm.c
> @@ -61,6 +61,7 @@ static int cap_ppc_smt;
> static int cap_ppc_rma;
> static int cap_spapr_tce;
> static int cap_spapr_multitce;
> +static int cap_spapr_tce_iommu;
> static int cap_hior;
> static int cap_one_reg;
> static int cap_epr;
> @@ -98,6 +99,7 @@ int kvm_arch_init(KVMState *s)
> cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
> cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
> cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
> + cap_spapr_tce_iommu = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_IOMMU);
> cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
> cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
> cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
> @@ -1669,6 +1671,37 @@ int kvmppc_remove_spapr_tce(void *table, int fd,
> uint32_t window_size)
> return 0;
> }
>
> +int kvmppc_create_spapr_tce_iommu(uint32_t liobn, int group_fd)
> +{
> + int fd = 0;
> + struct kvm_create_spapr_tce_iommu args = {
> + .liobn = liobn,
> + .fd = group_fd
> + };
> +
> + if (!kvm_enabled() || !cap_spapr_tce_iommu) {
> + fprintf(stderr, "KVM VFIO: TCE IOMMU capability is not present, DMA
> may be slow\n");
> + return -1;
> + }
> +
> + fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_IOMMU, &args);
> + if (fd < 0) {
> + fprintf(stderr, "KVM VFIO: Failed to create TCE table for liobn
> 0x%x, ret = %d, DMA may be slow\n",
> + liobn, fd);
> + }
> +
> + return fd;
> +}
> +
> +int kvmppc_remove_spapr_tce_iommu(int fd)
> +{
> + if (fd < 0) {
> + return -1;
> + }
> +
> + return close(fd);
> +}
> +
> int kvmppc_reset_htab(int shift_hint)
> {
> uint32_t shift = shift_hint;
> diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
> index a2a903f..a223e63 100644
> --- a/target-ppc/kvm_ppc.h
> +++ b/target-ppc/kvm_ppc.h
> @@ -34,6 +34,8 @@ off_t kvmppc_alloc_rma(const char *name, MemoryRegion
> *sysmem);
> bool kvmppc_spapr_use_multitce(void);
> void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
> int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
> +int kvmppc_create_spapr_tce_iommu(uint32_t liobn, int group_fd);
> +int kvmppc_remove_spapr_tce_iommu(int fd);
> int kvmppc_reset_htab(int shift_hint);
> uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
> #endif /* !CONFIG_USER_ONLY */
> @@ -144,6 +146,16 @@ static inline int kvmppc_remove_spapr_tce(void *table,
> int pfd,
> return -1;
> }
>
> +static inline int kvmppc_create_spapr_tce_iommu(uint32_t liobn, uint32_t
> iommu_id)
> +{
> + return -1;
> +}
> +
> +static inline int kvmppc_remove_spapr_tce_iommu(int fd)
> +{
> + return -1;
> +}
> +
> static inline int kvmppc_reset_htab(int shift_hint)
> {
> return -1;
> diff --git a/trace-events b/trace-events
> index 3856b5c..d1e54ad 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1113,6 +1113,7 @@ qxl_render_guest_primary_resized(int32_t width, int32_t
> height, int32_t stride,
> qxl_render_update_area_done(void *cookie) "%p"
>
> # hw/ppc/spapr_pci.c
> +spapr_pci(const char *msg1, const char *msg2) "%s%s"
> spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d,
> cfg=%x)"
> spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr)
> "dev\"%s\" vector %u, addr=%"PRIx64
> spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u,
> requested %u"
> @@ -1133,6 +1134,9 @@ xics_ics_write_xive(int nr, int srcno, int server,
> uint8_t priority) "ics_write_
> xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
> xics_ics_eoi(int nr) "ics_eoi: irq %#x"
>
> +# hw/ppc/spapr_iommu.c
> +spapr_iommu(const char *op, uint32_t liobn, uint64_t ioba, uint64_t tce, int
> ret) "%s %x ioba=%"PRIx64" tce=%"PRIx64" ret=%d"
> +
> # util/hbitmap.c
> hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsigned
> long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx"
> hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit,
> uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64
> --
> 1.8.3.2
>
- [Qemu-ppc] [PATCH 4/8] vfio: Create VFIOAddressSpace objects as needed, (continued)
[Qemu-ppc] [PATCH 6/8] spapr vfio: add vfio_container_spapr_get_info(), Alexey Kardashevskiy, 2013/08/07
[Qemu-ppc] [PATCH 7/8] spapr vfio: add spapr-pci-vfio-host-bridge to support vfio, Alexey Kardashevskiy, 2013/08/07
- Re: [Qemu-ppc] [PATCH 7/8] spapr vfio: add spapr-pci-vfio-host-bridge to support vfio,
Alexander Graf <=
[Qemu-ppc] [PATCH 8/8] spapr vfio: enable for spapr, Alexey Kardashevskiy, 2013/08/07