qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 4/8] x86_iommu/amd: Prepare for interrupt rem


From: Eduardo Habkost
Subject: Re: [Qemu-devel] [PATCH v2 4/8] x86_iommu/amd: Prepare for interrupt remap support
Date: Mon, 17 Sep 2018 10:49:15 -0300
User-agent: Mutt/1.9.2 (2017-12-15)

Hi,

I couldn't review the whole patch yet, but I have some comments
below:

On Fri, Sep 14, 2018 at 01:26:59PM -0500, Brijesh Singh wrote:
> Register the interrupt remapping callback and read/write ops for the
> amd-iommu-ir memory region.
> 
> amd-iommu-ir is set to higher priority to ensure that this region won't
> be masked out by other memory regions.
> 
> While at it, add a overlapping amd-iommu region with higher priority
> and update address space name to include the devfn.
> 
> Cc: "Michael S. Tsirkin" <address@hidden>
> Cc: Paolo Bonzini <address@hidden>
> Cc: Richard Henderson <address@hidden>
> Cc: Eduardo Habkost <address@hidden>
> Cc: Marcel Apfelbaum <address@hidden>
> Cc: Tom Lendacky <address@hidden>
> Cc: Suravee Suthikulpanit <address@hidden>
> Signed-off-by: Brijesh Singh <address@hidden>
> ---
>  hw/i386/amd_iommu.c  | 140 
> ++++++++++++++++++++++++++++++++++++++++++++++++---
>  hw/i386/amd_iommu.h  |  17 ++++++-
>  hw/i386/trace-events |   5 ++
>  3 files changed, 154 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index 225825e..b15962b 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -26,6 +26,7 @@
>  #include "amd_iommu.h"
>  #include "qapi/error.h"
>  #include "qemu/error-report.h"
> +#include "hw/i386/apic_internal.h"
>  #include "trace.h"
>  
>  /* used AMD-Vi MMIO registers */
> @@ -56,6 +57,7 @@ struct AMDVIAddressSpace {
>      uint8_t devfn;              /* device function                      */
>      AMDVIState *iommu_state;    /* AMDVI - one per machine              */
>      IOMMUMemoryRegion iommu;    /* Device's address translation region  */
> +    MemoryRegion root;          /* AMDVI Root memory map region */
>      MemoryRegion iommu_ir;      /* Device's interrupt remapping region  */
>      AddressSpace as;            /* device's corresponding address space */
>  };
> @@ -1027,10 +1029,104 @@ static IOMMUTLBEntry 
> amdvi_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
>      return ret;
>  }
>  
> +/* Interrupt remapping for MSI/MSI-X entry */
> +static int amdvi_int_remap_msi(AMDVIState *iommu,
> +                               MSIMessage *origin,
> +                               MSIMessage *translated,
> +                               uint16_t sid)
> +{
> +    assert(origin && translated);
> +
> +    trace_amdvi_ir_remap_msi_req(origin->address, origin->data, sid);
> +
> +    if (!iommu || !iommu->intr_enabled) {
> +        memcpy(translated, origin, sizeof(*origin));
> +        goto out;
> +    }
> +
> +    if (origin->address & AMDVI_MSI_ADDR_HI_MASK) {
> +        trace_amdvi_err("MSI address high 32 bits non-zero when "
> +                        "Interrupt Remapping enabled.");
> +        return -AMDVI_IR_ERR;
> +    }
> +
> +    if ((origin->address & AMDVI_MSI_ADDR_LO_MASK) != APIC_DEFAULT_ADDRESS) {
> +        trace_amdvi_err("MSI is not from IOAPIC.");
> +        return -AMDVI_IR_ERR;
> +    }
> +
> +out:
> +    trace_amdvi_ir_remap_msi(origin->address, origin->data,
> +                             translated->address, translated->data);
> +    return 0;
> +}
> +
> +static int amdvi_int_remap(X86IOMMUState *iommu,
> +                           MSIMessage *origin,
> +                           MSIMessage *translated,
> +                           uint16_t sid)
> +{
> +    return amdvi_int_remap_msi(AMD_IOMMU_DEVICE(iommu), origin,
> +                               translated, sid);
> +}
> +
> +static MemTxResult amdvi_mem_ir_write(void *opaque, hwaddr addr,
> +                                      uint64_t value, unsigned size,
> +                                      MemTxAttrs attrs)
> +{
> +    int ret;
> +    MSIMessage from = { 0, 0 }, to = { 0, 0 };
> +    uint16_t sid = AMDVI_IOAPIC_SB_DEVID;
> +
> +    from.address = (uint64_t) addr + AMDVI_INT_ADDR_FIRST;
> +    from.data = (uint32_t) value;
> +
> +    trace_amdvi_mem_ir_write_req(addr, value, size);
> +
> +    if (!attrs.unspecified) {
> +        /* We have explicit Source ID */
> +        sid = attrs.requester_id;
> +    }
> +
> +    ret = amdvi_int_remap_msi(opaque, &from, &to, sid);
> +    if (ret < 0) {
> +        /* TODO: report error */

How do you plan to address this TODO item?

> +        /* Drop the interrupt */

What does this comment mean?  Is this also a TODO item?

> +        return MEMTX_ERROR;
> +    }
> +
> +    apic_get_class()->send_msi(&to);
> +
> +    trace_amdvi_mem_ir_write(to.address, to.data);
> +    return MEMTX_OK;
> +}
> +
> +static MemTxResult amdvi_mem_ir_read(void *opaque, hwaddr addr,
> +                                     uint64_t *data, unsigned size,
> +                                     MemTxAttrs attrs)
> +{
> +    return MEMTX_OK;
> +}
> +
> +static const MemoryRegionOps amdvi_ir_ops = {
> +    .read_with_attrs = amdvi_mem_ir_read,
> +    .write_with_attrs = amdvi_mem_ir_write,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +    .impl = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    },
> +    .valid = {
> +        .min_access_size = 4,
> +        .max_access_size = 4,
> +    }
> +};
> +
>  static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int 
> devfn)
>  {
> +    char name[128];
>      AMDVIState *s = opaque;
> -    AMDVIAddressSpace **iommu_as;
> +    AMDVIAddressSpace **iommu_as, *amdvi_dev_as;
>      int bus_num = pci_bus_num(bus);
>  
>      iommu_as = s->address_spaces[bus_num];
> @@ -1043,19 +1139,46 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus 
> *bus, void *opaque, int devfn)
>  
>      /* set up AMD-Vi region */
>      if (!iommu_as[devfn]) {
> +        snprintf(name, sizeof(name), "amd_iommu_devfn_%d", devfn);
> +
>          iommu_as[devfn] = g_malloc0(sizeof(AMDVIAddressSpace));
>          iommu_as[devfn]->bus_num = (uint8_t)bus_num;
>          iommu_as[devfn]->devfn = (uint8_t)devfn;
>          iommu_as[devfn]->iommu_state = s;
>  
> -        memory_region_init_iommu(&iommu_as[devfn]->iommu,
> -                                 sizeof(iommu_as[devfn]->iommu),
> +        amdvi_dev_as = iommu_as[devfn];
> +
> +        /*
> +         * Memory region relationships looks like (Address range shows
> +         * only lower 32 bits to make it short in length...):
> +         *
> +         * |-----------------+-------------------+----------|
> +         * | Name            | Address range     | Priority |
> +         * |-----------------+-------------------+----------+
> +         * | amdvi_root      | 00000000-ffffffff |        0 |
> +         * |  amdvi_iommu    | 00000000-ffffffff |        1 |
> +         * |  amdvi_iommu_ir | fee00000-feefffff |       64 |
> +         * |-----------------+-------------------+----------|
> +         */
> +        memory_region_init_iommu(&amdvi_dev_as->iommu,
> +                                 sizeof(amdvi_dev_as->iommu),

The change from iommu_as[devfn] to amdvi_dev_as makes this patch
harder to review.  Not a big deal, but if you introduce it in a
separate patch you'll help reviewers.


>                                   TYPE_AMD_IOMMU_MEMORY_REGION,
>                                   OBJECT(s),
>                                   "amd-iommu", UINT64_MAX);
> -        address_space_init(&iommu_as[devfn]->as,
> -                           MEMORY_REGION(&iommu_as[devfn]->iommu),
> -                           "amd-iommu");
> +        memory_region_init(&amdvi_dev_as->root, OBJECT(s),
> +                           "amdvi_root", UINT64_MAX);
> +        address_space_init(&amdvi_dev_as->as, &amdvi_dev_as->root, name);

The old code simply used "amd-iommu" as the address space name,
why did you decide to rename it?  The commit message says you
renamed it, but doesn't say why.

The new name follows a different style: the old one used "-"
and the new one uses "_".  Why?


> +        memory_region_init_io(&amdvi_dev_as->iommu_ir, OBJECT(s),
> +                              &amdvi_ir_ops, s, "amd-iommu-ir",
> +                              AMDVI_INT_ADDR_SIZE);
> +        memory_region_add_subregion_overlap(&amdvi_dev_as->root,
> +                                            AMDVI_INT_ADDR_FIRST,
> +                                            &amdvi_dev_as->iommu_ir,
> +                                            64);
> +        memory_region_add_subregion_overlap(&amdvi_dev_as->root, 0,
> +                                            
> MEMORY_REGION(&amdvi_dev_as->iommu),
> +                                            1);
> +
>      }
>      return &iommu_as[devfn]->as;
>  }
> @@ -1173,6 +1296,10 @@ static void amdvi_realize(DeviceState *dev, Error 
> **err)
>          return;
>      }
>  
> +    /* Pseudo address space under root PCI bus. */
> +    pcms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID);
> +    s->intr_enabled = x86_iommu->intr_supported;
> +
>      /* set up MMIO */
>      memory_region_init_io(&s->mmio, OBJECT(s), &mmio_mem_ops, s, 
> "amdvi-mmio",
>                            AMDVI_MMIO_SIZE);
> @@ -1206,6 +1333,7 @@ static void amdvi_class_init(ObjectClass *klass, void* 
> data)
>      dc->vmsd = &vmstate_amdvi;
>      dc->hotpluggable = false;
>      dc_class->realize = amdvi_realize;
> +    dc_class->int_remap = amdvi_int_remap;
>      /* Supported by the pc-q35-* machine types */
>      dc->user_creatable = true;
>  }
> diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h
> index 8740305..71ff3c1 100644
> --- a/hw/i386/amd_iommu.h
> +++ b/hw/i386/amd_iommu.h
> @@ -206,8 +206,18 @@
>  
>  #define AMDVI_COMMAND_SIZE   16
>  
> -#define AMDVI_INT_ADDR_FIRST 0xfee00000
> -#define AMDVI_INT_ADDR_LAST  0xfeefffff
> +#define AMDVI_INT_ADDR_FIRST    0xfee00000
> +#define AMDVI_INT_ADDR_LAST     0xfeefffff
> +#define AMDVI_INT_ADDR_SIZE     (AMDVI_INT_ADDR_LAST - AMDVI_INT_ADDR_FIRST 
> + 1)
> +#define AMDVI_MSI_ADDR_HI_MASK  (0xffffffff00000000ULL)
> +#define AMDVI_MSI_ADDR_LO_MASK  (0x00000000ffffffffULL)
> +
> +/* SB IOAPIC is always on this device in AMD systems */
> +#define AMDVI_IOAPIC_SB_DEVID   PCI_BUILD_BDF(0, PCI_DEVFN(0x14, 0))
> +
> +/* Interrupt remapping errors */
> +#define AMDVI_IR_ERR            0x1
> +
>  
>  #define TYPE_AMD_IOMMU_DEVICE "amd-iommu"
>  #define AMD_IOMMU_DEVICE(obj)\
> @@ -278,6 +288,9 @@ typedef struct AMDVIState {
>  
>      /* IOTLB */
>      GHashTable *iotlb;
> +
> +    /* Interrupt remapping */
> +    bool intr_enabled;

Why do you need this field if the same info is already available
at AMDVIState::iommu::intr_supported?

>  } AMDVIState;
>  
>  #endif
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 9e6fc4d..41d533c 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -101,6 +101,11 @@ amdvi_mode_invalid(uint8_t level, uint64_t addr)"error: 
> translation level 0x%"PR
>  amdvi_page_fault(uint64_t addr) "error: page fault accessing guest physical 
> address 0x%"PRIx64
>  amdvi_iotlb_hit(uint8_t bus, uint8_t slot, uint8_t func, uint64_t addr, 
> uint64_t txaddr) "hit iotlb devid %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
>  amdvi_translation_result(uint8_t bus, uint8_t slot, uint8_t func, uint64_t 
> addr, uint64_t txaddr) "devid: %02x:%02x.%x gpa 0x%"PRIx64" hpa 0x%"PRIx64
> +amdvi_mem_ir_write_req(uint64_t addr, uint64_t val, uint32_t size) "addr 
> 0x%"PRIx64" data 0x%"PRIx64" size 0x%"PRIx32
> +amdvi_mem_ir_write(uint64_t addr, uint64_t val) "addr 0x%"PRIx64" data 
> 0x%"PRIx64
> +amdvi_ir_remap_msi_req(uint64_t addr, uint64_t data, uint8_t devid) "addr 
> 0x%"PRIx64" data 0x%"PRIx64" devid 0x%"PRIx8
> +amdvi_ir_remap_msi(uint64_t addr, uint64_t data, uint64_t addr2, uint64_t 
> data2) "(addr 0x%"PRIx64", data 0x%"PRIx64") -> (addr 0x%"PRIx64", data 
> 0x%"PRIx64")"
> +amdvi_err(const char *str) "%s"
>  
>  # hw/i386/vmport.c
>  vmport_register(unsigned char command, void *func, void *opaque) "command: 
> 0x%02x func: %p opaque: %p"
> -- 
> 2.7.4
> 
> 

-- 
Eduardo



reply via email to

[Prev in Thread] Current Thread [Next in Thread]