[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v12 13/27] intel_iommu: Add support for PCI MSI
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [PATCH v12 13/27] intel_iommu: Add support for PCI MSI remap |
Date: |
Thu, 21 Jul 2016 20:45:30 +0300 |
On Thu, Jul 14, 2016 at 01:56:22PM +0800, Peter Xu wrote:
> This patch enables interrupt remapping for PCI devices.
>
> To play the trick, one memory region "iommu_ir" is added as child region
> of the original iommu memory region, covering range 0xfeeXXXXX (which is
> the address range for APIC). All the writes to this range will be taken
> as MSI, and translation is carried out only when IR is enabled.
>
> Idea suggested by Paolo Bonzini.
>
> Signed-off-by: Peter Xu <address@hidden>
> ---
> hw/i386/intel_iommu.c | 241
> +++++++++++++++++++++++++++++++++++++++++
> hw/i386/intel_iommu_internal.h | 2 +
> include/hw/i386/intel_iommu.h | 66 +++++++++++
> 3 files changed, 309 insertions(+)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 6a6cb3b..3d1b15d 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1982,6 +1982,242 @@ static Property vtd_properties[] = {
> DEFINE_PROP_END_OF_LIST(),
> };
>
> +/* Read IRTE entry with specific index */
> +static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
> + VTD_IRTE *entry)
> +{
> + dma_addr_t addr = 0x00;
> +
> + addr = iommu->intr_root + index * sizeof(*entry);
> + if (dma_memory_read(&address_space_memory, addr, entry,
> + sizeof(*entry))) {
> + VTD_DPRINTF(GENERAL, "error: fail to access IR root at 0x%"PRIx64
> + " + %"PRIu16, iommu->intr_root, index);
> + return -VTD_FR_IR_ROOT_INVAL;
> + }
> +
> + if (!entry->present) {
> + VTD_DPRINTF(GENERAL, "error: present flag not set in IRTE"
> + " entry index %u value 0x%"PRIx64 " 0x%"PRIx64,
> + index, le64_to_cpu(entry->data[1]),
> + le64_to_cpu(entry->data[0]));
> + return -VTD_FR_IR_ENTRY_P;
> + }
> +
> + if (entry->__reserved_0 || entry->__reserved_1 || \
> + entry->__reserved_2) {
> + VTD_DPRINTF(GENERAL, "error: IRTE entry index %"PRIu16
> + " reserved fields non-zero: 0x%"PRIx64 " 0x%"PRIx64,
> + index, le64_to_cpu(entry->data[1]),
> + le64_to_cpu(entry->data[0]));
> + return -VTD_FR_IR_IRTE_RSVD;
> + }
> +
> + /*
> + * TODO: Check Source-ID corresponds to SVT (Source Validation
> + * Type) bits
> + */
> +
> + return 0;
> +}
> +
> +/* Fetch IRQ information of specific IR index */
> +static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index, VTDIrq
> *irq)
> +{
> + VTD_IRTE irte;
> + int ret = 0;
> +
> + bzero(&irte, sizeof(irte));
> +
> + ret = vtd_irte_get(iommu, index, &irte);
> + if (ret) {
> + return ret;
> + }
> +
> + irq->trigger_mode = irte.trigger_mode;
> + irq->vector = irte.vector;
> + irq->delivery_mode = irte.delivery_mode;
> + /* Not support EIM yet: please refer to vt-d 9.10 DST bits */
> +#define VTD_IR_APIC_DEST_MASK (0xff00ULL)
> +#define VTD_IR_APIC_DEST_SHIFT (8)
> + irq->dest = (le32_to_cpu(irte.dest_id) & VTD_IR_APIC_DEST_MASK) >> \
> + VTD_IR_APIC_DEST_SHIFT;
> + irq->dest_mode = irte.dest_mode;
> + irq->redir_hint = irte.redir_hint;
> +
> + VTD_DPRINTF(IR, "remapping interrupt index %d: trig:%u,vec:%u,"
> + "deliver:%u,dest:%u,dest_mode:%u", index,
> + irq->trigger_mode, irq->vector, irq->delivery_mode,
> + irq->dest, irq->dest_mode);
> +
> + return 0;
> +}
> +
> +/* Generate one MSI message from VTDIrq info */
> +static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
> +{
> + VTD_MSIMessage msg = {};
> +
> + /* Generate address bits */
> + msg.dest_mode = irq->dest_mode;
> + msg.redir_hint = irq->redir_hint;
> + msg.dest = irq->dest;
> + msg.__addr_head = cpu_to_le32(0xfee);
> + /* Keep this from original MSI address bits */
> + msg.__not_used = irq->msi_addr_last_bits;
> +
> + /* Generate data bits */
> + msg.vector = irq->vector;
> + msg.delivery_mode = irq->delivery_mode;
> + msg.level = 1;
> + msg.trigger_mode = irq->trigger_mode;
> +
> + msg_out->address = msg.msi_addr;
> + msg_out->data = msg.msi_data;
> +}
> +
> +/* Interrupt remapping for MSI/MSI-X entry */
> +static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
> + MSIMessage *origin,
> + MSIMessage *translated)
> +{
> + int ret = 0;
> + VTD_IR_MSIAddress addr;
> + uint16_t index;
> + VTDIrq irq = {0};
> +
> + assert(origin && translated);
> +
> + if (!iommu || !iommu->intr_enabled) {
> + goto do_not_translate;
> + }
> +
> + if (origin->address & VTD_MSI_ADDR_HI_MASK) {
> + VTD_DPRINTF(GENERAL, "error: MSI addr high 32 bits nonzero"
> + " during interrupt remapping: 0x%"PRIx32,
> + (uint32_t)((origin->address & VTD_MSI_ADDR_HI_MASK) >> \
> + VTD_MSI_ADDR_HI_SHIFT));
> + return -VTD_FR_IR_REQ_RSVD;
> + }
> +
> + addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
Below you treat data as LE, but here you use VTD_MSI_ADDR_LO_MASK
which is native endian. This looks wrong to me.
> + if (le16_to_cpu(addr.__head) != 0xfee) {
> + VTD_DPRINTF(GENERAL, "error: MSI addr low 32 bits invalid: "
> + "0x%"PRIx32, addr.data);
> + return -VTD_FR_IR_REQ_RSVD;
> + }
> +
> + /* This is compatible mode. */
> + if (addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
> + goto do_not_translate;
> + }
> +
> + index = addr.index_h << 15 | le16_to_cpu(addr.index_l);
> +
> +#define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff)
> +#define VTD_IR_MSI_DATA_RESERVED (0xffff0000)
> +
> + if (addr.sub_valid) {
> + /* See VT-d spec 5.1.2.2 and 5.1.3 on subhandle */
> + index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
> + }
> +
> + ret = vtd_remap_irq_get(iommu, index, &irq);
> + if (ret) {
> + return ret;
> + }
> +
> + if (addr.sub_valid) {
> + VTD_DPRINTF(IR, "received MSI interrupt");
> + if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
> + VTD_DPRINTF(GENERAL, "error: MSI data bits non-zero for "
> + "interrupt remappable entry: 0x%"PRIx32,
> + origin->data);
> + return -VTD_FR_IR_REQ_RSVD;
> + }
> + } else {
> + uint8_t vector = origin->data & 0xff;
> + VTD_DPRINTF(IR, "received IOAPIC interrupt");
> + /* IOAPIC entry vector should be aligned with IRTE vector
> + * (see vt-d spec 5.1.5.1). */
> + if (vector != irq.vector) {
> + VTD_DPRINTF(GENERAL, "IOAPIC vector inconsistent: "
> + "entry: %d, IRTE: %d, index: %d",
> + vector, irq.vector, index);
> + }
> + }
> +
> + /*
> + * We'd better keep the last two bits, assuming that guest OS
> + * might modify it. Keep it does not hurt after all.
> + */
> + irq.msi_addr_last_bits = addr.__not_care;
> +
> + /* Translate VTDIrq to MSI message */
> + vtd_generate_msi_message(&irq, translated);
> +
> + VTD_DPRINTF(IR, "mapping MSI 0x%"PRIx64":0x%"PRIx32 " -> "
> + "0x%"PRIx64":0x%"PRIx32, origin->address, origin->data,
> + translated->address, translated->data);
> + return 0;
> +
> +do_not_translate:
> + memcpy(translated, origin, sizeof(*origin));
> + return 0;
> +}
> +
> +static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
> + uint64_t *data, unsigned size,
> + MemTxAttrs attrs)
> +{
> + return MEMTX_OK;
> +}
> +
> +static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
> + uint64_t value, unsigned size,
> + MemTxAttrs attrs)
> +{
> + int ret = 0;
> + MSIMessage from = {0}, to = {0};
> +
> + from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
> + from.data = (uint32_t) value;
> +
> + ret = vtd_interrupt_remap_msi(opaque, &from, &to);
> + if (ret) {
> + /* TODO: report error */
> + VTD_DPRINTF(GENERAL, "int remap fail for addr 0x%"PRIx64
> + " data 0x%"PRIx32, from.address, from.data);
> + /* Drop this interrupt */
> + return MEMTX_ERROR;
> + }
> +
> + VTD_DPRINTF(IR, "delivering MSI 0x%"PRIx64":0x%"PRIx32
> + " for device sid 0x%04x",
> + to.address, to.data, sid);
> +
> + if (dma_memory_write(&address_space_memory, to.address,
> + &to.data, size)) {
> + VTD_DPRINTF(GENERAL, "error: fail to write 0x%"PRIx64
> + " value 0x%"PRIx32, to.address, to.data);
> + }
> +
> + return MEMTX_OK;
> +}
> +
> +static const MemoryRegionOps vtd_mem_ir_ops = {
> + .read_with_attrs = vtd_mem_ir_read,
> + .write_with_attrs = vtd_mem_ir_write,
> + .endianness = DEVICE_LITTLE_ENDIAN,
> + .impl = {
> + .min_access_size = 4,
> + .max_access_size = 4,
> + },
> + .valid = {
> + .min_access_size = 4,
> + .max_access_size = 4,
> + },
> +};
>
> VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
> {
> @@ -2009,6 +2245,11 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s,
> PCIBus *bus, int devfn)
> vtd_dev_as->context_cache_entry.context_cache_gen = 0;
> memory_region_init_iommu(&vtd_dev_as->iommu, OBJECT(s),
> &s->iommu_ops, "intel_iommu", UINT64_MAX);
> + memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
> + &vtd_mem_ir_ops, s, "intel_iommu_ir",
> + VTD_INTERRUPT_ADDR_SIZE);
> + memory_region_add_subregion(&vtd_dev_as->iommu,
> VTD_INTERRUPT_ADDR_FIRST,
> + &vtd_dev_as->iommu_ir);
> address_space_init(&vtd_dev_as->as,
> &vtd_dev_as->iommu, "intel_iommu");
> }
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 2a9987f..e1a08cb 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -110,6 +110,8 @@
> /* Interrupt Address Range */
> #define VTD_INTERRUPT_ADDR_FIRST 0xfee00000ULL
> #define VTD_INTERRUPT_ADDR_LAST 0xfeefffffULL
> +#define VTD_INTERRUPT_ADDR_SIZE (VTD_INTERRUPT_ADDR_LAST - \
> + VTD_INTERRUPT_ADDR_FIRST + 1)
>
> /* The shift of source_id in the key of IOTLB hash table */
> #define VTD_IOTLB_SID_SHIFT 36
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 260aa8e..cdbbddd 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -24,6 +24,8 @@
> #include "hw/qdev.h"
> #include "sysemu/dma.h"
> #include "hw/i386/x86-iommu.h"
> +#include "hw/i386/ioapic.h"
> +#include "hw/pci/msi.h"
>
> #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
> #define INTEL_IOMMU_DEVICE(obj) \
> @@ -46,6 +48,10 @@
>
> #define DMAR_REPORT_F_INTR (1)
>
> +#define VTD_MSI_ADDR_HI_MASK (0xffffffff00000000ULL)
> +#define VTD_MSI_ADDR_HI_SHIFT (32)
> +#define VTD_MSI_ADDR_LO_MASK (0x00000000ffffffffULL)
> +
> typedef struct VTDContextEntry VTDContextEntry;
> typedef struct VTDContextCacheEntry VTDContextCacheEntry;
> typedef struct IntelIOMMUState IntelIOMMUState;
> @@ -54,6 +60,8 @@ typedef struct VTDIOTLBEntry VTDIOTLBEntry;
> typedef struct VTDBus VTDBus;
> typedef union VTD_IRTE VTD_IRTE;
> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
> +typedef struct VTDIrq VTDIrq;
> +typedef struct VTD_MSIMessage VTD_MSIMessage;
>
> /* Context-Entry */
> struct VTDContextEntry {
> @@ -74,6 +82,7 @@ struct VTDAddressSpace {
> uint8_t devfn;
> AddressSpace as;
> MemoryRegion iommu;
> + MemoryRegion iommu_ir; /* Interrupt region: 0xfeeXXXXX */
> IntelIOMMUState *iommu_state;
> VTDContextCacheEntry context_cache_entry;
> };
> @@ -161,6 +170,63 @@ union VTD_IR_MSIAddress {
> uint32_t data;
> };
>
> +/* Generic IRQ entry information */
> +struct VTDIrq {
> + /* Used by both IOAPIC/MSI interrupt remapping */
> + uint8_t trigger_mode;
> + uint8_t vector;
> + uint8_t delivery_mode;
> + uint32_t dest;
> + uint8_t dest_mode;
> +
> + /* only used by MSI interrupt remapping */
> + uint8_t redir_hint;
> + uint8_t msi_addr_last_bits;
> +};
> +
> +struct VTD_MSIMessage {
> + union {
> + struct {
> +#ifdef HOST_WORDS_BIGENDIAN
> + uint32_t __addr_head:12; /* 0xfee */
> + uint32_t dest:8;
> + uint32_t __reserved:8;
> + uint32_t redir_hint:1;
> + uint32_t dest_mode:1;
> + uint32_t __not_used:2;
> +#else
> + uint32_t __not_used:2;
> + uint32_t dest_mode:1;
> + uint32_t redir_hint:1;
> + uint32_t __reserved:8;
> + uint32_t dest:8;
> + uint32_t __addr_head:12; /* 0xfee */
> +#endif
> + uint32_t __addr_hi:32;
> + } QEMU_PACKED;
> + uint64_t msi_addr;
> + };
> + union {
> + struct {
> +#ifdef HOST_WORDS_BIGENDIAN
> + uint16_t trigger_mode:1;
> + uint16_t level:1;
> + uint16_t __resved:3;
> + uint16_t delivery_mode:3;
> + uint16_t vector:8;
> +#else
> + uint16_t vector:8;
> + uint16_t delivery_mode:3;
> + uint16_t __resved:3;
> + uint16_t level:1;
> + uint16_t trigger_mode:1;
> +#endif
> + uint16_t __resved1:16;
> + } QEMU_PACKED;
> + uint32_t msi_data;
> + };
> +};
> +
> /* When IR is enabled, all MSI/MSI-X data bits should be zero */
> #define VTD_IR_MSI_DATA (0)
>
> --
> 2.4.11
- [Qemu-devel] [PATCH v12 04/27] x86-iommu: introduce "intremap" property, (continued)
- [Qemu-devel] [PATCH v12 04/27] x86-iommu: introduce "intremap" property, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 05/27] acpi: enable INTR for DMAR report structure, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 06/27] intel_iommu: allow queued invalidation for IR, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 07/27] intel_iommu: set IR bit for ECAP register, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 08/27] acpi: add DMAR scope definition for root IOAPIC, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 09/27] intel_iommu: define interrupt remap table addr register, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 10/27] intel_iommu: handle interrupt remap enable, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 11/27] intel_iommu: define several structs for IOMMU IR, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 12/27] intel_iommu: add IR translation faults defines, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 13/27] intel_iommu: Add support for PCI MSI remap, Peter Xu, 2016/07/14
- Re: [Qemu-devel] [PATCH v12 13/27] intel_iommu: Add support for PCI MSI remap,
Michael S. Tsirkin <=
- [Qemu-devel] [PATCH v12 14/27] q35: ioapic: add support for emulated IOAPIC IR, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 15/27] ioapic: introduce ioapic_entry_parse() helper, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 16/27] intel_iommu: add support for split irqchip, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 17/27] x86-iommu: introduce IEC notifiers, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 18/27] ioapic: register IOMMU IEC notifier for ioapic, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 19/27] intel_iommu: Add support for Extended Interrupt Mode, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 20/27] intel_iommu: add SID validation for IR, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 21/27] kvm-irqchip: simplify kvm_irqchip_add_msi_route, Peter Xu, 2016/07/14
- [Qemu-devel] [PATCH v12 22/27] kvm-irqchip: i386: add hook for add/remove virq, Peter Xu, 2016/07/14