[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 2/2] i386: Interrupt remapping support for VT-d
From: |
Rita Sinha |
Subject: |
[Qemu-devel] [PATCH 2/2] i386: Interrupt remapping support for VT-d |
Date: |
Wed, 9 Mar 2016 00:58:17 +0530 |
From: Jan Kiszka <address@hidden>
Still a bit hacky, unconditionally enabled (must become opt-in, not
available with in-kernel irqchip), not reporting faults properly - but
it works! And revealed a Linux bug [1]
[1] http://thread.gmane.org/gmane.linux.kernel/1766261
Signed-off-by: Rita Sinha <address@hidden>
---
hw/i386/acpi-build.c | 28 ++++++-
hw/i386/intel_iommu.c | 162 ++++++++++++++++++++++++++++++++++++++++-
hw/i386/intel_iommu_internal.h | 27 +++++++
hw/intc/apic.c | 1 +
hw/pci-host/q35.c | 11 +++
include/hw/acpi/acpi-defs.h | 22 ++++++
include/hw/i386/intel_iommu.h | 7 ++
7 files changed, 252 insertions(+), 6 deletions(-)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 52c9470..ef43122 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -68,6 +68,9 @@
#define ACPI_BUILD_TABLE_SIZE 0x20000
+#define ACPI_BUILD_IOAPIC_ID 0x0
+#define ACPI_BUILD_HPET_ID 0x0
+
/* #define DEBUG_ACPI_BUILD */
#ifdef DEBUG_ACPI_BUILD
#define ACPI_BUILD_DPRINTF(fmt, ...) \
@@ -392,7 +395,6 @@ build_madt(GArray *table_data, GArray *linker, AcpiCpuInfo
*cpu)
io_apic = acpi_data_push(table_data, sizeof *io_apic);
io_apic->type = ACPI_APIC_IO;
io_apic->length = sizeof(*io_apic);
-#define ACPI_BUILD_IOAPIC_ID 0x0
io_apic->io_apic_id = ACPI_BUILD_IOAPIC_ID;
io_apic->address = cpu_to_le32(IO_APIC_DEFAULT_ADDRESS);
io_apic->interrupt = cpu_to_le32(0);
@@ -2302,6 +2304,7 @@ build_hpet(GArray *table_data, GArray *linker)
*/
hpet->timer_block_id = cpu_to_le32(0x8086a201);
hpet->addr.address = cpu_to_le64(HPET_BASE);
+ hpet->hpet_number = ACPI_BUILD_HPET_ID;
build_header(linker, table_data,
(void *)hpet, "HPET", sizeof(*hpet), 1, NULL, NULL);
}
@@ -2496,19 +2499,38 @@ build_dmar_q35(GArray *table_data, GArray *linker)
AcpiTableDmar *dmar;
AcpiDmarHardwareUnit *drhd;
+ AcpiDmarDeviceScope *dev_scope;
dmar = acpi_data_push(table_data, sizeof(*dmar));
dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1;
- dmar->flags = 0; /* No intr_remap for now */
+ dmar->flags = ACPI_DMAR_INTR_REMAP;
/* DMAR Remapping Hardware Unit Definition structure */
drhd = acpi_data_push(table_data, sizeof(*drhd));
drhd->type = cpu_to_le16(ACPI_DMAR_TYPE_HARDWARE_UNIT);
- drhd->length = cpu_to_le16(sizeof(*drhd)); /* No device scope now */
+ drhd->length = cpu_to_le16(sizeof(*drhd) + (sizeof(*dev_scope) + 2) * 2);
drhd->flags = ACPI_DMAR_INCLUDE_PCI_ALL;
drhd->pci_segment = cpu_to_le16(0);
drhd->address = cpu_to_le64(Q35_HOST_BRIDGE_IOMMU_ADDR);
+ /* Device Scope structures for IOAPIC */
+ dev_scope = acpi_data_push(table_data, sizeof(*dev_scope) + 2);
+ dev_scope->type = ACPI_DMAR_SCOPE_TYPE_IOAPIC;
+ dev_scope->length = sizeof(*dev_scope) + 2;
+ dev_scope->enumeration_id = ACPI_BUILD_IOAPIC_ID;
+ dev_scope->start_bus_number = Q35_PSEUDO_BUS_PLATFORM;
+ dev_scope->path[0] = PCI_SLOT(Q35_PSEUDO_DEVFN_IOAPIC);
+ dev_scope->path[1] = PCI_FUNC(Q35_PSEUDO_DEVFN_IOAPIC);
+
+ /* Device Scope structures for HPET */
+ dev_scope = acpi_data_push(table_data, sizeof(*dev_scope) + 2);
+ dev_scope->type = ACPI_DMAR_SCOPE_TYPE_HPET;
+ dev_scope->length = sizeof(*dev_scope) + 2;
+ dev_scope->enumeration_id = ACPI_BUILD_HPET_ID;
+ dev_scope->start_bus_number = Q35_PSEUDO_BUS_PLATFORM;
+ dev_scope->path[0] = PCI_SLOT(Q35_PSEUDO_DEVFN_HPET);
+ dev_scope->path[1] = PCI_FUNC(Q35_PSEUDO_DEVFN_HPET);
+
build_header(linker, table_data, (void *)(table_data->data + dmar_start),
"DMAR", table_data->len - dmar_start, 1, NULL, NULL);
}
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c371588..2ea642c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -20,6 +20,7 @@
*/
#include "qemu/osdep.h"
+#include "hw/i386/apic-msidef.h"
#include "hw/sysbus.h"
#include "exec/address-spaces.h"
#include "intel_iommu_internal.h"
@@ -30,10 +31,11 @@
#ifdef DEBUG_INTEL_IOMMU
enum {
DEBUG_GENERAL, DEBUG_CSR, DEBUG_INV, DEBUG_MMU, DEBUG_FLOG,
- DEBUG_CACHE,
+ DEBUG_CACHE, DEBUG_IR
};
#define VTD_DBGBIT(x) (1 << DEBUG_##x)
-static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR);
+static int vtd_dbgflags = VTD_DBGBIT(GENERAL) | VTD_DBGBIT(CSR) |
+ VTD_DBGBIT(IR);
#define VTD_DPRINTF(what, fmt, ...) do { \
if (vtd_dbgflags & VTD_DBGBIT(what)) { \
@@ -1134,6 +1136,31 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool
en)
}
/* Set Root Table Pointer */
+static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
+{
+ VTD_DPRINTF(CSR, "set Interrupt Remap Table Pointer");
+
+ s->irta = vtd_get_quad_raw(s, DMAR_IRTA_REG);
+ s->irt_size = 2 << (s->irta & VTD_IRTA_SIZE_MASK);
+ s->irta &= VTD_IRTA_ADDR_MASK;
+ /* Ok - report back to driver */
+ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
+}
+
+static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
+{
+ VTD_DPRINTF(IR, "Interrupt Remapping Enable %s", (en ? "on" : "off"));
+
+ if (en) {
+ s->ir_enabled = true;
+ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES);
+ } else {
+ s->ir_enabled = false;
+ vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0);
+ }
+}
+
+/* Set Root Table Pointer */
static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
{
VTD_DPRINTF(CSR, "set Root Table Pointer");
@@ -1182,6 +1209,12 @@ static void vtd_handle_gcmd_write(IntelIOMMUState *s)
/* Queued Invalidation Enable */
vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
}
+ if (val & VTD_GCMD_SIRTP) {
+ vtd_handle_gcmd_sirtp(s);
+ }
+ if (changed & VTD_GCMD_IRE) {
+ vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
+ }
}
/* Handle write to Context Command Register */
@@ -1406,6 +1439,11 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
}
break;
+ case VTD_INV_DESC_INT:
+ VTD_DPRINTF(INV, "Interrupt Entry Invalidate Descriptor hi 0x%"PRIx64
+ " lo 0x%"PRIx64, inv_desc.hi, inv_desc.lo);
+ break;
+
default:
VTD_DPRINTF(GENERAL, "error: unkonw Invalidation Descriptor type "
"hi 0x%"PRIx64 " lo 0x%"PRIx64 " type %"PRIu8,
@@ -1762,6 +1800,24 @@ static void vtd_mem_write(void *opaque, hwaddr addr,
vtd_handle_ics_write(s);
break;
+ /* Interrupt Remapping Table Address Register, 64-bit */
+ case DMAR_IRTA_REG:
+ VTD_DPRINTF(IR, "DMAR_IRTA_REG write addr 0x%"PRIx64
+ ", size %d, val 0x%"PRIx64, addr, size, val);
+ if (size == 4) {
+ vtd_set_long(s, addr, val);
+ } else {
+ vtd_set_quad(s, addr, val);
+ }
+ break;
+
+ case DMAR_IRTA_REG_HI:
+ VTD_DPRINTF(IR, "DMAR_IRTA_REG_HI write addr 0x%"PRIx64
+ ", size %d, val 0x%"PRIx64, addr, size, val);
+ assert(size == 4);
+ vtd_set_long(s, addr, val);
+ break;
+
/* Invalidation Event Control Register, 32-bit */
case DMAR_IECTL_REG:
VTD_DPRINTF(INV, "DMAR_IECTL_REG write addr 0x%"PRIx64
@@ -1858,6 +1914,15 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion
*iommu, hwaddr addr,
.perm = IOMMU_NONE,
};
+ if (s->ir_enabled && vtd_is_interrupt_addr(addr)) {
+ ret.target_as = &vtd_as->int_remap_as;
+ ret.iova = addr;
+ ret.translated_addr = addr;
+ ret.addr_mask = ~(hwaddr)0x3;
+ ret.perm = IOMMU_WO;
+ return ret;
+ }
+
if (!s->dmar_enabled) {
/* DMAR disabled, passthrough, use 4k-page*/
ret.iova = addr & VTD_PAGE_MASK_4K;
@@ -1877,6 +1942,93 @@ static IOMMUTLBEntry vtd_iommu_translate(MemoryRegion
*iommu, hwaddr addr,
return ret;
}
+static int get_int_remap_entry(IntelIOMMUState *s, uint16_t index,
+ VTDIntRemapEntry *irte)
+{
+ dma_addr_t addr;
+
+ if (index >= s->irt_size) {
+ VTD_DPRINTF(IR, "error: IR table index %d out of range", index);
+ return -1;
+ }
+
+ addr = s->irta + index * sizeof(*irte);
+
+ if (dma_memory_read(get_dma_address_space(), addr, irte, sizeof(*irte))) {
+ VTD_DPRINTF(IR, "error: failed to access IR table at 0x%"PRIx64
+ " + %"PRIu32, s->irta, index);
+ return -1;/*-VTD_FR_CONTEXT_TABLE_INV*/;
+ }
+
+ irte->lo = le64_to_cpu(irte->lo);
+ irte->hi = le64_to_cpu(irte->hi);
+
+ return 0;
+}
+
+static void vtd_int_remap_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned size)
+{
+ uint16_t index = ((addr >> 5) & 0x7fff) | ((addr << 13) & 8000);
+ VTDAddressSpace *vtd_as = opaque;
+ IntelIOMMUState *s = vtd_as->iommu_state;
+ VTDIntRemapEntry irte;
+ uint8_t bus_num = pci_bus_num(vtd_as->bus);
+ int ret;
+
+ if (!(addr & (1 << 4))) {
+ printf("compat MSI, blocked\n");
+ return;
+ }
+
+ if (addr & (1 << 3)) {
+ index += val & 0xffff;
+ }
+ ret = get_int_remap_entry(s, index, &irte);
+ if (ret) {
+ printf("error\n");
+ return;
+ }
+ /*printf("IRTE %d: %016lx_%016lx\n", index, irte.raw[1], irte.raw[0]);*/
+
+ /*
+ * TODO:
+ * - proper error reporting, including FPD evaluation
+ * - check for reserved bits
+ * - SQ & SVT evaluation
+ */
+ if (!VTD_IRTE_LO_P(irte.lo)) {
+ printf("IRTE not present\n");
+ return;
+ }
+ if (vtd_make_source_id(bus_num, vtd_as->devfn) !=
+ VTD_IRTE_HI_SID(irte.hi)) {
+ printf("SID mismatch\n");
+ return;
+ }
+
+ addr = MSI_ADDR_BASE |
+ (VTD_IRTE_LO_DEST_MODE(irte.lo) << MSI_ADDR_DEST_MODE_SHIFT) |
+ (VTD_IRTE_LO_REDIR_HINT(irte.lo) << MSI_ADDR_REDIRECTION_SHIFT) |
+ (VTD_IRTE_LO_DEST(irte.lo) << MSI_ADDR_DEST_IDX_SHIFT);
+ val = VTD_IRTE_LO_VECTOR(irte.lo) |
+ (VTD_IRTE_LO_DELIVERY(irte.lo) << MSI_DATA_DELIVERY_MODE_SHIFT) |
+ (1 << MSI_DATA_LEVEL_SHIFT) |
+ (VTD_IRTE_LO_TRIGGER_MODE(irte.lo) << MSI_DATA_TRIGGER_SHIFT);
+ /*printf("MSI: %08lx:%04lx\n", addr, val);*/
+ stl_le_phys(get_dma_address_space(), addr, val);
+}
+
+const MemoryRegionOps vtd_int_remap_ops = {
+ .write = vtd_int_remap_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+
static const VMStateDescription vtd_vmstate = {
.name = "iommu-intel",
.unmigratable = 1,
@@ -1954,9 +2106,12 @@ static void vtd_init(IntelIOMMUState *s)
s->qi_enabled = false;
s->iq_last_desc_type = VTD_INV_DESC_NONE;
s->next_frcd_reg = 0;
+ s->irta = 0;
+ s->irt_size = 0;
+ s->ir_enabled = false;
s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS;
- s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
+ s->ecap = VTD_ECAP_QI | VTD_ECAP_IR | VTD_ECAP_IRO | VTD_ECAP_MHMV;
vtd_reset_context_cache(s);
vtd_reset_iotlb(s);
@@ -1993,6 +2148,7 @@ static void vtd_init(IntelIOMMUState *s)
vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0);
vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0);
vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL);
+ vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff00fULL, 0);
vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0);
vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0);
vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0);
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index e5f514c..e0671ee 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -173,8 +173,10 @@
#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL)
/* ECAP_REG */
+#define VTD_ECAP_MHMV (0xf << 20)
/* (offset >> 4) << 8 */
#define VTD_ECAP_IRO (DMAR_IOTLB_REG_OFFSET << 4)
+#define VTD_ECAP_IR (1ULL << 3)
#define VTD_ECAP_QI (1ULL << 1)
/* CAP_REG */
@@ -214,6 +216,11 @@
/* ICS_REG */
#define VTD_ICS_IWC 1UL
+/* IRTA_REG */
+#define VTD_IRTA_SIZE_MASK (0xf)
+#define VTD_IRTA_EIME (1ULL << 11)
+#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL)
+
/* IECTL_REG */
#define VTD_IECTL_IM (1UL << 31)
#define VTD_IECTL_IP (1UL << 30)
@@ -286,6 +293,7 @@ typedef struct VTDInvDesc VTDInvDesc;
#define VTD_INV_DESC_TYPE 0xf
#define VTD_INV_DESC_CC 0x1 /* Context-cache Invalidate Desc */
#define VTD_INV_DESC_IOTLB 0x2
+#define VTD_INV_DESC_INT 0x4
#define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */
#define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */
@@ -388,4 +396,23 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK)
#define VTD_SL_IGN_COM 0xbff0000000000000ULL
+/* Interrupt Remapping Table Entry */
+struct VTDIntRemapEntry {
+ uint64_t lo;
+ uint64_t hi;
+};
+typedef struct VTDIntRemapEntry VTDIntRemapEntry;
+
+/* IRTE low word */
+#define VTD_IRTE_LO_P(val) (((val) >> 0) & 0x01)
+#define VTD_IRTE_LO_DEST_MODE(val) (((val) >> 2) & 0x01)
+#define VTD_IRTE_LO_REDIR_HINT(val) (((val) >> 3) & 0x01)
+#define VTD_IRTE_LO_TRIGGER_MODE(val) (((val) >> 4) & 0x01)
+#define VTD_IRTE_LO_DELIVERY(val) (((val) >> 5) & 0x07)
+#define VTD_IRTE_LO_VECTOR(val) (((val) >> 16) & 0xff)
+#define VTD_IRTE_LO_DEST(val) (((val) >> 32) & 0xffffffff)
+
+/* IRTE high word */
+#define VTD_IRTE_HI_SID(val) (((val) >> 0) & 0xffff)
+
#endif
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index 2e99f75..1fb4faa 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -846,6 +846,7 @@ static void msi_region_write(void *opaque, hwaddr addr,
uint64_t data,
uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
/* FIXME: Ignoring redirection hint. */
+ /*printf("APIC: %08lx:%04lx\n", addr, data);*/
apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
}
diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c
index 566e3d8..f7adc8e 100644
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -431,6 +431,17 @@ static AddressSpace *q35_host_dma_iommu(PCIBus *bus, void
*opaque, int devfn)
assert(0 <= devfn && devfn <= VTD_PCI_DEVFN_MAX);
vtd_as = vtd_find_add_as(s, bus, devfn);
+
+ memory_region_init_iommu(&vtd_as->iommu, OBJECT(s),
+ &s->iommu_ops, "intel_iommu", UINT64_MAX);
+ address_space_init(&vtd_as->as,
+ &vtd_as->iommu, "intel_iommu");
+ memory_region_init_io(&vtd_as->int_remap_region, OBJECT(s),
+ &vtd_int_remap_ops, vtd_as,
+ "intel_int_remap", UINT64_MAX);
+ address_space_init(&vtd_as->int_remap_as,
+ &vtd_as->int_remap_region,
+ "intel_int_remap");
return &vtd_as->as;
}
diff --git a/include/hw/acpi/acpi-defs.h b/include/hw/acpi/acpi-defs.h
index c7a03d4..220d784 100644
--- a/include/hw/acpi/acpi-defs.h
+++ b/include/hw/acpi/acpi-defs.h
@@ -570,4 +570,26 @@ typedef struct AcpiDmarHardwareUnit AcpiDmarHardwareUnit;
/* Masks for Flags field above */
#define ACPI_DMAR_INCLUDE_PCI_ALL 1
+/* DMAR Device Scope structures */
+struct AcpiDmarDeviceScope {
+ uint8_t type;
+ uint8_t length;
+ uint16_t reserved;
+ uint8_t enumeration_id;
+ uint8_t start_bus_number;
+ uint8_t path[0];
+} QEMU_PACKED;
+typedef struct AcpiDmarDeviceScope AcpiDmarDeviceScope;
+
+/* Values for type in struct AcpiDmarDeviceScope */
+enum {
+ ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
+ ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
+ ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
+ ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
+ ACPI_DMAR_SCOPE_TYPE_HPET = 4,
+ ACPI_DMAR_SCOPE_TYPE_ACPI = 5,
+ ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* Reserved for future use */
+};
+
#endif
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index b024ffa..75a3627 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -70,6 +70,8 @@ struct VTDAddressSpace {
uint8_t devfn;
AddressSpace as;
MemoryRegion iommu;
+ AddressSpace int_remap_as;
+ MemoryRegion int_remap_region;
IntelIOMMUState *iommu_state;
VTDContextCacheEntry context_cache_entry;
};
@@ -109,6 +111,10 @@ struct IntelIOMMUState {
bool qi_enabled; /* Set if the QI is enabled */
uint8_t iq_last_desc_type; /* The type of last completed descriptor */
+ dma_addr_t irta;
+ unsigned int irt_size;
+ bool ir_enabled;
+
/* The index of the Fault Recording Register to be used next.
* Wraps around from N-1 to 0, where N is the number of FRCD_REG.
*/
@@ -125,6 +131,7 @@ struct IntelIOMMUState {
VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by
bus number */
};
+extern const MemoryRegionOps vtd_int_remap_ops;
/* Find the VTD Address space associated with the given bus pointer,
* create a new one if none exists
*/
--
2.7.2
- [Qemu-devel] [PATCH 2/2] i386: Interrupt remapping support for VT-d,
Rita Sinha <=