[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastr
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure |
Date: |
Thu, 1 Mar 2012 17:49:20 +0200 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Thu, Mar 01, 2012 at 04:36:06PM +1100, David Gibson wrote:
> This patch adds the basic infrastructure necessary to emulate an IOMMU
> visible to the guest. The DMAContext structure is extended with
> information and a callback describing the translation, and the various
> DMA functions used by devices will now perform IOMMU translation using
> this callback.
>
> Cc: Michael S. Tsirkin <address@hidden>
> Cc: Richard Henderson <address@hidden>
>
> Signed-off-by: Eduard - Gabriel Munteanu <address@hidden>
> Signed-off-by: David Gibson <address@hidden>
> ---
> configure | 12 ++++
> dma-helpers.c | 189
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Why not just dma.c?
> dma.h | 126 +++++++++++++++++++++++++++++++-------
> 3 files changed, 305 insertions(+), 22 deletions(-)
>
> diff --git a/configure b/configure
> index fb0e18e..3eff89c 100755
> --- a/configure
> +++ b/configure
> @@ -138,6 +138,7 @@ linux_aio=""
> cap_ng=""
> attr=""
> libattr=""
> +iommu="yes"
> xfs=""
>
> vhost_net="no"
> @@ -784,6 +785,10 @@ for opt do
> ;;
> --enable-vhost-net) vhost_net="yes"
> ;;
> + --enable-iommu) iommu="yes"
> + ;;
> + --disable-iommu) iommu="no"
> + ;;
> --disable-opengl) opengl="no"
> ;;
> --enable-opengl) opengl="yes"
> @@ -1085,6 +1090,8 @@ echo " --enable-docs enable documentation
> build"
> echo " --disable-docs disable documentation build"
> echo " --disable-vhost-net disable vhost-net acceleration support"
> echo " --enable-vhost-net enable vhost-net acceleration support"
> +echo " --disable-iommu disable IOMMU emulation support"
> +echo " --enable-iommu enable IOMMU emulation support"
> echo " --enable-trace-backend=B Set trace backend"
> echo " Available backends:"
> $("$source_path"/scripts/tracetool --list-backends)
> echo " --with-trace-file=NAME Full PATH,NAME of file to store traces"
> @@ -2935,6 +2942,7 @@ echo "posix_madvise $posix_madvise"
> echo "uuid support $uuid"
> echo "libcap-ng support $cap_ng"
> echo "vhost-net support $vhost_net"
> +echo "IOMMU support $iommu"
> echo "Trace backend $trace_backend"
> echo "Trace output file $trace_file-<pid>"
> echo "spice support $spice"
> @@ -3812,6 +3820,10 @@ if test "$target_softmmu" = "yes" -a \( \
> echo "CONFIG_NEED_MMU=y" >> $config_target_mak
> fi
>
> +if test "$iommu" = "yes" ; then
> + echo "CONFIG_IOMMU=y" >> $config_host_mak
> +fi
> +
> if test "$gprof" = "yes" ; then
> echo "TARGET_GPROF=yes" >> $config_target_mak
> if test "$target_linux_user" = "yes" ; then
> diff --git a/dma-helpers.c b/dma-helpers.c
> index 9dcfb2c..8269d60 100644
> --- a/dma-helpers.c
> +++ b/dma-helpers.c
> @@ -10,6 +10,9 @@
> #include "dma.h"
> #include "block_int.h"
> #include "trace.h"
> +#include "range.h"
> +
> +/*#define DEBUG_IOMMU*/
>
> void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
> {
> @@ -255,3 +258,189 @@ void dma_acct_start(BlockDriverState *bs,
> BlockAcctCookie *cookie,
> {
> bdrv_acct_start(bs, cookie, sg->size, type);
> }
> +
> +#ifdef CONFIG_IOMMU
> +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
> + DMADirection dir)
> +{
> + target_phys_addr_t paddr, plen;
> +
> +#ifdef DEBUG_DMA
> + fprintf(stderr, "dma_memory_check iommu=%p addr=0x%llx len=0x%llx
> dir=%d\n",
> + (unsigned long long)addr, (unsigned long long)len, dir);
> +#endif
> +
> + while (len) {
> + if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) {
> + return false;
> + }
> +
> + /* The translation might be valid for larger regions. */
> + if (plen > len) {
> + plen = len;
> + }
> +
> + len -= plen;
> + addr += plen;
> + }
> +
> + return true;
> +}
> +
> +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr,
> + void *buf, dma_addr_t len, DMADirection dir)
By the way, users still ignore the return code
from the pci routines.
I suspect the API of returning errors is just not
such a good one. What happens on read errors with
real devices? Master abort? What about write errors?
Maybe we need a callback
that will set error flags in the device instead.
> +{
> + target_phys_addr_t paddr, plen;
> + int err;
> +
> +#ifdef DEBUG_IOMMU
> + fprintf(stderr, "dma_memory_rw iommu=%p addr=0x%llx len=0x%llx dir=%d\n",
> + (unsigned long long)addr, (unsigned long long)len, dir);
> +#endif
> +
> + while (len) {
> + err = dma->translate(dma, addr, &paddr, &plen, dir);
> + if (err) {
> + return -1;
Why not return err?
> + }
> +
> + /* The translation might be valid for larger regions. */
> + if (plen > len) {
> + plen = len;
> + }
> +
> + cpu_physical_memory_rw(paddr, buf, plen,
> + dir == DMA_DIRECTION_FROM_DEVICE);
> +
> + len -= plen;
> + addr += plen;
> + buf += plen;
> + }
> +
> + return 0;
> +}
> +
> +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len)
> +{
> + target_phys_addr_t paddr, plen;
> + int err;
> +
> +#ifdef DEBUG_IOMMU
> + fprintf(stderr, "dma_memory_zero iommu=%p addr=0x%llx len=0x%llx\n",
> + (unsigned long long)addr, (unsigned long long)len);
> +#endif
> +
> + while (len) {
> + err = dma->translate(dma, addr, &paddr, &plen,
> + DMA_DIRECTION_FROM_DEVICE);
> + if (err) {
> + return -1;
> + }
> +
> + /* The translation might be valid for larger regions. */
> + if (plen > len) {
> + plen = len;
> + }
> +
> + cpu_physical_memory_zero(paddr, plen);
> +
> + len -= plen;
> + addr += plen;
> + }
> +
> + return 0;
> +}
> +
> +typedef struct DMAMemoryMap DMAMemoryMap;
> +struct DMAMemoryMap {
> + dma_addr_t addr;
> + size_t len;
> + void *buf;
> + DMAInvalidateMapFunc *invalidate;
> + void *invalidate_opaque;
Do we absolutely need the opaque value?
Can we let the user allocate the map objects?
Then user can use safe container_of to get user data.
> +
> + QLIST_ENTRY(DMAMemoryMap) list;
> +};
> +
> +void dma_context_init(DMAContext *dma, DMATranslateFunc fn, void *opaque)
> +{
> + dma->translate = fn;
> + dma->opaque = opaque;
> + QLIST_INIT(&dma->memory_maps);
> +}
> +
> +void dma_invalidate_memory_range(DMAContext *dma,
> + dma_addr_t addr, dma_addr_t len)
> +{
> + DMAMemoryMap *map;
> +
> + QLIST_FOREACH(map, &dma->memory_maps, list) {
> + if (ranges_overlap(addr, len, map->addr, map->len)) {
> + map->invalidate(map->invalidate_opaque);
> + QLIST_REMOVE(map, list);
> + free(map);
> + }
> + }
> +}
> +
> +void *__dma_memory_map(DMAContext *dma, DMAInvalidateMapFunc *cb,
> + void *cb_opaque, dma_addr_t addr, dma_addr_t *len,
> + DMADirection dir)
> +{
> + int err;
> + target_phys_addr_t paddr, plen;
> + void *buf;
> +
> + plen = *len;
> + err = dma->translate(dma, addr, &paddr, &plen, dir);
> + if (err) {
> + return NULL;
> + }
> +
> + /*
> + * If this is true, the virtual region is contiguous,
> + * but the translated physical region isn't. We just
> + * clamp *len, much like cpu_physical_memory_map() does.
> + */
> + if (plen < *len) {
> + *len = plen;
> + }
> +
> + buf = cpu_physical_memory_map(paddr, &plen,
> + dir == DMA_DIRECTION_FROM_DEVICE);
> + *len = plen;
> +
> + /* We treat maps as remote TLBs to cope with stuff like AIO. */
> + if (cb) {
> + DMAMemoryMap *map;
> +
> + map = g_malloc(sizeof(DMAMemoryMap));
> + map->addr = addr;
> + map->len = *len;
> + map->buf = buf;
> + map->invalidate = cb;
> + map->invalidate_opaque = cb_opaque;
> +
> + QLIST_INSERT_HEAD(&dma->memory_maps, map, list);
> + }
> +
> + return buf;
> +}
> +
> +void __dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
> + DMADirection dir, dma_addr_t access_len)
> +{
> + DMAMemoryMap *map;
> +
> + cpu_physical_memory_unmap(buffer, len,
> + dir == DMA_DIRECTION_FROM_DEVICE,
> + access_len);
> +
> + QLIST_FOREACH(map, &dma->memory_maps, list) {
> + if ((map->buf == buffer) && (map->len == len)) {
> + QLIST_REMOVE(map, list);
> + free(map);
> + }
> + }
> +}
> +#endif /* CONFIG_IOMMU */
> diff --git a/dma.h b/dma.h
> index a66e3d7..c27f8b3 100644
> --- a/dma.h
> +++ b/dma.h
> @@ -15,6 +15,7 @@
> #include "hw/hw.h"
> #include "block.h"
>
> +typedef struct DMAContext DMAContext;
> typedef struct ScatterGatherEntry ScatterGatherEntry;
>
> typedef enum {
> @@ -31,30 +32,83 @@ struct QEMUSGList {
> };
>
> #if defined(TARGET_PHYS_ADDR_BITS)
> +
> +#ifdef CONFIG_IOMMU
> +/*
> + * When an IOMMU is present, bus addresses become distinct from
> + * CPU/memory physical addresses and may be a different size. Because
> + * the IOVA size depends more on the bus than on the platform, we more
> + * or less have to treat these as 64-bit always to cover all (or at
> + * least most) cases.
> + */
> +typedef uint64_t dma_addr_t;
> +
> +#define DMA_ADDR_BITS 64
> +#define DMA_ADDR_FMT "%" PRIx64
> +#else
> typedef target_phys_addr_t dma_addr_t;
>
> #define DMA_ADDR_BITS TARGET_PHYS_ADDR_BITS
> #define DMA_ADDR_FMT TARGET_FMT_plx
> +#endif
> +
> +typedef int DMATranslateFunc(DMAContext *dma,
> + dma_addr_t addr,
> + target_phys_addr_t *paddr,
> + target_phys_addr_t *len,
> + DMADirection dir);
> +
> +typedef struct DMAContext {
> +#ifdef CONFIG_IOMMU
> + DMATranslateFunc *translate;
> + void *opaque;
> + QLIST_HEAD(memory_maps, DMAMemoryMap) memory_maps;
> +#endif
> +} DMAContext;
> +
Here I suspect opaque is not needed at all.
> +#ifdef CONFIG_IOMMU
> +static inline bool dma_has_iommu(DMACon.text *dma)
> +{
> + return (dma != NULL);
> +}
() not necessary here. Neither is != NULL.
> +#else
> +static inline bool dma_has_iommu(DMAContext *dma)
> +{
> + return false;
> +}
> +#endif
>
> typedef void DMAInvalidateMapFunc(void *);
>
> /* Checks that the given range of addresses is valid for DMA. This is
> * useful for certain cases, but usually you should just use
> * dma_memory_{read,write}() and check for errors */
> -static inline bool dma_memory_valid(DMAContext *dma, dma_addr_t addr,
> - dma_addr_t len, DMADirection dir)
> +bool __dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
> + DMADirection dir);
> +static inline bool dma_memory_valid(DMAContext *dma,
> + dma_addr_t addr, dma_addr_t len,
> + DMADirection dir)
> {
> - /* Stub version, with no iommu we assume all bus addresses are valid */
> - return true;
> + if (!dma_has_iommu(dma)) {
> + return true;
> + } else {
> + return __dma_memory_valid(dma, addr, len, dir);
> + }
> }
>
> +int __dma_memory_rw(DMAContext *dma, dma_addr_t addr,
> + void *buf, dma_addr_t len, DMADirection dir);
> static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
> void *buf, dma_addr_t len, DMADirection dir)
> {
> - /* Stub version when we have no iommu support */
> - cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len,
> - dir == DMA_DIRECTION_FROM_DEVICE);
> - return 0;
> + if (!dma_has_iommu(dma)) {
> + /* Fast-path for no IOMMU */
> + cpu_physical_memory_rw(addr, buf, (target_phys_addr_t)len,
cast not needed, I think.
> + dir == DMA_DIRECTION_FROM_DEVICE);
> + return 0;
> + } else {
> + return __dma_memory_rw(dma, addr, buf, len, dir);
> + }
> }
>
> static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr,
> @@ -70,35 +124,55 @@ static inline int dma_memory_write(DMAContext *dma,
> dma_addr_t addr,
> DMA_DIRECTION_FROM_DEVICE);
> }
>
> +int __dma_memory_zero(DMAContext *dma, dma_addr_t addr, dma_addr_t len);
> static inline int dma_memory_zero(DMAContext *dma, dma_addr_t addr,
> dma_addr_t len)
> {
> - /* Stub version when we have no iommu support */
> - cpu_physical_memory_zero(addr, len);
> - return 0;
> + if (!dma_has_iommu(dma)) {
> + /* Fast-path for no IOMMU */
> + cpu_physical_memory_zero(addr, len);
> + return 0;
> + } else {
> + return __dma_memory_zero(dma, addr, len);
> + }
> }
>
> +void *__dma_memory_map(DMAContext *dma,
> + DMAInvalidateMapFunc *cb, void *opaque,
> + dma_addr_t addr, dma_addr_t *len,
> + DMADirection dir);
> static inline void *dma_memory_map(DMAContext *dma,
> - DMAInvalidateMapFunc *cb, void *opaque,
> + DMAInvalidateMapFunc *cb, void *cb_opaque,
> dma_addr_t addr, dma_addr_t *len,
> DMADirection dir)
> {
> - target_phys_addr_t xlen = *len;
> - void *p;
> -
> - p = cpu_physical_memory_map(addr, &xlen,
> - dir == DMA_DIRECTION_FROM_DEVICE);
> - *len = xlen;
> - return p;
> + if (!dma_has_iommu(dma)) {
> + target_phys_addr_t xlen = *len;
> + void *p;
> +
> + p = cpu_physical_memory_map(addr, &xlen,
> + dir == DMA_DIRECTION_FROM_DEVICE);
> + *len = xlen;
> + return p;
> + } else {
> + return __dma_memory_map(dma, cb, cb_opaque, addr, len, dir);
> + }
> }
Are callbacks ever useful? I note that without an iommu, they are never
invoked. So how can a device use them?
>
> +void __dma_memory_unmap(DMAContext *dma,
> + void *buffer, dma_addr_t len,
> + DMADirection dir, dma_addr_t access_len);
> static inline void dma_memory_unmap(DMAContext *dma,
> void *buffer, dma_addr_t len,
> DMADirection dir, dma_addr_t access_len)
> {
> - return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
> - dir == DMA_DIRECTION_FROM_DEVICE,
> - access_len);
> + if (!dma_has_iommu(dma)) {
> + return cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
> + dir == DMA_DIRECTION_FROM_DEVICE,
> + access_len);
> + } else {
> + __dma_memory_unmap(dma, buffer, len, dir, access_len);
> + }
> }
>
> #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
> @@ -139,6 +213,14 @@ DEFINE_LDST_DMA(q, q, 64, be);
>
> #undef DEFINE_LDST_DMA
>
> +#ifdef CONFIG_IOMMU
> +
> +void dma_context_init(DMAContext *dma, DMATranslateFunc, void *opaque);
> +void dma_invalidate_memory_range(DMAContext *dma,
> + dma_addr_t addr, dma_addr_t len);
> +
> +#endif /* CONFIG_IOMMU */
> +
> struct ScatterGatherEntry {
> dma_addr_t base;
> dma_addr_t len;
> --
> 1.7.9
- [Qemu-devel] [0/13] RFC: Support for guest-visible IOMMUs, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure, David Gibson, 2012/03/01
- Re: [Qemu-devel] [PATCH 10/13] iommu: Introduce IOMMU emulation infrastructure,
Michael S. Tsirkin <=
- [Qemu-devel] [PATCH 05/13] iommu: Add universal DMA helper functions, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 04/13] Implement cpu_physical_memory_zero(), David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 07/13] iommu: Make sglists and dma_bdrv helpers use new universal DMA helpers, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 01/13] Use DMADirection type for dma_bdrv_io, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 02/13] Better support for dma_addr_t variables, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 09/13] usb: Convert usb_packet_{map, unmap} to universal DMA helpers, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 13/13] pseries: Implement IOMMU and DMA for PAPR PCI devices, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 06/13] usb-ohci: Use universal DMA helper functions, David Gibson, 2012/03/01
- [Qemu-devel] [PATCH 08/13] ide/ahci: Use universal DMA helper functions, David Gibson, 2012/03/01