[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 2/6] PCI DMA API (v2)
From: |
Anthony Liguori |
Subject: |
[Qemu-devel] [PATCH 2/6] PCI DMA API (v2) |
Date: |
Fri, 4 Apr 2008 23:02:51 -0500 |
This patch introduces a PCI DMA API and some generic code to support other DMA
APIs. It introduces a IOVector type that contains physical address/length
pairs. These vectors can be translated by the PCI layer and passed either to
generic copying functions or directly to the block or network subsystems.
This enables zero-copy IO to be preformed without introducing assumptions of
phys_ram_base. This API is at the PCI device level to enable support of
per-device IOMMU remapping.
Since v1, I've eliminated renamed PhysIOVector to IOVector and removed the
concept of a mapped vector. I've added comments and provided an API for
using IOVectors with the network and block layers. It's not optimized at the
moment as enabling true zero-copy will require more patches at a later time.
Signed-off-by: Anthony Liguori <address@hidden>
diff --git a/Makefile.target b/Makefile.target
index 5ac29a7..94f3e58 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -173,7 +173,7 @@ all: $(PROGS)
#########################################################
# cpu emulator library
LIBOBJS=exec.o kqemu.o translate-all.o cpu-exec.o\
- translate.o host-utils.o
+ translate.o host-utils.o iovector.o
ifndef CONFIG_NO_DYNGEN_OP
LIBOBJS+=op.o
endif
diff --git a/block.c b/block.c
index 0730954..58cb6cc 100644
--- a/block.c
+++ b/block.c
@@ -570,6 +570,51 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
}
}
+#ifndef QEMU_IMG
+int bdrv_readv(BlockDriverState *bs, int64_t sector_num,
+ IOVector *iovec)
+{
+ char *buffer;
+ size_t size;
+ int ret;
+
+ size = iovector_size(iovec);
+ buffer = qemu_malloc(size);
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ ret = bdrv_read(bs, sector_num, buffer, size / 512);
+
+ if (ret >= 0)
+ memcpy_to_iovector(iovec, 0, size, buffer);
+
+ qemu_free(buffer);
+
+ return ret;
+}
+
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num,
+ const IOVector *iovec)
+{
+ char *buffer;
+ size_t size;
+ int ret;
+
+ size = iovector_size(iovec);
+ buffer = qemu_malloc(size);
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ memcpy_from_iovector(buffer, 0, size, iovec);
+
+ ret = bdrv_write(bs, sector_num, buffer, size / 512);
+
+ qemu_free(buffer);
+
+ return ret;
+}
+#endif
+
static int bdrv_pread_em(BlockDriverState *bs, int64_t offset,
uint8_t *buf, int count1)
{
diff --git a/block.h b/block.h
index b730505..9d30db2 100644
--- a/block.h
+++ b/block.h
@@ -1,6 +1,8 @@
#ifndef BLOCK_H
#define BLOCK_H
+#include "iovector.h"
+
/* block.c */
typedef struct BlockDriver BlockDriver;
@@ -67,6 +69,9 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
+int bdrv_readv(BlockDriverState *bs, int64_t sector_num, IOVector *iovec);
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num,
+ const IOVector *iovec);
int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count);
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
diff --git a/cpu-all.h b/cpu-all.h
index 9e5d33b..3cbc718 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -835,6 +835,7 @@ void cpu_register_physical_memory(target_phys_addr_t
start_addr,
unsigned long size,
unsigned long phys_offset);
ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr);
+void *cpu_map_physical_page(target_phys_addr_t addr);
ram_addr_t qemu_ram_alloc(unsigned int size);
void qemu_ram_free(ram_addr_t addr);
int cpu_register_io_memory(int io_index,
diff --git a/exec.c b/exec.c
index c25872d..b2d2af4 100644
--- a/exec.c
+++ b/exec.c
@@ -2085,6 +2085,21 @@ ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t
addr)
return p->phys_offset;
}
+void *cpu_map_physical_page(target_phys_addr_t addr)
+{
+ ram_addr_t phys_offset;
+
+ /* DMA'ing to MMIO, just skip */
+ phys_offset = cpu_get_physical_page_desc(addr);
+ if ((phys_offset & ~TARGET_PAGE_MASK) != IO_MEM_RAM)
+ return NULL;
+
+ phys_offset &= TARGET_PAGE_MASK;
+ phys_offset += addr & ~TARGET_PAGE_MASK;
+
+ return phys_ram_base + phys_offset;
+}
+
/* XXX: better than nothing */
ram_addr_t qemu_ram_alloc(unsigned int size)
{
diff --git a/hw/pci.c b/hw/pci.c
index bc55989..c09b5f8 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -145,6 +145,34 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
return 0;
}
+/* Return a translated IOVector suitable for DMA. At the moment, we perform
+ * no translation. */
+IOVector *pci_device_dma_map(PCIDevice *s, const IOVector *iovec)
+{
+ return (IOVector *)iovec;
+}
+
+/* Unmap a translated IOVector and update dirty bits if necessary. */
+void pci_device_dma_unmap(PCIDevice *s, const IOVector *orig,
+ IOVector *mapped, int write)
+{
+ int i;
+
+ if (!write)
+ return;
+
+ /* mark memory as dirty if necessary */
+ for (i = 0; i < orig->num; i++) {
+ size_t offset;
+
+ for (offset = 0;
+ offset < orig->sg[i].len;
+ offset += TARGET_PAGE_SIZE) {
+ cpu_physical_memory_set_dirty(orig->sg[i].base + offset);
+ }
+ }
+}
+
/* -1 for devfn means auto assign */
PCIDevice *pci_register_device(PCIBus *bus, const char *name,
int instance_size, int devfn,
diff --git a/hw/pci.h b/hw/pci.h
index e870987..b86d8cb 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -4,6 +4,8 @@
/* PCI includes legacy ISA access. */
#include "isa.h"
+#include "iovector.h"
+
/* PCI bus */
extern target_phys_addr_t pci_mem_base;
@@ -81,6 +83,10 @@ void pci_default_write_config(PCIDevice *d,
void pci_device_save(PCIDevice *s, QEMUFile *f);
int pci_device_load(PCIDevice *s, QEMUFile *f);
+IOVector *pci_device_dma_map(PCIDevice *s, const IOVector *iovec);
+void pci_device_dma_unmap(PCIDevice *s, const IOVector *orig,
+ IOVector *mapped, int write);
+
typedef void (*pci_set_irq_fn)(qemu_irq *pic, int irq_num, int level);
typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
diff --git a/iovector.c b/iovector.c
new file mode 100644
index 0000000..7002656
--- /dev/null
+++ b/iovector.c
@@ -0,0 +1,137 @@
+/*
+ * IO Vectors
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "iovector.h"
+
+static size_t iovector_rw(void *buffer, size_t size, IOVector *iov, int read)
+{
+ uint8_t *ptr = buffer;
+ size_t offset = 0;
+ int i;
+
+ for (i = 0; i < iov->num; i++) {
+ size_t len;
+ void *addr;
+
+ len = MIN(iov->sg[i].len, size - offset);
+
+ addr = cpu_map_physical_page(iov->sg[i].base);
+
+ if (read)
+ memcpy(ptr + offset, addr, len);
+ else
+ memcpy(addr, ptr + offset, len);
+
+ offset += len;
+ }
+
+ return offset;
+}
+
+size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size,
+ const IOVector *iov)
+{
+ IOVector *sg;
+ size_t len;
+
+ if (offset)
+ sg = iovector_trim(iov, offset, size);
+ else
+ sg = (IOVector *)iov;
+
+ len = iovector_rw(buffer, size, sg, 1);
+
+ if (offset)
+ qemu_free(sg);
+
+ return len;
+}
+
+size_t memcpy_to_iovector(IOVector *iovec, size_t offset, size_t size,
+ const void *buffer)
+{
+ IOVector *sg;
+ size_t len;
+
+ if (offset)
+ sg = iovector_trim(iovec, offset, size);
+ else
+ sg = iovec;
+
+ len = iovector_rw((void *)buffer, size, sg, 0);
+
+ if (offset)
+ qemu_free(sg);
+
+ return len;
+}
+
+IOVector *iovector_new(int num)
+{
+ IOVector *ret;
+
+ ret = qemu_malloc(sizeof(IOVector) + sizeof(IOVectorElement) * num);
+ if (ret == NULL)
+ return NULL;
+
+ ret->num = num;
+
+ return ret;
+}
+
+IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size)
+{
+ IOVector *ret;
+ size_t off, total_size;
+ int i;
+
+ ret = iovector_new(iov->num);
+ if (ret == NULL)
+ return NULL;
+
+ total_size = 0;
+ ret->num = 0;
+ off = 0;
+ for (i = 0; i < iov->num; i++) {
+ if (off >= offset || offset < (off + iov->sg[i].len)) {
+ size_t fudge = 0;
+ if (off < offset)
+ fudge = offset - off;
+
+ ret->sg[ret->num].base = iov->sg[i].base + fudge;
+ ret->sg[ret->num].len = MIN(iov->sg[i].len - fudge,
+ size - total_size);
+ total_size += ret->sg[ret->num].len;
+ ret->num++;
+
+ if (total_size == size)
+ break;
+ }
+
+ off += iov->sg[i].len;
+ }
+
+ return ret;
+}
+
+size_t iovector_size(const IOVector *iov)
+{
+ size_t size = 0;
+ int i;
+
+ for (i = 0; i < iov->num; i++)
+ size += iov->sg[i].len;
+
+ return size;
+}
diff --git a/iovector.h b/iovector.h
new file mode 100644
index 0000000..fac7236
--- /dev/null
+++ b/iovector.h
@@ -0,0 +1,49 @@
+/*
+ * IO Vectors
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_IOVECTOR_H
+#define _QEMU_IOVECTOR_H
+
+typedef struct IOVectorElement IOVectorElement;
+
+typedef struct IOVector
+{
+ int num;
+ struct IOVectorElement {
+ uint64_t base;
+ size_t len;
+ } sg[0];
+} IOVector;
+
+/* Copy from an IOVector to a flat buffer. Be careful to pass in a fully
+ * translated IOVector here. */
+size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size,
+ const IOVector *iov);
+
+/* Copy to an IOVector from a flat buffer. Be careful to pass in a fully
+ * translated IOVector here. */
+size_t memcpy_to_iovector(IOVector *iovec, size_t offset, size_t size,
+ const void *buffer);
+
+/* Return a new IOVector that's a subset of the passed in IOVector. It should
+ * be freed with qemu_free when you are done with it. */
+IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size);
+
+/* Returns the size of an IOVector in bytes */
+size_t iovector_size(const IOVector *iov);
+
+/* Returns a new IOVector with num elements. iov->num will be set to num on
+ * return */
+IOVector *iovector_new(int num);
+
+#endif
diff --git a/net.h b/net.h
index 2dfff8d..0b3a155 100644
--- a/net.h
+++ b/net.h
@@ -1,6 +1,8 @@
#ifndef QEMU_NET_H
#define QEMU_NET_H
+#include "iovector.h"
+
/* VLANs support */
typedef struct VLANClientState VLANClientState;
@@ -30,6 +32,7 @@ VLANClientState *qemu_new_vlan_client(VLANState *vlan,
void *opaque);
int qemu_can_send_packet(VLANClientState *vc);
void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size);
+void qemu_sendv_packet(VLANClientState *vc, const IOVector *iovec);
void qemu_handler_true(void *opaque);
void do_info_network(void);
diff --git a/vl.c b/vl.c
index 61eb191..342ef79 100644
--- a/vl.c
+++ b/vl.c
@@ -3731,6 +3731,22 @@ void qemu_send_packet(VLANClientState *vc1, const
uint8_t *buf, int size)
}
}
+void qemu_sendv_packet(VLANClientState *vc, const IOVector *iovec)
+{
+ size_t size;
+ uint8_t *data;
+
+ size = iovector_size(iovec);
+ data = qemu_malloc(size);
+ if (data == NULL)
+ return;
+
+ memcpy_from_iovector(data, 0, size, iovec);
+ qemu_send_packet(vc, data, size);
+
+ qemu_free(data);
+}
+
#if defined(CONFIG_SLIRP)
/* slirp network adapter */
- [Qemu-devel] [PATCH 1/6] Use ram_addr_t for cpu_get_physical_page_desc (v2), Anthony Liguori, 2008/04/05
- [Qemu-devel] [PATCH 2/6] PCI DMA API (v2),
Anthony Liguori <=
- Re: [Qemu-devel] [PATCH 2/6] PCI DMA API (v2), Blue Swirl, 2008/04/06
- Re: [kvm-devel] [Qemu-devel] [PATCH 2/6] PCI DMA API (v2), Anthony Liguori, 2008/04/06
- Re: [kvm-devel] [Qemu-devel] [PATCH 2/6] PCI DMA API (v2), andrzej zaborowski, 2008/04/06
- Re: [kvm-devel] [Qemu-devel] [PATCH 2/6] PCI DMA API (v2), Anthony Liguori, 2008/04/06
- Re: [kvm-devel] [Qemu-devel] [PATCH 2/6] PCI DMA API (v2), Paul Brook, 2008/04/06
- Re: [kvm-devel] [Qemu-devel] [PATCH 2/6] PCI DMA API (v2), andrzej zaborowski, 2008/04/07