qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 2/6] PCI DMA API (v2)


From: Anthony Liguori
Subject: [Qemu-devel] [PATCH 2/6] PCI DMA API (v2)
Date: Fri, 4 Apr 2008 23:02:51 -0500

This patch introduces a PCI DMA API and some generic code to support other DMA
APIs.  It introduces a IOVector type that contains physical address/length
pairs.  These vectors can be translated by the PCI layer and passed either to
generic copying functions or directly to the block or network subsystems.

This enables zero-copy IO to be preformed without introducing assumptions of
phys_ram_base.  This API is at the PCI device level to enable support of
per-device IOMMU remapping.

Since v1, I've eliminated renamed PhysIOVector to IOVector and removed the
concept of a mapped vector.  I've added comments and provided an API for
using IOVectors with the network and block layers.  It's not optimized at the
moment as enabling true zero-copy will require more patches at a later time.

Signed-off-by: Anthony Liguori <address@hidden>

diff --git a/Makefile.target b/Makefile.target
index 5ac29a7..94f3e58 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -173,7 +173,7 @@ all: $(PROGS)
 #########################################################
 # cpu emulator library
 LIBOBJS=exec.o kqemu.o translate-all.o cpu-exec.o\
-        translate.o host-utils.o
+        translate.o host-utils.o iovector.o
 ifndef CONFIG_NO_DYNGEN_OP
 LIBOBJS+=op.o
 endif
diff --git a/block.c b/block.c
index 0730954..58cb6cc 100644
--- a/block.c
+++ b/block.c
@@ -570,6 +570,51 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num,
     }
 }
 
+#ifndef QEMU_IMG
+int bdrv_readv(BlockDriverState *bs, int64_t sector_num,
+              IOVector *iovec)
+{
+    char *buffer;
+    size_t size;
+    int ret;
+
+    size = iovector_size(iovec);
+    buffer = qemu_malloc(size);
+    if (buffer == NULL)
+       return -ENOMEM;
+
+    ret = bdrv_read(bs, sector_num, buffer, size / 512);
+
+    if (ret >= 0)
+       memcpy_to_iovector(iovec, 0, size, buffer);
+
+    qemu_free(buffer);
+
+    return ret;
+}
+
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num,
+               const IOVector *iovec)
+{
+    char *buffer;
+    size_t size;
+    int ret;
+
+    size = iovector_size(iovec);
+    buffer = qemu_malloc(size);
+    if (buffer == NULL)
+       return -ENOMEM;
+
+    memcpy_from_iovector(buffer, 0, size, iovec);
+
+    ret = bdrv_write(bs, sector_num, buffer, size / 512);
+
+    qemu_free(buffer);
+
+    return ret;
+}
+#endif
+
 static int bdrv_pread_em(BlockDriverState *bs, int64_t offset,
                          uint8_t *buf, int count1)
 {
diff --git a/block.h b/block.h
index b730505..9d30db2 100644
--- a/block.h
+++ b/block.h
@@ -1,6 +1,8 @@
 #ifndef BLOCK_H
 #define BLOCK_H
 
+#include "iovector.h"
+
 /* block.c */
 typedef struct BlockDriver BlockDriver;
 
@@ -67,6 +69,9 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
               uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
                const uint8_t *buf, int nb_sectors);
+int bdrv_readv(BlockDriverState *bs, int64_t sector_num, IOVector *iovec);
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num,
+               const IOVector *iovec);
 int bdrv_pread(BlockDriverState *bs, int64_t offset,
                void *buf, int count);
 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
diff --git a/cpu-all.h b/cpu-all.h
index 9e5d33b..3cbc718 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -835,6 +835,7 @@ void cpu_register_physical_memory(target_phys_addr_t 
start_addr,
                                   unsigned long size,
                                   unsigned long phys_offset);
 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr);
+void *cpu_map_physical_page(target_phys_addr_t addr);
 ram_addr_t qemu_ram_alloc(unsigned int size);
 void qemu_ram_free(ram_addr_t addr);
 int cpu_register_io_memory(int io_index,
diff --git a/exec.c b/exec.c
index c25872d..b2d2af4 100644
--- a/exec.c
+++ b/exec.c
@@ -2085,6 +2085,21 @@ ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t 
addr)
     return p->phys_offset;
 }
 
+void *cpu_map_physical_page(target_phys_addr_t addr)
+{
+    ram_addr_t phys_offset;
+
+    /* DMA'ing to MMIO, just skip */
+    phys_offset = cpu_get_physical_page_desc(addr);
+    if ((phys_offset & ~TARGET_PAGE_MASK) != IO_MEM_RAM)
+       return NULL;
+
+    phys_offset &= TARGET_PAGE_MASK;
+    phys_offset += addr & ~TARGET_PAGE_MASK;
+
+    return phys_ram_base + phys_offset;
+}
+
 /* XXX: better than nothing */
 ram_addr_t qemu_ram_alloc(unsigned int size)
 {
diff --git a/hw/pci.c b/hw/pci.c
index bc55989..c09b5f8 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -145,6 +145,34 @@ int pci_device_load(PCIDevice *s, QEMUFile *f)
     return 0;
 }
 
+/* Return a translated IOVector suitable for DMA.  At the moment, we perform
+ * no translation. */
+IOVector *pci_device_dma_map(PCIDevice *s, const IOVector *iovec)
+{
+    return (IOVector *)iovec;
+}
+
+/* Unmap a translated IOVector and update dirty bits if necessary. */
+void pci_device_dma_unmap(PCIDevice *s, const IOVector *orig,
+                         IOVector *mapped, int write)
+{
+    int i;
+
+    if (!write)
+       return;
+
+    /* mark memory as dirty if necessary */
+    for (i = 0; i < orig->num; i++) {
+       size_t offset;
+
+       for (offset = 0;
+            offset < orig->sg[i].len;
+            offset += TARGET_PAGE_SIZE) {
+           cpu_physical_memory_set_dirty(orig->sg[i].base + offset);
+       }
+    }
+}
+
 /* -1 for devfn means auto assign */
 PCIDevice *pci_register_device(PCIBus *bus, const char *name,
                                int instance_size, int devfn,
diff --git a/hw/pci.h b/hw/pci.h
index e870987..b86d8cb 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -4,6 +4,8 @@
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
 
+#include "iovector.h"
+
 /* PCI bus */
 
 extern target_phys_addr_t pci_mem_base;
@@ -81,6 +83,10 @@ void pci_default_write_config(PCIDevice *d,
 void pci_device_save(PCIDevice *s, QEMUFile *f);
 int pci_device_load(PCIDevice *s, QEMUFile *f);
 
+IOVector *pci_device_dma_map(PCIDevice *s, const IOVector *iovec);
+void pci_device_dma_unmap(PCIDevice *s, const IOVector *orig,
+                         IOVector *mapped, int write);
+
 typedef void (*pci_set_irq_fn)(qemu_irq *pic, int irq_num, int level);
 typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
 PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
diff --git a/iovector.c b/iovector.c
new file mode 100644
index 0000000..7002656
--- /dev/null
+++ b/iovector.c
@@ -0,0 +1,137 @@
+/*
+ * IO Vectors
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "iovector.h"
+
+static size_t iovector_rw(void *buffer, size_t size, IOVector *iov, int read)
+{
+    uint8_t *ptr = buffer;
+    size_t offset = 0;
+    int i;
+
+    for (i = 0; i < iov->num; i++) {
+       size_t len;
+       void *addr;
+
+       len = MIN(iov->sg[i].len, size - offset);
+
+       addr = cpu_map_physical_page(iov->sg[i].base);
+
+       if (read)
+           memcpy(ptr + offset, addr, len);
+       else
+           memcpy(addr, ptr + offset, len);
+
+       offset += len;
+    }
+
+    return offset;
+}
+
+size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size,
+                           const IOVector *iov)
+{
+    IOVector *sg;
+    size_t len;
+
+    if (offset)
+       sg = iovector_trim(iov, offset, size);
+    else
+       sg = (IOVector *)iov;
+
+    len = iovector_rw(buffer, size, sg, 1);
+
+    if (offset)
+       qemu_free(sg);
+
+    return len;
+}
+
+size_t memcpy_to_iovector(IOVector *iovec, size_t offset, size_t size,
+                         const void *buffer)
+{
+    IOVector *sg;
+    size_t len;
+
+    if (offset)
+       sg = iovector_trim(iovec, offset, size);
+    else
+       sg = iovec;
+
+    len = iovector_rw((void *)buffer, size, sg, 0);
+
+    if (offset)
+       qemu_free(sg);
+
+    return len;
+}
+
+IOVector *iovector_new(int num)
+{
+    IOVector *ret;
+
+    ret = qemu_malloc(sizeof(IOVector) + sizeof(IOVectorElement) * num);
+    if (ret == NULL)
+       return NULL;
+
+    ret->num = num;
+
+    return ret;
+}
+
+IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size)
+{
+    IOVector *ret;
+    size_t off, total_size;
+    int i;
+
+    ret = iovector_new(iov->num);
+    if (ret == NULL)
+       return NULL;
+
+    total_size = 0;
+    ret->num = 0;
+    off = 0;
+    for (i = 0; i < iov->num; i++) {
+       if (off >= offset || offset < (off + iov->sg[i].len)) {
+           size_t fudge = 0;
+           if (off < offset)
+               fudge = offset - off;
+
+           ret->sg[ret->num].base = iov->sg[i].base + fudge;
+           ret->sg[ret->num].len = MIN(iov->sg[i].len - fudge,
+                                       size - total_size);
+           total_size += ret->sg[ret->num].len;
+           ret->num++;
+
+           if (total_size == size)
+               break;
+       }
+
+       off += iov->sg[i].len;
+    }
+
+    return ret;
+}
+
+size_t iovector_size(const IOVector *iov)
+{
+    size_t size = 0;
+    int i;
+
+    for (i = 0; i < iov->num; i++)
+       size += iov->sg[i].len;
+    
+    return size;
+}
diff --git a/iovector.h b/iovector.h
new file mode 100644
index 0000000..fac7236
--- /dev/null
+++ b/iovector.h
@@ -0,0 +1,49 @@
+/*
+ * IO Vectors
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_IOVECTOR_H
+#define _QEMU_IOVECTOR_H
+
+typedef struct IOVectorElement IOVectorElement;
+
+typedef struct IOVector
+{
+    int num;
+    struct IOVectorElement {
+       uint64_t base;
+       size_t len;
+    } sg[0];
+} IOVector;
+
+/* Copy from an IOVector to a flat buffer.  Be careful to pass in a fully
+ * translated IOVector here. */
+size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size,
+                           const IOVector *iov);
+
+/* Copy to an IOVector from a flat buffer.  Be careful to pass in a fully
+ * translated IOVector here. */
+size_t memcpy_to_iovector(IOVector *iovec, size_t offset, size_t size,
+                         const void *buffer);
+
+/* Return a new IOVector that's a subset of the passed in IOVector.  It should
+ * be freed with qemu_free when you are done with it. */
+IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size);
+
+/* Returns the size of an IOVector in bytes */
+size_t iovector_size(const IOVector *iov);
+
+/* Returns a new IOVector with num elements.  iov->num will be set to num on
+ * return */
+IOVector *iovector_new(int num);
+
+#endif
diff --git a/net.h b/net.h
index 2dfff8d..0b3a155 100644
--- a/net.h
+++ b/net.h
@@ -1,6 +1,8 @@
 #ifndef QEMU_NET_H
 #define QEMU_NET_H
 
+#include "iovector.h"
+
 /* VLANs support */
 
 typedef struct VLANClientState VLANClientState;
@@ -30,6 +32,7 @@ VLANClientState *qemu_new_vlan_client(VLANState *vlan,
                                       void *opaque);
 int qemu_can_send_packet(VLANClientState *vc);
 void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size);
+void qemu_sendv_packet(VLANClientState *vc, const IOVector *iovec);
 void qemu_handler_true(void *opaque);
 
 void do_info_network(void);
diff --git a/vl.c b/vl.c
index 61eb191..342ef79 100644
--- a/vl.c
+++ b/vl.c
@@ -3731,6 +3731,22 @@ void qemu_send_packet(VLANClientState *vc1, const 
uint8_t *buf, int size)
     }
 }
 
+void qemu_sendv_packet(VLANClientState *vc, const IOVector *iovec)
+{
+    size_t size;
+    uint8_t *data;
+
+    size = iovector_size(iovec);
+    data = qemu_malloc(size);
+    if (data == NULL)
+       return;
+
+    memcpy_from_iovector(data, 0, size, iovec);
+    qemu_send_packet(vc, data, size);
+
+    qemu_free(data);
+}
+
 #if defined(CONFIG_SLIRP)
 
 /* slirp network adapter */




reply via email to

[Prev in Thread] Current Thread [Next in Thread]