[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RFC v3 04/19] Add device IO ops vector
From: |
Alex Williamson |
Subject: |
Re: [RFC v3 04/19] Add device IO ops vector |
Date: |
Fri, 19 Nov 2021 15:42:15 -0700 |
On Mon, 8 Nov 2021 16:46:32 -0800
John Johnson <john.g.johnson@oracle.com> wrote:
> Used for communication with VFIO driver
> (prep work for vfio-user, which will communicate over a socket)
>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> ---
> include/hw/vfio/vfio-common.h | 28 ++++++++
> hw/vfio/common.c | 159
> ++++++++++++++++++++++++++++++++++++++----
> hw/vfio/pci.c | 146 ++++++++++++++++++++++++--------------
> 3 files changed, 265 insertions(+), 68 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 9b3c5e5..43fa948 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -124,6 +124,7 @@ typedef struct VFIOHostDMAWindow {
> } VFIOHostDMAWindow;
>
> typedef struct VFIODeviceOps VFIODeviceOps;
> +typedef struct VFIODevIO VFIODevIO;
>
> typedef struct VFIODevice {
> QLIST_ENTRY(VFIODevice) next;
> @@ -139,12 +140,14 @@ typedef struct VFIODevice {
> bool ram_block_discard_allowed;
> bool enable_migration;
> VFIODeviceOps *ops;
> + VFIODevIO *io_ops;
> unsigned int num_irqs;
> unsigned int num_regions;
> unsigned int flags;
> VFIOMigration *migration;
> Error *migration_blocker;
> OnOffAuto pre_copy_dirty_page_tracking;
> + struct vfio_region_info **regions;
> } VFIODevice;
>
> struct VFIODeviceOps {
> @@ -164,6 +167,30 @@ struct VFIODeviceOps {
> * through ioctl() to the kernel VFIO driver, but vfio-user
> * can use a socket to a remote process.
> */
> +struct VFIODevIO {
> + int (*get_info)(VFIODevice *vdev, struct vfio_device_info *info);
> + int (*get_region_info)(VFIODevice *vdev,
> + struct vfio_region_info *info, int *fd);
> + int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
> + int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
> + int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t
> size,
> + void *data);
> + int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t
> size,
> + void *data, bool post);
The @post arg is not really developed in this patch, it would make
review easier to add the arg when it becomes implemented and used.
> +};
> +
> +#define VDEV_GET_INFO(vdev, info) \
> + ((vdev)->io_ops->get_info((vdev), (info)))
> +#define VDEV_GET_REGION_INFO(vdev, info, fd) \
> + ((vdev)->io_ops->get_region_info((vdev), (info), (fd)))
> +#define VDEV_GET_IRQ_INFO(vdev, irq) \
> + ((vdev)->io_ops->get_irq_info((vdev), (irq)))
> +#define VDEV_SET_IRQS(vdev, irqs) \
> + ((vdev)->io_ops->set_irqs((vdev), (irqs)))
> +#define VDEV_REGION_READ(vdev, nr, off, size, data) \
> + ((vdev)->io_ops->region_read((vdev), (nr), (off), (size), (data)))
> +#define VDEV_REGION_WRITE(vdev, nr, off, size, data, post) \
> + ((vdev)->io_ops->region_write((vdev), (nr), (off), (size), (data),
> (post)))
>
> struct VFIOContIO {
> int (*dma_map)(VFIOContainer *container,
> @@ -184,6 +211,7 @@ struct VFIOContIO {
> #define CONT_DIRTY_BITMAP(cont, bitmap, range) \
> ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
>
> +extern VFIODevIO vfio_dev_io_ioctl;
> extern VFIOContIO vfio_cont_io_ioctl;
>
> #endif /* CONFIG_LINUX */
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 50748be..41fdd78 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -70,7 +70,7 @@ void vfio_disable_irqindex(VFIODevice *vbasedev, int index)
> .count = 0,
> };
>
> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
> + VDEV_SET_IRQS(vbasedev, &irq_set);
> }
>
> void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index)
> @@ -83,7 +83,7 @@ void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int
> index)
> .count = 1,
> };
>
> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
> + VDEV_SET_IRQS(vbasedev, &irq_set);
> }
>
> void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index)
> @@ -96,7 +96,7 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int
> index)
> .count = 1,
> };
>
> - ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
> + VDEV_SET_IRQS(vbasedev, &irq_set);
> }
>
> static inline const char *action_to_str(int action)
> @@ -177,9 +177,7 @@ int vfio_set_irq_signaling(VFIODevice *vbasedev, int
> index, int subindex,
> pfd = (int32_t *)&irq_set->data;
> *pfd = fd;
>
> - if (ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
> - ret = -errno;
> - }
> + ret = VDEV_SET_IRQS(vbasedev, irq_set);
> g_free(irq_set);
>
> if (!ret) {
> @@ -214,6 +212,7 @@ void vfio_region_write(void *opaque, hwaddr addr,
> uint32_t dword;
> uint64_t qword;
> } buf;
> + int ret;
>
> switch (size) {
> case 1:
> @@ -233,13 +232,15 @@ void vfio_region_write(void *opaque, hwaddr addr,
> break;
> }
>
> - if (pwrite(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
> + ret = VDEV_REGION_WRITE(vbasedev, region->nr, addr, size, &buf, false);
> + if (ret != size) {
> + const char *err = ret < 0 ? strerror(-ret) : "short write";
> +
> error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
> - ",%d) failed: %m",
> + ",%d) failed: %s",
> __func__, vbasedev->name, region->nr,
> - addr, data, size);
> + addr, data, size, err);
> }
> -
> trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size);
>
> /*
> @@ -265,13 +266,18 @@ uint64_t vfio_region_read(void *opaque,
> uint64_t qword;
> } buf;
> uint64_t data = 0;
> + int ret;
>
> - if (pread(vbasedev->fd, &buf, size, region->fd_offset + addr) != size) {
> - error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
> + ret = VDEV_REGION_READ(vbasedev, region->nr, addr, size, &buf);
> + if (ret != size) {
> + const char *err = ret < 0 ? strerror(-ret) : "short read";
> +
> + error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s",
> __func__, vbasedev->name, region->nr,
> - addr, size);
> + addr, size, err);
> return (uint64_t)-1;
> }
> +
> switch (size) {
> case 1:
> data = buf.byte;
> @@ -2369,6 +2375,16 @@ int vfio_get_device(VFIOGroup *group, const char *name,
>
> void vfio_put_base_device(VFIODevice *vbasedev)
> {
> + if (vbasedev->regions != NULL) {
> + int i;
> +
> + for (i = 0; i < vbasedev->num_regions; i++) {
> + g_free(vbasedev->regions[i]);
> + }
> + g_free(vbasedev->regions);
> + vbasedev->regions = NULL;
> + }
> +
> if (!vbasedev->group) {
> return;
> }
> @@ -2382,6 +2398,21 @@ int vfio_get_region_info(VFIODevice *vbasedev, int
> index,
> struct vfio_region_info **info)
> {
> size_t argsz = sizeof(struct vfio_region_info);
> + int fd = -1;
> + int ret;
> +
> + /* create region cache */
> + if (vbasedev->regions == NULL) {
> + vbasedev->regions = g_new0(struct vfio_region_info *,
> + vbasedev->num_regions);
> + }
Seems like this should have been part of vfio_get_device() since that's
where we set num_regions.
> + /* check cache */
> + if (vbasedev->regions[index] != NULL) {
> + *info = g_malloc0(vbasedev->regions[index]->argsz);
> + memcpy(*info, vbasedev->regions[index],
> + vbasedev->regions[index]->argsz);
> + return 0;
> + }
Why are we calling vfio_get_region_info() enough that we need a cache?
This looks like an optimization for something that doesn't exist yet.
And if we have a cache, why aren't callers using it directly rather
than creating a copy for each caller?
>
> *info = g_malloc0(argsz);
>
> @@ -2389,19 +2420,28 @@ int vfio_get_region_info(VFIODevice *vbasedev, int
> index,
> retry:
> (*info)->argsz = argsz;
>
> - if (ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, *info)) {
> + ret = VDEV_GET_REGION_INFO(vbasedev, *info, &fd);
> + if (ret != 0) {
> g_free(*info);
> *info = NULL;
> - return -errno;
> + return ret;
> }
>
> if ((*info)->argsz > argsz) {
> argsz = (*info)->argsz;
> *info = g_realloc(*info, argsz);
> + if (fd != -1) {
> + close(fd);
> + fd = -1;
> + }
Use of fd here is hard to review, it's clearly for some future use case.
>
> goto retry;
> }
>
> + /* fill cache */
> + vbasedev->regions[index] = g_malloc0(argsz);
> + memcpy(vbasedev->regions[index], *info, argsz);
> +
> return 0;
> }
>
> @@ -2554,6 +2594,95 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
> * Traditional ioctl() based io_ops
> */
>
> +static int vfio_io_get_info(VFIODevice *vbasedev, struct vfio_device_info
> *info)
> +{
> + int ret;
> +
> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_INFO, info);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + return ret;
> +}
> +
> +static int vfio_io_get_region_info(VFIODevice *vbasedev,
> + struct vfio_region_info *info,
> + int *fd)
> +{
> + int ret;
> +
> + *fd = -1;
> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + return ret;
> +}
> +
> +static int vfio_io_get_irq_info(VFIODevice *vbasedev,
> + struct vfio_irq_info *info)
> +{
> + int ret;
> +
> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, info);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + return ret;
> +}
> +
> +static int vfio_io_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irqs)
> +{
> + int ret;
> +
> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_SET_IRQS, irqs);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + return ret;
> +}
> +
> +static int vfio_io_region_read(VFIODevice *vbasedev, uint8_t index, off_t
> off,
> + uint32_t size, void *data)
> +{
> + struct vfio_region_info *info = vbasedev->regions[index];
> + int ret;
> +
> + ret = pread(vbasedev->fd, data, size, info->offset + off);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + return ret;
> +}
> +
> +static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t
> off,
> + uint32_t size, void *data, bool post)
> +{
> + struct vfio_region_info *info = vbasedev->regions[index];
> + int ret;
> +
> + ret = pwrite(vbasedev->fd, data, size, info->offset + off);
> + if (ret < 0) {
> + ret = -errno;
> + }
> +
> + return ret;
> +}
Simplify all these with ternaries?
return ret < 0 ? -errno : ret;
> +
> +VFIODevIO vfio_dev_io_ioctl = {
> + .get_info = vfio_io_get_info,
> + .get_region_info = vfio_io_get_region_info,
> + .get_irq_info = vfio_io_get_irq_info,
> + .set_irqs = vfio_io_set_irqs,
> + .region_read = vfio_io_region_read,
> + .region_write = vfio_io_region_write,
> +};
> +
> static int vfio_io_dma_map(VFIOContainer *container,
> struct vfio_iommu_type1_dma_map *map,
> int fd, bool will_commit)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 122edf8..28f21f8 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -402,7 +402,7 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool
> msix)
> fds[i] = fd;
> }
>
> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
> + ret = VDEV_SET_IRQS(&vdev->vbasedev, irq_set);
>
> g_free(irq_set);
>
> @@ -772,14 +772,16 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
>
> static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> {
> + VFIODevice *vbasedev = &vdev->vbasedev;
> struct vfio_region_info *reg_info;
> uint64_t size;
> off_t off = 0;
> ssize_t bytes;
> + int ret;
>
> - if (vfio_get_region_info(&vdev->vbasedev,
> - VFIO_PCI_ROM_REGION_INDEX, ®_info)) {
> - error_report("vfio: Error getting ROM info: %m");
> + ret = vfio_get_region_info(vbasedev, VFIO_PCI_ROM_REGION_INDEX,
> ®_info);
> + if (ret < 0) {
> + error_report("vfio: Error getting ROM info: %s", strerror(-ret));
> return;
> }
>
> @@ -806,18 +808,19 @@ static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> memset(vdev->rom, 0xff, size);
>
> while (size) {
> - bytes = pread(vdev->vbasedev.fd, vdev->rom + off,
> - size, vdev->rom_offset + off);
> + bytes = VDEV_REGION_READ(vbasedev, VFIO_PCI_ROM_REGION_INDEX, off,
> + size, vdev->rom + off);
> if (bytes == 0) {
> break;
> } else if (bytes > 0) {
> off += bytes;
> size -= bytes;
> } else {
> - if (errno == EINTR || errno == EAGAIN) {
> + if (bytes == -EINTR || bytes == -EAGAIN) {
> continue;
> }
> - error_report("vfio: Error reading device ROM: %m");
> + error_report("vfio: Error reading device ROM: %s",
> + strerror(-bytes));
> break;
> }
> }
> @@ -905,11 +908,10 @@ static const MemoryRegionOps vfio_rom_ops = {
>
> static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
> {
> + VFIODevice *vbasedev = &vdev->vbasedev;
> uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK);
> - off_t offset = vdev->config_offset + PCI_ROM_ADDRESS;
> DeviceState *dev = DEVICE(vdev);
> char *name;
> - int fd = vdev->vbasedev.fd;
>
> if (vdev->pdev.romfile || !vdev->pdev.rom_bar) {
> /* Since pci handles romfile, just print a message and return */
> @@ -926,13 +928,23 @@ static void vfio_pci_size_rom(VFIOPCIDevice *vdev)
> * Use the same size ROM BAR as the physical device. The contents
> * will get filled in later when the guest tries to read it.
> */
> - if (pread(fd, &orig, 4, offset) != 4 ||
> - pwrite(fd, &size, 4, offset) != 4 ||
> - pread(fd, &size, 4, offset) != 4 ||
> - pwrite(fd, &orig, 4, offset) != 4) {
> - error_report("%s(%s) failed: %m", __func__, vdev->vbasedev.name);
> +#define rom_read(var) VDEV_REGION_READ(vbasedev, \
> + VFIO_PCI_CONFIG_REGION_INDEX, \
> + PCI_ROM_ADDRESS, 4, (var))
> +#define rom_write(var) VDEV_REGION_WRITE(vbasedev, \
> + VFIO_PCI_CONFIG_REGION_INDEX, \
> + PCI_ROM_ADDRESS, 4, (var), false)
> +
> + if (rom_read(&orig) != 4 ||
> + rom_write(&size) != 4 ||
> + rom_read(&size) != 4 ||
> + rom_write(&orig) != 4) {
> +
> + error_report("%s(%s) ROM access failed", __func__, vbasedev->name);
> return;
> }
> +#undef rom_read
> +#undef rom_write
Hmm, I think I'd rather see a generic function broken out for this,
maybe:
int vfio_pci_size_bar32(VFIODevice *vbasedev, int offset,
uint32_t mask, uint32_t *size)
{
uint32_t orig, val;
int index = VFIO_PCI_CONFIG_REGION_INDEX;
if (VDEV_REGION_READ(vbasedev, index, offset, 4, &orig) != 4 ||
VDEV_REGION_WRITE(vbasedev, index, offset, 4, &mask, false) != 4 ||
VDEV_REGION_READ(vbasedev, index, offset, 4, &val) != 4 ||
VDEV_REGION_WRITE(vbasedev, index, offset, 4, &orig, false) != 4) {
error_report(...
return -1;
}
*size = ~(le32_to_cpu(val) & PCI_ROM_ADDRESS_MASK) + 1;
return 0;
}
Really I think it's just trying to hard code
VFIO_PCI_CONFIG_REGION_INDEX and PCI_ROM_ADDRESS into the macro calls
that demand some simplification, if we use variables it looks fine
inline.
We could also re-wrap VDEV_REGION_READ/WRITE in vfio/pci.h to simplify
config space access, presumably we'd never have posted writes to config
space. Thanks,
Alex
>
> size = ~(le32_to_cpu(size) & PCI_ROM_ADDRESS_MASK) + 1;
>
> @@ -1110,6 +1122,7 @@ static void vfio_sub_page_bar_update_mapping(PCIDevice
> *pdev, int bar)
> uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
> {
> VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val;
>
> memcpy(&emu_bits, vdev->emulated_config_bits + addr, len);
> @@ -1122,12 +1135,14 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev,
> uint32_t addr, int len)
> if (~emu_bits & (0xffffffffU >> (32 - len * 8))) {
> ssize_t ret;
>
> - ret = pread(vdev->vbasedev.fd, &phys_val, len,
> - vdev->config_offset + addr);
> + ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, addr,
> + len, &phys_val);
> if (ret != len) {
> - error_report("%s(%s, 0x%x, 0x%x) failed: %m",
> - __func__, vdev->vbasedev.name, addr, len);
> - return -errno;
> + const char *err = ret < 0 ? strerror(-ret) : "short read";
> +
> + error_report("%s(%s, 0x%x, 0x%x) failed: %s",
> + __func__, vbasedev->name, addr, len, err);
> + return -1;
> }
> phys_val = le32_to_cpu(phys_val);
> }
> @@ -1143,15 +1158,20 @@ void vfio_pci_write_config(PCIDevice *pdev,
> uint32_t addr, uint32_t val, int len)
> {
> VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> uint32_t val_le = cpu_to_le32(val);
> + int ret;
>
> trace_vfio_pci_write_config(vdev->vbasedev.name, addr, val, len);
>
> /* Write everything to VFIO, let it filter out what we can't write */
> - if (pwrite(vdev->vbasedev.fd, &val_le, len, vdev->config_offset + addr)
> - != len) {
> - error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %m",
> - __func__, vdev->vbasedev.name, addr, val, len);
> + ret = VDEV_REGION_WRITE(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, addr,
> + len, &val_le, false);
> + if (ret != len) {
> + const char *err = ret < 0 ? strerror(-ret) : "short write";
> +
> + error_report("%s(%s, 0x%x, 0x%x, 0x%x) failed: %s",
> + __func__, vbasedev->name, addr, val, len, err);
> }
>
> /* MSI/MSI-X Enabling/Disabling */
> @@ -1239,10 +1259,13 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int
> pos, Error **errp)
> int ret, entries;
> Error *err = NULL;
>
> - if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl),
> - vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
> - error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS");
> - return -errno;
> + ret = VDEV_REGION_READ(&vdev->vbasedev, VFIO_PCI_CONFIG_REGION_INDEX,
> + pos + PCI_CAP_FLAGS, sizeof(ctrl), &ctrl);
> + if (ret != sizeof(ctrl)) {
> + const char *err = ret < 0 ? strerror(-ret) : "short read";
> +
> + error_setg(errp, "failed reading MSI PCI_CAP_FLAGS %s", err);
> + return ret;
> }
> ctrl = le16_to_cpu(ctrl);
>
> @@ -1444,33 +1467,40 @@ static void vfio_pci_relocate_msix(VFIOPCIDevice
> *vdev, Error **errp)
> */
> static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp)
> {
> + VFIODevice *vbasedev = &vdev->vbasedev;
> uint8_t pos;
> uint16_t ctrl;
> uint32_t table, pba;
> - int fd = vdev->vbasedev.fd;
> VFIOMSIXInfo *msix;
> + int ret;
>
> pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
> if (!pos) {
> return;
> }
>
> - if (pread(fd, &ctrl, sizeof(ctrl),
> - vdev->config_offset + pos + PCI_MSIX_FLAGS) != sizeof(ctrl)) {
> - error_setg_errno(errp, errno, "failed to read PCI MSIX FLAGS");
> - return;
> + ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX,
> + pos + PCI_MSIX_FLAGS, sizeof(ctrl), &ctrl);
> + if (ret != sizeof(ctrl)) {
> + const char *err = ret < 0 ? strerror(-ret) : "short read";
> +
> + error_setg(errp, "failed to read PCI MSIX FLAGS %s", err);
> }
>
> - if (pread(fd, &table, sizeof(table),
> - vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
> - error_setg_errno(errp, errno, "failed to read PCI MSIX TABLE");
> - return;
> + ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX,
> + pos + PCI_MSIX_TABLE, sizeof(table), &table);
> + if (ret != sizeof(table)) {
> + const char *err = ret < 0 ? strerror(-ret) : "short read";
> +
> + error_setg(errp, "failed to read PCI MSIX TABLE %s", err);
> }
>
> - if (pread(fd, &pba, sizeof(pba),
> - vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
> - error_setg_errno(errp, errno, "failed to read PCI MSIX PBA");
> - return;
> + ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX,
> + pos + PCI_MSIX_PBA, sizeof(pba), &pba);
> + if (ret != sizeof(pba)) {
> + const char *err = ret < 0 ? strerror(-ret) : "short read";
> +
> + error_setg(errp, "failed to read PCI MSIX PBA %s", err);
> }
>
> ctrl = le16_to_cpu(ctrl);
> @@ -1608,7 +1638,6 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev,
> bool enabled)
> static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
> {
> VFIOBAR *bar = &vdev->bars[nr];
> -
> uint32_t pci_bar;
> int ret;
>
> @@ -1618,10 +1647,13 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int
> nr)
> }
>
> /* Determine what type of BAR this is for registration */
> - ret = pread(vdev->vbasedev.fd, &pci_bar, sizeof(pci_bar),
> - vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
> + ret = VDEV_REGION_READ(&vdev->vbasedev, VFIO_PCI_CONFIG_REGION_INDEX,
> + PCI_BASE_ADDRESS_0 + (4 * nr),
> + sizeof(pci_bar), &pci_bar);
> if (ret != sizeof(pci_bar)) {
> - error_report("vfio: Failed to read BAR %d (%m)", nr);
> + const char *err = ret < 0 ? strerror(-ret) : "short read";
> +
> + error_report("vfio: Failed to read BAR %d (%s)", nr, err);
> return;
> }
>
> @@ -2169,8 +2201,9 @@ static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
>
> static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> {
> + VFIODevice *vbasedev = &vdev->vbasedev;
> Error *err = NULL;
> - int nr;
> + int ret, nr;
>
> vfio_intx_enable(vdev, &err);
> if (err) {
> @@ -2178,13 +2211,17 @@ static void vfio_pci_post_reset(VFIOPCIDevice *vdev)
> }
>
> for (nr = 0; nr < PCI_NUM_REGIONS - 1; ++nr) {
> - off_t addr = vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr);
> + off_t addr = PCI_BASE_ADDRESS_0 + (4 * nr);
> uint32_t val = 0;
> uint32_t len = sizeof(val);
>
> - if (pwrite(vdev->vbasedev.fd, &val, len, addr) != len) {
> - error_report("%s(%s) reset bar %d failed: %m", __func__,
> - vdev->vbasedev.name, nr);
> + ret = VDEV_REGION_WRITE(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, addr,
> + len, &val, false);
> + if (ret != len) {
> + const char *err = ret < 0 ? strerror(-ret) : "short write";
> +
> + error_report("%s(%s) reset bar %d failed: %s", __func__,
> + vbasedev->name, nr, err);
> }
> }
>
> @@ -2619,7 +2656,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev,
> Error **errp)
>
> irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
>
> - ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
> + ret = VDEV_GET_IRQ_INFO(vbasedev, &irq_info);
> if (ret) {
> /* This can fail for an old kernel or legacy PCI dev */
> trace_vfio_populate_device_get_irq_info_failure(strerror(errno));
> @@ -2738,8 +2775,10 @@ static void vfio_register_req_notifier(VFIOPCIDevice
> *vdev)
> return;
> }
>
> - if (ioctl(vdev->vbasedev.fd,
> - VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0 || irq_info.count <
> 1) {
> + if (VDEV_GET_IRQ_INFO(&vdev->vbasedev, &irq_info) < 0) {
> + return;
> + }
> + if (irq_info.count < 1) {
> return;
> }
>
> @@ -2817,6 +2856,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
> vdev->vbasedev.ops = &vfio_pci_ops;
> vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
> vdev->vbasedev.dev = DEVICE(vdev);
> + vdev->vbasedev.io_ops = &vfio_dev_io_ioctl;
>
> tmp = g_strdup_printf("%s/iommu_group", vdev->vbasedev.sysfsdev);
> len = readlink(tmp, group_path, sizeof(group_path));
- [RFC v3 00/19] vfio-user client, John Johnson, 2021/11/08
- [RFC v3 13/19] vfio-user: pci_user_realize PCI setup, John Johnson, 2021/11/08
- [RFC v3 11/19] vfio-user: get region info, John Johnson, 2021/11/08
- [RFC v3 06/19] vfio-user: Define type vfio_user_pci_dev_info, John Johnson, 2021/11/08
- [RFC v3 04/19] Add device IO ops vector, John Johnson, 2021/11/08
- Re: [RFC v3 04/19] Add device IO ops vector,
Alex Williamson <=
- [RFC v3 09/19] vfio-user: define socket send functions, John Johnson, 2021/11/08
- [RFC v3 03/19] Add container IO ops vector, John Johnson, 2021/11/08
- [RFC v3 02/19] vfio-user: add VFIO base abstract class, John Johnson, 2021/11/08
- [RFC v3 07/19] vfio-user: connect vfio proxy to remote server, John Johnson, 2021/11/08
- [RFC v3 10/19] vfio-user: get device info, John Johnson, 2021/11/08
- [RFC v3 08/19] vfio-user: define socket receive functions, John Johnson, 2021/11/08
- [RFC v3 05/19] Add validation ops vector, John Johnson, 2021/11/08