[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 3/3] pseries: Add partial support for PCI
From: |
Alexander Graf |
Subject: |
Re: [Qemu-devel] [PATCH 3/3] pseries: Add partial support for PCI |
Date: |
Sun, 30 Oct 2011 18:06:51 +0100 |
On 28.10.2011, at 03:56, David Gibson wrote:
> From: Alexey Kardashevskiy <address@hidden>
>
> This patch adds a PCI bus to the pseries machine. This instantiates
> the qemu generic PCI bus code, advertises a PCI host bridge in the
> guest's device tree and implements the RTAS methods specified by PAPR
> to access PCI config space. It also sets up the memory regions we
> need to provide windows into the PCI memory and IO space, and
> advertises those to the guest.
>
> However, because qemu can't yet emulate an IOMMU, which is mandatory on
> pseries, PCI devices which use DMA (i.e. most of them) will not work with
> this code alone. Still, this is enough to support the virtio_pci device
> (which probably _should_ use emulated PCI DMA, but is specced to use
> direct hypervisor access to guest physical memory instead).
>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> Signed-off-by: David Gibson <address@hidden>
> ---
> Makefile.target | 3 +
> hw/spapr.c | 36 ++++-
> hw/spapr.h | 2 +
> hw/spapr_pci.c | 515 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/spapr_pci.h | 59 +++++++
> 5 files changed, 611 insertions(+), 4 deletions(-)
> create mode 100644 hw/spapr_pci.c
> create mode 100644 hw/spapr_pci.h
>
> diff --git a/Makefile.target b/Makefile.target
> index fe5f6f7..f3eb842 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -248,6 +248,9 @@ obj-ppc-y += ppc_newworld.o
> # IBM pSeries (sPAPR)
> obj-ppc-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
> obj-ppc-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
> +ifeq ($(CONFIG_PCI),y)
> +obj-ppc-$(CONFIG_PSERIES) += spapr_pci.o device-hotplug.o pci-hotplug.o
> +endif
You make it conditional here ...
> # PowerPC 4xx boards
> obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
> obj-ppc-y += ppc440.o ppc440_bamboo.o
> diff --git a/hw/spapr.c b/hw/spapr.c
> index 933af32..bdaa938 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -39,10 +39,12 @@
>
> #include "hw/spapr.h"
> #include "hw/spapr_vio.h"
> +#include "hw/spapr_pci.h"
> #include "hw/xics.h"
>
> #include "kvm.h"
> #include "kvm_ppc.h"
> +#include "pci.h"
... but not here. Just throw away the condition above. We don't need to support
-M pseries without PCI.
>
> #include "exec-memory.h"
>
> @@ -62,6 +64,11 @@
> #define MAX_CPUS 256
> #define XICS_IRQS 1024
>
> +#define SPAPR_PCI_BUID 0x800000020000001ULL
> +#define SPAPR_PCI_MEM_WIN_ADDR (0x10000000000ULL + 0xA0000000)
> +#define SPAPR_PCI_MEM_WIN_SIZE 0x20000000
> +#define SPAPR_PCI_IO_WIN_ADDR (0x10000000000ULL + 0x80000000)
> +
> #define PHANDLE_XICP 0x00001111
>
> sPAPREnvironment *spapr;
> @@ -146,6 +153,14 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
> &end_prop, sizeof(end_prop))));
> _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
>
> + /*
> + * Because we don't always invoke any firmware, we can't rely on
> + * that to do BAR allocation. Long term, we should probably do
> + * that ourselves, but for now, this setting (plus advertising the
> + * current BARs as 0) causes sufficiently recent kernels to to the
> + * BAR assignment themselves */
> + _FDT((fdt_property_cell(fdt, "linux,pci-probe-only", 0)));
> +
> _FDT((fdt_end_node(fdt)));
>
> /* memory node(s) */
> @@ -308,6 +323,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
> {
> int ret;
> void *fdt;
> + sPAPRPHBState *phb;
>
> fdt = g_malloc(FDT_MAX_SIZE);
>
> @@ -320,6 +336,15 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
> exit(1);
> }
>
> + QLIST_FOREACH(phb, &spapr->phbs, list) {
> + ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt);
> + }
> +
> + if (ret < 0) {
> + fprintf(stderr, "couldn't setup PCI devices in fdt\n");
> + exit(1);
> + }
> +
> /* RTAS */
> ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
> if (ret < 0) {
> @@ -478,6 +503,12 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> }
> }
>
> + /* Set up PCI */
> + spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
> + SPAPR_PCI_MEM_WIN_ADDR,
> + SPAPR_PCI_MEM_WIN_SIZE,
> + SPAPR_PCI_IO_WIN_ADDR);
> +
> for (i = 0; i < nb_nics; i++) {
> NICInfo *nd = &nd_table[i];
>
> @@ -488,10 +519,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
> if (strcmp(nd->model, "ibmveth") == 0) {
> spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd);
> } else {
> - fprintf(stderr, "pSeries (sPAPR) platform does not support "
> - "NIC model '%s' (only ibmveth is supported)\n",
> - nd->model);
> - exit(1);
> + pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
> }
> }
>
> diff --git a/hw/spapr.h b/hw/spapr.h
> index 6657c33..5689797 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -2,12 +2,14 @@
> #define __HW_SPAPR_H__
>
> #include "hw/xics.h"
> +#include "spapr_pci.h"
>
> struct VIOsPAPRBus;
> struct icp_state;
>
> typedef struct sPAPREnvironment {
> struct VIOsPAPRBus *vio_bus;
> + QLIST_HEAD(, sPAPRPHBState) phbs;
> struct icp_state *icp;
>
> target_phys_addr_t ram_limit;
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> new file mode 100644
> index 0000000..a907747
> --- /dev/null
> +++ b/hw/spapr_pci.c
> @@ -0,0 +1,515 @@
> +/*
> + * QEMU sPAPR PCI host originated from Uninorth PCI host
> + *
> + * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
> + * Copyright (C) 2011 David Gibson, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> deal
> + * in the Software without restriction, including without limitation the
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +#include "hw.h"
> +#include "pci.h"
> +#include "pci_host.h"
> +#include "hw/spapr.h"
> +#include "hw/spapr_pci.h"
> +#include "exec-memory.h"
> +#include <libfdt.h>
> +
> +#include "hw/pci_internals.h"
> +
> +static const uint32_t bars[] = {
> + PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1,
> + PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3,
> + PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5
> + /*, PCI_ROM_ADDRESS*/
> +};
> +
> +static PCIDevice *find_dev(sPAPREnvironment *spapr,
> + uint64_t buid, uint32_t config_addr)
> +{
> + DeviceState *qdev;
> + int devfn = (config_addr >> 8) & 0xFF;
> + sPAPRPHBState *phb;
> +
> + QLIST_FOREACH(phb, &spapr->phbs, list) {
> + if (phb->buid != buid) {
> + continue;
> + }
> +
> + QLIST_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) {
> + PCIDevice *dev = (PCIDevice *)qdev;
> + if (dev->devfn == devfn) {
> + return dev;
> + }
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + uint32_t val, size, addr;
> + uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
> + PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0));
> +
> + if (!dev) {
> + rtas_st(rets, 0, -1);
> + return;
> + }
> + size = rtas_ld(args, 3);
> + addr = rtas_ld(args, 0) & 0xFF;
> + val = pci_default_read_config(dev, addr, size);
> + rtas_st(rets, 0, 0);
> + rtas_st(rets, 1, val);
> +}
> +
> +static void rtas_read_pci_config(sPAPREnvironment *spapr,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + uint32_t val, size, addr;
> + PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0));
> +
> + if (!dev) {
> + rtas_st(rets, 0, -1);
> + return;
> + }
> + size = rtas_ld(args, 1);
> + addr = rtas_ld(args, 0) & 0xFF;
> + val = pci_default_read_config(dev, addr, size);
> + rtas_st(rets, 0, 0);
> + rtas_st(rets, 1, val);
> +}
> +
> +static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + uint32_t val, size, addr;
> + uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
> + PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0));
> +
> + if (!dev) {
> + rtas_st(rets, 0, -1);
> + return;
> + }
> + val = rtas_ld(args, 4);
> + size = rtas_ld(args, 3);
> + addr = rtas_ld(args, 0) & 0xFF;
> + pci_default_write_config(dev, addr, val, size);
> + rtas_st(rets, 0, 0);
> +}
> +
> +static void rtas_write_pci_config(sPAPREnvironment *spapr,
> + uint32_t token, uint32_t nargs,
> + target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + uint32_t val, size, addr;
> + PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0));
> +
> + if (!dev) {
> + rtas_st(rets, 0, -1);
> + return;
> + }
> + val = rtas_ld(args, 2);
> + size = rtas_ld(args, 1);
> + addr = rtas_ld(args, 0) & 0xFF;
> + pci_default_write_config(dev, addr, val, size);
> + rtas_st(rets, 0, 0);
> +}
> +
> +static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
> +{
> + /*
> + * Here we need to convert pci_dev + irq_num to some unique value
> + * which is less than number of IRQs on the specific bus (now it
> + * is 16). At the moment irq_num == device_id (number of the
> + * slot?)
> + * FIXME: we should swizzle in fn and irq_num
> + */
> + return (pci_dev->devfn >> 3) % SPAPR_PCI_NUM_LSI;
> +}
> +
> +static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
> +{
> + /*
> + * Here we use the number returned by pci_spapr_map_irq to find a
> + * corresponding qemu_irq.
> + */
> + sPAPRPHBState *phb = opaque;
> +
> + qemu_set_irq(phb->lsi_table[irq_num].qirq, level);
> +}
> +
> +static int spapr_phb_init(SysBusDevice *s)
> +{
> + sPAPRPHBState *phb = FROM_SYSBUS(sPAPRPHBState, s);
> + int i;
> +
> + /* Initialize the LSI table */
> + for (i = 0; i < SPAPR_PCI_NUM_LSI; i++) {
> + qemu_irq qirq;
> + uint32_t num;
> +
> + qirq = spapr_allocate_irq(0, &num);
> + if (!qirq) {
> + return -1;
> + }
> +
> + phb->lsi_table[i].dt_irq = num;
> + phb->lsi_table[i].qirq = qirq;
> + }
> +
> + return 0;
> +}
> +
> +static int spapr_main_pci_host_init(PCIDevice *d)
> +{
> + return 0;
> +}
> +
> +static PCIDeviceInfo spapr_main_pci_host_info = {
> + .qdev.name = "spapr-pci-host-bridge",
> + .qdev.size = sizeof(PCIDevice),
> + .init = spapr_main_pci_host_init,
> +};
> +
> +static void spapr_register_devices(void)
> +{
> + sysbus_register_dev("spapr-pci-host-bridge", sizeof(sPAPRPHBState),
> + spapr_phb_init);
> + pci_qdev_register(&spapr_main_pci_host_info);
> +}
> +
> +device_init(spapr_register_devices)
> +
> +static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
> + unsigned size)
> +{
> + switch (size) {
> + case 1:
> + return cpu_inb(addr);
> + case 2:
> + return cpu_inw(addr);
> + case 4:
> + return cpu_inl(addr);
> + }
> + assert(0);
> +}
> +
> +static void spapr_io_write(void *opaque, target_phys_addr_t addr,
> + uint64_t data, unsigned size)
> +{
> + switch (size) {
> + case 1:
> + cpu_outb(addr, data);
> + return;
> + case 2:
> + cpu_outw(addr, data);
> + return;
> + case 4:
> + cpu_outl(addr, data);
> + return;
> + }
> + assert(0);
> +}
> +
> +static MemoryRegionOps spapr_io_ops = {
> + .endianness = DEVICE_LITTLE_ENDIAN,
> + .read = spapr_io_read,
> + .write = spapr_io_write
> +};
> +
> +void spapr_create_phb(sPAPREnvironment *spapr,
> + const char *busname, uint64_t buid,
> + uint64_t mem_win_addr, uint64_t mem_win_size,
> + uint64_t io_win_addr)
> +{
> + DeviceState *dev;
> + SysBusDevice *s;
> + sPAPRPHBState *phb;
> + PCIBus *bus;
> + char namebuf[strlen(busname)+11];
> +
> + dev = qdev_create(NULL, "spapr-pci-host-bridge");
> + qdev_init_nofail(dev);
> + s = sysbus_from_qdev(dev);
> + phb = FROM_SYSBUS(sPAPRPHBState, s);
> +
> + phb->mem_win_addr = mem_win_addr;
> +
> + sprintf(namebuf, "%s-mem", busname);
> + memory_region_init(&phb->memspace, namebuf, INT64_MAX);
> +
> + sprintf(namebuf, "%s-memwindow", busname);
> + memory_region_init_alias(&phb->memwindow, namebuf, &phb->memspace,
> + SPAPR_PCI_MEM_WIN_BUS_OFFSET, mem_win_size);
> + memory_region_add_subregion(get_system_memory(), mem_win_addr,
> + &phb->memwindow);
> +
> + phb->io_win_addr = io_win_addr;
> +
> + /* On ppc, we only have MMIO no specific IO space from the CPU
> + * perspective. In theory we ought to be able to embed the PCI IO
> + * memory region direction in the system memory space. However,
> + * if any of the IO BAR subregions use the old_portio mechanism,
> + * that won't be processed properly unless accessed from the
> + * system io address space. This hack to bounce things via
> + * system_io works around the problem until all the users of
> + * old_portion are updated */
> + sprintf(namebuf, "%s-io", busname);
> + memory_region_init(&phb->iospace, namebuf, SPAPR_PCI_IO_WIN_SIZE);
> + /* FIXME: fix to support multiple PHBs */
> + memory_region_add_subregion(get_system_io(), 0, &phb->iospace);
> +
> + sprintf(namebuf, "%s-iowindow", busname);
> + memory_region_init_io(&phb->iowindow, &spapr_io_ops, phb,
> + namebuf, SPAPR_PCI_IO_WIN_SIZE);
> + memory_region_add_subregion(get_system_memory(), io_win_addr,
> + &phb->iowindow);
> +
> + phb->host_state.bus = bus = pci_register_bus(&phb->busdev.qdev, busname,
> + pci_spapr_set_irq,
> + pci_spapr_map_irq,
> + phb,
> + &phb->memspace,
> &phb->iospace,
> + PCI_DEVFN(0, 0),
> + SPAPR_PCI_NUM_LSI);
> +
> + spapr_rtas_register("read-pci-config", rtas_read_pci_config);
> + spapr_rtas_register("write-pci-config", rtas_write_pci_config);
> + spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
> + spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
> +
> + /*
> + * This is a workaround to disable PCI devices resetting as we do
> + * BAR allocation on the QEMU side and reset destroys this
> + * configuration.
> + */
> + bus->qbus.info->reset = NULL;
Eh. What? So you're breaking reset for all PCI devices? How do you reboot?
Alex