[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v5 08/16] spapr_events: re-use EPOW event infras
From: |
Michael Roth |
Subject: |
Re: [Qemu-devel] [PATCH v5 08/16] spapr_events: re-use EPOW event infrastructure for hotplug events |
Date: |
Tue, 24 Feb 2015 14:04:29 -0600 |
User-agent: |
alot/0.3.4 |
Quoting David Gibson (2015-02-24 00:49:42)
> On Mon, Feb 16, 2015 at 08:27:44AM -0600, Michael Roth wrote:
> > From: Nathan Fontenot <address@hidden>
> >
> > This extends the data structures currently used to report EPOW events to
> > guests via the check-exception RTAS interfaces to also include event types
> > for hotplug/unplug events.
> >
> > This is currently undocumented and being finalized for inclusion in PAPR
> > specification, but we implement this here as an extension for guest
> > userspace tools to implement (existing guest kernels simply log these
> > events via a sysfs interface that's read by rtas_errd, and current
> > versions of rtas_errd/powerpc-utils already support the use of this
> > mechanism for initiating hotplug operations).
> >
> > We also add support for queues of pending RTAS events, since in the
> > case of hotplug there's chance for multiple events being in-flight
> > at any point in time.
> >
> > Signed-off-by: Nathan Fontenot <address@hidden>
> > Signed-off-by: Michael Roth <address@hidden>
> > ---
> > hw/ppc/spapr.c | 2 +-
> > hw/ppc/spapr_events.c | 271
> > ++++++++++++++++++++++++++++++++++++++++---------
> > include/hw/ppc/spapr.h | 5 +-
> > 3 files changed, 226 insertions(+), 52 deletions(-)
> >
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index bfacc9d..861107e 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1640,7 +1640,7 @@ static void ppc_spapr_init(MachineState *machine)
> > spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
> > kernel_size, kernel_le,
> > boot_device, kernel_cmdline,
> > - spapr->epow_irq);
> > + spapr->check_exception_irq);
> > assert(spapr->fdt_skel != NULL);
> > }
> >
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index 283e96b..0eeb1d8 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -32,6 +32,9 @@
> >
> > #include "hw/ppc/spapr.h"
> > #include "hw/ppc/spapr_vio.h"
> > +#include "hw/pci/pci.h"
> > +#include "hw/pci-host/spapr.h"
> > +#include "hw/ppc/spapr_drc.h"
> >
> > #include <libfdt.h>
> >
> > @@ -77,6 +80,7 @@ struct rtas_error_log {
> > #define RTAS_LOG_TYPE_ECC_UNCORR 0x00000009
> > #define RTAS_LOG_TYPE_ECC_CORR 0x0000000a
> > #define RTAS_LOG_TYPE_EPOW 0x00000040
> > +#define RTAS_LOG_TYPE_HOTPLUG 0x000000e5
> > uint32_t extended_length;
> > } QEMU_PACKED;
> >
> > @@ -166,6 +170,38 @@ struct epow_log_full {
> > struct rtas_event_log_v6_epow epow;
> > } QEMU_PACKED;
> >
> > +struct rtas_event_log_v6_hp {
> > +#define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */
> > + struct rtas_event_log_v6_section_header hdr;
> > + uint8_t hotplug_type;
> > +#define RTAS_LOG_V6_HP_TYPE_CPU 1
> > +#define RTAS_LOG_V6_HP_TYPE_MEMORY 2
> > +#define RTAS_LOG_V6_HP_TYPE_SLOT 3
> > +#define RTAS_LOG_V6_HP_TYPE_PHB 4
> > +#define RTAS_LOG_V6_HP_TYPE_PCI 5
> > + uint8_t hotplug_action;
> > +#define RTAS_LOG_V6_HP_ACTION_ADD 1
> > +#define RTAS_LOG_V6_HP_ACTION_REMOVE 2
> > + uint8_t hotplug_identifier;
> > +#define RTAS_LOG_V6_HP_ID_DRC_NAME 1
> > +#define RTAS_LOG_V6_HP_ID_DRC_INDEX 2
> > +#define RTAS_LOG_V6_HP_ID_DRC_COUNT 3
> > + uint8_t reserved;
> > + union {
> > + uint32_t index;
> > + uint32_t count;
> > + char name[1];
> > + } drc;
> > +} QEMU_PACKED;
> > +
> > +struct hp_log_full {
> > + struct rtas_error_log hdr;
> > + struct rtas_event_log_v6 v6hdr;
> > + struct rtas_event_log_v6_maina maina;
> > + struct rtas_event_log_v6_mainb mainb;
> > + struct rtas_event_log_v6_hp hp;
> > +} QEMU_PACKED;
> > +
> > #define EVENT_MASK_INTERNAL_ERRORS 0x80000000
> > #define EVENT_MASK_EPOW 0x40000000
> > #define EVENT_MASK_HOTPLUG 0x10000000
> > @@ -181,67 +217,84 @@ struct epow_log_full {
> > } \
> > } while (0)
> >
> > -void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq)
> > +void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
> > {
> > - uint32_t epow_irq_ranges[] = {cpu_to_be32(epow_irq), cpu_to_be32(1)};
> > - uint32_t epow_interrupts[] = {cpu_to_be32(epow_irq), 0};
> > + uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq),
> > cpu_to_be32(1)};
> > + uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};
> >
> > _FDT((fdt_begin_node(fdt, "event-sources")));
> >
> > _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
> > _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
> > _FDT((fdt_property(fdt, "interrupt-ranges",
> > - epow_irq_ranges, sizeof(epow_irq_ranges))));
> > + irq_ranges, sizeof(irq_ranges))));
> >
> > _FDT((fdt_begin_node(fdt, "epow-events")));
> > - _FDT((fdt_property(fdt, "interrupts",
> > - epow_interrupts, sizeof(epow_interrupts))));
> > + _FDT((fdt_property(fdt, "interrupts", interrupts,
> > sizeof(interrupts))));
> > _FDT((fdt_end_node(fdt)));
> >
> > _FDT((fdt_end_node(fdt)));
> > }
> >
> > -static struct epow_log_full *pending_epow;
> > -static uint32_t next_plid;
> > +typedef struct EventLogEntry {
> > + int log_type;
> > + void *data;
> > + QTAILQ_ENTRY(EventLogEntry) next;
> > +} EventLogEntry;
> >
> > -static void spapr_powerdown_req(Notifier *n, void *opaque)
> > +static struct {
> > + QTAILQ_HEAD(, EventLogEntry) pending_events;
> > +} rtas_event_log;
>
> Adding new globals is never nice. Couldn't this go into
> sPAPREnvironment, or better yet the SPAPR_MACHINE structure? (I'd
> like to merge those two when we get the chance).
Makes sense, will roll into sPAPREnvironment initially
>
> > +static void rtas_event_log_queue(int log_type, void *data)
> > {
> > - sPAPREnvironment *spapr = container_of(n, sPAPREnvironment,
> > epow_notifier);
> > - struct rtas_error_log *hdr;
> > - struct rtas_event_log_v6 *v6hdr;
> > - struct rtas_event_log_v6_maina *maina;
> > - struct rtas_event_log_v6_mainb *mainb;
> > - struct rtas_event_log_v6_epow *epow;
> > - struct tm tm;
> > - int year;
> > + EventLogEntry *entry = g_new(EventLogEntry, 1);
> > +
> > + entry->log_type = log_type;
> > + entry->data = data;
> > + QTAILQ_INSERT_TAIL(&rtas_event_log.pending_events, entry, next);
> > +}
> >
> > - if (pending_epow) {
> > - /* For now, we just throw away earlier events if two come
> > - * along before any are consumed. This is sufficient for our
> > - * powerdown messages, but we'll need more if we do more
> > - * general error/event logging */
> > - g_free(pending_epow);
> > +static EventLogEntry *rtas_event_log_dequeue(uint32_t event_mask)
> > +{
> > + EventLogEntry *entry = NULL;
> > +
> > + /* we only queue EPOW events atm. */
> > + if ((event_mask & EVENT_MASK_EPOW) == 0) {
> > + return NULL;
> > }
> > - pending_epow = g_malloc0(sizeof(*pending_epow));
> > - hdr = &pending_epow->hdr;
> > - v6hdr = &pending_epow->v6hdr;
> > - maina = &pending_epow->maina;
> > - mainb = &pending_epow->mainb;
> > - epow = &pending_epow->epow;
> >
> > - hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
> > - | RTAS_LOG_SEVERITY_EVENT
> > - | RTAS_LOG_DISPOSITION_NOT_RECOVERED
> > - | RTAS_LOG_OPTIONAL_PART_PRESENT
> > - | RTAS_LOG_TYPE_EPOW);
> > - hdr->extended_length = cpu_to_be32(sizeof(*pending_epow)
> > - - sizeof(pending_epow->hdr));
> > + QTAILQ_FOREACH(entry, &rtas_event_log.pending_events, next) {
> > + /* EPOW and hotplug events are surfaced in the same manner */
> > + if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
> > + entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
> > + break;
> > + }
> > + }
> > +
> > + if (entry) {
> > + QTAILQ_REMOVE(&rtas_event_log.pending_events, entry, next);
> > + }
> >
> > + return entry;
> > +}
> > +
> > +static uint32_t next_plid;
> > +
> > +static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr)
> > +{
> > v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG
> > | RTAS_LOG_V6_B0_BIGENDIAN;
> > v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT
> > | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT;
> > v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM);
> > +}
> > +
> > +static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
> > + int section_count)
> > +{
> > + struct tm tm;
> > + int year;
> >
> > maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
> > maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
> > @@ -256,8 +309,37 @@ static void spapr_powerdown_req(Notifier *n, void
> > *opaque)
> > | (to_bcd(tm.tm_min) << 16)
> > | (to_bcd(tm.tm_sec) << 8));
> > maina->creator_id = 'H'; /* Hypervisor */
> > - maina->section_count = 3; /* Main-A, Main-B and EPOW */
> > + maina->section_count = section_count;
> > maina->plid = next_plid++;
> > +}
> > +
> > +static void spapr_powerdown_req(Notifier *n, void *opaque)
> > +{
> > + sPAPREnvironment *spapr = container_of(n, sPAPREnvironment,
> > epow_notifier);
> > + struct rtas_error_log *hdr;
> > + struct rtas_event_log_v6 *v6hdr;
> > + struct rtas_event_log_v6_maina *maina;
> > + struct rtas_event_log_v6_mainb *mainb;
> > + struct rtas_event_log_v6_epow *epow;
> > + struct epow_log_full *pending_epow;
> > +
> > + pending_epow = g_malloc0(sizeof(*pending_epow));
>
> The name's no longer really right as a local.
>
> > + hdr = &pending_epow->hdr;
> > + v6hdr = &pending_epow->v6hdr;
> > + maina = &pending_epow->maina;
> > + mainb = &pending_epow->mainb;
> > + epow = &pending_epow->epow;
> > +
> > + hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
> > + | RTAS_LOG_SEVERITY_EVENT
> > + | RTAS_LOG_DISPOSITION_NOT_RECOVERED
> > + | RTAS_LOG_OPTIONAL_PART_PRESENT
> > + | RTAS_LOG_TYPE_EPOW);
> > + hdr->extended_length = cpu_to_be32(sizeof(*pending_epow)
> > + - sizeof(pending_epow->hdr));
> > +
> > + spapr_init_v6hdr(v6hdr);
> > + spapr_init_maina(maina, 3 /* Main-A, Main-B and EPOW */);
> >
> > mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
> > mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
> > @@ -274,7 +356,78 @@ static void spapr_powerdown_req(Notifier *n, void
> > *opaque)
> > epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
> > epow->extended_modifier =
> > RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
> >
> > - qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->epow_irq));
> > + rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, pending_epow);
> > +
> > + qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
>
> Shouldn't the irq pulse go into rtas_event_log_queue()?
It seems maybe it should...
"If a platform chooses to report multiple event or error sources through a
single interrupt, it must ensure that the interrupt remains asserted
or is re-asserted until check-exception has been used to process all
out-standing errors or events for that interrupt."
But another alternative would be to have rtas-check-exception re-pulse if there
are still events present for that particular IRQ. I think I'd prefer this
approach, since it makes it easier to share the queueing functionality with
rtas-event-scan (which relies on polling instead of interrupts)
>
> > +}
> > +
> > +static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t
> > hp_action)
> > +{
> > + struct hp_log_full *new_hp;
> > + struct rtas_error_log *hdr;
> > + struct rtas_event_log_v6 *v6hdr;
> > + struct rtas_event_log_v6_maina *maina;
> > + struct rtas_event_log_v6_mainb *mainb;
> > + struct rtas_event_log_v6_hp *hp;
> > + sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > + sPAPRDRConnectorType drc_type = drck->get_type(drc);
> > +
> > + new_hp = g_malloc0(sizeof(struct hp_log_full));
> > + hdr = &new_hp->hdr;
> > + v6hdr = &new_hp->v6hdr;
> > + maina = &new_hp->maina;
> > + mainb = &new_hp->mainb;
> > + hp = &new_hp->hp;
> > +
> > + hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
> > + | RTAS_LOG_SEVERITY_EVENT
> > + | RTAS_LOG_DISPOSITION_NOT_RECOVERED
> > + | RTAS_LOG_OPTIONAL_PART_PRESENT
> > + | RTAS_LOG_INITIATOR_HOTPLUG
> > + | RTAS_LOG_TYPE_HOTPLUG);
> > + hdr->extended_length = cpu_to_be32(sizeof(*new_hp)
> > + - sizeof(new_hp->hdr));
> > +
> > + spapr_init_v6hdr(v6hdr);
> > + spapr_init_maina(maina, 3 /* Main-A, Main-B, HP */);
> > +
> > + mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
> > + mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
> > + mainb->subsystem_id = 0x80; /* External environment */
> > + mainb->event_severity = 0x00; /* Informational / non-error */
> > + mainb->event_subtype = 0x00; /* Normal shutdown */
> > +
> > + hp->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_HOTPLUG);
> > + hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
> > + hp->hdr.section_version = 1; /* includes extended modifier */
> > + hp->hotplug_action = hp_action;
> > +
> > +
> > + switch (drc_type) {
> > + case SPAPR_DR_CONNECTOR_TYPE_PCI:
> > + hp->drc.index = cpu_to_be32(drck->get_index(drc));
> > + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
> > + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
> > + break;
> > + default:
> > + /* skip notification for unknown connector types */
>
> This should be an assert, shouldn't it? If code is added which can
> fire this for non-PCI types, without the corresponding non-PCI option
> here, you want to catch that.
Agreed, if hotplug events are supported for that resource type, we should be
calling this.
>
> > + g_free(new_hp);
> > + return;
> > + }
> > +
> > + rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp);
> > +
> > + qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
> > +}
> > +
> > +void spapr_hotplug_req_add_event(sPAPRDRConnector *drc)
> > +{
> > + spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_ADD);
> > +}
> > +
> > +void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc)
> > +{
> > + spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE);
> > }
> >
> > static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> > @@ -282,8 +435,9 @@ static void check_exception(PowerPCCPU *cpu,
> > sPAPREnvironment *spapr,
> > target_ulong args,
> > uint32_t nret, target_ulong rets)
> > {
> > - uint32_t mask, buf, len;
> > + uint32_t mask, buf, len, event_len;
> > uint64_t xinfo;
> > + EventLogEntry *event;
> >
> > if ((nargs < 6) || (nargs > 7) || nret != 1) {
> > rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> > @@ -298,23 +452,40 @@ static void check_exception(PowerPCCPU *cpu,
> > sPAPREnvironment *spapr,
> > xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
> > }
> >
> > - if ((mask & EVENT_MASK_EPOW) && pending_epow) {
> > - if (sizeof(*pending_epow) < len) {
> > - len = sizeof(*pending_epow);
> > - }
> > + event = rtas_event_log_dequeue(mask);
> > + if (!event) {
> > + goto out_no_events;
> > + }
> >
> > - cpu_physical_memory_write(buf, pending_epow, len);
> > - g_free(pending_epow);
> > - pending_epow = NULL;
> > - rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > - } else {
> > - rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
> > + switch (event->log_type) {
> > + case RTAS_LOG_TYPE_EPOW:
> > + event_len = sizeof(struct epow_log_full);
> > + break;
> > + case RTAS_LOG_TYPE_HOTPLUG:
> > + event_len = sizeof(struct hp_log_full);
> > + break;
> > + default:
> > + goto out_no_events;
>
> Doesn't one of the headers include size information you could use
> here, avoiding the switch?
Looks like hdr->extended_length + sizeof(hdr) should do the trick. Nice!
>
> > }
> > +
> > + if (event_len < len) {
> > + len = event_len;
> > + }
> > +
> > + cpu_physical_memory_write(buf, event->data, len);
> > + rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> > + g_free(event->data);
> > + g_free(event);
> > + return;
> > +
> > +out_no_events:
> > + rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
> > }
> >
> > void spapr_events_init(sPAPREnvironment *spapr)
> > {
> > - spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false);
> > + QTAILQ_INIT(&rtas_event_log.pending_events);
> > + spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false);
> > spapr->epow_notifier.notify = spapr_powerdown_req;
> > qemu_register_powerdown_notifier(&spapr->epow_notifier);
> > spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 3cc4490..1d27708 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -3,6 +3,7 @@
> >
> > #include "sysemu/dma.h"
> > #include "hw/ppc/xics.h"
> > +#include "hw/ppc/spapr_drc.h"
> >
> > struct VIOsPAPRBus;
> > struct sPAPRPHBState;
> > @@ -31,7 +32,7 @@ typedef struct sPAPREnvironment {
> > struct PPCTimebase tb;
> > bool has_graphics;
> >
> > - uint32_t epow_irq;
> > + uint32_t check_exception_irq;
> > Notifier epow_notifier;
> >
> > /* Migration state */
> > @@ -498,6 +499,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char
> > *propname,
> > uint32_t liobn, uint64_t window, uint32_t size);
> > int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
> > sPAPRTCETable *tcet);
> > +void spapr_hotplug_req_add_event(sPAPRDRConnector *drc);
> > +void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc);
> >
> > #define TYPE_SPAPR_RTC "spapr-rtc"
> >
>
> --
> David Gibson | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
> | _way_ _around_!
> http://www.ozlabs.org/~dgibson