qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] spapr: Improve handling of memory unplug with old guests


From: Greg Kurz
Subject: Re: [PATCH] spapr: Improve handling of memory unplug with old guests
Date: Wed, 13 Jan 2021 18:01:27 +0100

On Wed, 13 Jan 2021 12:20:58 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Jan 08, 2021 at 06:31:27PM +0100, Greg Kurz wrote:
> > Since commit 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> > trying to unplug memory from a guest that doesn't support it (eg. rhel6)
> > no longer generates an error like it used to. Instead, it leaves the
> > memory around : only a subsequent reboot or manual use of drmgr within
> > the guest can complete the hot-unplug sequence. A flag was added to
> > SpaprMachineClass so that this new behavior only applies to the default
> > machine type.
> > 
> > We can do better. CAS processes all pending hot-unplug requests. This
> > means that we don't really care about what the guest supports if
> > the hot-unplug request happens before CAS.
> > 
> > All guests that we care for, even old ones, set enough bits in OV5
> > that lead to a non-empty bitmap in spapr->ov5_cas. Use that as a
> > heuristic to decide if CAS has already occured or not.
> > 
> > Always accept unplug requests that happen before CAS since CAS will
> > process them. Restore the previous behavior of rejecting them after
> > CAS when we know that the guest doesn't support memory hot-unplug.
> > 
> > This behavior is suitable for all machine types : this allows to
> > drop the pre_6_0_memory_unplug flag.
> > 
> > Fixes: 1e8b5b1aa16b ("spapr: Allow memory unplug to always succeed")
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> 
> Applied, sorry it too me so long.
> 

No problem. Any estimate for your next PR ?

> > ---
> >  hw/ppc/spapr.c              |   24 +++++++++++++-----------
> >  hw/ppc/spapr_events.c       |    3 +--
> >  hw/ppc/spapr_ovec.c         |    7 +++++++
> >  include/hw/ppc/spapr.h      |    2 +-
> >  include/hw/ppc/spapr_ovec.h |    1 +
> >  5 files changed, 23 insertions(+), 14 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 2c403b574e37..6c47466fc2f1 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -4048,6 +4048,18 @@ static void 
> > spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
> >      }
> >  }
> >  
> > +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
> > +{
> > +    return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
> > +        /*
> > +         * CAS will process all pending unplug requests.
> > +         *
> > +         * HACK: a guest could theoretically have cleared all bits in OV5,
> > +         * but none of the guests we care for do.
> > +         */
> 
> Hrm.  This is pretty ugly - I thought we had a better canonical way of
> determining if CAS had already happened this boot, but it appears
> not.  I don't want to delay this patch, since it is an important fix,
> but it would be nice if you could do a later cleanup to have a nicer
> way of detecting CAS-hasn't-happened.
> 

Yeah, I fully agree this is ugly. I'll try to find something nicer later.

Thanks for taking it anyway !

Cheers,

--
Greg

> > +        spapr_ovec_empty(spapr->ov5_cas);
> > +}
> > +
> >  static void spapr_machine_device_unplug_request(HotplugHandler 
> > *hotplug_dev,
> >                                                  DeviceState *dev, Error 
> > **errp)
> >  {
> > @@ -4056,16 +4068,9 @@ static void 
> > spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
> >      SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> >  
> >      if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> > -        if (!smc->pre_6_0_memory_unplug ||
> > -            spapr_ovec_test(sms->ov5_cas, OV5_HP_EVT)) {
> > +        if (spapr_memory_hot_unplug_supported(sms)) {
> >              spapr_memory_unplug_request(hotplug_dev, dev, errp);
> >          } else {
> > -            /* NOTE: this means there is a window after guest reset, prior 
> > to
> > -             * CAS negotiation, where unplug requests will fail due to the
> > -             * capability not being detected yet. This is a bit different 
> > than
> > -             * the case with PCI unplug, where the events will be queued 
> > and
> > -             * eventually handled by the guest after boot
> > -             */
> >              error_setg(errp, "Memory hot unplug not supported for this 
> > guest");
> >          }
> >      } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> > @@ -4543,11 +4548,8 @@ DEFINE_SPAPR_MACHINE(6_0, "6.0", true);
> >   */
> >  static void spapr_machine_5_2_class_options(MachineClass *mc)
> >  {
> > -    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
> > -
> >      spapr_machine_6_0_class_options(mc);
> >      compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
> > -    smc->pre_6_0_memory_unplug = true;
> >  }
> >  
> >  DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
> > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > index 6aedd988b3d0..d51daedfa6e0 100644
> > --- a/hw/ppc/spapr_events.c
> > +++ b/hw/ppc/spapr_events.c
> > @@ -658,8 +658,7 @@ static void spapr_hotplug_req_event(uint8_t hp_id, 
> > uint8_t hp_action,
> >          /* we should not be using count_indexed value unless the guest
> >           * supports dedicated hotplug event source
> >           */
> > -        g_assert(!SPAPR_MACHINE_GET_CLASS(spapr)->pre_6_0_memory_unplug ||
> > -                 spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT));
> > +        g_assert(spapr_memory_hot_unplug_supported(spapr));
> >          hp->drc_id.count_indexed.count =
> >              cpu_to_be32(drc_id->count_indexed.count);
> >          hp->drc_id.count_indexed.index =
> > diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
> > index dd003f1763fd..b2567caa5cf4 100644
> > --- a/hw/ppc/spapr_ovec.c
> > +++ b/hw/ppc/spapr_ovec.c
> > @@ -125,6 +125,13 @@ bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr)
> >      return test_bit(bitnr, ov->bitmap) ? true : false;
> >  }
> >  
> > +bool spapr_ovec_empty(SpaprOptionVector *ov)
> > +{
> > +    g_assert(ov);
> > +
> > +    return bitmap_empty(ov->bitmap, OV_MAXBITS);
> > +}
> > +
> >  static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
> >                                   long bitmap_offset)
> >  {
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 1cc19575f548..3ad2ff713279 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -142,7 +142,6 @@ struct SpaprMachineClass {
> >      hwaddr rma_limit;          /* clamp the RMA to this size */
> >      bool pre_5_1_assoc_refpoints;
> >      bool pre_5_2_numa_associativity;
> > -    bool pre_6_0_memory_unplug;
> >  
> >      bool (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
> >                            uint64_t *buid, hwaddr *pio, 
> > @@ -950,4 +949,5 @@ bool spapr_check_pagesize(SpaprMachineState *spapr, 
> > hwaddr pagesize,
> >  
> >  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
> >  hwaddr spapr_get_rtas_addr(void);
> > +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr);
> >  #endif /* HW_SPAPR_H */
> > diff --git a/include/hw/ppc/spapr_ovec.h b/include/hw/ppc/spapr_ovec.h
> > index d4dee9e06a01..48b716a060c2 100644
> > --- a/include/hw/ppc/spapr_ovec.h
> > +++ b/include/hw/ppc/spapr_ovec.h
> > @@ -71,6 +71,7 @@ void spapr_ovec_cleanup(SpaprOptionVector *ov);
> >  void spapr_ovec_set(SpaprOptionVector *ov, long bitnr);
> >  void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr);
> >  bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr);
> > +bool spapr_ovec_empty(SpaprOptionVector *ov);
> >  SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int 
> > vector);
> >  int spapr_dt_ovec(void *fdt, int fdt_offset,
> >                    SpaprOptionVector *ov, const char *name);
> > 
> > 
> 

Attachment: pgpuWMD7YpzJy.pgp
Description: OpenPGP digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]