qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 03/13] spapr/xive: add state synchronization wit


From: David Gibson
Subject: Re: [Qemu-devel] [PATCH 03/13] spapr/xive: add state synchronization with KVM
Date: Wed, 6 Feb 2019 13:42:51 +1100
User-agent: Mutt/1.10.1 (2018-07-13)

On Mon, Jan 07, 2019 at 07:39:36PM +0100, Cédric Le Goater wrote:
> This extends the KVM XIVE device backend with 'synchronize_state'
> methods used to retrieve the state from KVM. The HW state of the
> sources, the KVM device and the thread interrupt contexts are
> collected for the monitor usage and also migration.
> 
> These get operations rely on their KVM counterpart in the host kernel
> which acts as a proxy for OPAL, the host firmware. The set operations
> will be added for migration support later.
> 
> Signed-off-by: Cédric Le Goater <address@hidden>

Reviewed-by: David Gibson <address@hidden>

> ---
>  include/hw/ppc/spapr_xive.h |   9 ++
>  include/hw/ppc/xive.h       |   1 +
>  hw/intc/spapr_xive.c        |  24 ++--
>  hw/intc/spapr_xive_kvm.c    | 223 ++++++++++++++++++++++++++++++++++++
>  hw/intc/xive.c              |  10 ++
>  5 files changed, 260 insertions(+), 7 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 24a0be478039..02f2de20111c 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -44,6 +44,14 @@ typedef struct sPAPRXive {
>  bool spapr_xive_irq_claim(sPAPRXive *xive, uint32_t lisn, bool lsi);
>  bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn);
>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> +bool spapr_xive_priority_is_reserved(uint8_t priority);
> +
> +void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu,
> +                           uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
> +void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
> +                           uint8_t *out_end_blk, uint32_t *out_end_idx);
> +int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
> +                             uint8_t *out_end_blk, uint32_t *out_end_idx);
>  
>  typedef struct sPAPRMachineState sPAPRMachineState;
>  
> @@ -58,5 +66,6 @@ void spapr_xive_map_mmio(sPAPRXive *xive);
>   * KVM XIVE device helpers
>   */
>  void kvmppc_xive_connect(sPAPRXive *xive, Error **errp);
> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp);
>  
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 4bbba8d39a65..2e48d75a22e0 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -442,5 +442,6 @@ static inline bool kvmppc_xive_enabled(void)
>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>  
>  #endif /* PPC_XIVE_H */
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index cf6d3a5f12e1..50dd66707968 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -54,8 +54,8 @@ static uint32_t spapr_xive_nvt_to_target(uint8_t nvt_blk, 
> uint32_t nvt_idx)
>      return nvt_idx - SPAPR_XIVE_NVT_BASE;
>  }
>  
> -static void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu,
> -                                  uint8_t *out_nvt_blk, uint32_t 
> *out_nvt_idx)
> +void spapr_xive_cpu_to_nvt(PowerPCCPU *cpu,
> +                           uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
>  {
>      assert(cpu);
>  
> @@ -85,8 +85,8 @@ static int spapr_xive_target_to_nvt(uint32_t target,
>   * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
>   * priorities per CPU
>   */
> -static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
> -                                  uint8_t *out_end_blk, uint32_t 
> *out_end_idx)
> +void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t prio,
> +                           uint8_t *out_end_blk, uint32_t *out_end_idx)
>  {
>      assert(cpu);
>  
> @@ -99,8 +99,8 @@ static void spapr_xive_cpu_to_end(PowerPCCPU *cpu, uint8_t 
> prio,
>      }
>  }
>  
> -static int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
> -                                    uint8_t *out_end_blk, uint32_t 
> *out_end_idx)
> +int spapr_xive_target_to_end(uint32_t target, uint8_t prio,
> +                             uint8_t *out_end_blk, uint32_t *out_end_idx)
>  {
>      PowerPCCPU *cpu = spapr_find_cpu(target);
>  
> @@ -139,6 +139,16 @@ void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor 
> *mon)
>      XiveSource *xsrc = &xive->source;
>      int i;
>  
> +    if (kvmppc_xive_enabled()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_synchronize_state(xive, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return;
> +        }
> +    }
> +
>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>  
>      for (i = 0; i < xive->nr_irqs; i++) {
> @@ -529,7 +539,7 @@ bool spapr_xive_irq_free(sPAPRXive *xive, uint32_t lisn)
>   * interrupts (DD2.X POWER9). So we only allow the guest to use
>   * priorities [0..6].
>   */
> -static bool spapr_xive_priority_is_reserved(uint8_t priority)
> +bool spapr_xive_priority_is_reserved(uint8_t priority)
>  {
>      switch (priority) {
>      case 0 ... 6:
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index f96c66fa419d..f52bddc92a2a 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -60,6 +60,57 @@ static void kvm_cpu_enable(CPUState *cs)
>  /*
>   * XIVE Thread Interrupt Management context (KVM)
>   */
> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
> +{
> +    uint64_t state[4] = { 0 };
> +    int ret;
> +
> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_NVT_STATE, state);
> +    if (ret != 0) {
> +        error_setg_errno(errp, errno,
> +                         "XIVE: could not capture KVM state of CPU %ld",
> +                         kvm_arch_vcpu_id(tctx->cs));
> +        return;
> +    }
> +
> +    /* word0 and word1 of the OS ring. */
> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> +
> +    /*
> +     * KVM also returns word2 containing the OS CAM line which is
> +     * interesting to print out in the QEMU monitor.
> +     */
> +    *((uint64_t *) &tctx->regs[TM_QW1_OS + TM_WORD2]) = state[1];
> +}
> +
> +typedef struct {
> +    XiveTCTX *tctx;
> +    Error *err;
> +} XiveCpuGetState;
> +
> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
> +                                                 run_on_cpu_data arg)
> +{
> +    XiveCpuGetState *s = arg.host_ptr;
> +
> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
> +}
> +
> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
> +{
> +    XiveCpuGetState s = {
> +        .tctx = tctx,
> +        .err = NULL,
> +    };
> +
> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
> +               RUN_ON_CPU_HOST_PTR(&s));
> +
> +    if (s.err) {
> +        error_propagate(errp, s.err);
> +        return;
> +    }
> +}
>  
>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>  {
> @@ -119,6 +170,34 @@ void kvmppc_xive_source_reset(XiveSource *xsrc, Error 
> **errp)
>      }
>  }
>  
> +/*
> + * This is used to perform the magic loads on the ESB pages, described
> + * in xive.h.
> + */
> +static uint8_t xive_esb_read(XiveSource *xsrc, int srcno, uint32_t offset)
> +{
> +    unsigned long addr = (unsigned long) xsrc->esb_mmap +
> +        xive_source_esb_mgmt(xsrc, srcno) + offset;
> +
> +    /* Prevent the compiler from optimizing away the load */
> +    volatile uint64_t value = *((uint64_t *) addr);
> +
> +    return be64_to_cpu(value) & 0x3;
> +}
> +
> +static void kvmppc_xive_source_get_state(XiveSource *xsrc)
> +{
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        /* Perform a load without side effect to retrieve the PQ bits */
> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> +
> +        /* and save PQ locally */
> +        xive_source_esb_set(xsrc, i, pq);
> +    }
> +}
> +
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>  {
>      XiveSource *xsrc = opaque;
> @@ -149,6 +228,150 @@ void kvmppc_xive_source_set_irq(void *opaque, int 
> srcno, int val)
>  /*
>   * sPAPR XIVE interrupt controller (KVM)
>   */
> +static int kvmppc_xive_get_eq_state(sPAPRXive *xive, CPUState *cs, Error 
> **errp)
> +{
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cs);
> +    int ret;
> +    int i;
> +
> +    for (i = 0; i < XIVE_PRIORITY_MAX + 1; i++) {
> +        Error *local_err = NULL;
> +        struct kvm_ppc_xive_eq kvm_eq = { 0 };
> +        uint64_t kvm_eq_idx;
> +        XiveEND end = { 0 };
> +        uint8_t end_blk, nvt_blk;
> +        uint32_t end_idx, nvt_idx;
> +
> +        /* Skip priorities reserved for the hypervisor */
> +        if (spapr_xive_priority_is_reserved(i)) {
> +            continue;
> +        }
> +
> +        /* Encode the tuple (server, prio) as a KVM EQ index */
> +        kvm_eq_idx = i << KVM_XIVE_EQ_PRIORITY_SHIFT &
> +            KVM_XIVE_EQ_PRIORITY_MASK;
> +        kvm_eq_idx |= vcpu_id << KVM_XIVE_EQ_SERVER_SHIFT &
> +            KVM_XIVE_EQ_SERVER_MASK;
> +
> +        ret = kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EQ, kvm_eq_idx,
> +                                &kvm_eq, false, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return ret;
> +        }
> +
> +        if (!(kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED)) {
> +            continue;
> +        }
> +
> +        /* Update the local END structure with the KVM input */
> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ENABLED) {
> +            end.w0 |= cpu_to_be32(END_W0_VALID | END_W0_ENQUEUE);
> +        }
> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ALWAYS_NOTIFY) {
> +            end.w0 |= cpu_to_be32(END_W0_UCOND_NOTIFY);
> +        }
> +        if (kvm_eq.flags & KVM_XIVE_EQ_FLAG_ESCALATE) {
> +            end.w0 |= cpu_to_be32(END_W0_ESCALATE_CTL);
> +        }
> +        end.w0 |= xive_set_field32(END_W0_QSIZE, 0ul, kvm_eq.qsize - 12);
> +
> +        end.w1 = xive_set_field32(END_W1_GENERATION, 0ul, kvm_eq.qtoggle) |
> +            xive_set_field32(END_W1_PAGE_OFF, 0ul, kvm_eq.qindex);
> +        end.w2 = cpu_to_be32((kvm_eq.qpage >> 32) & 0x0fffffff);
> +        end.w3 = cpu_to_be32(kvm_eq.qpage & 0xffffffff);
> +        end.w4 = 0;
> +        end.w5 = 0;
> +
> +        spapr_xive_cpu_to_nvt(POWERPC_CPU(cs), &nvt_blk, &nvt_idx);
> +
> +        end.w6 = xive_set_field32(END_W6_NVT_BLOCK, 0ul, nvt_blk) |
> +            xive_set_field32(END_W6_NVT_INDEX, 0ul, nvt_idx);
> +        end.w7 = xive_set_field32(END_W7_F0_PRIORITY, 0ul, i);
> +
> +        spapr_xive_cpu_to_end(POWERPC_CPU(cs), i, &end_blk, &end_idx);
> +
> +        assert(end_idx < xive->nr_ends);
> +        memcpy(&xive->endt[end_idx], &end, sizeof(XiveEND));
> +    }
> +
> +    return 0;
> +}
> +
> +static void kvmppc_xive_get_eas_state(sPAPRXive *xive, Error **errp)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        XiveEAS *eas = &xive->eat[i];
> +        XiveEAS new_eas;
> +        uint64_t kvm_eas;
> +        uint8_t priority;
> +        uint32_t server;
> +        uint32_t end_idx;
> +        uint8_t end_blk;
> +        uint32_t eisn;
> +        Error *local_err = NULL;
> +
> +        if (!xive_eas_is_valid(eas)) {
> +            continue;
> +        }
> +
> +        kvm_device_access(xive->fd, KVM_DEV_XIVE_GRP_EAS, i, &kvm_eas, false,
> +                          &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +
> +        priority = (kvm_eas & KVM_XIVE_EAS_PRIORITY_MASK) >>
> +            KVM_XIVE_EAS_PRIORITY_SHIFT;
> +        server = (kvm_eas & KVM_XIVE_EAS_SERVER_MASK) >>
> +            KVM_XIVE_EAS_SERVER_SHIFT;
> +        eisn = (kvm_eas & KVM_XIVE_EAS_EISN_MASK) >> KVM_XIVE_EAS_EISN_SHIFT;
> +
> +        if (spapr_xive_target_to_end(server, priority, &end_blk, &end_idx)) {
> +            error_setg(errp, "XIVE: invalid tuple CPU %d priority %d", 
> server,
> +                       priority);
> +            return;
> +        }
> +
> +        new_eas.w = cpu_to_be64(EAS_VALID);
> +        if (kvm_eas & KVM_XIVE_EAS_MASK_MASK) {
> +            new_eas.w |= cpu_to_be64(EAS_MASKED);
> +        }
> +
> +        new_eas.w = xive_set_field64(EAS_END_INDEX, new_eas.w, end_idx);
> +        new_eas.w = xive_set_field64(EAS_END_BLOCK, new_eas.w, end_blk);
> +        new_eas.w = xive_set_field64(EAS_END_DATA, new_eas.w, eisn);
> +
> +        *eas = new_eas;
> +    }
> +}
> +
> +void kvmppc_xive_synchronize_state(sPAPRXive *xive, Error **errp)
> +{
> +    XiveSource *xsrc = &xive->source;
> +    CPUState *cs;
> +    Error *local_err = NULL;
> +
> +    kvmppc_xive_source_get_state(xsrc);
> +
> +    kvmppc_xive_get_eas_state(xive, &local_err);
> +    if (local_err) {
> +        error_propagate(errp, local_err);
> +        return;
> +    }
> +
> +    CPU_FOREACH(cs) {
> +        kvmppc_xive_get_eq_state(xive, cs, &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +}
>  
>  static void *kvmppc_xive_mmap(sPAPRXive *xive, int ctrl, size_t len,
>                                   Error **errp)
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index 9a2d7be283f8..596c29d8c826 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -434,6 +434,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor 
> *mon)
>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
>      int i;
>  
> +    if (kvmppc_xive_enabled()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return;
> +        }
> +    }
> +
>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE 
> PIPR"
>                     "  W2\n", cpu_index);
>  

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]