[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Build error log
From: |
Alexander Graf |
Subject: |
Re: [Qemu-devel] [Qemu-ppc] [PATCH v2 3/4] target-ppc: Build error log |
Date: |
Fri, 05 Sep 2014 10:04:56 +0200 |
User-agent: |
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:31.0) Gecko/20100101 Thunderbird/31.1.0 |
On 04.09.14 13:13, Aravinda Prasad wrote:
> Whenever there is a physical memory error due to bit
> flips, which cannot be corrected by hardware, the error
> is passed on to the kernel. If the memory address in
> error belongs to guest address space then guest kernel
> is responsible to take action. Hence the error is passed
> on to guest via KVM by invoking 0x200 NMI vector.
>
> However, guest OS, as per PAPR, expects an error log
> upon such error. This patch registers a new hcall
> which is issued from 0x200 interrupt vector and builds
> the error log, copies the error log to rtas space and
> passes the address of the error log to guest
>
> Enhancement to KVM to perform above functionality is
> already in upstream kernel.
>
> Signed-off-by: Aravinda Prasad <address@hidden>
> ---
> hw/ppc/spapr_hcall.c | 154
> ++++++++++++++++++++++++++++++++++++++++++++++++
> include/hw/ppc/spapr.h | 4 +
> 2 files changed, 157 insertions(+), 1 deletion(-)
>
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 01650ba..c3aa448 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -14,6 +14,88 @@ struct SPRSyncState {
> target_ulong mask;
> };
>
> +/* Offset from rtas-base where error log is placed */
> +#define RTAS_ERROR_OFFSET (TARGET_PAGE_SIZE)
You can't assume this. Please compute the value at the same place you
compute the rtas-size.
> +
> +#define RTAS_ELOG_SEVERITY_SHIFT 0x5
> +#define RTAS_ELOG_DISPOSITION_SHIFT 0x3
> +#define RTAS_ELOG_INITIATOR_SHIFT 0x4
> +
> +/*
> + * Only required RTAS event severity, disposition, initiator
> + * target and type are copied from arch/powerpc/include/asm/rtas.h
> + */
> +
> +/* RTAS event severity */
> +#define RTAS_SEVERITY_ERROR_SYNC 0x3
> +
> +/* RTAS event disposition */
> +#define RTAS_DISP_NOT_RECOVERED 0x2
> +
> +/* RTAS event initiator */
> +#define RTAS_INITIATOR_MEMORY 0x4
> +
> +/* RTAS event target */
> +#define RTAS_TARGET_MEMORY 0x4
> +
> +/* RTAS event type */
> +#define RTAS_TYPE_ECC_UNCORR 0x09
> +
> +/*
> + * Currently KVM only passes on the uncorrected machine
> + * check memory error to guest. Other machine check errors
> + * such as SLB multi-hit and TLB multi-hit are recovered
> + * in KVM and are not passed on to guest.
> + *
> + * DSISR Bit for uncorrected machine check error. Based
> + * on arch/powerpc/include/asm/mce.h
Please don't include Linux code. This file is GPLv2+ licensed and I
don't want to taint it as GPLv2 only just for the sake of mce.
> + */
> +#define PPC_BIT(bit) (0x8000000000000000ULL >> bit)
> +#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */
> +
> +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */
> +struct rtas_error_log {
> + /* Byte 0 */
> + uint8_t byte0; /* Architectural version */
> +
> + /* Byte 1 */
> + uint8_t byte1;
> + /* XXXXXXXX
> + * XXX 3: Severity level of error
> + * XX 2: Degree of recovery
> + * X 1: Extended log present?
> + * XX 2: Reserved
> + */
> +
> + /* Byte 2 */
> + uint8_t byte2;
> + /* XXXXXXXX
> + * XXXX 4: Initiator of event
> + * XXXX 4: Target of failed operation
> + */
> + uint8_t byte3; /* General event or error*/
> +};
> +
> +/*
> + * Data format in RTAS-Blob
> + *
> + * This structure contains error information related to Machine
> + * Check exception. This is filled up and copied to rtas-blob
> + * upon machine check exception.
> + */
> +struct rtas_mc_log {
> + target_ulong srr0;
> + target_ulong srr1;
> + /*
> + * Beginning of error log address. This is properly
> + * populated and passed on to OS registered machine
> + * check notification routine upon machine check
> + * exception
> + */
> + target_ulong r3;
> + struct rtas_error_log err_log;
> +};
> +
> static void do_spr_sync(void *arg)
> {
> struct SPRSyncState *s = arg;
> @@ -586,6 +668,77 @@ static target_ulong h_rtas_update(PowerPCCPU *cpu,
> sPAPREnvironment *spapr,
> return 0;
> }
>
> +static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> + target_ulong opcode, target_ulong *args)
> +{
> + struct rtas_mc_log mc_log;
> + CPUPPCState *env = &cpu->env;
> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> +
> + /*
> + * We save the original r3 register in SPRG2 in 0x200 vector,
> + * which is patched during call to ibm.nmi-register. Original
> + * r3 is required to be included in error log
> + */
> + mc_log.r3 = env->spr[SPR_SPRG2];
Don't you have to call cpu_synchronize_registers() before you access
SPRG2? Otherwise the value may be stale.
> +
> + /*
> + * SRR0 and SRR1, containing nip and msr at the time of exception,
> + * are clobbered when we return from this hcall. Hence they
> + * need to be properly saved and restored. We save srr0
> + * and srr1 in rtas blob and restore it in 0x200 vector
> + * before branching to OS registered machine check handler
> + */
> + mc_log.srr0 = env->spr[SPR_SRR0];
> + mc_log.srr1 = env->spr[SPR_SRR1];
> +
> + /* Set error log fields */
> + mc_log.err_log.byte0 = 0x00;
> + mc_log.err_log.byte1 =
> + (RTAS_SEVERITY_ERROR_SYNC << RTAS_ELOG_SEVERITY_SHIFT);
> + mc_log.err_log.byte1 |=
> + (RTAS_DISP_NOT_RECOVERED << RTAS_ELOG_DISPOSITION_SHIFT);
> + mc_log.err_log.byte2 =
> + (RTAS_INITIATOR_MEMORY << RTAS_ELOG_INITIATOR_SHIFT);
> + mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY;
> +
> + if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) {
> + mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR;
> + } else {
> + mc_log.err_log.byte3 = 0x0;
> + }
> +
> + /* Handle all Host/Guest LE/BE combinations */
> + if ((*pcc->interrupts_big_endian)(cpu)) {
This should check MSR.LE, not ILE.
Alex