[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH for-4.0 v2 11/37] tcg/aarch64: Parameterize the
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [PATCH for-4.0 v2 11/37] tcg/aarch64: Parameterize the temps for tcg_out_tlb_read |
Date: |
Fri, 30 Nov 2018 17:50:08 +0000 |
User-agent: |
mu4e 1.1.0; emacs 26.1.90 |
Richard Henderson <address@hidden> writes:
> When moving the qemu_ld/st arguments to the right place for
> a function call, we'll need to move the temps out of the way.
>
> Signed-off-by: Richard Henderson <address@hidden>
Reviewed-by: Alex Bennée <address@hidden>
> ---
> tcg/aarch64/tcg-target.inc.c | 74 +++++++++++++++++++-----------------
> 1 file changed, 40 insertions(+), 34 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
> index 148de0b7f2..c0ba9a6d50 100644
> --- a/tcg/aarch64/tcg-target.inc.c
> +++ b/tcg/aarch64/tcg-target.inc.c
> @@ -1467,13 +1467,15 @@ static void add_qemu_ldst_label(TCGContext *s, bool
> is_ld, TCGMemOpIdx oi,
> label->label_ptr[0] = label_ptr;
> }
>
> -/* Load and compare a TLB entry, emitting the conditional jump to the
> - slow path for the failure case, which will be patched later when
> finalizing
> - the slow path. Generated code returns the host addend in X1,
> - clobbers X0,X2,X3,TMP. */
> -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
> - tcg_insn_unit **label_ptr, int mem_index,
> - bool is_read)
> +/*
> + * Load and compare a TLB entry, emitting the conditional jump to the
> + * slow path on failure. Returns the register for the host addend.
> + * Clobbers t0, t1, t2, t3.
> + */
> +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
> + tcg_insn_unit **label_ptr, int mem_index,
> + bool is_read, TCGReg t0, TCGReg t1,
> + TCGReg t2, TCGReg t3)
> {
> int tlb_offset = is_read ?
> offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
> @@ -1491,55 +1493,56 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg
> addr_reg, TCGMemOp opc,
> if (a_bits >= s_bits) {
> x3 = addr_reg;
> } else {
> + x3 = t3;
> tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
> - TCG_REG_X3, addr_reg, s_mask - a_mask);
> - x3 = TCG_REG_X3;
> + x3, addr_reg, s_mask - a_mask);
> }
> tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
>
> - /* Extract the TLB index from the address into X0.
> - X0<CPU_TLB_BITS:0> =
> + /* Extract the TLB index from the address into T0.
> + T0<CPU_TLB_BITS:0> =
> addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
> - tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
> + tcg_out_ubfm(s, TARGET_LONG_BITS == 64, t0, addr_reg,
> TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
>
> - /* Store the page mask part of the address into X3. */
> + /* Store the page mask part of the address into T3. */
> tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
> - TCG_REG_X3, x3, tlb_mask);
> + t3, x3, tlb_mask);
>
> - /* Add any "high bits" from the tlb offset to the env address into X2,
> + /* Add any "high bits" from the tlb offset to the env address into T2,
> to take advantage of the LSL12 form of the ADDI instruction.
> - X2 = env + (tlb_offset & 0xfff000) */
> + T2 = env + (tlb_offset & 0xfff000) */
> if (tlb_offset & 0xfff000) {
> - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
> + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, t2, base,
> tlb_offset & 0xfff000);
> - base = TCG_REG_X2;
> + base = t2;
> }
>
> - /* Merge the tlb index contribution into X2.
> - X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
> - tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
> - TCG_REG_X0, CPU_TLB_ENTRY_BITS);
> + /* Merge the tlb index contribution into T2.
> + T2 = T2 + (T0 << CPU_TLB_ENTRY_BITS) */
> + tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64,
> + t2, base, t0, CPU_TLB_ENTRY_BITS);
>
> - /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
> - X0 = load [X2 + (tlb_offset & 0x000fff)] */
> + /* Merge "low bits" from tlb offset, load the tlb comparator into T0.
> + T0 = load [T2 + (tlb_offset & 0x000fff)] */
> tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
> - TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff,
> - TARGET_LONG_BITS == 32 ? 2 : 3);
> + t0, t2, tlb_offset & 0xfff, TARGET_LONG_BITS == 32 ? 2 : 3);
>
> /* Load the tlb addend. Do that early to avoid stalling.
> - X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
> - tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
> + T1 = load [T2 + (tlb_offset & 0xfff) + offsetof(addend)] */
> + tcg_out_ldst(s, I3312_LDRX, t1, t2,
> (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
> (is_read ? offsetof(CPUTLBEntry, addr_read)
> : offsetof(CPUTLBEntry, addr_write)), 3);
>
> /* Perform the address comparison. */
> - tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
> + tcg_out_cmp(s, (TARGET_LONG_BITS == 64), t0, t3, 0);
>
> /* If not equal, we jump to the slow path. */
> *label_ptr = s->code_ptr;
> tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
> +
> + return t1;
> }
>
> #endif /* CONFIG_SOFTMMU */
> @@ -1644,10 +1647,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg
> data_reg, TCGReg addr_reg,
> #ifdef CONFIG_SOFTMMU
> unsigned mem_index = get_mmuidx(oi);
> tcg_insn_unit *label_ptr;
> + TCGReg base;
>
> - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
> + base = tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1,
> + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3);
> tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
> - TCG_REG_X1, otype, addr_reg);
> + base, otype, addr_reg);
> add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
> s->code_ptr, label_ptr);
> #else /* !CONFIG_SOFTMMU */
> @@ -1669,10 +1674,11 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg
> data_reg, TCGReg addr_reg,
> #ifdef CONFIG_SOFTMMU
> unsigned mem_index = get_mmuidx(oi);
> tcg_insn_unit *label_ptr;
> + TCGReg base;
>
> - tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
> - tcg_out_qemu_st_direct(s, memop, data_reg,
> - TCG_REG_X1, otype, addr_reg);
> + base = tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0,
> + TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3);
> + tcg_out_qemu_st_direct(s, memop, data_reg, base, otype, addr_reg);
> add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
> data_reg, addr_reg, s->code_ptr, label_ptr);
> #else /* !CONFIG_SOFTMMU */
--
Alex Bennée
- [Qemu-devel] [PATCH for-4.0 v2 03/37] tcg: Return success from patch_reloc, (continued)
- [Qemu-devel] [PATCH for-4.0 v2 03/37] tcg: Return success from patch_reloc, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 05/37] tcg/i386: Add constraints for r8 and r9, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 04/37] tcg: Add TCG_TARGET_NEED_LDST_OOL_LABELS, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 10/37] tcg/aarch64: Add constraints for x0, x1, x2, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 11/37] tcg/aarch64: Parameterize the temps for tcg_out_tlb_read, Richard Henderson, 2018/11/23
- Re: [Qemu-devel] [PATCH for-4.0 v2 11/37] tcg/aarch64: Parameterize the temps for tcg_out_tlb_read,
Alex Bennée <=
- [Qemu-devel] [PATCH for-4.0 v2 12/37] tcg/aarch64: Parameterize the temp for tcg_out_goto_long, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 13/37] tcg/aarch64: Use B not BL for tcg_out_goto_long, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 09/37] tcg/i386: Use TCG_TARGET_NEED_LDST_OOL_LABELS, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 08/37] tcg/i386: Force qemu_ld/st arguments into fixed registers, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 14/37] tcg/aarch64: Use TCG_TARGET_NEED_LDST_OOL_LABELS, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 16/37] tcg/arm: Add constraints for R0-R5, Richard Henderson, 2018/11/23