[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 12/21] target-mips: add ALIGN, DALIGN, BITSWAP a
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 12/21] target-mips: add ALIGN, DALIGN, BITSWAP and DBITSWAP instructions |
Date: |
Sat, 31 May 2014 00:41:03 +0200 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Fri, May 30, 2014 at 03:47:50PM +0100, Leon Alrae wrote:
> From: Yongbok Kim <address@hidden>
>
> Signed-off-by: Yongbok Kim <address@hidden>
> Signed-off-by: Leon Alrae <address@hidden>
> ---
> disas/mips.c | 4 ++
> target-mips/helper.h | 2 +
> target-mips/op_helper.c | 16 +++++++
> target-mips/translate.c | 103 +++++++++++++++++++++++++++++++++++++++++-----
> 4 files changed, 113 insertions(+), 12 deletions(-)
>
> diff --git a/disas/mips.c b/disas/mips.c
> index 8cb9032..127a7d5 100644
> --- a/disas/mips.c
> +++ b/disas/mips.c
> @@ -1246,6 +1246,10 @@ const struct mips_opcode mips_builtin_opcodes[] =
> {"cache", "k,o(b)", 0x7c000025, 0xfc00003f, RD_b, 0,
> I32R6},
> {"seleqz", "d,v,t", 0x00000035, 0xfc0007ff, WR_d|RD_s|RD_t, 0,
> I32R6},
> {"selnez", "d,v,t", 0x00000037, 0xfc0007ff, WR_d|RD_s|RD_t, 0,
> I32R6},
> +{"align", "d,v,t", 0x7c000220, 0xfc00073f, WR_d|RD_s|RD_t, 0,
> I32R6},
> +{"dalign", "d,v,t", 0x7c000224, 0xfc00063f, WR_d|RD_s|RD_t, 0,
> I64R6},
> +{"bitswap", "d,w", 0x7c000020, 0xffe007ff, WR_d|RD_t, 0,
> I32R6},
> +{"dbitswap","d,w", 0x7c000024, 0xffe007ff, WR_d|RD_t, 0,
> I64R6},
> {"pref", "k,o(b)", 0xcc000000, 0xfc000000, RD_b, 0,
> I4|I32|G3 },
> {"prefx", "h,t(b)", 0x4c00000f, 0xfc0007ff, RD_b|RD_t,
> 0, I4|I33 },
> {"nop", "", 0x00000000, 0xffffffff, 0,
> INSN2_ALIAS, I1 }, /* sll */
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 8c7921a..cd82633 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -41,6 +41,8 @@ DEF_HELPER_3(macchiu, tl, env, tl, tl)
> DEF_HELPER_3(msachi, tl, env, tl, tl)
> DEF_HELPER_3(msachiu, tl, env, tl, tl)
>
> +DEF_HELPER_1(bitswap, tl, tl)
> +
This function doesn't access env and can't generate exception, so it can
probably be declared TCG_CALL_NO_RWG_SE.
> #ifndef CONFIG_USER_ONLY
> /* CP0 helpers */
> DEF_HELPER_1(mfc0_mvpcontrol, tl, env)
> diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
> index 4edec6c..ac8a430 100644
> --- a/target-mips/op_helper.c
> +++ b/target-mips/op_helper.c
> @@ -269,6 +269,22 @@ target_ulong helper_mulshiu(CPUMIPSState *env,
> target_ulong arg1,
> (uint64_t)(uint32_t)arg2);
> }
>
> +target_ulong helper_bitswap(target_ulong rt)
> +{
> + target_ulong v = rt;
> +#ifdef TARGET_MIPS64
> + v = ((v >> 1) & 0x5555555555555555) | ((v & 0x5555555555555555) << 1);
> + v = ((v >> 2) & 0x3333333333333333) | ((v & 0x3333333333333333) << 2);
> + v = ((v >> 4) & 0x0F0F0F0F0F0F0F0F) | ((v & 0x0F0F0F0F0F0F0F0F) << 4);
> + return v;
> +#else
> + v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
> + v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
> + v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
> + return v;
> +#endif
Alternatively you can use the same code for both 32 and 64-bit, by
casting the constants to target_ulong. I am not sure it is better or
worse, so I leave you to do as you prefer.
Also this kind of code is at the limit between helper vs TCG code. It
can be implemented here with 15 TCG instructions, but I am not really
sure it worth it. As you prefer.
> +}
> +
> #ifndef CONFIG_USER_ONLY
>
> static inline hwaddr do_translate_address(CPUMIPSState *env,
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 6d294e1..cce17cd 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -390,17 +390,23 @@ enum {
> #define MASK_BSHFL(op) MASK_SPECIAL3(op) | (op & (0x1F << 6))
>
> enum {
> - OPC_WSBH = (0x02 << 6) | OPC_BSHFL,
> - OPC_SEB = (0x10 << 6) | OPC_BSHFL,
> - OPC_SEH = (0x18 << 6) | OPC_BSHFL,
> + OPC_WSBH = (0x02 << 6) | OPC_BSHFL,
> + OPC_SEB = (0x10 << 6) | OPC_BSHFL,
> + OPC_SEH = (0x18 << 6) | OPC_BSHFL,
> + OPC_ALIGN = (0x08 << 6) | OPC_BSHFL, /* 010.bp */
> + OPC_ALIGN_END = (0x0B << 6) | OPC_BSHFL, /* 010.00 to 010.11 */
> + OPC_BITSWAP = (0x00 << 6) | OPC_BSHFL /* 00000 */
> };
>
> /* DBSHFL opcodes */
> #define MASK_DBSHFL(op) MASK_SPECIAL3(op) | (op & (0x1F << 6))
>
> enum {
> - OPC_DSBH = (0x02 << 6) | OPC_DBSHFL,
> - OPC_DSHD = (0x05 << 6) | OPC_DBSHFL,
> + OPC_DSBH = (0x02 << 6) | OPC_DBSHFL,
> + OPC_DSHD = (0x05 << 6) | OPC_DBSHFL,
> + OPC_DALIGN = (0x08 << 6) | OPC_DBSHFL, /* 01.bp */
> + OPC_DALIGN_END = (0x0F << 6) | OPC_DBSHFL, /* 01.000 to 01.111 */
> + OPC_DBITSWAP = (0x00 << 6) | OPC_DBSHFL, /* 00000 */
> };
>
> /* MIPS DSP REGIMM opcodes */
> @@ -15131,12 +15137,14 @@ static void decode_opc_special2_legacy(CPUMIPSState
> *env, DisasContext *ctx)
>
> static void decode_opc_special3_r6(CPUMIPSState *env, DisasContext *ctx)
> {
> - int rs, rt;
> - uint32_t op1;
> + int rs, rt, rd, sa;
> + uint32_t op1, op2;
> int16_t imm;
>
> rs = (ctx->opcode >> 21) & 0x1f;
> rt = (ctx->opcode >> 16) & 0x1f;
> + rd = (ctx->opcode >> 11) & 0x1f;
> + sa = (ctx->opcode >> 6) & 0x1f;
> imm = (int16_t)ctx->opcode;
>
> op1 = MASK_SPECIAL3(ctx->opcode);
> @@ -15157,6 +15165,34 @@ static void decode_opc_special3_r6(CPUMIPSState
> *env, DisasContext *ctx)
> case R6_OPC_LL:
> gen_ld(ctx, op1, rt, rs, imm >> 7);
> break;
> + case OPC_BSHFL:
> + op2 = MASK_BSHFL(ctx->opcode);
> + switch (op2) {
> + case OPC_ALIGN ... OPC_ALIGN_END:
> + {
> + TCGv t0, t1;
> + sa &= 3;
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + tcg_gen_shli_tl(t0, cpu_gpr[rt], 8 * sa);
> + if (sa == 0) { /* avoid undefined */
> + tcg_gen_mov_tl(cpu_gpr[rd], t0);
In that case, the shift above is also useless, it can be moved in the
block below, and the sa == 0 case replaced by
tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
> + } else {
> + tcg_gen_ext32u_tl(t1, cpu_gpr[rs]);
> + tcg_gen_shri_tl(t1, t1, 8 * (4 - sa));
> + tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
> + }
> + tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
For this case, have you considered merging both rt and rs in a 64 bits
register and shift the whole? It might be slightly faster on a 64-bit
host, though I haven't tested.
> + }
> + break;
> + case OPC_BITSWAP:
> + gen_helper_bitswap(cpu_gpr[rd], cpu_gpr[rt]);
> + tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
Another alternative is to have two helpers, one for bitswap, and the
other for dbitswap (this one defined only on MIPS64), so that the ext32s
could be folded in the helper. It's just an idea, I think all options
are fine.
> + break;
> + }
> + break;
> #if defined(TARGET_MIPS64)
> case R6_OPC_SCD:
> gen_st_cond(ctx, op1, rt, rs, imm >> 7);
> @@ -15164,6 +15200,30 @@ static void decode_opc_special3_r6(CPUMIPSState
> *env, DisasContext *ctx)
> case R6_OPC_LLD:
> gen_ld(ctx, op1, rt, rs, imm >> 7);
> break;
> + case OPC_DBSHFL:
> + op2 = MASK_DBSHFL(ctx->opcode);
> + switch (op2) {
> + case OPC_DALIGN ... OPC_DALIGN_END:
> + check_mips_64(ctx);
> + sa &= 7;
> + TCGv t0, t1;
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + tcg_gen_shli_tl(t0, cpu_gpr[rt], 8 * sa);
> + if (sa == 0) { /* avoid undefined */
> + tcg_gen_mov_tl(cpu_gpr[rd], t0);
> + } else {
Same comment here.
> + tcg_gen_shri_tl(t1, cpu_gpr[rs], 8 * (8 - sa));
> + tcg_gen_or_tl(cpu_gpr[rd], t1, t0);
> + }
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> + break;
> + case OPC_DBITSWAP:
> + gen_helper_bitswap(cpu_gpr[rd], cpu_gpr[rt]);
> + break;
> + }
> + break;
> #endif
> default: /* Invalid */
> MIPS_INVAL("special3_r6");
> @@ -15709,9 +15769,18 @@ static void decode_opc_special3(CPUMIPSState *env,
> DisasContext *ctx)
> gen_bitops(ctx, op1, rt, rs, sa, rd);
> break;
> case OPC_BSHFL:
> - check_insn(ctx, ISA_MIPS32R2);
> op2 = MASK_BSHFL(ctx->opcode);
> - gen_bshfl(ctx, op2, rt, rd);
> + switch (op2) {
> + case OPC_ALIGN ... OPC_ALIGN_END:
> + case OPC_BITSWAP:
> + check_insn(ctx, ISA_MIPS32R6);
> + decode_opc_special3_r6(env, ctx);
> + break;
> + default:
> + check_insn(ctx, ISA_MIPS32R2);
> + gen_bshfl(ctx, op2, rt, rd);
> + break;
> + }
> break;
> #if defined(TARGET_MIPS64)
> case OPC_DEXTM ... OPC_DEXT:
> @@ -15721,10 +15790,20 @@ static void decode_opc_special3(CPUMIPSState *env,
> DisasContext *ctx)
> gen_bitops(ctx, op1, rt, rs, sa, rd);
> break;
> case OPC_DBSHFL:
> - check_insn(ctx, ISA_MIPS64R2);
> - check_mips_64(ctx);
> op2 = MASK_DBSHFL(ctx->opcode);
> - gen_bshfl(ctx, op2, rt, rd);
> + switch (op2) {
> + case OPC_DALIGN ... OPC_DALIGN_END:
> + case OPC_DBITSWAP:
> + check_insn(ctx, ISA_MIPS32R6);
> + decode_opc_special3_r6(env, ctx);
> + break;
> + default:
> + check_insn(ctx, ISA_MIPS64R2);
> + check_mips_64(ctx);
> + op2 = MASK_DBSHFL(ctx->opcode);
> + gen_bshfl(ctx, op2, rt, rd);
> + break;
> + }
> break;
> #endif
> case OPC_RDHWR:
> --
> 1.7.5.4
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
- Re: [Qemu-devel] [PATCH 07/21] target-mips: signal RI Exception on DSP and Loongson instructions, (continued)
- [Qemu-devel] [PATCH 11/21] target-mips: Status.UX/SX/KX enable 32-bit address wrapping, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 10/21] target-mips: move CLO, DCLO, CLZ, DCLZ, SDBBP and free special2 in R6, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 09/21] target-mips: redefine Integer Multiply and Divide instructions, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 12/21] target-mips: add ALIGN, DALIGN, BITSWAP and DBITSWAP instructions, Leon Alrae, 2014/05/30
- Re: [Qemu-devel] [PATCH 12/21] target-mips: add ALIGN, DALIGN, BITSWAP and DBITSWAP instructions,
Aurelien Jarno <=
- [Qemu-devel] [PATCH 18/21] target-mips: do not allow Status.FR=0 mode in 64-bit FPU, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 13/21] target-mips: add Compact Branches, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 14/21] target-mips: add Addressing and PC-relative instructions, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 16/21] target-mips: add new Floating Point instructions, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 15/21] softfloat: add functions corresponding to IEEE-2008 min/maxNumMag, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 19/21] target-mips: remove JR, BLTZAL, BGEZAL and add NAL, BAL instructions, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 17/21] target-mips: add new Floating Point Comparison instructions, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 21/21] target-mips: use pointers referring to appropriate decoding function, Leon Alrae, 2014/05/30
- [Qemu-devel] [PATCH 20/21] mips_malta: update malta's pseudo-bootloader - replace JR with JALR, Leon Alrae, 2014/05/30