[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 67/71] tcg/tci: Implement mulu2, muls2
From: |
Richard Henderson |
Subject: |
[PATCH v4 67/71] tcg/tci: Implement mulu2, muls2 |
Date: |
Wed, 17 Feb 2021 12:20:32 -0800 |
We already had mulu2_i32 for a 32-bit host; expand this to 64-bit
hosts as well. The muls2_i32 and the 64-bit opcodes are new.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tci/tcg-target.h | 8 ++++----
tcg/tci.c | 35 +++++++++++++++++++++++++++++------
tcg/tci/tcg-target.c.inc | 16 ++++++++++------
3 files changed, 43 insertions(+), 16 deletions(-)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 59859bd8a6..71a44bbfb0 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -83,7 +83,7 @@
#define TCG_TARGET_HAS_orc_i32 1
#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_movcond_i32 1
-#define TCG_TARGET_HAS_muls2_i32 0
+#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_muluh_i32 0
#define TCG_TARGET_HAS_mulsh_i32 0
#define TCG_TARGET_HAS_goto_ptr 1
@@ -120,13 +120,13 @@
#define TCG_TARGET_HAS_orc_i64 1
#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
-#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
-#define TCG_TARGET_HAS_mulu2_i32 0
+#define TCG_TARGET_HAS_mulu2_i32 1
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
-#define TCG_TARGET_HAS_mulu2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 1
#define TCG_TARGET_HAS_muluh_i64 0
#define TCG_TARGET_HAS_mulsh_i64 0
#else
diff --git a/tcg/tci.c b/tcg/tci.c
index 068d742a80..d76b9f5798 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -39,7 +39,7 @@ __thread uintptr_t tci_tb_ptr;
static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
uint32_t low_index, uint64_t value)
{
- regs[low_index] = value;
+ regs[low_index] = (uint32_t)value;
regs[high_index] = value >> 32;
}
@@ -169,7 +169,6 @@ static void tci_args_rrrrr(uint32_t insn, TCGReg *r0,
TCGReg *r1,
*r4 = extract32(insn, 24, 4);
}
-#if TCG_TARGET_REG_BITS == 32
static void tci_args_rrrr(uint32_t insn,
TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
{
@@ -178,7 +177,6 @@ static void tci_args_rrrr(uint32_t insn,
*r2 = extract32(insn, 16, 4);
*r3 = extract32(insn, 20, 4);
}
-#endif
static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
@@ -670,11 +668,21 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState
*env,
T2 = tci_uint64(regs[r5], regs[r4]);
tci_write_reg64(regs, r1, r0, T1 - T2);
break;
+#endif /* TCG_TARGET_REG_BITS == 32 */
+#if TCG_TARGET_HAS_mulu2_i32
case INDEX_op_mulu2_i32:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
- tci_write_reg64(regs, r1, r0, (uint64_t)regs[r2] * regs[r3]);
+ tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3];
+ tci_write_reg64(regs, r1, r0, tmp64);
break;
-#endif /* TCG_TARGET_REG_BITS == 32 */
+#endif
+#if TCG_TARGET_HAS_muls2_i32
+ case INDEX_op_muls2_i32:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3];
+ tci_write_reg64(regs, r1, r0, tmp64);
+ break;
+#endif
#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
CASE_32_64(ext8s)
tci_args_rr(insn, &r0, &r1);
@@ -778,6 +786,18 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState
*env,
regs[r0] = ctpop64(regs[r1]);
break;
#endif
+#if TCG_TARGET_HAS_mulu2_i64
+ case INDEX_op_mulu2_i64:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ mulu64(®s[r0], ®s[r1], regs[r2], regs[r3]);
+ break;
+#endif
+#if TCG_TARGET_HAS_muls2_i64
+ case INDEX_op_muls2_i64:
+ tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
+ muls64(®s[r0], ®s[r1], regs[r2], regs[r3]);
+ break;
+#endif
/* Shift/rotate operations (64 bit). */
@@ -1285,14 +1305,17 @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
str_r(r3), str_r(r4), str_c(c));
break;
-#if TCG_TARGET_REG_BITS == 32
case INDEX_op_mulu2_i32:
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i32:
+ case INDEX_op_muls2_i64:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
info->fprintf_func(info->stream, "%-12s %s,%s,%s,%s",
op_name, str_r(r0), str_r(r1),
str_r(r2), str_r(r3));
break;
+#if TCG_TARGET_REG_BITS == 32
case INDEX_op_add2_i32:
case INDEX_op_sub2_i32:
tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
index 664d715440..eb48633fba 100644
--- a/tcg/tci/tcg-target.c.inc
+++ b/tcg/tci/tcg-target.c.inc
@@ -141,10 +141,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode
op)
return C_O2_I4(r, r, r, r, r, r);
case INDEX_op_brcond2_i32:
return C_O0_I4(r, r, r, r);
- case INDEX_op_mulu2_i32:
- return C_O2_I2(r, r, r, r);
#endif
+ case INDEX_op_mulu2_i32:
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i32:
+ case INDEX_op_muls2_i64:
+ return C_O2_I2(r, r, r, r);
+
case INDEX_op_movcond_i32:
case INDEX_op_movcond_i64:
case INDEX_op_setcond2_i32:
@@ -434,7 +438,6 @@ static void tcg_out_op_rrrrr(TCGContext *s, TCGOpcode op,
TCGReg r0,
tcg_out32(s, insn);
}
-#if TCG_TARGET_REG_BITS == 32
static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2, TCGReg r3)
{
@@ -447,7 +450,6 @@ static void tcg_out_op_rrrr(TCGContext *s, TCGOpcode op,
insn = deposit32(insn, 20, 4, r3);
tcg_out32(s, insn);
}
-#endif
static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op,
TCGReg r0, TCGReg r1, TCGReg r2,
@@ -728,10 +730,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args,
args[0], args[1], args[2], args[3], args[4]);
tcg_out_op_rl(s, INDEX_op_brcond_i32, TCG_REG_TMP, arg_label(args[5]));
break;
- case INDEX_op_mulu2_i32:
+#endif
+
+ CASE_32_64(mulu2)
+ CASE_32_64(muls2)
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
break;
-#endif
case INDEX_op_qemu_ld_i32:
case INDEX_op_qemu_st_i32:
--
2.25.1
- [PATCH v4 58/71] tcg/tci: Reserve r13 for a temporary, (continued)
- [PATCH v4 58/71] tcg/tci: Reserve r13 for a temporary, Richard Henderson, 2021/02/17
- [PATCH v4 60/71] tcg/tci: Remove tci_write_reg, Richard Henderson, 2021/02/17
- [PATCH v4 59/71] tcg/tci: Emit setcond before brcond, Richard Henderson, 2021/02/17
- [PATCH v4 62/71] tcg/tci: Implement goto_ptr, Richard Henderson, 2021/02/17
- [PATCH v4 64/71] tcg/tci: Implement andc, orc, eqv, nand, nor, Richard Henderson, 2021/02/17
- [PATCH v4 63/71] tcg/tci: Implement movcond, Richard Henderson, 2021/02/17
- [PATCH v4 68/71] tcg/tci: Implement add2, sub2, Richard Henderson, 2021/02/17
- [PATCH v4 66/71] tcg/tci: Implement clz, ctz, ctpop, Richard Henderson, 2021/02/17
- [PATCH v4 61/71] tcg/tci: Change encoding to uint32_t units, Richard Henderson, 2021/02/17
- [PATCH v4 65/71] tcg/tci: Implement extract, sextract, Richard Henderson, 2021/02/17
- [PATCH v4 67/71] tcg/tci: Implement mulu2, muls2,
Richard Henderson <=
- [PATCH v4 70/71] tests/tcg: Increase timeout for TCI, Richard Henderson, 2021/02/17
- [PATCH v4 69/71] tcg/tci: Split out tci_qemu_ld, tci_qemu_st, Richard Henderson, 2021/02/17
- [PATCH v4 71/71] gitlab: Enable cross-i386 builds of TCI, Richard Henderson, 2021/02/17
- Re: [PATCH v4 00/71] TCI fixes and cleanups, no-reply, 2021/02/17