[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 47/80] tcg/i386: Honor 64-bit atomicity in 32-bit mode
|
From: |
Richard Henderson |
|
Subject: |
[PULL 47/80] tcg/i386: Honor 64-bit atomicity in 32-bit mode |
|
Date: |
Tue, 16 May 2023 12:41:12 -0700 |
Use the fpu to perform 64-bit loads and stores.
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/i386/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++------
1 file changed, 38 insertions(+), 6 deletions(-)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 3b8528e332..0415ca2a4c 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -468,6 +468,10 @@ static bool tcg_target_const_match(int64_t val, TCGType
type, int ct)
#define OPC_GRP5 (0xff)
#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
+#define OPC_ESCDF (0xdf)
+#define ESCDF_FILD_m64 5
+#define ESCDF_FISTP_m64 7
+
/* Group 1 opcode extensions for 0x80-0x83.
These are also used as modifiers for OPC_ARITH. */
#define ARITH_ADD 0
@@ -2086,7 +2090,20 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
datalo = datahi;
datahi = t;
}
- if (h.base == datalo || h.index == datalo) {
+ if (h.aa.atom == MO_64) {
+ /*
+ * Atomicity requires that we use use a single 8-byte load.
+ * For simplicity and code size, always use the FPU for this.
+ * Similar insns using SSE/AVX are merely larger.
+ * Load from memory in one go, then store back to the stack,
+ * from whence we can load into the correct integer regs.
+ */
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FILD_m64,
+ h.base, h.index, 0, h.ofs);
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FISTP_m64, TCG_REG_ESP,
0);
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
+ } else if (h.base == datalo || h.index == datalo) {
tcg_out_modrm_sib_offset(s, OPC_LEA, datahi,
h.base, h.index, 0, h.ofs);
tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0);
@@ -2156,12 +2173,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
h.base, h.index, 0, h.ofs);
+ break;
+ }
+ if (use_movbe) {
+ TCGReg t = datalo;
+ datalo = datahi;
+ datahi = t;
+ }
+ if (h.aa.atom == MO_64) {
+ /*
+ * Atomicity requires that we use use one 8-byte store.
+ * For simplicity, and code size, always use the FPU for this.
+ * Similar insns using SSE/AVX are merely larger.
+ * Assemble the 8-byte quantity in required endianness
+ * on the stack, load to coproc unit, and store.
+ */
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FILD_m64, TCG_REG_ESP, 0);
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FISTP_m64,
+ h.base, h.index, 0, h.ofs);
} else {
- if (use_movbe) {
- TCGReg t = datalo;
- datalo = datahi;
- datahi = t;
- }
tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
h.base, h.index, 0, h.ofs);
tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
--
2.34.1
- [PULL 33/80] tcg: Add INDEX_op_qemu_{ld,st}_i128, (continued)
- [PULL 33/80] tcg: Add INDEX_op_qemu_{ld,st}_i128, Richard Henderson, 2023/05/16
- [PULL 36/80] tcg: Support TCG_TYPE_I128 in tcg_out_{ld, st}_helper_{args, ret}, Richard Henderson, 2023/05/16
- [PULL 39/80] tcg/aarch64: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 38/80] tcg/i386: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 40/80] tcg/arm: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 41/80] tcg/loongarch64: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 42/80] tcg/mips: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 44/80] tcg/riscv: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 46/80] tcg/sparc64: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 45/80] tcg/s390x: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 47/80] tcg/i386: Honor 64-bit atomicity in 32-bit mode,
Richard Henderson <=
- [PULL 52/80] tcg/s390x: Support 128-bit load/store, Richard Henderson, 2023/05/16
- [PULL 65/80] tcg: Remove TCGv from tcg_gen_atomic_*, Richard Henderson, 2023/05/16
- [PULL 64/80] tcg: Remove TCGv from tcg_gen_qemu_{ld,st}_*, Richard Henderson, 2023/05/16
- [PULL 68/80] tcg/i386: Always enable TCG_TARGET_HAS_extr[lh]_i64_i32, Richard Henderson, 2023/05/16
- [PULL 74/80] tcg/aarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL, Richard Henderson, 2023/05/16
- [PULL 56/80] tcg: Widen helper_{ld,st}_i128 addresses to uint64_t, Richard Henderson, 2023/05/16
- [PULL 61/80] tcg: Reduce copies for plugin_gen_mem_callbacks, Richard Henderson, 2023/05/16
- [PULL 43/80] tcg/ppc: Use atom_and_align_for_opc, Richard Henderson, 2023/05/16
- [PULL 48/80] tcg/i386: Support 128-bit load/store with have_atomic16, Richard Henderson, 2023/05/16
- [PULL 50/80] tcg/aarch64: Support 128-bit load/store, Richard Henderson, 2023/05/16