[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v2 11/27] target-i386: emulate LOCK'ed cmpxchg using
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH v2 11/27] target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers |
Date: |
Fri, 1 Jul 2016 10:04:37 -0700 |
From: "Emilio G. Cota" <address@hidden>
The diff here is uglier than necessary. All this does is to turn
FOO
into:
if (s->prefix & PREFIX_LOCK) {
BAR
} else {
FOO
}
where FOO is the original implementation of an unlocked cmpxchg.
[rth: Adjust unlocked cmpxchg to use movcond instead of branches.
Adjust helpers to use atomic helpers.]
Signed-off-by: Emilio G. Cota <address@hidden>
Message-Id: <address@hidden>
Signed-off-by: Richard Henderson <address@hidden>
---
target-i386/mem_helper.c | 96 ++++++++++++++++++++++++++++++++++++++----------
target-i386/translate.c | 87 +++++++++++++++++++++----------------------
2 files changed, 120 insertions(+), 63 deletions(-)
diff --git a/target-i386/mem_helper.c b/target-i386/mem_helper.c
index c2f4769..5c0558f 100644
--- a/target-i386/mem_helper.c
+++ b/target-i386/mem_helper.c
@@ -22,6 +22,8 @@
#include "exec/helper-proto.h"
#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
+#include "qemu/int128.h"
+#include "tcg.h"
/* broken thread support */
@@ -58,20 +60,39 @@ void helper_lock_init(void)
void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
{
- uint64_t d;
+ uintptr_t ra = GETPC();
+ uint64_t oldv, cmpv, newv;
int eflags;
eflags = cpu_cc_compute_all(env, CC_OP);
- d = cpu_ldq_data_ra(env, a0, GETPC());
- if (d == (((uint64_t)env->regs[R_EDX] << 32) |
(uint32_t)env->regs[R_EAX])) {
- cpu_stq_data_ra(env, a0, ((uint64_t)env->regs[R_ECX] << 32)
- | (uint32_t)env->regs[R_EBX], GETPC());
- eflags |= CC_Z;
+
+ cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]);
+ newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]);
+
+ if (parallel_cpus) {
+#ifdef CONFIG_USER_ONLY
+ uint64_t *haddr = g2h(a0);
+ cmpv = cpu_to_le64(cmpv);
+ newv = cpu_to_le64(newv);
+ oldv = atomic_cmpxchg(haddr, cmpv, newv);
+ oldv = le64_to_cpu(oldv);
+#else
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx);
+ oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra);
+#endif
} else {
+ oldv = cpu_ldq_data_ra(env, a0, ra);
+ newv = (cmpv == oldv ? newv : oldv);
/* always do the store */
- cpu_stq_data_ra(env, a0, d, GETPC());
- env->regs[R_EDX] = (uint32_t)(d >> 32);
- env->regs[R_EAX] = (uint32_t)d;
+ cpu_stq_data_ra(env, a0, newv, ra);
+ }
+
+ if (oldv == cmpv) {
+ eflags |= CC_Z;
+ } else {
+ env->regs[R_EAX] = (uint32_t)oldv;
+ env->regs[R_EDX] = (uint32_t)(oldv >> 32);
eflags &= ~CC_Z;
}
CC_SRC = eflags;
@@ -80,25 +101,60 @@ void helper_cmpxchg8b(CPUX86State *env, target_ulong a0)
#ifdef TARGET_X86_64
void helper_cmpxchg16b(CPUX86State *env, target_ulong a0)
{
- uint64_t d0, d1;
+ uintptr_t ra = GETPC();
+ Int128 oldv, cmpv, newv;
int eflags;
+ bool success;
if ((a0 & 0xf) != 0) {
raise_exception_ra(env, EXCP0D_GPF, GETPC());
}
eflags = cpu_cc_compute_all(env, CC_OP);
- d0 = cpu_ldq_data_ra(env, a0, GETPC());
- d1 = cpu_ldq_data_ra(env, a0 + 8, GETPC());
- if (d0 == env->regs[R_EAX] && d1 == env->regs[R_EDX]) {
- cpu_stq_data_ra(env, a0, env->regs[R_EBX], GETPC());
- cpu_stq_data_ra(env, a0 + 8, env->regs[R_ECX], GETPC());
+
+ cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]);
+ newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]);
+
+ if (parallel_cpus) {
+#ifndef CONFIG_ATOMIC128
+ cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#elif defined(CONFIG_USER_ONLY)
+ Int128 *haddr = g2h(a0);
+ oldv = cmpv;
+#ifdef HOST_WORDS_BIGENDIAN
+ oldv = bswap128(oldv);
+ newv = bswap128(newv);
+#endif
+ success = __atomic_compare_exchange_16(haddr, &oldv, newv, false,
+ __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST);
+#ifdef HOST_WORDS_BIGENDIAN
+ oldv = bswap128(oldv);
+#endif
+#else
+ int mem_idx = cpu_mmu_index(env, false);
+ TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+ oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv, newv, oi, ra);
+ success = int128_eq(oldv, cmpv);
+#endif
+ } else {
+ uint64_t o0 = cpu_ldq_data_ra(env, a0 + 0, ra);
+ uint64_t o1 = cpu_ldq_data_ra(env, a0 + 8, ra);
+
+ oldv = int128_make128(o0, o1);
+ success = int128_eq(oldv, cmpv);
+ if (!success) {
+ newv = oldv;
+ }
+
+ cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra);
+ cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra);
+ }
+
+ if (success) {
eflags |= CC_Z;
} else {
- /* always do the store */
- cpu_stq_data_ra(env, a0, d0, GETPC());
- cpu_stq_data_ra(env, a0 + 8, d1, GETPC());
- env->regs[R_EDX] = d1;
- env->regs[R_EAX] = d0;
+ env->regs[R_EAX] = int128_getlo(oldv);
+ env->regs[R_EDX] = int128_gethi(oldv);
eflags &= ~CC_Z;
}
CC_SRC = eflags;
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 7dea18b..2244f38 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -5070,57 +5070,58 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
case 0x1b0:
case 0x1b1: /* cmpxchg Ev, Gv */
{
- TCGLabel *label1, *label2;
- TCGv t0, t1, t2, a0;
+ TCGv oldv, newv, cmpv;
ot = mo_b_d(b, dflag);
modrm = cpu_ldub_code(env, s->pc++);
reg = ((modrm >> 3) & 7) | rex_r;
mod = (modrm >> 6) & 3;
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- t2 = tcg_temp_local_new();
- a0 = tcg_temp_local_new();
- gen_op_mov_v_reg(ot, t1, reg);
- if (mod == 3) {
- rm = (modrm & 7) | REX_B(s);
- gen_op_mov_v_reg(ot, t0, rm);
- } else {
+ oldv = tcg_temp_new();
+ newv = tcg_temp_new();
+ cmpv = tcg_temp_new();
+ gen_op_mov_v_reg(ot, newv, reg);
+ tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
+
+ if (s->prefix & PREFIX_LOCK) {
+ if (mod == 3) {
+ goto illegal_op;
+ }
gen_lea_modrm(env, s, modrm);
- tcg_gen_mov_tl(a0, cpu_A0);
- gen_op_ld_v(s, ot, t0, a0);
- rm = 0; /* avoid warning */
- }
- label1 = gen_new_label();
- tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
- gen_extu(ot, t0);
- gen_extu(ot, t2);
- tcg_gen_brcond_tl(TCG_COND_EQ, t2, t0, label1);
- label2 = gen_new_label();
- if (mod == 3) {
- gen_op_mov_reg_v(ot, R_EAX, t0);
- tcg_gen_br(label2);
- gen_set_label(label1);
- gen_op_mov_reg_v(ot, rm, t1);
+ tcg_gen_atomic_cmpxchg_tl(oldv, cpu_A0, cmpv, newv,
+ s->mem_index, ot | MO_LE);
+ gen_op_mov_reg_v(ot, R_EAX, oldv);
} else {
- /* perform no-op store cycle like physical cpu; must be
- before changing accumulator to ensure idempotency if
- the store faults and the instruction is restarted */
- gen_op_st_v(s, ot, t0, a0);
- gen_op_mov_reg_v(ot, R_EAX, t0);
- tcg_gen_br(label2);
- gen_set_label(label1);
- gen_op_st_v(s, ot, t1, a0);
- }
- gen_set_label(label2);
- tcg_gen_mov_tl(cpu_cc_src, t0);
- tcg_gen_mov_tl(cpu_cc_srcT, t2);
- tcg_gen_sub_tl(cpu_cc_dst, t2, t0);
+ if (mod == 3) {
+ rm = (modrm & 7) | REX_B(s);
+ gen_op_mov_v_reg(ot, oldv, rm);
+ } else {
+ gen_lea_modrm(env, s, modrm);
+ gen_op_ld_v(s, ot, oldv, cpu_A0);
+ rm = 0; /* avoid warning */
+ }
+ gen_extu(ot, oldv);
+ gen_extu(ot, cmpv);
+ /* store value = (old == cmp ? new : old); */
+ tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
+ if (mod == 3) {
+ gen_op_mov_reg_v(ot, R_EAX, oldv);
+ gen_op_mov_reg_v(ot, rm, newv);
+ } else {
+ /* Perform an unconditional store cycle like physical cpu;
+ must be before changing accumulator to ensure
+ idempotency if the store faults and the instruction
+ is restarted */
+ gen_op_st_v(s, ot, newv, cpu_A0);
+ gen_op_mov_reg_v(ot, R_EAX, oldv);
+ }
+ }
+ tcg_gen_mov_tl(cpu_cc_src, oldv);
+ tcg_gen_mov_tl(cpu_cc_srcT, cmpv);
+ tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
set_cc_op(s, CC_OP_SUBB + ot);
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(t2);
- tcg_temp_free(a0);
+ tcg_temp_free(oldv);
+ tcg_temp_free(newv);
+ tcg_temp_free(cmpv);
}
break;
case 0x1c7: /* cmpxchg8b */
--
2.5.5
- [Qemu-devel] [PATCH v2 00/27] cmpxchg-based emulation of atomics, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 04/27] int128: Use __int128 if available, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 03/27] exec: Avoid direct references to Int128 parts, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 02/27] atomics: add atomic_op_fetch variants, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 08/27] HACK: Always enable parallel_cpus, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 01/27] atomics: add atomic_xor, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 11/27] target-i386: emulate LOCK'ed cmpxchg using cmpxchg helpers,
Richard Henderson <=
- [Qemu-devel] [PATCH v2 06/27] int128: Use complex numbers if advisable, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 10/27] tcg: Add atomic128 helpers, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 07/27] tcg: Add EXCP_ATOMIC, Richard Henderson, 2016/07/01
- [Qemu-devel] [PATCH v2 13/27] target-i386: emulate LOCK'ed INC using atomic helper, Richard Henderson, 2016/07/01