[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 7/8] target-arm: emulate atomic instructions using AIE
From: |
Emilio G. Cota |
Subject: |
[Qemu-devel] [RFC 7/8] target-arm: emulate atomic instructions using AIE |
Date: |
Fri, 8 May 2015 17:02:13 -0400 |
Signed-off-by: Emilio G. Cota <address@hidden>
---
linux-user/main.c | 89 -------------------------
target-arm/helper.c | 2 +
target-arm/helper.h | 2 +
target-arm/op_helper.c | 5 ++
target-arm/translate.c | 172 ++++++++++++++++++++++---------------------------
5 files changed, 86 insertions(+), 184 deletions(-)
diff --git a/linux-user/main.c b/linux-user/main.c
index 3f32db0..b6f21b4 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -584,90 +584,6 @@ do_kernel_trap(CPUARMState *env)
return 0;
}
-/* Store exclusive handling for AArch32 */
-static int do_strex(CPUARMState *env)
-{
- uint64_t val;
- int size;
- int rc = 1;
- int segv = 0;
- uint32_t addr;
- start_exclusive();
- if (env->exclusive_addr != env->exclusive_test) {
- goto fail;
- }
- /* We know we're always AArch32 so the address is in uint32_t range
- * unless it was the -1 exclusive-monitor-lost value (which won't
- * match exclusive_test above).
- */
- assert(extract64(env->exclusive_addr, 32, 32) == 0);
- addr = env->exclusive_addr;
- size = env->exclusive_info & 0xf;
- switch (size) {
- case 0:
- segv = get_user_u8(val, addr);
- break;
- case 1:
- segv = get_user_u16(val, addr);
- break;
- case 2:
- case 3:
- segv = get_user_u32(val, addr);
- break;
- default:
- abort();
- }
- if (segv) {
- env->exception.vaddress = addr;
- goto done;
- }
- if (size == 3) {
- uint32_t valhi;
- segv = get_user_u32(valhi, addr + 4);
- if (segv) {
- env->exception.vaddress = addr + 4;
- goto done;
- }
- val = deposit64(val, 32, 32, valhi);
- }
- if (val != env->exclusive_val) {
- goto fail;
- }
-
- val = env->regs[(env->exclusive_info >> 8) & 0xf];
- switch (size) {
- case 0:
- segv = put_user_u8(val, addr);
- break;
- case 1:
- segv = put_user_u16(val, addr);
- break;
- case 2:
- case 3:
- segv = put_user_u32(val, addr);
- break;
- }
- if (segv) {
- env->exception.vaddress = addr;
- goto done;
- }
- if (size == 3) {
- val = env->regs[(env->exclusive_info >> 12) & 0xf];
- segv = put_user_u32(val, addr + 4);
- if (segv) {
- env->exception.vaddress = addr + 4;
- goto done;
- }
- }
- rc = 0;
-fail:
- env->regs[15] += 4;
- env->regs[(env->exclusive_info >> 4) & 0xf] = rc;
-done:
- end_exclusive();
- return segv;
-}
-
void cpu_loop(CPUARMState *env)
{
CPUState *cs = CPU(arm_env_get_cpu(env));
@@ -833,11 +749,6 @@ void cpu_loop(CPUARMState *env)
case EXCP_INTERRUPT:
/* just indicate that signals should be handled asap */
break;
- case EXCP_STREX:
- if (!do_strex(env)) {
- break;
- }
- /* fall through for segv */
case EXCP_PREFETCH_ABORT:
case EXCP_DATA_ABORT:
addr = env->exception.vaddress;
diff --git a/target-arm/helper.c b/target-arm/helper.c
index f8f8d76..742e5be 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -11,6 +11,8 @@
#include "arm_ldst.h"
#include <zlib.h> /* For crc32 */
+#include "aie-helper.c"
+
#ifndef CONFIG_USER_ONLY
static inline int get_phys_addr(CPUARMState *env, target_ulong address,
int access_type, ARMMMUIdx mmu_idx,
diff --git a/target-arm/helper.h b/target-arm/helper.h
index dec3728..3c797d1 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -529,6 +529,8 @@ DEF_HELPER_2(dc_zva, void, env, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+#include "qemu/aie-helper.h"
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 3df9c57..ef48180 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -29,6 +29,11 @@ static void raise_exception(CPUARMState *env, int tt)
ARMCPU *cpu = arm_env_get_cpu(env);
CPUState *cs = CPU(cpu);
+ if (unlikely(env->aie_locked)) {
+ assert(env->aie_entry);
+ qemu_mutex_unlock(&env->aie_entry->lock);
+ env->aie_locked = false;
+ }
cs->exception_index = tt;
cpu_loop_exit(cs);
}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 9116529..935011c 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -65,12 +65,6 @@ TCGv_ptr cpu_env;
static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
static TCGv_i32 cpu_R[16];
static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
-static TCGv_i64 cpu_exclusive_addr;
-static TCGv_i64 cpu_exclusive_val;
-#ifdef CONFIG_USER_ONLY
-static TCGv_i64 cpu_exclusive_test;
-static TCGv_i32 cpu_exclusive_info;
-#endif
/* FIXME: These should be removed. */
static TCGv_i32 cpu_F0s, cpu_F1s;
@@ -99,17 +93,6 @@ void arm_translate_init(void)
cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF),
"VF");
cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF),
"ZF");
- cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
- offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
- cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
- offsetof(CPUARMState, exclusive_val), "exclusive_val");
-#ifdef CONFIG_USER_ONLY
- cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
- offsetof(CPUARMState, exclusive_test), "exclusive_test");
- cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
- offsetof(CPUARMState, exclusive_info), "exclusive_info");
-#endif
-
a64_translate_init();
}
@@ -896,6 +879,15 @@ static inline void gen_aa32_ld##SUFF(TCGv_i32 val,
TCGv_i32 addr, int index) \
#define DO_GEN_ST(SUFF, OPC) \
static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
{ \
+ gen_helper_aie_llsc_st_pre(cpu_env, addr); \
+ tcg_gen_qemu_st_i32(val, addr, index, OPC); \
+ gen_helper_aie_llsc_st_post(cpu_env); \
+}
+
+#define DO_GEN_ST_LOCKED(SUFF, OPC) \
+static inline \
+void gen_aa32_st##SUFF##_locked(TCGv_i32 val, TCGv_i32 addr, int index) \
+{ \
tcg_gen_qemu_st_i32(val, addr, index, OPC); \
}
@@ -906,7 +898,19 @@ static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32
addr, int index)
static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
{
+ gen_helper_aie_llsc_st_pre(cpu_env, addr);
tcg_gen_qemu_st_i64(val, addr, index, MO_TEQ);
+ gen_helper_aie_llsc_st_post(cpu_env);
+}
+
+static inline void gen_aa32_aie_insert_lock(TCGv_i32 addr)
+{
+ gen_helper_aie_insert_lock(cpu_env, addr);
+}
+
+static inline void gen_aa32_aie_ld_lock(TCGv_i32 addr)
+{
+ gen_helper_aie_ld_lock(cpu_env, addr);
}
#else
@@ -920,11 +924,23 @@ static inline void gen_aa32_ld##SUFF(TCGv_i32 val,
TCGv_i32 addr, int index) \
tcg_temp_free(addr64); \
}
-#define DO_GEN_ST(SUFF, OPC) \
+#define DO_GEN_ST(SUFF, OPC) \
static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
{ \
TCGv addr64 = tcg_temp_new(); \
tcg_gen_extu_i32_i64(addr64, addr); \
+ gen_helper_aie_llsc_st_pre(cpu_env, addr64); \
+ tcg_gen_qemu_st_i32(val, addr64, index, OPC); \
+ gen_helper_aie_llsc_st_post(cpu_env); \
+ tcg_temp_free(addr64); \
+}
+
+#define DO_GEN_ST_LOCKED(SUFF, OPC) \
+static inline \
+void gen_aa32_st##SUFF##_locked(TCGv_i32 val, TCGv_i32 addr, int index) \
+{ \
+ TCGv addr64 = tcg_temp_new(); \
+ tcg_gen_extu_i32_i64(addr64, addr); \
tcg_gen_qemu_st_i32(val, addr64, index, OPC); \
tcg_temp_free(addr64); \
}
@@ -941,7 +957,29 @@ static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32
addr, int index)
{
TCGv addr64 = tcg_temp_new();
tcg_gen_extu_i32_i64(addr64, addr);
+ gen_helper_aie_llsc_st_pre(cpu_env, addr64);
tcg_gen_qemu_st_i64(val, addr64, index, MO_TEQ);
+ gen_helper_aie_llsc_st_post(cpu_env);
+ tcg_temp_free(addr64);
+}
+
+static inline void gen_aa32_aie_insert_lock(TCGv_i32 addr)
+{
+ TCGv addr64 = tcg_temp_new();
+
+ addr64 = tcg_temp_new();
+ tcg_gen_extu_i32_i64(addr64, addr);
+ gen_helper_aie_insert_lock(cpu_env, addr64);
+ tcg_temp_free(addr64);
+}
+
+static inline void gen_aa32_aie_ld_lock(TCGv_i32 addr)
+{
+ TCGv addr64 = tcg_temp_new();
+
+ addr64 = tcg_temp_new();
+ tcg_gen_extu_i32_i64(addr64, addr);
+ gen_helper_aie_ld_lock(cpu_env, addr64);
tcg_temp_free(addr64);
}
@@ -955,6 +993,9 @@ DO_GEN_LD(32u, MO_TEUL)
DO_GEN_ST(8, MO_UB)
DO_GEN_ST(16, MO_TEUW)
DO_GEN_ST(32, MO_TEUL)
+DO_GEN_ST_LOCKED(8, MO_UB)
+DO_GEN_ST_LOCKED(16, MO_TEUW)
+DO_GEN_ST_LOCKED(32, MO_TEUL)
static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
{
@@ -7372,15 +7413,6 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
tcg_gen_or_i32(cpu_ZF, lo, hi);
}
-/* Load/Store exclusive instructions are implemented by remembering
- the value/address loaded, and seeing if these are the same
- when the store is performed. This should be sufficient to implement
- the architecturally mandated semantics, and avoids having to monitor
- regular stores.
-
- In system emulation mode only one CPU will be running at once, so
- this sequence is effectively atomic. In user emulation mode we
- throw an exception and handle the atomic operation elsewhere. */
static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
TCGv_i32 addr, int size)
{
@@ -7388,6 +7420,7 @@ static void gen_load_exclusive(DisasContext *s, int rt,
int rt2,
s->is_ldex = true;
+ gen_aa32_aie_insert_lock(addr);
switch (size) {
case 0:
gen_aa32_ld8u(tmp, addr, get_mem_index(s));
@@ -7410,96 +7443,44 @@ static void gen_load_exclusive(DisasContext *s, int rt,
int rt2,
tcg_gen_addi_i32(tmp2, addr, 4);
gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
tcg_temp_free_i32(tmp2);
- tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
store_reg(s, rt2, tmp3);
- } else {
- tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
}
store_reg(s, rt, tmp);
- tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
+ gen_helper_aie_unlock(cpu_env);
}
static void gen_clrex(DisasContext *s)
{
- tcg_gen_movi_i64(cpu_exclusive_addr, -1);
+ gen_helper_aie_clear(cpu_env);
}
-#ifdef CONFIG_USER_ONLY
-static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
- TCGv_i32 addr, int size)
-{
- tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
- tcg_gen_movi_i32(cpu_exclusive_info,
- size | (rd << 4) | (rt << 8) | (rt2 << 12));
- gen_exception_internal_insn(s, 4, EXCP_STREX);
-}
-#else
static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
TCGv_i32 addr, int size)
{
TCGv_i32 tmp;
- TCGv_i64 val64, extaddr;
TCGLabel *done_label;
TCGLabel *fail_label;
- /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
- [addr] = {Rt};
- {Rd} = 0;
- } else {
- {Rd} = 1;
- } */
fail_label = gen_new_label();
done_label = gen_new_label();
- extaddr = tcg_temp_new_i64();
- tcg_gen_extu_i32_i64(extaddr, addr);
- tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
- tcg_temp_free_i64(extaddr);
tmp = tcg_temp_new_i32();
- switch (size) {
- case 0:
- gen_aa32_ld8u(tmp, addr, get_mem_index(s));
- break;
- case 1:
- gen_aa32_ld16u(tmp, addr, get_mem_index(s));
- break;
- case 2:
- case 3:
- gen_aa32_ld32u(tmp, addr, get_mem_index(s));
- break;
- default:
- abort();
- }
-
- val64 = tcg_temp_new_i64();
- if (size == 3) {
- TCGv_i32 tmp2 = tcg_temp_new_i32();
- TCGv_i32 tmp3 = tcg_temp_new_i32();
- tcg_gen_addi_i32(tmp2, addr, 4);
- gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
- tcg_temp_free_i32(tmp2);
- tcg_gen_concat_i32_i64(val64, tmp, tmp3);
- tcg_temp_free_i32(tmp3);
- } else {
- tcg_gen_extu_i32_i64(val64, tmp);
- }
+ gen_helper_aie_contains_lock(tmp, cpu_env);
+ tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, fail_label);
tcg_temp_free_i32(tmp);
- tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
- tcg_temp_free_i64(val64);
-
tmp = load_reg(s, rt);
switch (size) {
case 0:
- gen_aa32_st8(tmp, addr, get_mem_index(s));
+ gen_aa32_st8_locked(tmp, addr, get_mem_index(s));
break;
case 1:
- gen_aa32_st16(tmp, addr, get_mem_index(s));
+ gen_aa32_st16_locked(tmp, addr, get_mem_index(s));
break;
case 2:
case 3:
- gen_aa32_st32(tmp, addr, get_mem_index(s));
+ gen_aa32_st32_locked(tmp, addr, get_mem_index(s));
break;
default:
abort();
@@ -7508,17 +7489,16 @@ static void gen_store_exclusive(DisasContext *s, int
rd, int rt, int rt2,
if (size == 3) {
tcg_gen_addi_i32(addr, addr, 4);
tmp = load_reg(s, rt2);
- gen_aa32_st32(tmp, addr, get_mem_index(s));
+ gen_aa32_st32_locked(tmp, addr, get_mem_index(s));
tcg_temp_free_i32(tmp);
}
tcg_gen_movi_i32(cpu_R[rd], 0);
+ gen_helper_aie_unlock__done(cpu_env);
tcg_gen_br(done_label);
gen_set_label(fail_label);
tcg_gen_movi_i32(cpu_R[rd], 1);
gen_set_label(done_label);
- tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
-#endif
/* gen_srs:
* @env: CPUARMState
@@ -8401,21 +8381,23 @@ static void disas_arm_insn(DisasContext *s, unsigned
int insn)
tcg_temp_free_i32(addr);
} else {
/* SWP instruction */
+ int size8 = insn & (1 << 22);
+
rm = (insn) & 0xf;
- /* ??? This is not really atomic. However we know
- we never have multiple CPUs running in parallel,
- so it is good enough. */
addr = load_reg(s, rn);
tmp = load_reg(s, rm);
tmp2 = tcg_temp_new_i32();
- if (insn & (1 << 22)) {
+ gen_helper_aie_llsc_st_tracking_enable(cpu_env);
+ gen_aa32_aie_ld_lock(addr);
+ if (size8) {
gen_aa32_ld8u(tmp2, addr, get_mem_index(s));
- gen_aa32_st8(tmp, addr, get_mem_index(s));
+ gen_aa32_st8_locked(tmp, addr, get_mem_index(s));
} else {
gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
- gen_aa32_st32(tmp, addr, get_mem_index(s));
+ gen_aa32_st32_locked(tmp, addr, get_mem_index(s));
}
+ gen_helper_aie_unlock__done(cpu_env);
tcg_temp_free_i32(tmp);
tcg_temp_free_i32(addr);
store_reg(s, rd, tmp2);
--
1.8.3
- [Qemu-devel] [RFC 0/8] Helper-based Atomic Instruction Emulation (AIE), Emilio G. Cota, 2015/05/08
- [Qemu-devel] [RFC 1/8] cputlb: add physical address to CPUTLBEntry, Emilio G. Cota, 2015/05/08
- [Qemu-devel] [RFC 3/8] tiny_set: add module to test for membership in a tiny set of pointers, Emilio G. Cota, 2015/05/08
- [Qemu-devel] [RFC 2/8] softmmu: add helpers to get ld/st physical addresses, Emilio G. Cota, 2015/05/08
- [Qemu-devel] [RFC 4/8] radix-tree: add generic lockless radix tree module, Emilio G. Cota, 2015/05/08
- [Qemu-devel] [RFC 5/8] aie: add module for Atomic Instruction Emulation, Emilio G. Cota, 2015/05/08
- [Qemu-devel] [RFC 6/8] aie: add target helpers, Emilio G. Cota, 2015/05/08
- [Qemu-devel] [RFC 7/8] target-arm: emulate atomic instructions using AIE,
Emilio G. Cota <=
- [Qemu-devel] [RFC 8/8] target-i386: emulate atomic instructions using AIE, Emilio G. Cota, 2015/05/08
- Re: [Qemu-devel] [RFC 0/8] Helper-based Atomic Instruction Emulation (AIE), Frederic Konrad, 2015/05/11