qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v3 01/30] target/sh4: Use cmpxchg for movco


From: Richard Henderson
Subject: [Qemu-devel] [PATCH v3 01/30] target/sh4: Use cmpxchg for movco
Date: Tue, 18 Jul 2017 10:02:26 -1000

As for other targets, cmpxchg isn't quite right for ll/sc,
suffering from an ABA race, but is sufficient to implement
portable atomic operations.

Signed-off-by: Richard Henderson <address@hidden>

---
V2: Clear lock_addr in rte, do_interrupt, syscall entry, & signal delivery.
    Fix movli to tollerate overlap between R0 and REG(B11_8).
---
 target/sh4/cpu.h       |  3 ++-
 linux-user/main.c      |  1 +
 linux-user/signal.c    |  2 ++
 target/sh4/helper.c    |  2 +-
 target/sh4/translate.c | 72 +++++++++++++++++++++++++++++---------------------
 5 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index ffb9168..b15116e 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -169,7 +169,8 @@ typedef struct CPUSH4State {
     tlb_t itlb[ITLB_SIZE];     /* instruction translation table */
     tlb_t utlb[UTLB_SIZE];     /* unified translation table */
 
-    uint32_t ldst;
+    uint32_t lock_addr;
+    uint32_t lock_value;
 
     /* Fields up to this point are cleared by a CPU reset */
     struct {} end_reset_fields;
diff --git a/linux-user/main.c b/linux-user/main.c
index ad03c9e..30f0ae1 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2673,6 +2673,7 @@ void cpu_loop(CPUSH4State *env)
         switch (trapnr) {
         case 0x160:
             env->pc += 2;
+            env->lock_addr = -1;
             ret = do_syscall(env,
                              env->gregs[3],
                              env->gregs[4],
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 3d18d1b..ddfd75c 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -3566,6 +3566,7 @@ static void setup_frame(int sig, struct target_sigaction 
*ka,
     regs->gregs[5] = 0;
     regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
     regs->pc = (unsigned long) ka->_sa_handler;
+    regs->lock_addr = -1;
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
@@ -3626,6 +3627,7 @@ static void setup_rt_frame(int sig, struct 
target_sigaction *ka,
     regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
     regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
     regs->pc = (unsigned long) ka->_sa_handler;
+    regs->lock_addr = -1;
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index 28d93c2..df7c000 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -87,7 +87,6 @@ void superh_cpu_do_interrupt(CPUState *cs)
     int do_exp, irq_vector = cs->exception_index;
 
     /* prioritize exceptions over interrupts */
-
     do_exp = cs->exception_index != -1;
     do_irq = do_irq && (cs->exception_index == -1);
 
@@ -171,6 +170,7 @@ void superh_cpu_do_interrupt(CPUState *cs)
     env->spc = env->pc;
     env->sgr = env->gregs[15];
     env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
+    env->lock_addr = -1;
 
     if (env->flags & DELAY_SLOT_MASK) {
         /* Branch instruction should be executed again before delay slot. */
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 4c3512f..45f7661 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -68,7 +68,8 @@ static TCGv cpu_gregs[24];
 static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
 static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
-static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst;
+static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
+static TCGv cpu_lock_addr, cpu_lock_value;
 static TCGv cpu_fregs[32];
 
 /* internal register indexes */
@@ -151,8 +152,12 @@ void sh4_translate_init(void)
                                               offsetof(CPUSH4State,
                                                        delayed_cond),
                                               "_delayed_cond_");
-    cpu_ldst = tcg_global_mem_new_i32(cpu_env,
-                                     offsetof(CPUSH4State, ldst), "_ldst_");
+    cpu_lock_addr = tcg_global_mem_new_i32(cpu_env,
+                                          offsetof(CPUSH4State, lock_addr),
+                                           "_lock_addr_");
+    cpu_lock_value = tcg_global_mem_new_i32(cpu_env,
+                                           offsetof(CPUSH4State, lock_value),
+                                            "_lock_value_");
 
     for (i = 0; i < 32; i++)
         cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env,
@@ -430,6 +435,7 @@ static void _decode_opc(DisasContext * ctx)
        CHECK_NOT_DELAY_SLOT
         gen_write_sr(cpu_ssr);
        tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
+        tcg_gen_movi_i32(cpu_lock_addr, -1);
         ctx->envflags |= DELAY_SLOT_RTE;
        ctx->delayed_pc = (uint32_t) - 1;
         ctx->bstate = BS_STOP;
@@ -1527,35 +1533,41 @@ static void _decode_opc(DisasContext * ctx)
         tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
        return;
     case 0x0073:
-        /* MOVCO.L
-              LDST -> T
-               If (T == 1) R0 -> (Rn)
-               0 -> LDST
-        */
+        /* MOVCO.L: if (lock still held) R0 -> (Rn), T=1; else T=0.
+           Approximate "lock still held" with a comparison of address
+           from the MOVLI insn and a cmpxchg with the value read.  */
         if (ctx->features & SH_FEATURE_SH4A) {
-            TCGLabel *label = gen_new_label();
-            tcg_gen_mov_i32(cpu_sr_t, cpu_ldst);
-           tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
-            tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
-           gen_set_label(label);
-           tcg_gen_movi_i32(cpu_ldst, 0);
-           return;
-       } else
-           break;
+            TCGLabel *fail = gen_new_label();
+            TCGLabel *done = gen_new_label();
+
+            tcg_gen_brcond_i32(TCG_COND_NE, REG(B11_8), cpu_lock_addr, fail);
+
+            tcg_gen_atomic_cmpxchg_i32(cpu_sr_t, REG(B11_8), cpu_lock_value,
+                                       REG(0), ctx->memidx, MO_TEUL);
+            tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t,
+                                cpu_sr_t, cpu_lock_value);
+            tcg_gen_br(done);
+
+            gen_set_label(fail);
+            tcg_gen_movi_i32(cpu_sr_t, 0);
+
+            gen_set_label(done);
+            tcg_gen_movi_i32(cpu_lock_addr, -1);
+            return;
+        } else {
+            break;
+        }
     case 0x0063:
-        /* MOVLI.L @Rm,R0
-               1 -> LDST
-               (Rm) -> R0
-               When interrupt/exception
-               occurred 0 -> LDST
-        */
-       if (ctx->features & SH_FEATURE_SH4A) {
-           tcg_gen_movi_i32(cpu_ldst, 0);
-            tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
-           tcg_gen_movi_i32(cpu_ldst, 1);
-           return;
-       } else
-           break;
+        /* MOVLI.L @Rm -> R0, and remember the address and value loaded.  */
+        if (ctx->features & SH_FEATURE_SH4A) {
+            tcg_gen_qemu_ld_i32(cpu_lock_value, REG(B11_8),
+                                ctx->memidx, MO_TESL);
+            tcg_gen_mov_i32(cpu_lock_addr, REG(B11_8));
+            tcg_gen_mov_i32(REG(0), cpu_lock_value);
+            return;
+        } else {
+            break;
+        }
     case 0x0093:               /* ocbi @Rn */
        {
             gen_helper_ocbi(cpu_env, REG(B11_8));
-- 
2.9.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]