[Qemu-devel] [PATCH 12/16] target-sparc: Allow 4-byte alignment on fp me

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 12/16] target-sparc: Allow 4-byte alignment on fp me

From:	Richard Henderson
Subject:	[Qemu-devel] [PATCH 12/16] target-sparc: Allow 4-byte alignment on fp mem ops
Date:	Mon, 10 Oct 2016 10:17:04 -0500

The cpu is allowed to require stricter alignment on these 8- and 16-byte
operations, and the OS is required to fix up the accesses as necessary,
so the previous code was not wrong.

However, we can easily handle this misalignment for all direct 8-byte
operations and for direct 16-byte loads.

We must retain 16-byte alignment for 16-byte stores, so that we don't have
to probe for writability of a second page before performing the first of
two 8-byte stores.  We also retain 8-byte alignment for no-fault loads,
since they are rare and it's not worth extending the helpers for this.

Signed-off-by: Richard Henderson <address@hidden>
---
 target-sparc/translate.c | 86 +++++++++++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 42 deletions(-)

diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 4df5115..e55ca1d 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -2459,6 +2459,7 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
 {
     DisasASI da = get_asi(dc, insn, (size == 4 ? MO_TEUL : MO_TEQ));
     TCGv_i32 d32;
+    TCGv_i64 d64;
 
     switch (da.type) {
     case GET_ASI_EXCP:
@@ -2473,12 +2474,17 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
             gen_store_fpr_F(dc, rd, d32);
             break;
         case 8:
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
             break;
         case 16:
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            d64 = tcg_temp_new_i64();
+            tcg_gen_qemu_ld_i64(d64, addr, da.mem_idx, da.memop | MO_ALIGN_4);
             tcg_gen_addi_tl(addr, addr, 8);
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd/2+1], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_ld_i64(cpu_fpr[rd/2+1], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
+            tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+            tcg_temp_free_i64(d64);
             break;
         default:
             g_assert_not_reached();
@@ -2534,22 +2540,23 @@ static void gen_ldf_asi(DisasContext *dc, TCGv addr,
                but we can just use the integer asi helper for them.  */
             switch (size) {
             case 4:
-                {
-                    TCGv d64 = tcg_temp_new_i64();
-                    gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
-                    d32 = gen_dest_fpr_F(dc);
-                    tcg_gen_extrl_i64_i32(d32, d64);
-                    tcg_temp_free_i64(d64);
-                    gen_store_fpr_F(dc, rd, d32);
-                }
+                d64 = tcg_temp_new_i64();
+                gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
+                d32 = gen_dest_fpr_F(dc);
+                tcg_gen_extrl_i64_i32(d32, d64);
+                tcg_temp_free_i64(d64);
+                gen_store_fpr_F(dc, rd, d32);
                 break;
             case 8:
                 gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, 
r_mop);
                 break;
             case 16:
-                gen_helper_ld_asi(cpu_fpr[rd / 2], cpu_env, addr, r_asi, 
r_mop);
+                d64 = tcg_temp_new_i64();
+                gen_helper_ld_asi(d64, cpu_env, addr, r_asi, r_mop);
                 tcg_gen_addi_tl(addr, addr, 8);
                 gen_helper_ld_asi(cpu_fpr[rd/2+1], cpu_env, addr, r_asi, 
r_mop);
+                tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+                tcg_temp_free_i64(d64);
                 break;
             default:
                 g_assert_not_reached();
@@ -2579,15 +2586,15 @@ static void gen_stf_asi(DisasContext *dc, TCGv addr,
             tcg_gen_qemu_st_i32(d32, addr, da.mem_idx, da.memop);
             break;
         case 8:
-            /* ??? Only 4-byte alignment required.  However, it is legal
-               for the cpu to signal the alignment fault, and the OS trap
-               handler is required to fix it up.  */
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx, da.memop);
+            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
+                                da.memop | MO_ALIGN_4);
             break;
         case 16:
-            /* Only 4-byte alignment required.  See above.  Requiring
-               16-byte alignment here avoids having to probe the second
-               page before performing the first write.  */
+            /* Only 4-byte alignment required.  However, it is legal for the
+               cpu to signal the alignment fault, and the OS trap handler is
+               required to fix it up.  Requiring 16-byte alignment here avoids
+               having to probe the second page before performing the first
+               write.  */
             tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da.mem_idx,
                                 da.memop | MO_ALIGN_16);
             tcg_gen_addi_tl(addr, addr, 8);
@@ -5385,18 +5392,15 @@ static void disas_sparc_insn(DisasContext * dc, 
unsigned int insn)
             skip_move: ;
 #endif
             } else if (xop >= 0x20 && xop < 0x24) {
-                TCGv t0;
-
                 if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
                 }
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    t0 = get_temp_tl(dc);
-                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
                     cpu_dst_32 = gen_dest_fpr_F(dc);
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x21:      /* ldfsr, V9 ldxfsr */
@@ -5404,26 +5408,28 @@ static void disas_sparc_insn(DisasContext * dc, 
unsigned int insn)
                     gen_address_mask(dc, cpu_addr);
                     if (rd == 1) {
                         TCGv_i64 t64 = tcg_temp_new_i64();
-                        tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
+                        tcg_gen_qemu_ld_i64(t64, cpu_addr,
+                                            dc->mem_idx, MO_TEQ);
                         gen_helper_ldxfsr(cpu_fsr, cpu_env, cpu_fsr, t64);
                         tcg_temp_free_i64(t64);
                         break;
                     }
 #endif
                     cpu_dst_32 = get_temp_i32(dc);
-                    t0 = get_temp_tl(dc);
-                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    tcg_gen_qemu_ld_i32(cpu_dst_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     gen_helper_ldfsr(cpu_fsr, cpu_env, cpu_fsr, cpu_dst_32);
                     break;
                 case 0x22:      /* ldqf, load quad fpreg */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
                     gen_address_mask(dc, cpu_addr);
                     cpu_src1_64 = tcg_temp_new_i64();
-                    tcg_gen_qemu_ld64(cpu_src1_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_ld_i64(cpu_src1_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     tcg_gen_addi_tl(cpu_addr, cpu_addr, 8);
                     cpu_src2_64 = tcg_temp_new_i64();
-                    tcg_gen_qemu_ld64(cpu_src2_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_ld_i64(cpu_src2_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     gen_store_fpr_Q(dc, rd, cpu_src1_64, cpu_src2_64);
                     tcg_temp_free_i64(cpu_src1_64);
                     tcg_temp_free_i64(cpu_src2_64);
@@ -5431,7 +5437,8 @@ static void disas_sparc_insn(DisasContext * dc, unsigned 
int insn)
                 case 0x23:      /* lddf, load double fpreg */
                     gen_address_mask(dc, cpu_addr);
                     cpu_dst_64 = gen_dest_fpr_D(dc, rd);
-                    tcg_gen_qemu_ld64(cpu_dst_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_ld_i64(cpu_dst_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 default:
@@ -5504,13 +5511,10 @@ static void disas_sparc_insn(DisasContext * dc, 
unsigned int insn)
                 }
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
-                    {
-                        TCGv t = get_temp_tl(dc);
-                        gen_address_mask(dc, cpu_addr);
-                        cpu_src1_32 = gen_load_fpr_F(dc, rd);
-                        tcg_gen_ext_i32_tl(t, cpu_src1_32);
-                        tcg_gen_qemu_st32(t, cpu_addr, dc->mem_idx);
-                    }
+                    gen_address_mask(dc, cpu_addr);
+                    cpu_src1_32 = gen_load_fpr_F(dc, rd);
+                    tcg_gen_qemu_st_i32(cpu_src1_32, cpu_addr,
+                                        dc->mem_idx, MO_TEUL);
                     break;
                 case 0x25: /* stfsr, V9 stxfsr */
                     {
@@ -5556,12 +5560,10 @@ static void disas_sparc_insn(DisasContext * dc, 
unsigned int insn)
 #endif
 #endif
                 case 0x27: /* stdf, store double fpreg */
-                    /* ??? Only 4-byte alignment required.  However, it is
-                       legal for the cpu to signal the alignment fault, and
-                       the OS trap handler is required to fix it up.  */
                     gen_address_mask(dc, cpu_addr);
                     cpu_src1_64 = gen_load_fpr_D(dc, rd);
-                    tcg_gen_qemu_st64(cpu_src1_64, cpu_addr, dc->mem_idx);
+                    tcg_gen_qemu_st_i64(cpu_src1_64, cpu_addr, dc->mem_idx,
+                                        MO_TEQ | MO_ALIGN_4);
                     break;
                 default:
                     goto illegal_insn;
-- 
2.7.4

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [PATCH 04/16] target-sparc: Use MMU_PHYS_IDX for bypass asis, (continued)
- [Qemu-devel] [PATCH 04/16] target-sparc: Use MMU_PHYS_IDX for bypass asis, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 03/16] target-sparc: Add MMU_PHYS_IDX, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 05/16] target-sparc: Handle more twinx asis, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 02/16] target-sparc: Introduce cpu_raise_exception_ra, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 06/16] target-sparc: Implement swap_asi inline, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 07/16] target-sparc: Implement ldstub_asi inline, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 11/16] target-sparc: Implement ldqf and stqf inline, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 13/16] target-sparc: Remove MMU_MODE*_SUFFIX, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 08/16] target-sparc: Implement cas_asi/casx_asi inline, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 09/16] target-sparc: Implement BCOPY/BFILL inline, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 12/16] target-sparc: Allow 4-byte alignment on fp mem ops, Richard Henderson <=
- [Qemu-devel] [PATCH 14/16] target-sparc: Optmize writeback of cpu_cond, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 10/16] target-sparc: Remove asi helper code handled inline, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 15/16] target-sparc: Use tcg_gen_atomic_xchg_tl, Richard Henderson, 2016/10/10
- [Qemu-devel] [PATCH 16/16] target-sparc: Use tcg_gen_atomic_cmpxchg_tl, Richard Henderson, 2016/10/10
- Re: [Qemu-devel] [PATCH 00/16] target-sparc improvements, no-reply, 2016/10/10
- Re: [Qemu-devel] [PATCH 00/16] target-sparc improvements, Mark Cave-Ayland, 2016/10/11
  - Re: [Qemu-devel] [PATCH 00/16] target-sparc improvements, Richard Henderson, 2016/10/11
    - Re: [Qemu-devel] [PATCH 00/16] target-sparc improvements, Mark Cave-Ayland, 2016/10/15

Prev by Date: [Qemu-devel] [PATCH 09/16] target-sparc: Implement BCOPY/BFILL inline
Next by Date: [Qemu-devel] [PATCH 14/16] target-sparc: Optmize writeback of cpu_cond
Previous by thread: [Qemu-devel] [PATCH 09/16] target-sparc: Implement BCOPY/BFILL inline
Next by thread: [Qemu-devel] [PATCH 14/16] target-sparc: Optmize writeback of cpu_cond
Index(es):
- Date
- Thread