[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH for-4.0 v2 30/37] tcg/i386: Adjust TCG_TARGET_HAS_ME
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH for-4.0 v2 30/37] tcg/i386: Adjust TCG_TARGET_HAS_MEMORY_BSWAP |
Date: |
Fri, 23 Nov 2018 15:45:51 +0100 |
Always true for softmmu and when movbe is available. In the softmmu
case we always have call-clobbered scratch registers available, and
having the bswap in the softmmu thunk maximizes code sharing.
For user-only and without movbe, leave this to generic code.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/i386/tcg-target.h | 5 ++
tcg/i386/tcg-target.inc.c | 122 ++++++++++++++++++++++++--------------
2 files changed, 82 insertions(+), 45 deletions(-)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 212ba554e9..2d7cbb5dd6 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -101,6 +101,7 @@ extern bool have_bmi1;
extern bool have_popcnt;
extern bool have_avx1;
extern bool have_avx2;
+extern bool have_movbe;
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -219,7 +220,11 @@ static inline void tb_target_set_jmp_target(uintptr_t
tc_ptr,
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+#else
+#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
+#endif
#ifdef CONFIG_SOFTMMU
#define TCG_TARGET_NEED_LDST_OOL_LABELS
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index 5c68cbd43d..76235e90c9 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -158,13 +158,12 @@ bool have_bmi1;
bool have_popcnt;
bool have_avx1;
bool have_avx2;
+bool have_movbe;
#ifdef CONFIG_CPUID_H
-static bool have_movbe;
static bool have_bmi2;
static bool have_lzcnt;
#else
-# define have_movbe 0
# define have_bmi2 0
# define have_lzcnt 0
#endif
@@ -1818,13 +1817,24 @@ static void tcg_out_qemu_ld_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
int seg, TCGMemOp memop)
{
- const TCGMemOp real_bswap = memop & MO_BSWAP;
- TCGMemOp bswap = real_bswap;
+ bool use_bswap = memop & MO_BSWAP;
+ bool use_movbe = false;
int movop = OPC_MOVL_GvEv;
- if (have_movbe && real_bswap) {
- bswap = 0;
- movop = OPC_MOVBE_GyMy;
+ /*
+ * Do big-endian loads with movbe or softmmu.
+ * User-only without movbe will have its swapping done generically.
+ */
+ if (use_bswap) {
+ if (have_movbe) {
+ use_bswap = false;
+ use_movbe = true;
+ movop = OPC_MOVBE_GyMy;
+ } else {
+#ifndef CONFIG_SOFTMMU
+ g_assert_not_reached();
+#endif
+ }
}
switch (memop & MO_SSIZE) {
@@ -1837,40 +1847,52 @@ static void tcg_out_qemu_ld_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
base, index, 0, ofs);
break;
case MO_UW:
- tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
- base, index, 0, ofs);
- if (real_bswap) {
- tcg_out_rolw_8(s, datalo);
- }
- break;
- case MO_SW:
- if (real_bswap) {
- if (have_movbe) {
+ if (use_movbe) {
+ /* There is no extending movbe; only low 16-bits are modified. */
+ if (datalo != base && datalo != index) {
+ /* XOR breaks zeros while breaking dependency chains. */
+ tgen_arithr(s, ARITH_XOR, datalo, datalo);
tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
datalo, base, index, 0, ofs);
} else {
- tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
- base, index, 0, ofs);
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ tcg_out_ext16u(s, datalo, datalo);
+ }
+ } else {
+ tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
+ base, index, 0, ofs);
+ if (use_bswap) {
tcg_out_rolw_8(s, datalo);
}
- tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
+ }
+ break;
+ case MO_SW:
+ if (use_movbe) {
+ tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
+ datalo, base, index, 0, ofs);
+ tcg_out_ext16s(s, datalo, datalo, P_REXW);
} else {
tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
datalo, base, index, 0, ofs);
+ if (use_bswap) {
+ tcg_out_rolw_8(s, datalo);
+ tcg_out_ext16s(s, datalo, datalo, P_REXW);
+ }
}
break;
case MO_UL:
tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
- if (bswap) {
+ if (use_bswap) {
tcg_out_bswap32(s, datalo);
}
break;
#if TCG_TARGET_REG_BITS == 64
case MO_SL:
- if (real_bswap) {
+ if (use_bswap || use_movbe) {
tcg_out_modrm_sib_offset(s, movop + seg, datalo,
base, index, 0, ofs);
- if (bswap) {
+ if (use_bswap) {
tcg_out_bswap32(s, datalo);
}
tcg_out_ext32s(s, datalo, datalo);
@@ -1884,12 +1906,12 @@ static void tcg_out_qemu_ld_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
if (TCG_TARGET_REG_BITS == 64) {
tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
base, index, 0, ofs);
- if (bswap) {
+ if (use_bswap) {
tcg_out_bswap64(s, datalo);
}
} else {
- if (real_bswap) {
- int t = datalo;
+ if (use_bswap || use_movbe) {
+ TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
@@ -1904,14 +1926,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
tcg_out_modrm_sib_offset(s, movop + seg, datalo,
base, index, 0, ofs);
}
- if (bswap) {
+ if (use_bswap) {
tcg_out_bswap32(s, datalo);
tcg_out_bswap32(s, datahi);
}
}
break;
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
@@ -1991,24 +2013,34 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
TCGReg base, intptr_t ofs, int seg,
TCGMemOp memop)
{
- /* ??? Ideally we wouldn't need a scratch register. For user-only,
- we could perform the bswap twice to restore the original value
- instead of moving to the scratch. But as it is, the L constraint
- means that TCG_REG_L0 is definitely free here. */
const TCGReg scratch = TCG_REG_L0;
- const TCGMemOp real_bswap = memop & MO_BSWAP;
- TCGMemOp bswap = real_bswap;
+ bool use_bswap = memop & MO_BSWAP;
+ bool use_movbe = false;
int movop = OPC_MOVL_EvGv;
- if (have_movbe && real_bswap) {
- bswap = 0;
- movop = OPC_MOVBE_MyGy;
+ /*
+ * Do big-endian stores with movbe or softmmu.
+ * User-only without movbe will have its swapping done generically.
+ */
+ if (use_bswap) {
+ if (have_movbe) {
+ use_bswap = false;
+ use_movbe = true;
+ movop = OPC_MOVBE_MyGy;
+ } else {
+#ifndef CONFIG_SOFTMMU
+ g_assert_not_reached();
+#endif
+ }
}
switch (memop & MO_SIZE) {
case MO_8:
- /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
- Use the scratch register if necessary. */
+ /*
+ * In 32-bit mode, 8-bit stores can only happen from [abcd]x.
+ * ??? Adjust constraints such that this is is forced, then
+ * we won't need a scratch at all for user-only.
+ */
if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
datalo = scratch;
@@ -2017,7 +2049,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
datalo, base, ofs);
break;
case MO_16:
- if (bswap) {
+ if (use_bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_rolw_8(s, scratch);
datalo = scratch;
@@ -2025,7 +2057,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
break;
case MO_32:
- if (bswap) {
+ if (use_bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
datalo = scratch;
@@ -2034,13 +2066,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s,
TCGReg datalo, TCGReg datahi,
break;
case MO_64:
if (TCG_TARGET_REG_BITS == 64) {
- if (bswap) {
+ if (use_bswap) {
tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
- } else if (bswap) {
+ } else if (use_bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
@@ -2048,8 +2080,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
tcg_out_bswap32(s, scratch);
tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
} else {
- if (real_bswap) {
- int t = datalo;
+ if (use_movbe) {
+ TCGReg t = datalo;
datalo = datahi;
datahi = t;
}
@@ -2058,7 +2090,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg
datalo, TCGReg datahi,
}
break;
default:
- tcg_abort();
+ g_assert_not_reached();
}
}
--
2.17.2
- [Qemu-devel] [PATCH for-4.0 v2 20/37] tcg/ppc: Parameterize the temps for tcg_out_tlb_read, (continued)
- [Qemu-devel] [PATCH for-4.0 v2 20/37] tcg/ppc: Parameterize the temps for tcg_out_tlb_read, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 34/37] tcg/i386: Restrict user-only qemu_st_i32 values to q-regs, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 36/37] tcg/i386: Require segment syscalls to succeed, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 31/37] tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 23/37] tcg/ppc: Change TCG_TARGET_CALL_ALIGN_ARGS to bool, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 25/37] tcg/ppc: Use TCG_TARGET_NEED_LDST_OOL_LABELS, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 28/37] tcg/optimize: Optimize bswap, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 21/37] tcg/ppc: Split out tcg_out_call_int, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 26/37] tcg: Clean up generic bswap32, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 19/37] tcg/arm: Use TCG_TARGET_NEED_LDST_OOL_LABELS, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 30/37] tcg/i386: Adjust TCG_TARGET_HAS_MEMORY_BSWAP,
Richard Henderson <=
- [Qemu-devel] [PATCH for-4.0 v2 33/37] tcg/i386: Propagate is64 to tcg_out_qemu_ld_direct, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 35/37] tcg/i386: Add setup_guest_base_seg for FreeBSD, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 37/37] tcg/i386: Remove L constraint, Richard Henderson, 2018/11/23
- [Qemu-devel] [PATCH for-4.0 v2 32/37] tcg/arm: Set TCG_TARGET_HAS_MEMORY_BSWAP to false for user-only, Richard Henderson, 2018/11/23
- Re: [Qemu-devel] [PATCH for-4.0 v2 00/37] tcg: Assorted cleanups, no-reply, 2018/11/23
- Re: [Qemu-devel] [PATCH for-4.0 v2 00/37] tcg: Assorted cleanups, Emilio G. Cota, 2018/11/25