[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 54/57] target-i386: Implement ADX extension
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 54/57] target-i386: Implement ADX extension |
Date: |
Wed, 23 Jan 2013 20:03:38 -0800 |
Signed-off-by: Richard Henderson <address@hidden>
---
target-i386/cc_helper.c | 25 +++++++++++
target-i386/cpu.c | 4 +-
target-i386/cpu.h | 4 ++
target-i386/helper.c | 4 ++
target-i386/translate.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++--
5 files changed, 139 insertions(+), 5 deletions(-)
diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index b5eb11a..d093673 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -75,6 +75,24 @@ const uint8_t parity_table[256] = {
#endif
+static target_ulong compute_all_adcx(target_ulong dst, target_ulong src1,
+ target_ulong src2)
+{
+ return (src1 & ~CC_C) | (dst * CC_C);
+}
+
+static target_ulong compute_all_adox(target_ulong dst, target_ulong src1,
+ target_ulong src2)
+{
+ return (src1 & ~CC_O) | (src2 * CC_O);
+}
+
+static target_ulong compute_all_adcox(target_ulong dst, target_ulong src1,
+ target_ulong src2)
+{
+ return (src1 & ~(CC_C | CC_O)) | (dst * CC_C) | (src2 * CC_O);
+}
+
target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
target_ulong src2, int op)
{
@@ -162,6 +180,13 @@ target_ulong helper_cc_compute_all(target_ulong dst,
target_ulong src1,
case CC_OP_BMILGL:
return compute_all_bmilgl(dst, src1, src2);
+ case CC_OP_ADCX:
+ return compute_all_adcx(dst, src1, src2);
+ case CC_OP_ADOX:
+ return compute_all_adox(dst, src1, src2);
+ case CC_OP_ADCOX:
+ return compute_all_adcox(dst, src1, src2);
+
#ifdef TARGET_X86_64
case CC_OP_MULQ:
return compute_all_mulq(dst, src1, src2);
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 30893b6..0433228 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -400,11 +400,11 @@ typedef struct x86_def_t {
CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
#define TCG_SVM_FEATURES 0
#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP \
- CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2)
+ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX)
/* missing:
CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
- CPUID_7_0_EBX_RDSEED, CPUID_7_0_EBX_ADX */
+ CPUID_7_0_EBX_RDSEED */
/* maintains list of cpu model definitions
*/
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 6153189..5c7ae4d 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -641,6 +641,10 @@ typedef enum {
CC_OP_BMILGL,
CC_OP_BMILGQ,
+ CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */
+ CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */
+ CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */
+
CC_OP_NB,
} CCOp;
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 5163472..8df0b9d 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -113,6 +113,10 @@ static const char *cc_op_str[CC_OP_NB] = {
"BMILGW",
"BMILGL",
"BMILGQ",
+
+ "ADCX",
+ "ADOX",
+ "ADCOX",
};
static void
diff --git a/target-i386/translate.c b/target-i386/translate.c
index ec8234d..fdb06a1 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -208,6 +208,9 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
[CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
[CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
[CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
+ [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
+ [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
};
/* Bit set if the global variable is live, but merely an optimization
@@ -994,6 +997,11 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s,
TCGv reg)
return (CCPrepare) { .cond = TCG_COND_NE,
.reg = cpu_cc_src, .mask = -1 };
+ case CC_OP_ADCX:
+ case CC_OP_ADCOX:
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
+ .mask = -1, .no_setcond = true };
+
default:
gen_compute_eflags(s);
/* FALLTHRU */
@@ -1021,6 +1029,9 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s,
TCGv reg)
gen_compute_eflags(s);
/* FALLTHRU */
case CC_OP_EFLAGS:
+ case CC_OP_ADCX:
+ case CC_OP_ADOX:
+ case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
.mask = CC_S };
default:
@@ -1035,9 +1046,17 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s,
TCGv reg)
/* compute eflags.O to reg */
static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg)
{
- gen_compute_eflags(s);
- return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
- .mask = CC_O };
+ switch (s->cc_op) {
+ case CC_OP_ADOX:
+ case CC_OP_ADCOX:
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
+ .mask = -1, .no_setcond = true };
+
+ default:
+ gen_compute_eflags(s);
+ return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
+ .mask = CC_O };
+ }
}
/* compute eflags.Z to reg */
@@ -1048,6 +1067,9 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s,
TCGv reg)
gen_compute_eflags(s);
/* FALLTHRU */
case CC_OP_EFLAGS:
+ case CC_OP_ADCX:
+ case CC_OP_ADOX:
+ case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
.mask = CC_Z };
default:
@@ -4171,6 +4193,85 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
gen_helper_pext(cpu_regs[reg], cpu_T[0], cpu_T[1]);
break;
+ case 0x1f6: /* adcx Gy, Ey */
+ case 0x2f6: /* adox Gy, Ey */
+ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
+ goto illegal_op;
+ } else {
+ TCGv carry_in, carry_out;
+ int end_op;
+
+ ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+
+ /* Re-use the carry-out from a previous round. */
+ TCGV_UNUSED(carry_in);
+ carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2);
+ switch (s->cc_op) {
+ case CC_OP_ADCX:
+ if (b == 0x1f6) {
+ carry_in = cpu_cc_dst;
+ end_op = CC_OP_ADCX;
+ } else {
+ end_op = CC_OP_ADCOX;
+ }
+ break;
+ case CC_OP_ADOX:
+ if (b == 0x1f6) {
+ end_op = CC_OP_ADCOX;
+ } else {
+ carry_in = cpu_cc_src2;
+ end_op = CC_OP_ADOX;
+ }
+ break;
+ case CC_OP_ADCOX:
+ end_op = CC_OP_ADCOX;
+ carry_in = carry_out;
+ break;
+ default:
+ end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX);
+ break;
+ }
+ /* If we can't reuse carry-out, get it out of EFLAGS. */
+ if (TCGV_IS_UNUSED(carry_in)) {
+ if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
+ gen_compute_eflags(s);
+ }
+ carry_in = cpu_tmp0;
+ tcg_gen_shri_tl(carry_in, cpu_cc_src,
+ ctz32(b == 0x1f6 ? CC_C : CC_O));
+ tcg_gen_andi_tl(carry_in, carry_in, 1);
+ }
+
+#ifdef TARGET_X86_64
+ if (ot == OT_LONG) {
+ /* If we know TL is 64-bit, and we want a 32-bit
+ result, just do everything in 64-bit arithmetic. */
+ tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]);
+ tcg_gen_ext32u_i64(cpu_T[0], cpu_T[0]);
+ tcg_gen_add_i64(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+ tcg_gen_add_i64(cpu_T[0], cpu_T[0], carry_in);
+ tcg_gen_ext32u_i64(cpu_regs[reg], cpu_T[0]);
+ tcg_gen_shri_i64(carry_out, cpu_T[0], 32);
+ } else
+#endif
+ {
+ /* Otherwise compute the carry-out in two steps. */
+ tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
+ tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
+ cpu_T[0], cpu_regs[reg]);
+ tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
+ tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
+ cpu_T[0], carry_in);
+ tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4);
+ }
+ /* We began with all flags computed to CC_SRC, and we
+ have now placed the carry-out in CC_DST. All that
+ is left is to record the CC_OP. */
+ set_cc_op(s, end_op);
+ }
+ break;
+
case 0x1f7: /* shlx Gy, Ey, By */
case 0x2f7: /* sarx Gy, Ey, By */
case 0x3f7: /* shrx Gy, Ey, By */
--
1.7.11.7
- [Qemu-devel] [PATCH 17/57] target-i386: add helper functions to get other flags, (continued)
- [Qemu-devel] [PATCH 17/57] target-i386: add helper functions to get other flags, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 16/57] target-i386: Use gen_update_cc_op everywhere, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 25/57] target-i386: optimize setbe, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 23/57] target-i386: convert gen_compute_eflags_c to TCG, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 28/57] target-i386: introduce CCPrepare, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 30/57] target-i386: use CCPrepare to generate conditional jumps, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 43/57] target-i386: Tidy prefix parsing, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 42/57] target-i386: Make helper_cc_compute_all const, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 53/57] target-i386: Implement RORX, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 55/57] target-i386: Use clz/ctz for bsf/bsr helpers, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 54/57] target-i386: Implement ADX extension,
Richard Henderson <=
- [Qemu-devel] [PATCH 56/57] target-i386: Simplify bsf/bsr flags computation, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 57/57] target-i386: Implement tzcnt and fix lzcnt, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 33/57] target-i386: introduce gen_cmovcc1, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 31/57] target-i386: inline gen_prepare_cc_slow, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 40/57] target-i386: Use CC_SRC2 for ADC and SBB, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 35/57] target-i386: kill cpu_T3, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 49/57] target-i386: Implement BZHI, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 39/57] target-i386: optimize flags checking after sub using CC_SRC2, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 36/57] target-i386: use gen_op for cmps/scas, Richard Henderson, 2013/01/23
- [Qemu-devel] [PATCH 48/57] target-i386: Implement BLSR, BLSMSK, BLSI, Richard Henderson, 2013/01/23