qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exc


From: Jaume Martí
Subject: Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
Date: Sun, 22 Jun 2014 16:55:34 +0200

Hello,

The patch I provided no longer applies correctly on the current HEAD.
I attach a new patch. Also the code can be pulled from
https://github.com/jmartif/qemu.git
As per my previous email, the patch is to fix bugs 661696 and 1248376.
Please review and apply.

Best regards,
Jaume


Signed-off-by: Jaume Marti Farriol (address@hidden)
diff --git a/linux-user/signal.c b/linux-user/signal.c
index f3b4378..1392207 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -865,9 +865,9 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     __put_user(env->regs[R_ESP], &sc->esp_at_signal);
     __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);

-        cpu_x86_fsave(env, fpstate_addr, 1);
-        fpstate->status = fpstate->sw;
-        magic = 0xffff;
+    cpu_x86_fsave(env, fpstate_addr);
+    fpstate->status = fpstate->sw;
+    magic = 0xffff;
     __put_user(magic, &fpstate->magic);
     __put_user(fpstate_addr, &sc->fpstate);

@@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
target_sigcontext *sc, int *peax)
                 if (!access_ok(VERIFY_READ, fpstate_addr,
                                sizeof(struct target_fpstate)))
                         goto badframe;
-                cpu_x86_frstor(env, fpstate_addr, 1);
+                cpu_x86_frstor(env, fpstate_addr);
  }

         *peax = tswapl(sc->eax);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index b5e1b41..8e00cd5 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -815,10 +815,14 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
-    /* KVM-only so far */
-    uint16_t fpop;
+    union {
+        uint32_t tcg;
+        uint16_t kvm;
+    } fpop;
     uint64_t fpip;
     uint64_t fpdp;
+    uint32_t fpcs;
+    uint32_t fpds;

     /* emulator internal variables */
     float_status fp_status;
@@ -1063,8 +1067,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 /* the following helpers are only usable in user mode simulation as
    they can trigger unexpected exceptions */
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);

 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index 1b2900d..ce0860e 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -56,6 +56,8 @@
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)

+#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
+
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
     env->fptags[5] = 1;
     env->fptags[6] = 1;
     env->fptags[7] = 1;
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

 /* BCD ops */
@@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }

-void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int fpus, fptag, exp, i;
+    int fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 7; i >= 0; i--) {
         fptag <<= 2;
@@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env,
target_ulong ptr, int data32)
             }
         }
     }
+
     if (data32) {
         /* 32 bit */
-        cpu_stl_data(env, ptr, env->fpuc);
-        cpu_stl_data(env, ptr + 4, fpus);
-        cpu_stl_data(env, ptr + 8, fptag);
-        cpu_stl_data(env, ptr + 12, 0); /* fpip */
-        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
-        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
-        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 4, FPUS(env));
+        cpu_stw_data(env, ptr + 8, fptag);
+        if (protected_mode) {
+            cpu_stl_data(env, ptr + 12, env->fpip);
+            cpu_stl_data(env, ptr + 16,
+                        ((env->fpop.tcg & 0x7ff) << 16) | (env->fpcs
& 0xffff));
+            cpu_stl_data(env, ptr + 20, env->fpdp);
+            cpu_stl_data(env, ptr + 24, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
+            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) << 12) |
+                        (env->fpop.tcg & 0x7ff))); /* fpip[31..16], fpop */
+            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
+            cpu_stl_data(env, ptr + 24,
+                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
+        }
     } else {
         /* 16 bit */
         cpu_stw_data(env, ptr, env->fpuc);
-        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 2, FPUS(env));
         cpu_stw_data(env, ptr + 4, fptag);
-        cpu_stw_data(env, ptr + 6, 0);
-        cpu_stw_data(env, ptr + 8, 0);
-        cpu_stw_data(env, ptr + 10, 0);
-        cpu_stw_data(env, ptr + 12, 0);
+        if (protected_mode) {
+            cpu_stw_data(env, ptr + 6, env->fpip);
+            cpu_stw_data(env, ptr + 8, env->fpcs);
+            cpu_stw_data(env, ptr + 10, env->fpdp);
+            cpu_stw_data(env, ptr + 12, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
+            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
+                        (env->fpop.tcg & 0x7ff)); /* fpip[19..16], fpop */
+            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
+            cpu_stw_data(env, ptr + 12,
+                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
+        }
     }
+
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

-void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int i, fpus, fptag;
+    int tmp, i, fpus, fptag;

     if (data32) {
+        /* 32 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 4);
         fptag = cpu_lduw_data(env, ptr + 8);
+        if (protected_mode) {
+            env->fpip = cpu_ldl_data(env, ptr + 12);
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpcs = tmp & 0xffff;
+            env->fpop.tcg = tmp >> 16;
+            env->fpdp = cpu_ldl_data(env, ptr + 20);
+            env->fpds = cpu_lduw_data(env, ptr + 24);
+        } else {
+            /* Real mode */
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpip = ((tmp & 0xffff000) << 4) |
+                        cpu_lduw_data(env, ptr + 12);
+            env->fpop.tcg = tmp & 0x7ff;
+            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
+                        cpu_lduw_data(env, ptr + 20);
+        }
     } else {
+        /* 16 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 2);
         fptag = cpu_lduw_data(env, ptr + 4);
+        if (protected_mode) {
+            /* Protected mode  */
+            env->fpip = cpu_lduw_data(env, ptr + 6);
+            env->fpcs = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 10);
+            env->fpds = cpu_lduw_data(env, ptr + 12);
+        } else {
+            /* Real mode  */
+            tmp = cpu_lduw_data(env, ptr + 8);
+            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr + 6);
+            env->fpop.tcg = tmp & 0x7ff;
+            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
+                        cpu_lduw_data(env, ptr + 10);
+        }
     }
+
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
         fptag >>= 2;
     }
+
+    env->fpip &= 0xffffffff;
+    env->fpdp &= 0xffffffff;
+    if (!protected_mode) {
+        env->fpcs = 0;
+        env->fpds = 0;
+    }
 }

-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                  int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fstenv(env, ptr, data32);
+    helper_fstenv(env, ptr, data32, protected_mode);

-    ptr += (14 << data32);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
         helper_fstt(env, tmp, ptr);
         ptr += 10;
     }

-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }

-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fldenv(env, ptr, data32);
-    ptr += (14 << data32);
+    helper_fldenv(env, ptr, data32, protected_mode);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }

     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, ptr);
@@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env,
target_ulong ptr, int data32)
     }
 }

-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS == 32
+
+void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fsave(env, ptr, data32);
+    helper_fsave(env, ptr, 1, 1);
 }

-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_frstor(env, ptr, data32);
+    helper_frstor(env, ptr, 1, 1);
 }
 #endif

-void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
-    int fpus, fptag, i, nb_xmm_regs;
+    int i, nb_xmm_regs, fptag;
     floatx80 tmp;
     target_ulong addr;

@@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
         raise_exception(env, EXCP0D_GPF);
     }

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
+    fptag ^= 0xff;
+
     cpu_stw_data(env, ptr, env->fpuc);
-    cpu_stw_data(env, ptr + 2, fpus);
-    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+    cpu_stw_data(env, ptr + 2, FPUS(env));
+    cpu_stw_data(env, ptr + 4, fptag & 0xff);
+    cpu_stw_data(env, ptr + 6, env->fpop.tcg);
+
 #ifdef TARGET_X86_64
     if (data64) {
-        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
-        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+        /* 64 bit */
+        cpu_stq_data(env, ptr + 8, env->fpip);
+        cpu_stq_data(env, ptr + 16, env->fpdp);
     } else
 #endif
     {
-        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
-        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
-        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
-        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+        if (data32) {
+            /* 32 bit */
+            cpu_stl_data(env, ptr + 8, env->fpip);
+            cpu_stl_data(env, ptr + 16, env->fpdp);
+        } else {
+            /* 16 bit */
+            cpu_stw_data(env, ptr + 8, env->fpip);
+            cpu_stw_data(env, ptr + 16, env->fpdp);
+        }
+        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
+        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
     }

     addr = ptr + 0x20;
@@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
     }
 }

-void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
     floatx80 tmp;
@@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
         env->fptags[i] = ((fptag >> i) & 1);
     }

+    env->fpop.tcg = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
+
+#ifdef TARGET_X86_64
+    if (data64) {
+        /* 64 bit */
+        env->fpip = cpu_ldq_data(env, ptr + 8);
+        env->fpdp = cpu_ldq_data(env, ptr + 16);
+    } else
+#endif
+    {
+        if (data32) {
+            /* 32 bit */
+            env->fpip = cpu_ldl_data(env, ptr + 8);
+            env->fpdp = cpu_ldl_data(env, ptr + 16);
+        } else {
+            /* 16 bit */
+            env->fpip = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 16);
+        }
+
+        env->fpcs = cpu_lduw_data(env, ptr + 12);
+        env->fpds = cpu_lduw_data(env, ptr + 20);
+    }
+
     addr = ptr + 0x20;
     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, addr);
@@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
             }
         }
     }
+
+    if (!data64) {
+        env->fpip &= 0xffffffff;
+        env->fpdp &= 0xffffffff;
+    }
 }

 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8eb0145..9c4fd22 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
 DEF_HELPER_1(fscale, void, env)
 DEF_HELPER_1(fsin, void, env)
 DEF_HELPER_1(fcos, void, env)
-DEF_HELPER_3(fstenv, void, env, tl, int)
-DEF_HELPER_3(fldenv, void, env, tl, int)
-DEF_HELPER_3(fsave, void, env, tl, int)
-DEF_HELPER_3(frstor, void, env, tl, int)
-DEF_HELPER_3(fxsave, void, env, tl, int)
-DEF_HELPER_3(fxrstor, void, env, tl, int)
+DEF_HELPER_4(fstenv, void, env, tl, int, int)
+DEF_HELPER_4(fldenv, void, env, tl, int, int)
+DEF_HELPER_4(fsave, void, env, tl, int, int)
+DEF_HELPER_4(frstor, void, env, tl, int, int)
+DEF_HELPER_4(fxsave, void, env, tl, int, int)
+DEF_HELPER_4(fxrstor, void, env, tl, int, int)

 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 4bf0ac9..79a84ce 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -987,7 +987,7 @@ static int kvm_put_fpu(X86CPU *cpu)
     fpu.fsw = env->fpus & ~(7 << 11);
     fpu.fsw |= (env->fpstt & 7) << 11;
     fpu.fcw = env->fpuc;
-    fpu.last_opcode = env->fpop;
+    fpu.last_opcode = env->fpop.kvm;
     fpu.last_ip = env->fpip;
     fpu.last_dp = env->fpdp;
     for (i = 0; i < 8; ++i) {
@@ -1032,7 +1032,7 @@ static int kvm_put_xsave(X86CPU *cpu)
         twd |= (!env->fptags[i]) << i;
     }
     xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
-    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
+    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop.kvm << 16) + twd;
     memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
     memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
     memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
@@ -1298,7 +1298,7 @@ static int kvm_get_fpu(X86CPU *cpu)
     env->fpstt = (fpu.fsw >> 11) & 7;
     env->fpus = fpu.fsw;
     env->fpuc = fpu.fcw;
-    env->fpop = fpu.last_opcode;
+    env->fpop.kvm = fpu.last_opcode;
     env->fpip = fpu.last_ip;
     env->fpdp = fpu.last_dp;
     for (i = 0; i < 8; ++i) {
@@ -1330,7 +1330,7 @@ static int kvm_get_xsave(X86CPU *cpu)
     cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
     swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
     twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
-    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
+    env->fpop.kvm = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
     env->fpstt = (swd >> 11) & 7;
     env->fpus = swd;
     env->fpuc = cwd;
diff --git a/target-i386/machine.c b/target-i386/machine.c
index b8dcd2f..70db6aa 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -389,7 +389,7 @@ static bool fpop_ip_dp_needed(void *opaque)
     X86CPU *cpu = opaque;
     CPUX86State *env = &cpu->env;

-    return env->fpop != 0 || env->fpip != 0 || env->fpdp != 0;
+    return env->fpop.kvm != 0 || env->fpip != 0 || env->fpdp != 0;
 }

 static const VMStateDescription vmstate_fpop_ip_dp = {
@@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT16(env.fpop, X86CPU),
+        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
         VMSTATE_UINT64(env.fpdp, X86CPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd824..0d748ee 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,6 +58,7 @@
 #endif

 //#define MACRO_TEST   1
+#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)

 /* global register indexes */
 static TCGv_ptr cpu_env;
@@ -65,6 +66,11 @@ static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv_i32 cpu_fpop;
+static TCGv cpu_fpip;
+static TCGv cpu_fpdp;
+static TCGv_i32 cpu_fpds;
+static TCGv_i32 cpu_fpcs;
 /* local temps */
 static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
@@ -208,6 +214,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_CLR] = 0,
 };

+static inline bool non_control_x87_instr(int modrm, int b)
+{
+    int op, mod, rm;
+    switch (b) {
+    case 0xd8 ... 0xdf:
+        /* floats */
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        if (mod != 3) {
+            /* memory */
+            switch (op) {
+            case 0x0c: /* fldenv */
+            case 0x0d: /* fldcw */
+            case 0x0e: /* fstenv, fnstenv */
+            case 0x0f: /* fstcw, fnstcw */
+            case 0x2c: /* frstor */
+            case 0x2e: /* fsave, fnsave */
+            case 0x2f: /* fstsw, fnstsw */
+                return false;
+            default:
+                return true;
+            }
+        } else {
+            /* register */
+            switch (op) {
+            case 0x0a:
+                return false; /* fnop, Illegal op */
+            case 0x0e: /* fdecstp, fincstp */
+            case 0x28: /* ffree */
+                return false;
+            case 0x1c:
+                switch (rm) {
+                case 1: /* feni */
+                    return true;
+                case 2: /* fclex, fnclex */
+                case 3: /* finit, fninit */
+                    return false;
+                case 4: /* fsetpm */
+                    return true;
+                default: /* Illegal op */
+                    return false;
+                }
+            case 0x3c:
+                return false; /* fstsw, fnstsw, Illegal op */
+            default:
+                return true;
+            }
+        }
+    /*case 0x9b: // fwait, wait
+        return false;*/
+    default:
+        return false;
+    }
+}
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
     int dead;
@@ -1863,7 +1925,7 @@ static void gen_shifti(DisasContext *s1, int op,
TCGMemOp ot, int d, int c)
     }
 }

-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm, int b)
 {
     target_long disp;
     int havesib;
@@ -1871,6 +1933,7 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
     int index;
     int scale;
     int mod, rm, code, override, must_add_seg;
+    int non_control_float_instr;
     TCGv sum;

     override = s->override;
@@ -1950,6 +2013,13 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             tcg_gen_addi_tl(cpu_A0, sum, disp);
         }

+        non_control_float_instr = non_control_x87_instr(modrm, b);
+        if (non_control_float_instr) {
+            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+            if (s->aflag == MO_32) {
+                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
+            }
+        }
         if (must_add_seg) {
             if (override < 0) {
                 if (base == R_EBP || base == R_ESP) {
@@ -1961,6 +2031,12 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)

             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
+
+            if (non_control_float_instr) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
+
             if (CODE64(s)) {
                 if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
@@ -1970,6 +2046,11 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             }

             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        } else {
+            if (non_control_float_instr) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }

         if (s->aflag == MO_32) {
@@ -2039,8 +2120,22 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
                     override = R_DS;
                 }
             }
+            if (non_control_x87_instr(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
             gen_op_addl_A0_seg(s, override);
+        } else {
+            if (non_control_x87_instr(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
+#endif
         break;

     default:
@@ -2130,7 +2225,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           TCGMemOp ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store, int b)
 {
     int mod, rm;

@@ -2147,7 +2242,7 @@ static void gen_ldst_modrm(CPUX86State *env,
DisasContext *s, int modrm,
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T[0], reg);
@@ -2250,7 +2345,7 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,
 {
     CCPrepare cc;

-    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

     cc = gen_prepare_cc(s, b, cpu_T[1]);
     if (cc.mask != -1) {
@@ -3043,7 +3138,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3051,20 +3146,20 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
@@ -3076,12 +3171,12 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3091,14 +3186,14 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3107,7 +3202,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3124,7 +3219,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3134,7 +3229,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3149,7 +3244,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3164,7 +3259,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3176,7 +3271,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3192,7 +3287,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3206,7 +3301,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3218,7 +3313,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3258,13 +3353,13 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (s->dflag == MO_64) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x17e: /* movd ea, xmm */
@@ -3272,18 +3367,18 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (s->dflag == MO_64) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3295,7 +3390,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3310,7 +3405,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3320,7 +3415,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
@@ -3331,7 +3426,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3343,7 +3438,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3353,7 +3448,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3417,7 +3512,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -3440,7 +3535,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = mo_64_32(s->dflag);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
@@ -3462,7 +3557,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s, op2_offset);
             } else {
@@ -3493,7 +3588,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x32d: /* cvtsd2si */
             ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
                 } else {
@@ -3525,7 +3620,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3559,7 +3654,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3626,7 +3721,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3660,7 +3755,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -3701,7 +3796,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 }

                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));

@@ -3729,7 +3824,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     ot = MO_64;
                 }

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3747,7 +3842,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
@@ -3764,7 +3859,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 {
                     TCGv bound, zero;

-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
@@ -3801,7 +3896,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
@@ -3828,7 +3923,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3854,7 +3949,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3872,7 +3967,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3892,7 +3987,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     int end_op;

                     ot = mo_64_32(s->dflag);
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                     /* Re-use the carry-out from a previous round.  */
                     TCGV_UNUSED(carry_in);
@@ -3971,7 +4066,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
@@ -4003,7 +4098,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
@@ -4062,7 +4157,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
@@ -4199,7 +4294,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
@@ -4208,7 +4303,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -4242,7 +4337,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
@@ -4278,7 +4373,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (mod != 3) {
                 int sz = 4;

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);

                 switch (b) {
@@ -4326,7 +4421,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -4595,7 +4690,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4616,7 +4711,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4655,7 +4750,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4698,7 +4793,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -4906,7 +5001,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
@@ -4998,7 +5093,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5073,7 +5168,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T[1], val);
@@ -5130,7 +5225,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
             tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -5159,7 +5254,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
@@ -5207,16 +5302,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
-#endif
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5266,7 +5361,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             s->popl_esp_hack = 0;
             gen_pop_update(s, ot);
         }
@@ -5352,7 +5447,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;

         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5361,7 +5456,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
         }
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T[0], val);
@@ -5377,7 +5472,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
@@ -5385,7 +5480,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5408,7 +5503,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
         ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
         break;

     case 0x1b6: /* movzbS Gv, Eb */
@@ -5450,7 +5545,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -5468,7 +5563,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         s->addseg = val;
         gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
@@ -5558,7 +5653,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5593,7 +5688,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
@@ -5624,7 +5719,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5674,7 +5769,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5705,7 +5800,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5832,7 +5927,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
@@ -5842,7 +5939,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
@@ -5863,12 +5962,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, cpu_A0,
+                                 tcg_const_i32(dflag == MO_32),
+                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
@@ -6209,6 +6312,11 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             }
         }
+        if (non_control_x87_instr(modrm, b)) {
+            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
+            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
+            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
+        }
         break;
         /************************/
         /* string ops */
@@ -6527,7 +6635,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
@@ -6657,7 +6765,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -6688,7 +6796,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6764,7 +6872,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_extu(ot, cpu_T[0]);

         /* Note that lzcnt and tzcnt are in different extensions.  */
@@ -6967,7 +7075,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_v_reg(ot, cpu_T[0], reg);
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         if (ot == MO_16) {
@@ -7149,7 +7257,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -7158,7 +7266,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7170,7 +7278,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,tr.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -7179,7 +7287,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7189,7 +7297,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7212,7 +7320,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
gdt.limit));
             gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
@@ -7268,7 +7376,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State, idt.limit));
                 gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
@@ -7371,7 +7479,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE :
SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
@@ -7394,14 +7502,14 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7414,7 +7522,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 } else {
                     gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7493,7 +7601,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -7514,7 +7622,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7556,7 +7664,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7584,7 +7692,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7696,7 +7804,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
@@ -7711,10 +7819,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0,
+                              tcg_const_i32(dflag == MO_32),
+                              tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7724,10 +7834,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32(dflag == MO_32),
+                               tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7738,7 +7850,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op == 2) {
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                     s->mem_index, MO_LEUL);
@@ -7763,7 +7875,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
             }
             break;
         default:
@@ -7775,7 +7887,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7803,7 +7915,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = mo_64_32(dflag);
         }

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);

@@ -7880,6 +7992,17 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
                                      "cc_src2");

+    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
+                                      offsetof(CPUX86State, fpop.tcg), "fpop");
+    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
+                                     "fpip");
+    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
+                                     "fpdp");
+    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpds),
+                                     "fpds");
+    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpcs),
+                                     "fpcs");
+
     for (i = 0; i < CPU_NB_REGS; ++i) {
         cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
                                          offsetof(CPUX86State, regs[i]),

On Sat, Jun 21, 2014 at 2:16 AM, Jaume Martí <address@hidden> wrote:
> Hello,
>
> I submit a patch to fix bugs 661696 and 1248376. The patch implements,
> for TCG, the specifications provided in Intel and AMD programmer's
> manuals regarding the x87 exception pointers. That is, when executing
> instructions fstenv/fnstenv, fsave and fxsave the values for the
> instruction pointer, data pointer and opcode of the last non-control
> x87 instruction executed, are correctly saved to the specified memory
> address. When executing instructions fldenv, frstor and fxrstor the
> values that are going to be considered the instruction pointer, data
> pointer and opcode of the last non-control x87 instruction are
> obtained from the specified memory address.
>
> Best regards,
> Jaume
>
> Signed-off-by: Jaume Marti Farriol (address@hidden)
> diff --git a/include/exec/def-helper.h b/include/exec/def-helper.h
> index 73d51f9..9f31404 100644
> --- a/include/exec/def-helper.h
> +++ b/include/exec/def-helper.h
> @@ -8,7 +8,7 @@
>     to match the types used by the C helper implementation.
>
>     The target helper.h should be included in all files that use/define
> -   helper functions.  THis will ensure that function prototypes are
> +   helper functions.  This will ensure that function prototypes are
>     consistent.  In addition it should be included an extra two times for
>     helper.c, defining:
>      GEN_HELPER 1 to produce op generation functions (gen_helper_*)
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 04638e2..0f3b573 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -687,52 +687,52 @@ struct target_xmmreg {
>  };
>
>  struct target_fpstate {
> - /* Regular FPU environment */
> -        abi_ulong       cw;
> -        abi_ulong       sw;
> -        abi_ulong       tag;
> -        abi_ulong       ipoff;
> -        abi_ulong       cssel;
> -        abi_ulong       dataoff;
> -        abi_ulong       datasel;
> - struct target_fpreg _st[8];
> - uint16_t status;
> - uint16_t magic; /* 0xffff = regular FPU data only */
> -
> - /* FXSR FPU environment */
> -        abi_ulong       _fxsr_env[6];   /* FXSR FPU env is ignored */
> -        abi_ulong       mxcsr;
> -        abi_ulong       reserved;
> - struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
> - struct target_xmmreg _xmm[8];
> -        abi_ulong       padding[56];
> +    /* Regular FPU environment */
> +    abi_ulong       cw;
> +    abi_ulong       sw;
> +    abi_ulong       tag;
> +    abi_ulong       ipoff;
> +    abi_ulong       cssel;
> +    abi_ulong       dataoff;
> +    abi_ulong       datasel;
> +    struct target_fpreg _st[8];
> +    uint16_t        status;
> +    uint16_t        magic; /* 0xffff = regular FPU data only */
> +
> +    /* FXSR FPU environment */
> +    abi_ulong       _fxsr_env[6]; /* FXSR FPU env is ignored */
> +    abi_ulong       mxcsr;
> +    abi_ulong       reserved;
> +    struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
> +    struct target_xmmreg _xmm[8];
> +    abi_ulong       padding[56];
>  };
>
>  #define X86_FXSR_MAGIC 0x0000
>
>  struct target_sigcontext {
> - uint16_t gs, __gsh;
> - uint16_t fs, __fsh;
> - uint16_t es, __esh;
> - uint16_t ds, __dsh;
> -        abi_ulong edi;
> -        abi_ulong esi;
> -        abi_ulong ebp;
> -        abi_ulong esp;
> -        abi_ulong ebx;
> -        abi_ulong edx;
> -        abi_ulong ecx;
> -        abi_ulong eax;
> -        abi_ulong trapno;
> -        abi_ulong err;
> -        abi_ulong eip;
> - uint16_t cs, __csh;
> -        abi_ulong eflags;
> -        abi_ulong esp_at_signal;
> - uint16_t ss, __ssh;
> -        abi_ulong fpstate; /* pointer */
> -        abi_ulong oldmask;
> -        abi_ulong cr2;
> +    uint16_t gs, __gsh;
> +    uint16_t fs, __fsh;
> +    uint16_t es, __esh;
> +    uint16_t ds, __dsh;
> +    abi_ulong edi;
> +    abi_ulong esi;
> +    abi_ulong ebp;
> +    abi_ulong esp;
> +    abi_ulong ebx;
> +    abi_ulong edx;
> +    abi_ulong ecx;
> +    abi_ulong eax;
> +    abi_ulong trapno;
> +    abi_ulong err;
> +    abi_ulong eip;
> +    uint16_t cs, __csh;
> +    abi_ulong eflags;
> +    abi_ulong esp_at_signal;
> +    uint16_t ss, __ssh;
> +    abi_ulong fpstate; /* pointer */
> +    abi_ulong oldmask;
> +    abi_ulong cr2;
>  };
>
>  struct target_ucontext {
> @@ -775,7 +775,7 @@ setup_sigcontext(struct target_sigcontext *sc,
> struct target_fpstate *fpstate,
>   CPUX86State *env, abi_ulong mask, abi_ulong fpstate_addr)
>  {
>   int err = 0;
> -        uint16_t magic;
> +    uint16_t magic;
>
>   /* already locked in setup_frame() */
>   err |= __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs);
> @@ -798,11 +798,11 @@ setup_sigcontext(struct target_sigcontext *sc,
> struct target_fpstate *fpstate,
>   err |= __put_user(env->regs[R_ESP], &sc->esp_at_signal);
>   err |= __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);
>
> -        cpu_x86_fsave(env, fpstate_addr, 1);
> -        fpstate->status = fpstate->sw;
> -        magic = 0xffff;
> -        err |= __put_user(magic, &fpstate->magic);
> -        err |= __put_user(fpstate_addr, &sc->fpstate);
> +    cpu_x86_fsave(env, fpstate_addr);
> +    fpstate->status = fpstate->sw;
> +    magic = 0xffff;
> +    err |= __put_user(magic, &fpstate->magic);
> +    err |= __put_user(fpstate_addr, &sc->fpstate);
>
>   /* non-iBCS2 extensions.. */
>   err |= __put_user(mask, &sc->oldmask);
> @@ -889,10 +889,10 @@ static void setup_frame(int sig, struct
> target_sigaction *ka,
>   env->regs[R_ESP] = frame_addr;
>   env->eip = ka->_sa_handler;
>
> -        cpu_x86_load_seg(env, R_DS, __USER_DS);
> -        cpu_x86_load_seg(env, R_ES, __USER_DS);
> -        cpu_x86_load_seg(env, R_SS, __USER_DS);
> -        cpu_x86_load_seg(env, R_CS, __USER_CS);
> +    cpu_x86_load_seg(env, R_DS, __USER_DS);
> +    cpu_x86_load_seg(env, R_ES, __USER_DS);
> +    cpu_x86_load_seg(env, R_SS, __USER_DS);
> +    cpu_x86_load_seg(env, R_CS, __USER_CS);
>   env->eflags &= ~TF_MASK;
>
>   unlock_user_struct(frame, frame_addr, 1);
> @@ -969,10 +969,10 @@ static void setup_rt_frame(int sig, struct
> target_sigaction *ka,
>   env->regs[R_ESP] = frame_addr;
>   env->eip = ka->_sa_handler;
>
> -        cpu_x86_load_seg(env, R_DS, __USER_DS);
> -        cpu_x86_load_seg(env, R_ES, __USER_DS);
> -        cpu_x86_load_seg(env, R_SS, __USER_DS);
> -        cpu_x86_load_seg(env, R_CS, __USER_CS);
> +    cpu_x86_load_seg(env, R_DS, __USER_DS);
> +    cpu_x86_load_seg(env, R_ES, __USER_DS);
> +    cpu_x86_load_seg(env, R_SS, __USER_DS);
> +    cpu_x86_load_seg(env, R_CS, __USER_CS);
>   env->eflags &= ~TF_MASK;
>
>   unlock_user_struct(frame, frame_addr, 1);
> @@ -989,43 +989,43 @@ give_sigsegv:
>  static int
>  restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc, int *peax)
>  {
> - unsigned int err = 0;
> -        abi_ulong fpstate_addr;
> -        unsigned int tmpflags;
> -
> -        cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
> -        cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
> -        cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
> -        cpu_x86_load_seg(env, R_DS, tswap16(sc->ds));
> -
> -        env->regs[R_EDI] = tswapl(sc->edi);
> -        env->regs[R_ESI] = tswapl(sc->esi);
> -        env->regs[R_EBP] = tswapl(sc->ebp);
> -        env->regs[R_ESP] = tswapl(sc->esp);
> -        env->regs[R_EBX] = tswapl(sc->ebx);
> -        env->regs[R_EDX] = tswapl(sc->edx);
> -        env->regs[R_ECX] = tswapl(sc->ecx);
> -        env->eip = tswapl(sc->eip);
> -
> -        cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
> -        cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
> -
> -        tmpflags = tswapl(sc->eflags);
> -        env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
> -        // regs->orig_eax = -1; /* disable syscall checks */
> -
> -        fpstate_addr = tswapl(sc->fpstate);
> - if (fpstate_addr != 0) {
> -                if (!access_ok(VERIFY_READ, fpstate_addr,
> -                               sizeof(struct target_fpstate)))
> -                        goto badframe;
> -                cpu_x86_frstor(env, fpstate_addr, 1);
> - }
> +    unsigned int err = 0;
> +    abi_ulong fpstate_addr;
> +    unsigned int tmpflags;
> +
> +    cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
> +    cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
> +    cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
> +    cpu_x86_load_seg(env, R_DS, tswap16(sc->ds));
> +
> +    env->regs[R_EDI] = tswapl(sc->edi);
> +    env->regs[R_ESI] = tswapl(sc->esi);
> +    env->regs[R_EBP] = tswapl(sc->ebp);
> +    env->regs[R_ESP] = tswapl(sc->esp);
> +    env->regs[R_EBX] = tswapl(sc->ebx);
> +    env->regs[R_EDX] = tswapl(sc->edx);
> +    env->regs[R_ECX] = tswapl(sc->ecx);
> +    env->eip = tswapl(sc->eip);
> +
> +    cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
> +    cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
> +
> +    tmpflags = tswapl(sc->eflags);
> +    env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
> +
> +    fpstate_addr = tswapl(sc->fpstate);
> +    if (fpstate_addr != 0) {
> +        if (!access_ok(VERIFY_READ, fpstate_addr,
> +                    sizeof(struct target_fpstate))) {
> +            goto badframe;
> +        }
> +        cpu_x86_frstor(env, fpstate_addr);
> +    }
>
> -        *peax = tswapl(sc->eax);
> - return err;
> +    *peax = tswapl(sc->eax);
> +    return err;
>  badframe:
> - return 1;
> +    return 1;
>  }
>
>  long do_sigreturn(CPUX86State *env)
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 0014acc..b239cae 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -803,10 +803,14 @@ typedef struct CPUX86State {
>      uint16_t fpuc;
>      uint8_t fptags[8];   /* 0 = valid, 1 = empty */
>      FPReg fpregs[8];
> -    /* KVM-only so far */
> -    uint16_t fpop;
> +    union {
> +        uint32_t tcg;
> +        uint16_t kvm;
> +    } fpop;
>      uint64_t fpip;
>      uint64_t fpdp;
> +    uint32_t fpcs;
> +    uint32_t fpds;
>
>      /* emulator internal variables */
>      float_status fp_status;
> @@ -1049,8 +1053,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
>  /* the following helpers are only usable in user mode simulation as
>     they can trigger unexpected exceptions */
>  void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
> -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
> -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
> +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
> +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);
>
>  /* you can call this signal handler from your SIGBUS and SIGSEGV
>     signal handlers to inform the virtual CPU of exceptions. non zero
> diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
> index de7ba76..c80cce7 100644
> --- a/target-i386/fpu_helper.c
> +++ b/target-i386/fpu_helper.c
> @@ -59,6 +59,8 @@
>  #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
>  #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
>
> +#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
> +
>  static inline void fpush(CPUX86State *env)
>  {
>      env->fpstt = (env->fpstt - 1) & 7;
> @@ -607,6 +609,10 @@ void helper_fninit(CPUX86State *env)
>      env->fptags[5] = 1;
>      env->fptags[6] = 1;
>      env->fptags[7] = 1;
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
>  /* BCD ops */
> @@ -964,13 +970,13 @@ void helper_fxam_ST0(CPUX86State *env)
>      }
>  }
>
> -void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int fpus, fptag, exp, i;
> +    int fptag, exp, i;
>      uint64_t mant;
>      CPU_LDoubleU tmp;
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 7; i >= 0; i--) {
>          fptag <<= 2;
> @@ -990,83 +996,150 @@ void helper_fstenv(CPUX86State *env,
> target_ulong ptr, int data32)
>              }
>          }
>      }
> +
>      if (data32) {
>          /* 32 bit */
> -        cpu_stl_data(env, ptr, env->fpuc);
> -        cpu_stl_data(env, ptr + 4, fpus);
> -        cpu_stl_data(env, ptr + 8, fptag);
> -        cpu_stl_data(env, ptr + 12, 0); /* fpip */
> -        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
> -        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
> -        cpu_stl_data(env, ptr + 24, 0); /* fpos */
> +        cpu_stw_data(env, ptr, env->fpuc);
> +        cpu_stw_data(env, ptr + 4, FPUS(env));
> +        cpu_stw_data(env, ptr + 8, fptag);
> +        if (protected_mode) {
> +            cpu_stl_data(env, ptr + 12, env->fpip);
> +            cpu_stl_data(env, ptr + 16,
> +                        ((env->fpop.tcg & 0x7ff) << 16) | (env->fpcs
> & 0xffff));
> +            cpu_stl_data(env, ptr + 20, env->fpdp);
> +            cpu_stl_data(env, ptr + 24, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
> +            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) << 
> 12) |
> +                        (env->fpop.tcg & 0x7ff))); /* fpip[31..16], fpop */
> +            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
> +            cpu_stl_data(env, ptr + 24,
> +                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
> +        }
>      } else {
>          /* 16 bit */
>          cpu_stw_data(env, ptr, env->fpuc);
> -        cpu_stw_data(env, ptr + 2, fpus);
> +        cpu_stw_data(env, ptr + 2, FPUS(env));
>          cpu_stw_data(env, ptr + 4, fptag);
> -        cpu_stw_data(env, ptr + 6, 0);
> -        cpu_stw_data(env, ptr + 8, 0);
> -        cpu_stw_data(env, ptr + 10, 0);
> -        cpu_stw_data(env, ptr + 12, 0);
> +        if (protected_mode) {
> +            cpu_stw_data(env, ptr + 6, env->fpip);
> +            cpu_stw_data(env, ptr + 8, env->fpcs);
> +            cpu_stw_data(env, ptr + 10, env->fpdp);
> +            cpu_stw_data(env, ptr + 12, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
> +            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
> +                        (env->fpop.tcg & 0x7ff)); /* fpip[19..16], fpop */
> +            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
> +            cpu_stw_data(env, ptr + 12,
> +                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
> +        }
>      }
> +
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
> -void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int i, fpus, fptag;
> +    int tmp, i, fpus, fptag;
>
>      if (data32) {
> +        /* 32 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 4);
>          fptag = cpu_lduw_data(env, ptr + 8);
> +        if (protected_mode) {
> +            env->fpip = cpu_ldl_data(env, ptr + 12);
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpcs = tmp & 0xffff;
> +            env->fpop.tcg = tmp >> 16;
> +            env->fpdp = cpu_ldl_data(env, ptr + 20);
> +            env->fpds = cpu_lduw_data(env, ptr + 24);
> +        } else {
> +            /* Real mode */
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpip = ((tmp & 0xffff000) << 4) |
> +                        cpu_lduw_data(env, ptr + 12);
> +            env->fpop.tcg = tmp & 0x7ff;
> +            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
> +                        cpu_lduw_data(env, ptr + 20);
> +        }
>      } else {
> +        /* 16 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 2);
>          fptag = cpu_lduw_data(env, ptr + 4);
> +        if (protected_mode) {
> +            /* Protected mode  */
> +            env->fpip = cpu_lduw_data(env, ptr + 6);
> +            env->fpcs = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 10);
> +            env->fpds = cpu_lduw_data(env, ptr + 12);
> +        } else {
> +            /* Real mode  */
> +            tmp = cpu_lduw_data(env, ptr + 8);
> +            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr + 6);
> +            env->fpop.tcg = tmp & 0x7ff;
> +            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
> +                        cpu_lduw_data(env, ptr + 10);
> +        }
>      }
> +
>      env->fpstt = (fpus >> 11) & 7;
>      env->fpus = fpus & ~0x3800;
>      for (i = 0; i < 8; i++) {
>          env->fptags[i] = ((fptag & 3) == 3);
>          fptag >>= 2;
>      }
> +
> +    env->fpip &= 0xffffffff;
> +    env->fpdp &= 0xffffffff;
> +    if (!protected_mode) {
> +        env->fpcs = 0;
> +        env->fpds = 0;
> +    }
>  }
>
> -void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
> +                  int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fstenv(env, ptr, data32);
> +    helper_fstenv(env, ptr, data32, protected_mode);
>
> -    ptr += (14 << data32);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>      for (i = 0; i < 8; i++) {
>          tmp = ST(i);
>          helper_fstt(env, tmp, ptr);
>          ptr += 10;
>      }
>
> -    /* fninit */
> -    env->fpus = 0;
> -    env->fpstt = 0;
> -    env->fpuc = 0x37f;
> -    env->fptags[0] = 1;
> -    env->fptags[1] = 1;
> -    env->fptags[2] = 1;
> -    env->fptags[3] = 1;
> -    env->fptags[4] = 1;
> -    env->fptags[5] = 1;
> -    env->fptags[6] = 1;
> -    env->fptags[7] = 1;
> +    helper_fninit(env);
>  }
>
> -void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fldenv(env, ptr, data32);
> -    ptr += (14 << data32);
> +    helper_fldenv(env, ptr, data32, protected_mode);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, ptr);
> @@ -1075,21 +1148,22 @@ void helper_frstor(CPUX86State *env,
> target_ulong ptr, int data32)
>      }
>  }
>
> -#if defined(CONFIG_USER_ONLY)
> -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS == 
> 32
> +
> +void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_fsave(env, ptr, data32);
> +    helper_fsave(env, ptr, 1, 1);
>  }
>
> -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_frstor(env, ptr, data32);
> +    helper_frstor(env, ptr, 1, 1);
>  }
>  #endif
>
> -void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int 
> data64)
>  {
> -    int fpus, fptag, i, nb_xmm_regs;
> +    int i, nb_xmm_regs, fptag;
>      floatx80 tmp;
>      target_ulong addr;
>
> @@ -1098,25 +1172,36 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>          raise_exception(env, EXCP0D_GPF);
>      }
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 0; i < 8; i++) {
>          fptag |= (env->fptags[i] << i);
>      }
> +    fptag ^= 0xff;
> +
>      cpu_stw_data(env, ptr, env->fpuc);
> -    cpu_stw_data(env, ptr + 2, fpus);
> -    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
> +    cpu_stw_data(env, ptr + 2, FPUS(env));
> +    cpu_stw_data(env, ptr + 4, fptag & 0xff);
> +    cpu_stw_data(env, ptr + 6, env->fpop.tcg);
> +
>  #ifdef TARGET_X86_64
>      if (data64) {
> -        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
> -        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
> +        /* 64 bit */
> +        cpu_stq_data(env, ptr + 8, env->fpip);
> +        cpu_stq_data(env, ptr + 16, env->fpdp);
>      } else
>  #endif
>      {
> -        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
> -        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
> -        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
> -        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
> +        if (data32) {
> +            /* 32 bit */
> +            cpu_stl_data(env, ptr + 8, env->fpip);
> +            cpu_stl_data(env, ptr + 16, env->fpdp);
> +        } else {
> +            /* 16 bit */
> +            cpu_stw_data(env, ptr + 8, env->fpip);
> +            cpu_stw_data(env, ptr + 16, env->fpdp);
> +        }
> +        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
> +        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
>      }
>
>      addr = ptr + 0x20;
> @@ -1149,7 +1234,7 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>      }
>  }
>
> -void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int 
> data64)
>  {
>      int i, fpus, fptag, nb_xmm_regs;
>      floatx80 tmp;
> @@ -1170,6 +1255,30 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>          env->fptags[i] = ((fptag >> i) & 1);
>      }
>
> +    env->fpop.tcg = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
> +
> +#ifdef TARGET_X86_64
> +    if (data64) {
> +        /* 64 bit */
> +        env->fpip = cpu_ldq_data(env, ptr + 8);
> +        env->fpdp = cpu_ldq_data(env, ptr + 16);
> +    } else
> +#endif
> +    {
> +        if (data32) {
> +            /* 32 bit */
> +            env->fpip = cpu_ldl_data(env, ptr + 8);
> +            env->fpdp = cpu_ldl_data(env, ptr + 16);
> +        } else {
> +            /* 16 bit */
> +            env->fpip = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 16);
> +        }
> +
> +        env->fpcs = cpu_lduw_data(env, ptr + 12);
> +        env->fpds = cpu_lduw_data(env, ptr + 20);
> +    }
> +
>      addr = ptr + 0x20;
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, addr);
> @@ -1198,6 +1307,11 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>              }
>          }
>      }
> +
> +    if (!data64) {
> +        env->fpip &= 0xffffffff;
> +        env->fpdp &= 0xffffffff;
> +    }
>  }
>
>  void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 3775abe..626b296 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -185,12 +185,12 @@ DEF_HELPER_1(frndint, void, env)
>  DEF_HELPER_1(fscale, void, env)
>  DEF_HELPER_1(fsin, void, env)
>  DEF_HELPER_1(fcos, void, env)
> -DEF_HELPER_3(fstenv, void, env, tl, int)
> -DEF_HELPER_3(fldenv, void, env, tl, int)
> -DEF_HELPER_3(fsave, void, env, tl, int)
> -DEF_HELPER_3(frstor, void, env, tl, int)
> -DEF_HELPER_3(fxsave, void, env, tl, int)
> -DEF_HELPER_3(fxrstor, void, env, tl, int)
> +DEF_HELPER_4(fstenv, void, env, tl, int, int)
> +DEF_HELPER_4(fldenv, void, env, tl, int, int)
> +DEF_HELPER_4(fsave, void, env, tl, int, int)
> +DEF_HELPER_4(frstor, void, env, tl, int, int)
> +DEF_HELPER_4(fxsave, void, env, tl, int, int)
> +DEF_HELPER_4(fxrstor, void, env, tl, int, int)
>
>  DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
>  DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index e555040..8444779 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -975,7 +975,7 @@ static int kvm_put_fpu(X86CPU *cpu)
>      fpu.fsw = env->fpus & ~(7 << 11);
>      fpu.fsw |= (env->fpstt & 7) << 11;
>      fpu.fcw = env->fpuc;
> -    fpu.last_opcode = env->fpop;
> +    fpu.last_opcode = env->fpop.kvm;
>      fpu.last_ip = env->fpip;
>      fpu.last_dp = env->fpdp;
>      for (i = 0; i < 8; ++i) {
> @@ -1020,7 +1020,7 @@ static int kvm_put_xsave(X86CPU *cpu)
>          twd |= (!env->fptags[i]) << i;
>      }
>      xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
> -    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
> +    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop.kvm << 16) + twd;
>      memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
>      memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
>      memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
> @@ -1286,7 +1286,7 @@ static int kvm_get_fpu(X86CPU *cpu)
>      env->fpstt = (fpu.fsw >> 11) & 7;
>      env->fpus = fpu.fsw;
>      env->fpuc = fpu.fcw;
> -    env->fpop = fpu.last_opcode;
> +    env->fpop.kvm = fpu.last_opcode;
>      env->fpip = fpu.last_ip;
>      env->fpdp = fpu.last_dp;
>      for (i = 0; i < 8; ++i) {
> @@ -1318,7 +1318,7 @@ static int kvm_get_xsave(X86CPU *cpu)
>      cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
>      swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
>      twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
> -    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
> +    env->fpop.kvm = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
>      env->fpstt = (swd >> 11) & 7;
>      env->fpus = swd;
>      env->fpuc = cwd;
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index d548c05..a879e00 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -388,7 +388,7 @@ static bool fpop_ip_dp_needed(void *opaque)
>      X86CPU *cpu = opaque;
>      CPUX86State *env = &cpu->env;
>
> -    return env->fpop != 0 || env->fpip != 0 || env->fpdp != 0;
> +    return env->fpop.kvm != 0 || env->fpip != 0 || env->fpdp != 0;
>  }
>
>  static const VMStateDescription vmstate_fpop_ip_dp = {
> @@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
>      .minimum_version_id = 1,
>      .minimum_version_id_old = 1,
>      .fields      = (VMStateField []) {
> -        VMSTATE_UINT16(env.fpop, X86CPU),
> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
>          VMSTATE_UINT64(env.fpip, X86CPU),
>          VMSTATE_UINT64(env.fpdp, X86CPU),
>          VMSTATE_END_OF_LIST()
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 707ebd5..8d29931 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -58,6 +58,7 @@
>  #endif
>
>  //#define MACRO_TEST   1
> +#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
>
>  /* global register indexes */
>  static TCGv_ptr cpu_env;
> @@ -65,6 +66,11 @@ static TCGv cpu_A0;
>  static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
> +static TCGv_i32 cpu_fpop;
> +static TCGv cpu_fpip;
> +static TCGv cpu_fpdp;
> +static TCGv_i32 cpu_fpds;
> +static TCGv_i32 cpu_fpcs;
>  /* local temps */
>  static TCGv cpu_T[2];
>  /* local register indexes (only used inside old micro ops) */
> @@ -208,6 +214,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
>      [CC_OP_CLR] = 0,
>  };
>
> +static inline bool non_control_x87_instr(int modrm, int b)
> +{
> +    int op, mod, rm;
> +    switch (b) {
> +    case 0xd8 ... 0xdf:
> +        /* floats */
> +        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
> +        mod = (modrm >> 6) & 3;
> +        rm = modrm & 7;
> +        if (mod != 3) {
> +            /* memory */
> +            switch (op) {
> +            case 0x0c: /* fldenv */
> +            case 0x0d: /* fldcw */
> +            case 0x0e: /* fstenv, fnstenv */
> +            case 0x0f: /* fstcw, fnstcw */
> +            case 0x2c: /* frstor */
> +            case 0x2e: /* fsave, fnsave */
> +            case 0x2f: /* fstsw, fnstsw */
> +                return false;
> +            default:
> +                return true;
> +            }
> +        } else {
> +            /* register */
> +            switch (op) {
> +            case 0x0a:
> +                return false; /* fnop, Illegal op */
> +            case 0x0e: /* fdecstp, fincstp */
> +            case 0x28: /* ffree */
> +                return false;
> +            case 0x1c:
> +                switch (rm) {
> +                case 1: /* feni */
> +                    return true;
> +                case 2: /* fclex, fnclex */
> +                case 3: /* finit, fninit */
> +                    return false;
> +                case 4: /* fsetpm */
> +                    return true;
> +                default: /* Illegal op */
> +                    return false;
> +                }
> +            case 0x3c:
> +                return false; /* fstsw, fnstsw, Illegal op */
> +            default:
> +                return true;
> +            }
> +        }
> +    /*case 0x9b: // fwait, wait
> +        return false;*/
> +    default:
> +        return false;
> +    }
> +}
> +
>  static void set_cc_op(DisasContext *s, CCOp op)
>  {
>      int dead;
> @@ -1588,14 +1650,14 @@ static void gen_rot_rm_T1(DisasContext *s,
> TCGMemOp ot, int op1, int is_right)
>      t0 = tcg_const_i32(0);
>      t1 = tcg_temp_new_i32();
>      tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
> -    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
> +    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
>      tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
>      tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
>                          cpu_tmp2_i32, cpu_tmp3_i32);
>      tcg_temp_free_i32(t0);
>      tcg_temp_free_i32(t1);
>
> -    /* The CC_OP value is no longer predictable.  */
> +    /* The CC_OP value is no longer predictable.  */
>      set_cc_op(s, CC_OP_DYNAMIC);
>  }
>
> @@ -1871,7 +1933,7 @@ static void gen_shifti(DisasContext *s1, int op,
> TCGMemOp ot, int d, int c)
>      }
>  }
>
> -static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
> +static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm, int 
> b)
>  {
>      target_long disp;
>      int havesib;
> @@ -1879,6 +1941,7 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>      int index;
>      int scale;
>      int mod, rm, code, override, must_add_seg;
> +    int non_control_float_instr;
>      TCGv sum;
>
>      override = s->override;
> @@ -1958,6 +2021,13 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              tcg_gen_addi_tl(cpu_A0, sum, disp);
>          }
>
> +        non_control_float_instr = non_control_x87_instr(modrm, b);
> +        if (non_control_float_instr) {
> +            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +            if (s->aflag == MO_32) {
> +                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
> +            }
> +        }
>          if (must_add_seg) {
>              if (override < 0) {
>                  if (base == R_EBP || base == R_ESP) {
> @@ -1969,6 +2039,12 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>
>              tcg_gen_ld_tl(cpu_tmp0, cpu_env,
>                            offsetof(CPUX86State, segs[override].base));
> +
> +            if (non_control_float_instr) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, 
> segs[override].selector));
> +            }
> +
>              if (CODE64(s)) {
>                  if (s->aflag == MO_32) {
>                      tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> @@ -1978,6 +2054,11 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              }
>
>              tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +        } else {
> +            if (non_control_float_instr) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
>
>          if (s->aflag == MO_32) {
> @@ -2047,8 +2128,22 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>                      override = R_DS;
>                  }
>              }
> +            if (non_control_x87_instr(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, 
> segs[override].selector));
> +            }
>              gen_op_addl_A0_seg(s, override);
> +        } else {
> +            if (non_control_x87_instr(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
> +#ifdef TARGET_X86_64
> +        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
> +#endif
>          break;
>
>      default:
> @@ -2138,7 +2233,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
>  /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
>     OR_TMP0 */
>  static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
> -                           TCGMemOp ot, int reg, int is_store)
> +                           TCGMemOp ot, int reg, int is_store, int b)
>  {
>      int mod, rm;
>
> @@ -2155,7 +2250,7 @@ static void gen_ldst_modrm(CPUX86State *env,
> DisasContext *s, int modrm,
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          }
>      } else {
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          if (is_store) {
>              if (reg != OR_TMP0)
>                  gen_op_mov_v_reg(ot, cpu_T[0], reg);
> @@ -2258,7 +2353,7 @@ static void gen_cmovcc1(CPUX86State *env,
> DisasContext *s, TCGMemOp ot, int b,
>  {
>      CCPrepare cc;
>
> -    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>      cc = gen_prepare_cc(s, b, cpu_T[1]);
>      if (cc.mask != -1) {
> @@ -2284,17 +2379,17 @@ static void gen_cmovcc1(CPUX86State *env,
> DisasContext *s, TCGMemOp ot, int b,
>
>  static inline void gen_op_movl_T0_seg(int seg_reg)
>  {
> -    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>                       offsetof(CPUX86State,segs[seg_reg].selector));
>  }
>
>  static inline void gen_op_movl_seg_T0_vm(int seg_reg)
>  {
>      tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
> -    tcg_gen_st32_tl(cpu_T[0], cpu_env,
> +    tcg_gen_st32_tl(cpu_T[0], cpu_env,
>                      offsetof(CPUX86State,segs[seg_reg].selector));
>      tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
> -    tcg_gen_st_tl(cpu_T[0], cpu_env,
> +    tcg_gen_st_tl(cpu_T[0], cpu_env,
>                    offsetof(CPUX86State,segs[seg_reg].base));
>  }
>
> @@ -3051,7 +3146,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x0e7: /* movntq */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              break;
>          case 0x1e7: /* movntdq */
> @@ -3059,20 +3154,20 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12b: /* movntps */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x3f0: /* lddqu */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x22b: /* movntss */
>          case 0x32b: /* movntsd */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (b1 & 1) {
>                  gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
> @@ -3084,13 +3179,13 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x6e: /* movd mm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
>                  tcg_gen_st_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,fpregs[reg].mmx));
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
> @@ -3099,15 +3194,15 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16e: /* movd xmm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
> @@ -3115,7 +3210,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x6f: /* movq mm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3132,7 +3227,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16f: /* movdqa xmm, ea */
>          case 0x26f: /* movdqu xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3142,7 +3237,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
>                  tcg_gen_st32_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3157,7 +3252,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x310: /* movsd xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3172,7 +3267,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x012: /* movlps */
>          case 0x112: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3184,7 +3279,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x212: /* movsldup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3200,7 +3295,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x312: /* movddup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3214,7 +3309,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x016: /* movhps */
>          case 0x116: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3226,7 +3321,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x216: /* movshdup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3264,34 +3359,34 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x7e: /* movd ea, mm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>                                 offsetof(CPUX86State,fpregs[reg].mmx));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x17e: /* movd ea, xmm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>                                 offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>                                   
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x27e: /* movq xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3303,7 +3398,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x7f: /* movq ea, mm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3318,7 +3413,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x17f: /* movdqa ea, xmm */
>          case 0x27f: /* movdqu ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3328,7 +3423,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x211: /* movss ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
>              } else {
> @@ -3339,7 +3434,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x311: /* movsd ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3351,7 +3446,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x013: /* movlps */
>          case 0x113: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3361,7 +3456,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x017: /* movhps */
>          case 0x117: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3409,14 +3504,14 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x050: /* movmskps */
>              rm = (modrm & 7) | REX_B(s);
> -            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                               offsetof(CPUX86State,xmm_regs[rm]));
>              gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
>              tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
>              break;
>          case 0x150: /* movmskpd */
>              rm = (modrm & 7) | REX_B(s);
> -            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                               offsetof(CPUX86State,xmm_regs[rm]));
>              gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
>              tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
> @@ -3425,7 +3520,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12a: /* cvtpi2pd */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -3448,7 +3543,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x22a: /* cvtsi2ss */
>          case 0x32a: /* cvtsi2sd */
>              ot = mo_64_32(s->dflag);
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>              op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              if (ot == MO_32) {
> @@ -3470,7 +3565,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12d: /* cvtpd2pi */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>                  gen_ldo_env_A0(s, op2_offset);
>              } else {
> @@ -3501,7 +3596,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x32d: /* cvtsd2si */
>              ot = mo_64_32(s->dflag);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State, 
> xmm_t0.XMM_Q(0)));
>                  } else {
> @@ -3533,7 +3628,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
>              s->rip_offset = 1;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              val = cpu_ldub_code(env, s->pc++);
>              if (b1) {
>                  val &= 7;
> @@ -3567,7 +3662,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3634,7 +3729,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm | 
> REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      switch (b) {
>                      case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
>                      case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
> @@ -3668,7 +3763,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -3709,7 +3804,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  }
>
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
>                                   cpu_T[0], tcg_const_i32(8 << ot));
>
> @@ -3737,7 +3832,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      ot = MO_64;
>                  }
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b & 1) == 0) {
>                      tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3755,7 +3850,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>                  gen_op_update1_cc();
> @@ -3772,7 +3867,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  {
>                      TCGv bound, zero;
>
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
>                      tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
> @@ -3809,7 +3904,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
>                  {
>                      TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
> @@ -3836,7 +3931,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  switch (ot) {
>                  default:
>                      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> @@ -3862,7 +3957,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3880,7 +3975,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3900,7 +3995,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      int end_op;
>
>                      ot = mo_64_32(s->dflag);
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                      /* Re-use the carry-out from a previous round.  */
>                      TCGV_UNUSED(carry_in);
> @@ -3979,7 +4074,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  if (ot == MO_64) {
>                      tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
>                  } else {
> @@ -4011,7 +4106,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> @@ -4070,7 +4165,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3)
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  val = cpu_ldub_code(env, s->pc++);
>                  switch (b) {
> @@ -4207,7 +4302,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm | 
> REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldo_env_A0(s, op2_offset);
>                  }
>              } else {
> @@ -4216,7 +4311,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -4250,7 +4345,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  b = cpu_ldub_code(env, s->pc++);
>                  if (ot == MO_64) {
>                      tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
> @@ -4286,7 +4381,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              if (mod != 3) {
>                  int sz = 4;
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>
>                  switch (b) {
> @@ -4334,7 +4429,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          } else {
>              op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -4603,7 +4698,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  mod = (modrm >> 6) & 3;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      opreg = OR_TMP0;
>                  } else if (op == OP_XORL && rm == reg) {
>                  xor_zero:
> @@ -4624,7 +4719,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
> @@ -4663,7 +4758,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      s->rip_offset = 1;
>                  else
>                      s->rip_offset = insn_const_size(ot);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = rm;
> @@ -4706,7 +4801,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod != 3) {
>              if (op == 0)
>                  s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -4914,7 +5009,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              }
>          }
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op >= 2 && op != 3 && op != 5)
>                  gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
> @@ -5006,7 +5101,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_v_reg(ot, cpu_T[1], reg);
>          gen_op_testl_T0_T1_cc();
>          set_cc_op(s, CC_OP_LOGICB + ot);
> @@ -5081,7 +5176,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              s->rip_offset = insn_const_size(ot);
>          else if (b == 0x6b)
>              s->rip_offset = 1;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          if (b == 0x69) {
>              val = insn_get(env, s, ot);
>              tcg_gen_movi_tl(cpu_T[1], val);
> @@ -5138,7 +5233,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>              tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> @@ -5167,7 +5262,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_mov_v_reg(ot, t0, rm);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_mov_tl(a0, cpu_A0);
>                  gen_op_ld_v(s, ot, t0, a0);
>                  rm = 0; /* avoid warning */
> @@ -5215,16 +5310,16 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg16b(cpu_env, cpu_A0);
>          } else
> -#endif
> +#endif
>          {
>              if (!(s->cpuid_features & CPUID_CX8))
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg8b(cpu_env, cpu_A0);
>          }
>          set_cc_op(s, CC_OP_EFLAGS);
> @@ -5274,7 +5369,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              s->popl_esp_hack = 0;
>              gen_pop_update(s, ot);
>          }
> @@ -5360,7 +5455,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0xc6:
>      case 0xc7: /* mov Ev, Iv */
> @@ -5369,7 +5464,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod != 3) {
>              s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>          }
>          val = insn_get(env, s, ot);
>          tcg_gen_movi_tl(cpu_T[0], val);
> @@ -5385,7 +5480,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          break;
>      case 0x8e: /* mov seg, Gv */
> @@ -5393,7 +5488,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = (modrm >> 3) & 7;
>          if (reg >= 6 || reg == R_CS)
>              goto illegal_op;
> -        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>          gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
>          if (reg == R_SS) {
>              /* if reg == SS, inhibit interrupts/trace */
> @@ -5416,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          gen_op_movl_T0_seg(reg);
>          ot = mod == 3 ? dflag : MO_16;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>          break;
>
>      case 0x1b6: /* movzbS Gv, Eb */
> @@ -5458,7 +5553,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -5476,7 +5571,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          s->override = -1;
>          val = s->addseg;
>          s->addseg = 0;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          s->addseg = val;
>          gen_op_mov_reg_v(ot, reg, cpu_A0);
>          break;
> @@ -5566,7 +5661,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              /* for xchg, lock is implicit */
>              if (!(prefixes & PREFIX_LOCK))
> @@ -5601,7 +5696,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> @@ -5632,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  if (shift == 2) {
>                      s->rip_offset = 1;
>                  }
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = (modrm & 7) | REX_B(s);
> @@ -5682,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              opreg = OR_TMP0;
>          } else {
>              opreg = rm;
> @@ -5713,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          op = ((b & 7) << 3) | ((modrm >> 3) & 7);
>          if (mod != 3) {
>              /* memory op */
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              switch(op) {
>              case 0x00 ... 0x07: /* fxxxs */
>              case 0x10 ... 0x17: /* fixxxl */
> @@ -5840,7 +5935,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 0x0c: /* fldenv mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fldenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0d: /* fldcw mem */
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> @@ -5850,7 +5947,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 0x0e: /* fnstenv mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fstenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0f: /* fnstcw mem */
>                  gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
> @@ -5871,12 +5970,16 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>              case 0x2c: /* frstor mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_frstor(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2e: /* fnsave mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fsave(cpu_env, cpu_A0,
> +                                 tcg_const_i32(dflag == MO_32),
> +                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2f: /* fnstsw mem */
>                  gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> @@ -6217,6 +6320,11 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              }
>          }
> +        if (non_control_x87_instr(modrm, b)) {
> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
> +        }
>          break;
>          /************************/
>          /* string ops */
> @@ -6276,7 +6384,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x6d:
>          ot = mo_b_d32(b, dflag);
>          tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
> -        gen_check_io(s, ot, pc_start - s->cs_base,
> +        gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
>          if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
>              gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
> @@ -6535,7 +6643,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x190 ... 0x19f: /* setcc Gv */
>          modrm = cpu_ldub_code(env, s->pc++);
>          gen_setcc1(s, b, cpu_T[0]);
> -        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
>          break;
>      case 0x140 ... 0x14f: /* cmov Gv, Ev */
>          if (!(s->cpuid_features & CPUID_CMOV)) {
> @@ -6665,7 +6773,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          if (mod != 3) {
>              s->rip_offset = 1;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -6696,7 +6804,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* specific case: we need to add a displacement */
>              gen_exts(ot, cpu_T[1]);
>              tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
> @@ -6750,7 +6858,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          ot = dflag;
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_extu(ot, cpu_T[0]);
>
>          /* Note that lzcnt and tzcnt are in different extensions.  */
> @@ -6953,7 +7061,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod == 3)
>              goto illegal_op;
>          gen_op_mov_v_reg(ot, cpu_T[0], reg);
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_jmp_im(pc_start - s->cs_base);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>          if (ot == MO_16) {
> @@ -7135,7 +7243,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,ldt.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 2: /* lldt */
>              if (!s->pe || s->vm86)
> @@ -7144,7 +7252,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_lldt(cpu_env, cpu_tmp2_i32);
> @@ -7156,7 +7264,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,tr.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 3: /* ltr */
>              if (!s->pe || s->vm86)
> @@ -7165,7 +7273,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_ltr(cpu_env, cpu_tmp2_i32);
> @@ -7175,7 +7283,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 5: /* verw */
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              gen_update_cc_op(s);
>              if (op == 4) {
>                  gen_helper_verr(cpu_env, cpu_T[0]);
> @@ -7198,7 +7306,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if (mod == 3)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
> gdt.limit));
>              gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>              gen_add_A0_im(s, 2);
> @@ -7254,7 +7362,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>              } else { /* sidt */
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State, idt.limit));
>                  gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>                  gen_add_A0_im(s, 2);
> @@ -7311,7 +7419,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      break;
>                  case 4: /* STGI */
>                      if ((!(s->flags & HF_SVME_MASK) &&
> -                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
> +                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
>                          !s->pe)
>                          goto illegal_op;
>                      if (s->cpl != 0) {
> @@ -7332,8 +7440,8 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      }
>                      break;
>                  case 6: /* SKINIT */
> -                    if ((!(s->flags & HF_SVME_MASK) &&
> -                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
> +                    if ((!(s->flags & HF_SVME_MASK) &&
> +                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
>                          !s->pe)
>                          goto illegal_op;
>                      gen_helper_skinit(cpu_env);
> @@ -7357,7 +7465,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              } else {
>                  gen_svm_check_intercept(s, pc_start,
>                                          op==2 ? SVM_EXIT_GDTR_WRITE :
> SVM_EXIT_IDTR_WRITE);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
>                  gen_add_A0_im(s, 2);
>                  gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
> @@ -7380,14 +7488,14 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>  #else
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
>  #endif
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
>              break;
>          case 6: /* lmsw */
>              if (s->cpl != 0) {
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_helper_lmsw(cpu_env, cpu_T[0]);
>                  gen_jmp_im(s->pc - s->cs_base);
>                  gen_eob(s);
> @@ -7400,7 +7508,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  } else {
>                      gen_update_cc_op(s);
>                      gen_jmp_im(pc_start - s->cs_base);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_helper_invlpg(cpu_env, cpu_A0);
>                      gen_jmp_im(s->pc - s->cs_base);
>                      gen_eob(s);
> @@ -7479,7 +7587,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -7500,7 +7608,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              mod = (modrm >> 6) & 3;
>              rm = modrm & 7;
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, ot, t0, cpu_A0);
>                  a0 = tcg_temp_local_new();
>                  tcg_gen_mov_tl(a0, cpu_A0);
> @@ -7542,7 +7650,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = dflag != MO_16 ? MO_32 : MO_16;
>              modrm = cpu_ldub_code(env, s->pc++);
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              t0 = tcg_temp_local_new();
>              gen_update_cc_op(s);
>              if (b == 0x102) {
> @@ -7570,7 +7678,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 3: /* prefetchnt0 */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* nothing more to do */
>              break;
>          default: /* nop (multi byte) */
> @@ -7682,7 +7790,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          reg = ((modrm >> 3) & 7) | rex_r;
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0x1ae:
>          modrm = cpu_ldub_code(env, s->pc++);
> @@ -7697,10 +7805,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == 
> MO_64));
> +            gen_helper_fxsave(cpu_env, cpu_A0,
> +                              tcg_const_i32(dflag == MO_32),
> +                              tcg_const_i32(dflag == MO_64));
>              break;
>          case 1: /* fxrstor */
>              if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
> @@ -7710,10 +7820,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == 
> MO_64));
> +            gen_helper_fxrstor(cpu_env, cpu_A0,
> +                               tcg_const_i32(dflag == MO_32),
> +                               tcg_const_i32(dflag == MO_64));
>              break;
>          case 2: /* ldmxcsr */
>          case 3: /* stmxcsr */
> @@ -7724,7 +7836,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
>                  mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op == 2) {
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
>                                      s->mem_index, MO_LEUL);
> @@ -7749,7 +7861,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  /* clflush */
>                  if (!(s->cpuid_features & CPUID_CLFLUSH))
>                      goto illegal_op;
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>              }
>              break;
>          default:
> @@ -7761,7 +7873,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          /* ignore for now */
>          break;
>      case 0x1aa: /* rsm */
> @@ -7789,7 +7901,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = mo_64_32(dflag);
>          }
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>
> @@ -7866,6 +7978,17 @@ void optimize_flags_init(void)
>      cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, 
> cc_src2),
>                                       "cc_src2");
>
> +    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
> +                                      offsetof(CPUX86State, fpop.tcg), 
> "fpop");
> +    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
> +                                     "fpip");
> +    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
> +                                     "fpdp");
> +    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpds),
> +                                     "fpds");
> +    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpcs),
> +                                     "fpcs");
> +
>      for (i = 0; i < CPU_NB_REGS; ++i) {
>          cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
>                                           offsetof(CPUX86State, regs[i]),



-- 
Jaume



reply via email to

[Prev in Thread] Current Thread [Next in Thread]