]> Git Repo - qemu.git/blobdiff - target-i386/translate.c
target-i386/cpu: Add missing flags to Haswell CPU model
[qemu.git] / target-i386 / translate.c
index 7d1340ed015515498417dc3bd2380daa7bb0e3a4..8e676ba1a8295acaa51f94921fb87ec6b0ca7322 100644 (file)
@@ -24,7 +24,6 @@
 #include <signal.h>
 
 #include "cpu.h"
-#include "exec-all.h"
 #include "disas.h"
 #include "tcg-op.h"
 
 #define PREFIX_ADR    0x10
 
 #ifdef TARGET_X86_64
-#define X86_64_ONLY(x) x
-#define X86_64_DEF(...)  __VA_ARGS__
 #define CODE64(s) ((s)->code64)
 #define REX_X(s) ((s)->rex_x)
 #define REX_B(s) ((s)->rex_b)
-/* XXX: gcc generates push/pop in some opcodes, so we cannot use them */
-#if 1
-#define BUGGY_64(x) NULL
-#endif
 #else
-#define X86_64_ONLY(x) NULL
-#define X86_64_DEF(...)
 #define CODE64(s) 0
 #define REX_X(s) 0
 #define REX_B(s) 0
@@ -116,6 +107,7 @@ typedef struct DisasContext {
     int cpuid_ext_features;
     int cpuid_ext2_features;
     int cpuid_ext3_features;
+    int cpuid_7_0_ebx_features;
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
@@ -272,11 +264,30 @@ static inline void gen_op_andl_A0_ffff(void)
 #define REG_LH_OFFSET 4
 #endif
 
+/* In instruction encodings for byte register accesses the
+ * register number usually indicates "low 8 bits of register N";
+ * however there are some special cases where N 4..7 indicates
+ * [AH, CH, DH, BH], ie "bits 15..8 of register N-4". Return
+ * true for this special case, false otherwise.
+ */
+static inline bool byte_reg_is_xH(int reg)
+{
+    if (reg < 4) {
+        return false;
+    }
+#ifdef TARGET_X86_64
+    if (reg >= 8 || x86_64_hregs) {
+        return false;
+    }
+#endif
+    return true;
+}
+
 static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
 {
     switch(ot) {
     case OT_BYTE:
-        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
+        if (!byte_reg_is_xH(reg)) {
             tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8);
         } else {
             tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8);
@@ -331,19 +342,11 @@ static inline void gen_op_mov_reg_A0(int size, int reg)
 
 static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
 {
-    switch(ot) {
-    case OT_BYTE:
-        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
-            goto std_case;
-        } else {
-            tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
-            tcg_gen_ext8u_tl(t0, t0);
-        }
-        break;
-    default:
-    std_case:
+    if (ot == OT_BYTE && byte_reg_is_xH(reg)) {
+        tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
+        tcg_gen_ext8u_tl(t0, t0);
+    } else {
         tcg_gen_mov_tl(t0, cpu_regs[reg]);
-        break;
     }
 }
 
@@ -389,7 +392,7 @@ static inline void gen_op_addl_T0_T1(void)
 
 static inline void gen_op_jmp_T0(void)
 {
-    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
+    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, eip));
 }
 
 static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
@@ -454,27 +457,34 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
 
 static inline void gen_op_movl_A0_seg(int reg)
 {
-    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + REG_L_OFFSET);
+    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base) + REG_L_OFFSET);
 }
 
-static inline void gen_op_addl_A0_seg(int reg)
+static inline void gen_op_addl_A0_seg(DisasContext *s, int reg)
 {
-    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
-    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 #ifdef TARGET_X86_64
-    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+    if (CODE64(s)) {
+        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+    } else {
+        tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
+    }
+#else
+    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 #endif
 }
 
 #ifdef TARGET_X86_64
 static inline void gen_op_movq_A0_seg(int reg)
 {
-    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base));
+    tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUX86State, segs[reg].base));
 }
 
 static inline void gen_op_addq_A0_seg(int reg)
 {
-    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, segs[reg].base));
+    tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, segs[reg].base));
     tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
 }
 
@@ -584,7 +594,7 @@ static inline void gen_op_st_T1_A0(int idx)
 static inline void gen_jmp_im(target_ulong pc)
 {
     tcg_gen_movi_tl(cpu_tmp0, pc);
-    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, eip));
+    tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State, eip));
 }
 
 static inline void gen_string_movl_A0_ESI(DisasContext *s)
@@ -618,7 +628,7 @@ static inline void gen_string_movl_A0_ESI(DisasContext *s)
             override = R_DS;
         gen_op_movl_A0_reg(R_ESI);
         gen_op_andl_A0_ffff();
-        gen_op_addl_A0_seg(override);
+        gen_op_addl_A0_seg(s, override);
     }
 }
 
@@ -639,13 +649,13 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s)
     } else {
         gen_op_movl_A0_reg(R_EDI);
         gen_op_andl_A0_ffff();
-        gen_op_addl_A0_seg(R_ES);
+        gen_op_addl_A0_seg(s, R_ES);
     }
 }
 
 static inline void gen_op_movl_T0_Dshift(int ot) 
 {
-    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUState, df));
+    tcg_gen_ld32s_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, df));
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], ot);
 };
 
@@ -731,9 +741,15 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         switch (ot) {
-        case 0: gen_helper_check_iob(cpu_tmp2_i32); break;
-        case 1: gen_helper_check_iow(cpu_tmp2_i32); break;
-        case 2: gen_helper_check_iol(cpu_tmp2_i32); break;
+        case 0:
+            gen_helper_check_iob(cpu_env, cpu_tmp2_i32);
+            break;
+        case 1:
+            gen_helper_check_iow(cpu_env, cpu_tmp2_i32);
+            break;
+        case 2:
+            gen_helper_check_iol(cpu_env, cpu_tmp2_i32);
+            break;
         }
     }
     if(s->flags & HF_SVMI_MASK) {
@@ -745,7 +761,8 @@ static void gen_check_io(DisasContext *s, int ot, target_ulong cur_eip,
         svm_flags |= (1 << (4 + ot));
         next_eip = s->pc - s->cs_base;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        gen_helper_svm_check_io(cpu_tmp2_i32, tcg_const_i32(svm_flags),
+        gen_helper_svm_check_io(cpu_env, cpu_tmp2_i32,
+                                tcg_const_i32(svm_flags),
                                 tcg_const_i32(next_eip - cur_eip));
     }
 }
@@ -802,14 +819,14 @@ static void gen_op_update_neg_cc(void)
 /* compute eflags.C to reg */
 static void gen_compute_eflags_c(TCGv reg)
 {
-    gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_cc_op);
+    gen_helper_cc_compute_c(cpu_tmp2_i32, cpu_env, cpu_cc_op);
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
 
 /* compute all eflags to cc_src */
 static void gen_compute_eflags(TCGv reg)
 {
-    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_cc_op);
+    gen_helper_cc_compute_all(cpu_tmp2_i32, cpu_env, cpu_cc_op);
     tcg_gen_extu_i32_tl(reg, cpu_tmp2_i32);
 }
 
@@ -1257,14 +1274,30 @@ GEN_REPZ2(cmps)
 static void gen_helper_fp_arith_ST0_FT0(int op)
 {
     switch (op) {
-    case 0: gen_helper_fadd_ST0_FT0(); break;
-    case 1: gen_helper_fmul_ST0_FT0(); break;
-    case 2: gen_helper_fcom_ST0_FT0(); break;
-    case 3: gen_helper_fcom_ST0_FT0(); break;
-    case 4: gen_helper_fsub_ST0_FT0(); break;
-    case 5: gen_helper_fsubr_ST0_FT0(); break;
-    case 6: gen_helper_fdiv_ST0_FT0(); break;
-    case 7: gen_helper_fdivr_ST0_FT0(); break;
+    case 0:
+        gen_helper_fadd_ST0_FT0(cpu_env);
+        break;
+    case 1:
+        gen_helper_fmul_ST0_FT0(cpu_env);
+        break;
+    case 2:
+        gen_helper_fcom_ST0_FT0(cpu_env);
+        break;
+    case 3:
+        gen_helper_fcom_ST0_FT0(cpu_env);
+        break;
+    case 4:
+        gen_helper_fsub_ST0_FT0(cpu_env);
+        break;
+    case 5:
+        gen_helper_fsubr_ST0_FT0(cpu_env);
+        break;
+    case 6:
+        gen_helper_fdiv_ST0_FT0(cpu_env);
+        break;
+    case 7:
+        gen_helper_fdivr_ST0_FT0(cpu_env);
+        break;
     }
 }
 
@@ -1273,12 +1306,24 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
 {
     TCGv_i32 tmp = tcg_const_i32(opreg);
     switch (op) {
-    case 0: gen_helper_fadd_STN_ST0(tmp); break;
-    case 1: gen_helper_fmul_STN_ST0(tmp); break;
-    case 4: gen_helper_fsubr_STN_ST0(tmp); break;
-    case 5: gen_helper_fsub_STN_ST0(tmp); break;
-    case 6: gen_helper_fdivr_STN_ST0(tmp); break;
-    case 7: gen_helper_fdiv_STN_ST0(tmp); break;
+    case 0:
+        gen_helper_fadd_STN_ST0(cpu_env, tmp);
+        break;
+    case 1:
+        gen_helper_fmul_STN_ST0(cpu_env, tmp);
+        break;
+    case 4:
+        gen_helper_fsubr_STN_ST0(cpu_env, tmp);
+        break;
+    case 5:
+        gen_helper_fsub_STN_ST0(cpu_env, tmp);
+        break;
+    case 6:
+        gen_helper_fdivr_STN_ST0(cpu_env, tmp);
+        break;
+    case 7:
+        gen_helper_fdiv_STN_ST0(cpu_env, tmp);
+        break;
     }
 }
 
@@ -1407,70 +1452,84 @@ static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
 {
     target_ulong mask;
     int shift_label;
-    TCGv t0, t1;
+    TCGv t0, t1, t2;
 
-    if (ot == OT_QUAD)
+    if (ot == OT_QUAD) {
         mask = 0x3f;
-    else
+    } else {
         mask = 0x1f;
+    }
 
     /* load */
-    if (op1 == OR_TMP0)
+    if (op1 == OR_TMP0) {
         gen_op_ld_T0_A0(ot + s->mem_index);
-    else
+    } else {
         gen_op_mov_TN_reg(ot, 0, op1);
+    }
 
-    tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+    t0 = tcg_temp_local_new();
+    t1 = tcg_temp_local_new();
+    t2 = tcg_temp_local_new();
 
-    tcg_gen_addi_tl(cpu_tmp5, cpu_T[1], -1);
+    tcg_gen_andi_tl(t2, cpu_T[1], mask);
 
     if (is_right) {
         if (is_arith) {
             gen_exts(ot, cpu_T[0]);
-            tcg_gen_sar_tl(cpu_T3, cpu_T[0], cpu_tmp5);
-            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_mov_tl(t0, cpu_T[0]);
+            tcg_gen_sar_tl(cpu_T[0], cpu_T[0], t2);
         } else {
             gen_extu(ot, cpu_T[0]);
-            tcg_gen_shr_tl(cpu_T3, cpu_T[0], cpu_tmp5);
-            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_mov_tl(t0, cpu_T[0]);
+            tcg_gen_shr_tl(cpu_T[0], cpu_T[0], t2);
         }
     } else {
-        tcg_gen_shl_tl(cpu_T3, cpu_T[0], cpu_tmp5);
-        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+        tcg_gen_mov_tl(t0, cpu_T[0]);
+        tcg_gen_shl_tl(cpu_T[0], cpu_T[0], t2);
     }
 
     /* store */
-    if (op1 == OR_TMP0)
+    if (op1 == OR_TMP0) {
         gen_op_st_T0_A0(ot + s->mem_index);
-    else
+    } else {
         gen_op_mov_reg_T0(ot, op1);
-        
+    }
+
     /* update eflags if non zero shift */
-    if (s->cc_op != CC_OP_DYNAMIC)
+    if (s->cc_op != CC_OP_DYNAMIC) {
         gen_op_set_cc_op(s->cc_op);
+    }
 
-    /* XXX: inefficient */
-    t0 = tcg_temp_local_new();
-    t1 = tcg_temp_local_new();
-
-    tcg_gen_mov_tl(t0, cpu_T[0]);
-    tcg_gen_mov_tl(t1, cpu_T3);
+    tcg_gen_mov_tl(t1, cpu_T[0]);
 
     shift_label = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_T[1], 0, shift_label);
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, shift_label);
 
-    tcg_gen_mov_tl(cpu_cc_src, t1);
-    tcg_gen_mov_tl(cpu_cc_dst, t0);
-    if (is_right)
+    tcg_gen_addi_tl(t2, t2, -1);
+    tcg_gen_mov_tl(cpu_cc_dst, t1);
+
+    if (is_right) {
+        if (is_arith) {
+            tcg_gen_sar_tl(cpu_cc_src, t0, t2);
+        } else {
+            tcg_gen_shr_tl(cpu_cc_src, t0, t2);
+        }
+    } else {
+        tcg_gen_shl_tl(cpu_cc_src, t0, t2);
+    }
+
+    if (is_right) {
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
-    else
+    } else {
         tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
-        
+    }
+
     gen_set_label(shift_label);
     s->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
 
     tcg_temp_free(t0);
     tcg_temp_free(t1);
+    tcg_temp_free(t2);
 }
 
 static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
@@ -1722,20 +1781,36 @@ static void gen_rotc_rm_T1(DisasContext *s, int ot, int op1,
     
     if (is_right) {
         switch (ot) {
-        case 0: gen_helper_rcrb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 1: gen_helper_rcrw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 2: gen_helper_rcrl(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 0:
+            gen_helper_rcrb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case 1:
+            gen_helper_rcrw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case 2:
+            gen_helper_rcrl(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #ifdef TARGET_X86_64
-        case 3: gen_helper_rcrq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 3:
+            gen_helper_rcrq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #endif
         }
     } else {
         switch (ot) {
-        case 0: gen_helper_rclb(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 1: gen_helper_rclw(cpu_T[0], cpu_T[0], cpu_T[1]); break;
-        case 2: gen_helper_rcll(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 0:
+            gen_helper_rclb(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case 1:
+            gen_helper_rclw(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
+        case 2:
+            gen_helper_rcll(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #ifdef TARGET_X86_64
-        case 3: gen_helper_rclq(cpu_T[0], cpu_T[0], cpu_T[1]); break;
+        case 3:
+            gen_helper_rclq(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
+            break;
 #endif
         }
     }
@@ -1943,7 +2018,8 @@ static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
     }
 }
 
-static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
+                          int *reg_ptr, int *offset_ptr)
 {
     target_long disp;
     int havesib;
@@ -1969,7 +2045,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
 
         if (base == 4) {
             havesib = 1;
-            code = ldub_code(s->pc++);
+            code = cpu_ldub_code(env, s->pc++);
             scale = (code >> 6) & 3;
             index = ((code >> 3) & 7) | REX_X(s);
             base = (code & 7);
@@ -1980,7 +2056,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
         case 0:
             if ((base & 7) == 5) {
                 base = -1;
-                disp = (int32_t)ldl_code(s->pc);
+                disp = (int32_t)cpu_ldl_code(env, s->pc);
                 s->pc += 4;
                 if (CODE64(s) && !havesib) {
                     disp += s->pc + s->rip_offset;
@@ -1990,11 +2066,11 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
             }
             break;
         case 1:
-            disp = (int8_t)ldub_code(s->pc++);
+            disp = (int8_t)cpu_ldub_code(env, s->pc++);
             break;
         default:
         case 2:
-            disp = (int32_t)ldl_code(s->pc);
+            disp = (int32_t)cpu_ldl_code(env, s->pc);
             s->pc += 4;
             break;
         }
@@ -2050,14 +2126,14 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
             } else
 #endif
             {
-                gen_op_addl_A0_seg(override);
+                gen_op_addl_A0_seg(s, override);
             }
         }
     } else {
         switch (mod) {
         case 0:
             if (rm == 6) {
-                disp = lduw_code(s->pc);
+                disp = cpu_lduw_code(env, s->pc);
                 s->pc += 2;
                 gen_op_movl_A0_im(disp);
                 rm = 0; /* avoid SS override */
@@ -2067,11 +2143,11 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
             }
             break;
         case 1:
-            disp = (int8_t)ldub_code(s->pc++);
+            disp = (int8_t)cpu_ldub_code(env, s->pc++);
             break;
         default:
         case 2:
-            disp = lduw_code(s->pc);
+            disp = cpu_lduw_code(env, s->pc);
             s->pc += 2;
             break;
         }
@@ -2117,7 +2193,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
                 else
                     override = R_DS;
             }
-            gen_op_addl_A0_seg(override);
+            gen_op_addl_A0_seg(s, override);
         }
     }
 
@@ -2127,7 +2203,7 @@ static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_
     *offset_ptr = disp;
 }
 
-static void gen_nop_modrm(DisasContext *s, int modrm)
+static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm)
 {
     int mod, rm, base, code;
 
@@ -2141,7 +2217,7 @@ static void gen_nop_modrm(DisasContext *s, int modrm)
         base = rm;
 
         if (base == 4) {
-            code = ldub_code(s->pc++);
+            code = cpu_ldub_code(env, s->pc++);
             base = (code & 7);
         }
 
@@ -2194,14 +2270,15 @@ static void gen_add_A0_ds_seg(DisasContext *s)
         } else
 #endif
         {
-            gen_op_addl_A0_seg(override);
+            gen_op_addl_A0_seg(s, override);
         }
     }
 }
 
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
-static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
+static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
+                           int ot, int reg, int is_store)
 {
     int mod, rm, opreg, disp;
 
@@ -2218,7 +2295,7 @@ static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_s
                 gen_op_mov_reg_T0(ot, reg);
         }
     } else {
-        gen_lea_modrm(s, modrm, &opreg, &disp);
+        gen_lea_modrm(env, s, modrm, &opreg, &disp);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_TN_reg(ot, 0, reg);
@@ -2231,22 +2308,22 @@ static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_s
     }
 }
 
-static inline uint32_t insn_get(DisasContext *s, int ot)
+static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, int ot)
 {
     uint32_t ret;
 
     switch(ot) {
     case OT_BYTE:
-        ret = ldub_code(s->pc);
+        ret = cpu_ldub_code(env, s->pc);
         s->pc++;
         break;
     case OT_WORD:
-        ret = lduw_code(s->pc);
+        ret = cpu_lduw_code(env, s->pc);
         s->pc += 2;
         break;
     default:
     case OT_LONG:
-        ret = ldl_code(s->pc);
+        ret = cpu_ldl_code(env, s->pc);
         s->pc += 4;
         break;
     }
@@ -2369,7 +2446,7 @@ static void gen_movl_seg_T0(DisasContext *s, int seg_reg, target_ulong cur_eip)
             gen_op_set_cc_op(s->cc_op);
         gen_jmp_im(cur_eip);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        gen_helper_load_seg(tcg_const_i32(seg_reg), cpu_tmp2_i32);
+        gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
         /* abort translation because the addseg value may change or
            because ss32 may change. For R_SS, translation must always
            stop as a special handling must be done to disable hardware
@@ -2398,7 +2475,7 @@ gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start,
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
     gen_jmp_im(pc_start - s->cs_base);
-    gen_helper_svm_check_intercept_param(tcg_const_i32(type),
+    gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
 }
 
@@ -2447,12 +2524,12 @@ static void gen_push_T0(DisasContext *s)
         if (s->ss32) {
             if (s->addseg) {
                 tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-                gen_op_addl_A0_seg(R_SS);
+                gen_op_addl_A0_seg(s, R_SS);
             }
         } else {
             gen_op_andl_A0_ffff();
             tcg_gen_mov_tl(cpu_T[1], cpu_A0);
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         }
         gen_op_st_T0_A0(s->dflag + 1 + s->mem_index);
         if (s->ss32 && !s->addseg)
@@ -2487,11 +2564,11 @@ static void gen_push_T1(DisasContext *s)
             gen_op_addl_A0_im(-4);
         if (s->ss32) {
             if (s->addseg) {
-                gen_op_addl_A0_seg(R_SS);
+                gen_op_addl_A0_seg(s, R_SS);
             }
         } else {
             gen_op_andl_A0_ffff();
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         }
         gen_op_st_T1_A0(s->dflag + 1 + s->mem_index);
 
@@ -2515,10 +2592,10 @@ static void gen_pop_T0(DisasContext *s)
         gen_op_movl_A0_reg(R_ESP);
         if (s->ss32) {
             if (s->addseg)
-                gen_op_addl_A0_seg(R_SS);
+                gen_op_addl_A0_seg(s, R_SS);
         } else {
             gen_op_andl_A0_ffff();
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         }
         gen_op_ld_T0_A0(s->dflag + 1 + s->mem_index);
     }
@@ -2543,7 +2620,7 @@ static void gen_stack_A0(DisasContext *s)
         gen_op_andl_A0_ffff();
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     if (s->addseg)
-        gen_op_addl_A0_seg(R_SS);
+        gen_op_addl_A0_seg(s, R_SS);
 }
 
 /* NOTE: wrap around in 16 bit not fully handled */
@@ -2556,7 +2633,7 @@ static void gen_pusha(DisasContext *s)
         gen_op_andl_A0_ffff();
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     if (s->addseg)
-        gen_op_addl_A0_seg(R_SS);
+        gen_op_addl_A0_seg(s, R_SS);
     for(i = 0;i < 8; i++) {
         gen_op_mov_TN_reg(OT_LONG, 0, 7 - i);
         gen_op_st_T0_A0(OT_WORD + s->dflag + s->mem_index);
@@ -2575,7 +2652,7 @@ static void gen_popa(DisasContext *s)
     tcg_gen_mov_tl(cpu_T[1], cpu_A0);
     tcg_gen_addi_tl(cpu_T[1], cpu_T[1], 16 <<  s->dflag);
     if (s->addseg)
-        gen_op_addl_A0_seg(R_SS);
+        gen_op_addl_A0_seg(s, R_SS);
     for(i = 0;i < 8; i++) {
         /* ESP is not reloaded */
         if (i != 3) {
@@ -2606,7 +2683,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
         gen_op_st_T0_A0(ot + s->mem_index);
         if (level) {
             /* XXX: must save state */
-            gen_helper_enter64_level(tcg_const_i32(level),
+            gen_helper_enter64_level(cpu_env, tcg_const_i32(level),
                                      tcg_const_i32((ot == OT_QUAD)),
                                      cpu_T[1]);
         }
@@ -2625,13 +2702,13 @@ static void gen_enter(DisasContext *s, int esp_addend, int level)
             gen_op_andl_A0_ffff();
         tcg_gen_mov_tl(cpu_T[1], cpu_A0);
         if (s->addseg)
-            gen_op_addl_A0_seg(R_SS);
+            gen_op_addl_A0_seg(s, R_SS);
         /* push bp */
         gen_op_mov_TN_reg(OT_LONG, 0, R_EBP);
         gen_op_st_T0_A0(ot + s->mem_index);
         if (level) {
             /* XXX: must save state */
-            gen_helper_enter_level(tcg_const_i32(level),
+            gen_helper_enter_level(cpu_env, tcg_const_i32(level),
                                    tcg_const_i32(s->dflag),
                                    cpu_T[1]);
         }
@@ -2646,7 +2723,7 @@ static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
     gen_jmp_im(cur_eip);
-    gen_helper_raise_exception(tcg_const_i32(trapno));
+    gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->is_jmp = DISAS_TB_JUMP;
 }
 
@@ -2658,7 +2735,7 @@ static void gen_interrupt(DisasContext *s, int intno,
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
     gen_jmp_im(cur_eip);
-    gen_helper_raise_interrupt(tcg_const_i32(intno), 
+    gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
     s->is_jmp = DISAS_TB_JUMP;
 }
@@ -2668,7 +2745,7 @@ static void gen_debug(DisasContext *s, target_ulong cur_eip)
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
     gen_jmp_im(cur_eip);
-    gen_helper_debug();
+    gen_helper_debug(cpu_env);
     s->is_jmp = DISAS_TB_JUMP;
 }
 
@@ -2679,15 +2756,15 @@ static void gen_eob(DisasContext *s)
     if (s->cc_op != CC_OP_DYNAMIC)
         gen_op_set_cc_op(s->cc_op);
     if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
-        gen_helper_reset_inhibit_irq();
+        gen_helper_reset_inhibit_irq(cpu_env);
     }
     if (s->tb->flags & HF_RF_MASK) {
-        gen_helper_reset_rf();
+        gen_helper_reset_rf(cpu_env);
     }
     if (s->singlestep_enabled) {
-        gen_helper_debug();
+        gen_helper_debug(cpu_env);
     } else if (s->tf) {
-       gen_helper_single_step();
+        gen_helper_single_step(cpu_env);
     } else {
         tcg_gen_exit_tb(0);
     }
@@ -2773,6 +2850,17 @@ static inline void gen_op_movq_env_0(int d_offset)
     tcg_gen_st_i64(cpu_tmp1_i64, cpu_env, d_offset);
 }
 
+typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg);
+typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg);
+typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val);
+typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val);
+typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b);
+typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
+                               TCGv_i32 val);
+typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val);
+typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b,
+                               TCGv val);
+
 #define SSE_SPECIAL ((void *)1)
 #define SSE_DUMMY ((void *)2)
 
@@ -2780,7 +2868,7 @@ static inline void gen_op_movq_env_0(int d_offset)
 #define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \
                      gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, }
 
-static void *sse_op_table1[256][4] = {
+static const SSEFunc_0_epp sse_op_table1[256][4] = {
     /* 3DNow! extensions */
     [0x0e] = { SSE_DUMMY }, /* femms */
     [0x0f] = { SSE_DUMMY }, /* pf... */
@@ -2821,7 +2909,8 @@ static void *sse_op_table1[256][4] = {
     [0x5f] = SSE_FOP(max),
 
     [0xc2] = SSE_FOP(cmpeq),
-    [0xc6] = { gen_helper_shufps, gen_helper_shufpd },
+    [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps,
+               (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */
 
     [0x38] = { SSE_SPECIAL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* SSSE3/SSE4 */
     [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3/SSE4 */
@@ -2843,10 +2932,10 @@ static void *sse_op_table1[256][4] = {
     [0x6d] = { NULL, gen_helper_punpckhqdq_xmm },
     [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */
     [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */
-    [0x70] = { gen_helper_pshufw_mmx,
-               gen_helper_pshufd_xmm,
-               gen_helper_pshufhw_xmm,
-               gen_helper_pshuflw_xmm },
+    [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx,
+               (SSEFunc_0_epp)gen_helper_pshufd_xmm,
+               (SSEFunc_0_epp)gen_helper_pshufhw_xmm,
+               (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */
     [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */
     [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */
     [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */
@@ -2901,7 +2990,8 @@ static void *sse_op_table1[256][4] = {
     [0xf4] = MMX_OP2(pmuludq),
     [0xf5] = MMX_OP2(pmaddwd),
     [0xf6] = MMX_OP2(psadbw),
-    [0xf7] = MMX_OP2(maskmov),
+    [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx,
+               (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */
     [0xf8] = MMX_OP2(psubb),
     [0xf9] = MMX_OP2(psubw),
     [0xfa] = MMX_OP2(psubl),
@@ -2911,7 +3001,7 @@ static void *sse_op_table1[256][4] = {
     [0xfe] = MMX_OP2(paddl),
 };
 
-static void *sse_op_table2[3 * 8][2] = {
+static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = {
     [0 + 2] = MMX_OP2(psrlw),
     [0 + 4] = MMX_OP2(psraw),
     [0 + 6] = MMX_OP2(psllw),
@@ -2924,24 +3014,35 @@ static void *sse_op_table2[3 * 8][2] = {
     [16 + 7] = { NULL, gen_helper_pslldq_xmm },
 };
 
-static void *sse_op_table3[4 * 3] = {
+static const SSEFunc_0_epi sse_op_table3ai[] = {
     gen_helper_cvtsi2ss,
-    gen_helper_cvtsi2sd,
-    X86_64_ONLY(gen_helper_cvtsq2ss),
-    X86_64_ONLY(gen_helper_cvtsq2sd),
+    gen_helper_cvtsi2sd
+};
 
+#ifdef TARGET_X86_64
+static const SSEFunc_0_epl sse_op_table3aq[] = {
+    gen_helper_cvtsq2ss,
+    gen_helper_cvtsq2sd
+};
+#endif
+
+static const SSEFunc_i_ep sse_op_table3bi[] = {
     gen_helper_cvttss2si,
+    gen_helper_cvtss2si,
     gen_helper_cvttsd2si,
-    X86_64_ONLY(gen_helper_cvttss2sq),
-    X86_64_ONLY(gen_helper_cvttsd2sq),
+    gen_helper_cvtsd2si
+};
 
-    gen_helper_cvtss2si,
-    gen_helper_cvtsd2si,
-    X86_64_ONLY(gen_helper_cvtss2sq),
-    X86_64_ONLY(gen_helper_cvtsd2sq),
+#ifdef TARGET_X86_64
+static const SSEFunc_l_ep sse_op_table3bq[] = {
+    gen_helper_cvttss2sq,
+    gen_helper_cvtss2sq,
+    gen_helper_cvttsd2sq,
+    gen_helper_cvtsd2sq
 };
+#endif
 
-static void *sse_op_table4[8][4] = {
+static const SSEFunc_0_epp sse_op_table4[8][4] = {
     SSE_FOP(cmpeq),
     SSE_FOP(cmplt),
     SSE_FOP(cmple),
@@ -2952,7 +3053,7 @@ static void *sse_op_table4[8][4] = {
     SSE_FOP(cmpord),
 };
 
-static void *sse_op_table5[256] = {
+static const SSEFunc_0_epp sse_op_table5[256] = {
     [0x0c] = gen_helper_pi2fw,
     [0x0d] = gen_helper_pi2fd,
     [0x1c] = gen_helper_pf2iw,
@@ -2979,14 +3080,22 @@ static void *sse_op_table5[256] = {
     [0xbf] = gen_helper_pavgb_mmx /* pavgusb */
 };
 
-struct sse_op_helper_s {
-    void *op[2]; uint32_t ext_mask;
+struct SSEOpHelper_epp {
+    SSEFunc_0_epp op[2];
+    uint32_t ext_mask;
+};
+
+struct SSEOpHelper_eppi {
+    SSEFunc_0_eppi op[2];
+    uint32_t ext_mask;
 };
+
 #define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 }
 #define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
 #define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
-static struct sse_op_helper_s sse_op_table6[256] = {
+
+static const struct SSEOpHelper_epp sse_op_table6[256] = {
     [0x00] = SSSE3_OP(pshufb),
     [0x01] = SSSE3_OP(phaddw),
     [0x02] = SSSE3_OP(phaddd),
@@ -3035,7 +3144,7 @@ static struct sse_op_helper_s sse_op_table6[256] = {
     [0x41] = SSE41_OP(phminposuw),
 };
 
-static struct sse_op_helper_s sse_op_table7[256] = {
+static const struct SSEOpHelper_eppi sse_op_table7[256] = {
     [0x08] = SSE41_OP(roundps),
     [0x09] = SSE41_OP(roundpd),
     [0x0a] = SSE41_OP(roundss),
@@ -3060,11 +3169,15 @@ static struct sse_op_helper_s sse_op_table7[256] = {
     [0x63] = SSE42_OP(pcmpistri),
 };
 
-static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
+static void gen_sse(CPUX86State *env, DisasContext *s, int b,
+                    target_ulong pc_start, int rex_r)
 {
     int b1, op1_offset, op2_offset, is_xmm, val, ot;
     int modrm, mod, rm, reg, reg_addr, offset_addr;
-    void *sse_op2;
+    SSEFunc_0_epp sse_fn_epp;
+    SSEFunc_0_eppi sse_fn_eppi;
+    SSEFunc_0_ppi sse_fn_ppi;
+    SSEFunc_0_eppt sse_fn_eppt;
 
     b &= 0xff;
     if (s->prefix & PREFIX_DATA)
@@ -3075,9 +3188,10 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         b1 = 3;
     else
         b1 = 0;
-    sse_op2 = sse_op_table1[b][b1];
-    if (!sse_op2)
+    sse_fn_epp = sse_op_table1[b][b1];
+    if (!sse_fn_epp) {
         goto illegal_op;
+    }
     if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
         is_xmm = 1;
     } else {
@@ -3105,32 +3219,32 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
             goto illegal_op;
         /* femms */
-        gen_helper_emms();
+        gen_helper_emms(cpu_env);
         return;
     }
     if (b == 0x77) {
         /* emms */
-        gen_helper_emms();
+        gen_helper_emms(cpu_env);
         return;
     }
     /* prepare MMX state (XXX: optimize by storing fptt and fptags in
        the static cpu state) */
     if (!is_xmm) {
-        gen_helper_enter_mmx();
+        gen_helper_enter_mmx(cpu_env);
     }
 
-    modrm = ldub_code(s->pc++);
+    modrm = cpu_ldub_code(env, s->pc++);
     reg = ((modrm >> 3) & 7);
     if (is_xmm)
         reg |= rex_r;
     mod = (modrm >> 6) & 3;
-    if (sse_op2 == SSE_SPECIAL) {
+    if (sse_fn_epp == SSE_SPECIAL) {
         b |= (b1 << 8);
         switch(b) {
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3138,20 +3252,20 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             if (b1 & 1) {
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,
                     xmm_regs[reg]));
@@ -3164,12 +3278,12 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 0);
                 tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3179,14 +3293,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == 2) {
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 0);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3195,7 +3309,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3212,7 +3326,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3222,7 +3336,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_movl_T0_0();
@@ -3237,7 +3351,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
                 gen_op_movl_T0_0();
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(2)));
@@ -3251,7 +3365,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 /* movhlps */
@@ -3262,7 +3376,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3278,7 +3392,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3291,7 +3405,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
             } else {
                 /* movlhps */
@@ -3302,7 +3416,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldo_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3323,16 +3437,18 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
 
                 if (b1 == 1 && reg != 0)
                     goto illegal_op;
-                field_length = ldub_code(s->pc++) & 0x3F;
-                bit_index = ldub_code(s->pc++) & 0x3F;
+                field_length = cpu_ldub_code(env, s->pc++) & 0x3F;
+                bit_index = cpu_ldub_code(env, s->pc++) & 0x3F;
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                     offsetof(CPUX86State,xmm_regs[reg]));
                 if (b1 == 1)
-                    gen_helper_extrq_i(cpu_ptr0, tcg_const_i32(bit_index),
-                        tcg_const_i32(field_length));
+                    gen_helper_extrq_i(cpu_env, cpu_ptr0,
+                                       tcg_const_i32(bit_index),
+                                       tcg_const_i32(field_length));
                 else
-                    gen_helper_insertq_i(cpu_ptr0, tcg_const_i32(bit_index),
-                        tcg_const_i32(field_length));
+                    gen_helper_insertq_i(cpu_env, cpu_ptr0,
+                                         tcg_const_i32(bit_index),
+                                         tcg_const_i32(field_length));
             }
             break;
         case 0x7e: /* movd ea, mm */
@@ -3340,13 +3456,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             if (s->dflag == 2) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env, 
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 1);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
                                  offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 1);
             }
             break;
         case 0x17e: /* movd ea, xmm */
@@ -3354,18 +3470,18 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             if (s->dflag == 2) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env, 
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(s, modrm, OT_QUAD, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_QUAD, OR_TMP0, 1);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, 
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(s, modrm, OT_LONG, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, OT_LONG, OR_TMP0, 1);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3376,7 +3492,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3391,7 +3507,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_sto_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3401,7 +3517,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_T0_A0(OT_LONG + s->mem_index);
             } else {
@@ -3412,7 +3528,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3423,7 +3539,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 goto illegal_op;
@@ -3432,7 +3548,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
             } else {
                 goto illegal_op;
@@ -3447,7 +3563,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             if (b1 >= 2) {
                goto illegal_op;
             }
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             if (is_xmm) {
                 gen_op_movl_T0_im(val);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,xmm_t0.XMM_L(0)));
@@ -3461,9 +3577,11 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                 tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,mmx_t0.MMX_L(1)));
                 op1_offset = offsetof(CPUX86State,mmx_t0);
             }
-            sse_op2 = sse_op_table2[((b - 1) & 3) * 8 + (((modrm >> 3)) & 7)][b1];
-            if (!sse_op2)
+            sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 +
+                                       (((modrm >> 3)) & 7)][b1];
+            if (!sse_fn_epp) {
                 goto illegal_op;
+            }
             if (is_xmm) {
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
@@ -3473,13 +3591,13 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op1_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskps(cpu_tmp2_i32, cpu_ptr0);
+            gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             gen_op_mov_reg_T0(OT_LONG, reg);
             break;
@@ -3487,15 +3605,15 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             rm = (modrm & 7) | REX_B(s);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, 
                              offsetof(CPUX86State,xmm_regs[rm]));
-            gen_helper_movmskpd(cpu_tmp2_i32, cpu_ptr0);
+            gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             gen_op_mov_reg_T0(OT_LONG, reg);
             break;
         case 0x02a: /* cvtpi2ps */
         case 0x12a: /* cvtpi2pd */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s->mem_index, op2_offset);
             } else {
@@ -3507,35 +3625,40 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b >> 8) {
             case 0x0:
-                gen_helper_cvtpi2ps(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2ps(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             default:
             case 0x1:
-                gen_helper_cvtpi2pd(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpi2pd(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             }
             break;
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
-            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2)];
             if (ot == OT_LONG) {
+                SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1];
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                ((void (*)(TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_tmp2_i32);
+                sse_fn_epi(cpu_env, cpu_ptr0, cpu_tmp2_i32);
             } else {
-                ((void (*)(TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_T[0]);
+#ifdef TARGET_X86_64
+                SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1];
+                sse_fn_epl(cpu_env, cpu_ptr0, cpu_T[0]);
+#else
+                goto illegal_op;
+#endif
             }
             break;
         case 0x02c: /* cvttps2pi */
         case 0x12c: /* cvttpd2pi */
         case 0x02d: /* cvtps2pi */
         case 0x12d: /* cvtpd2pi */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s->mem_index, op2_offset);
             } else {
@@ -3547,16 +3670,16 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
             switch(b) {
             case 0x02c:
-                gen_helper_cvttps2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             case 0x12c:
-                gen_helper_cvttpd2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvttpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             case 0x02d:
-                gen_helper_cvtps2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtps2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             case 0x12d:
-                gen_helper_cvtpd2pi(cpu_ptr0, cpu_ptr1);
+                gen_helper_cvtpd2pi(cpu_env, cpu_ptr0, cpu_ptr1);
                 break;
             }
             break;
@@ -3566,7 +3689,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x32d: /* cvtsd2si */
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
                 } else {
@@ -3578,22 +3701,28 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                 rm = (modrm & 7) | REX_B(s);
                 op2_offset = offsetof(CPUX86State,xmm_regs[rm]);
             }
-            sse_op2 = sse_op_table3[(s->dflag == 2) * 2 + ((b >> 8) - 2) + 4 +
-                                    (b & 1) * 4];
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op2_offset);
             if (ot == OT_LONG) {
-                ((void (*)(TCGv_i32, TCGv_ptr))sse_op2)(cpu_tmp2_i32, cpu_ptr0);
+                SSEFunc_i_ep sse_fn_i_ep =
+                    sse_op_table3bi[((b >> 7) & 2) | (b & 1)];
+                sse_fn_i_ep(cpu_tmp2_i32, cpu_env, cpu_ptr0);
                 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             } else {
-                ((void (*)(TCGv, TCGv_ptr))sse_op2)(cpu_T[0], cpu_ptr0);
+#ifdef TARGET_X86_64
+                SSEFunc_l_ep sse_fn_l_ep =
+                    sse_op_table3bq[((b >> 7) & 2) | (b & 1)];
+                sse_fn_l_ep(cpu_T[0], cpu_env, cpu_ptr0);
+#else
+                goto illegal_op;
+#endif
             }
             gen_op_mov_reg_T0(ot, reg);
             break;
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
-            val = ldub_code(s->pc++);
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+            val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
                 tcg_gen_st16_tl(cpu_T[0], cpu_env,
@@ -3609,7 +3738,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             if (mod != 3)
                 goto illegal_op;
             ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
                 rm = (modrm & 7) | REX_B(s);
@@ -3626,7 +3755,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_stq_env_A0(s->mem_index, offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3636,14 +3765,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             }
             break;
         case 0x2d6: /* movq2dq */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7);
             gen_op_movq(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)),
                         offsetof(CPUX86State,fpregs[rm].mmx));
             gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg].XMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
-            gen_helper_enter_mmx();
+            gen_helper_enter_mmx(cpu_env);
             rm = (modrm & 7) | REX_B(s);
             gen_op_movq(offsetof(CPUX86State,fpregs[reg & 7].mmx),
                         offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
@@ -3655,11 +3784,11 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
             if (b1) {
                 rm = (modrm & 7) | REX_B(s);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,xmm_regs[rm]));
-                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_ptr0);
+                gen_helper_pmovmskb_xmm(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             } else {
                 rm = (modrm & 7);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env, offsetof(CPUX86State,fpregs[rm].mmx));
-                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_ptr0);
+                gen_helper_pmovmskb_mmx(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             }
             tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
             reg = ((modrm >> 3) & 7) | rex_r;
@@ -3670,7 +3799,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                 goto crc32;
         case 0x038:
             b = modrm;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
@@ -3678,9 +3807,10 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                 goto illegal_op;
             }
 
-            sse_op2 = sse_op_table6[b].op[b1];
-            if (!sse_op2)
+            sse_fn_epp = sse_op_table6[b].op[b1];
+            if (!sse_fn_epp) {
                 goto illegal_op;
+            }
             if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
                 goto illegal_op;
 
@@ -3690,7 +3820,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3725,16 +3855,17 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_ldq_env_A0(s->mem_index, op2_offset);
                 }
             }
-            if (sse_op2 == SSE_SPECIAL)
+            if (sse_fn_epp == SSE_SPECIAL) {
                 goto illegal_op;
+            }
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
 
             if (b == 0x17)
                 s->cc_op = CC_OP_EFLAGS;
@@ -3742,7 +3873,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x338: /* crc32 */
         crc32:
             b = modrm;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
 
             if (b != 0xf0 && b != 0xf1)
@@ -3762,7 +3893,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
 
             gen_op_mov_TN_reg(OT_LONG, 0, reg);
             tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
             gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                              cpu_T[0], tcg_const_i32(8 << ot));
 
@@ -3772,7 +3903,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x03a:
         case 0x13a:
             b = modrm;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             rm = modrm & 7;
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
@@ -3780,19 +3911,20 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                 goto illegal_op;
             }
 
-            sse_op2 = sse_op_table7[b].op[b1];
-            if (!sse_op2)
+            sse_fn_eppi = sse_op_table7[b].op[b1];
+            if (!sse_fn_eppi) {
                 goto illegal_op;
+            }
             if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
                 goto illegal_op;
 
-            if (sse_op2 == SSE_SPECIAL) {
+            if (sse_fn_eppi == SSE_SPECIAL) {
                 ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 reg = ((modrm >> 3) & 7) | rex_r;
-                val = ldub_code(s->pc++);
+                val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
                 case 0x14: /* pextrb */
                     tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
@@ -3922,7 +4054,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_ldo_env_A0(s->mem_index, op2_offset);
                 }
             } else {
@@ -3931,11 +4063,11 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_ldq_env_A0(s->mem_index, op2_offset);
                 }
             }
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
 
             if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
                 s->cc_op = CC_OP_EFLAGS;
@@ -3947,7 +4079,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            sse_fn_eppi(cpu_env, cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
         default:
             goto illegal_op;
@@ -3966,7 +4098,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         if (is_xmm) {
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f && b != 0x5b) ||
                                 b == 0xc2)) {
@@ -3989,7 +4121,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s->mem_index, op2_offset);
             } else {
@@ -4001,30 +4133,34 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
         case 0x0f: /* 3DNow! data insns */
             if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW))
                 goto illegal_op;
-            val = ldub_code(s->pc++);
-            sse_op2 = sse_op_table5[val];
-            if (!sse_op2)
+            val = cpu_ldub_code(env, s->pc++);
+            sse_fn_epp = sse_op_table5[val];
+            if (!sse_fn_epp) {
                 goto illegal_op;
+            }
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         case 0x70: /* pshufx insn */
         case 0xc6: /* pshufx insn */
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+            /* XXX: introduce a new table? */
+            sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp;
+            sse_fn_ppi(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
             break;
         case 0xc2:
             /* compare insns */
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             if (val >= 8)
                 goto illegal_op;
-            sse_op2 = sse_op_table4[val][b1];
+            sse_fn_epp = sse_op_table4[val][b1];
+
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         case 0xf7:
             /* maskmov : we must prepare A0 */
@@ -4044,12 +4180,14 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
 
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr, TCGv))sse_op2)(cpu_ptr0, cpu_ptr1, cpu_A0);
+            /* XXX: introduce a new table? */
+            sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp;
+            sse_fn_eppt(cpu_env, cpu_ptr0, cpu_ptr1, cpu_A0);
             break;
         default:
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
-            ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+            sse_fn_epp(cpu_env, cpu_ptr0, cpu_ptr1);
             break;
         }
         if (b == 0x2e || b == 0x2f) {
@@ -4060,7 +4198,8 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
 
 /* convert one instruction. s->is_jmp is set if the translation must
    be stopped. Return the next pc value */
-static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
+static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
+                               target_ulong pc_start)
 {
     int b, prefixes, aflag, dflag;
     int shift, ot;
@@ -4068,8 +4207,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
+    }
     s->pc = pc_start;
     prefixes = 0;
     aflag = s->code32;
@@ -4084,7 +4224,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 #endif
     s->rip_offset = 0; /* for relative ip address */
  next_byte:
-    b = ldub_code(s->pc);
+    b = cpu_ldub_code(env, s->pc);
     s->pc++;
     /* check prefixes */
 #ifdef TARGET_X86_64
@@ -4199,7 +4339,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x0f:
         /**************************/
         /* extended op code */
-        b = ldub_code(s->pc++) | 0x100;
+        b = cpu_ldub_code(env, s->pc++) | 0x100;
         goto reswitch;
 
         /**************************/
@@ -4224,12 +4364,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 
             switch(f) {
             case 0: /* OP Ev, Gv */
-                modrm = ldub_code(s->pc++);
+                modrm = cpu_ldub_code(env, s->pc++);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4246,12 +4386,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
-                modrm = ldub_code(s->pc++);
+                modrm = cpu_ldub_code(env, s->pc++);
                 mod = (modrm >> 6) & 3;
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                     gen_op_ld_T1_A0(ot + s->mem_index);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4261,7 +4401,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_op(s, op, ot, reg);
                 break;
             case 2: /* OP A, Iv */
-                val = insn_get(s, ot);
+                val = insn_get(env, s, ot);
                 gen_op_movl_T1_im(val);
                 gen_op(s, op, ot, OR_EAX);
                 break;
@@ -4283,7 +4423,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             else
                 ot = dflag + OT_WORD;
 
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
             op = (modrm >> 3) & 7;
@@ -4293,7 +4433,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4304,10 +4444,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 0x80:
             case 0x81:
             case 0x82:
-                val = insn_get(s, ot);
+                val = insn_get(env, s, ot);
                 break;
             case 0x83:
-                val = (int8_t)insn_get(s, OT_BYTE);
+                val = (int8_t)insn_get(env, s, OT_BYTE);
                 break;
             }
             gen_op_movl_T1_im(val);
@@ -4332,14 +4472,14 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         else
             ot = dflag + OT_WORD;
 
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
             gen_op_mov_TN_reg(ot, 0, rm);
@@ -4347,7 +4487,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 
         switch(op) {
         case 0: /* test */
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
             gen_op_movl_T1_im(val);
             gen_op_testl_T0_T1_cc();
             s->cc_op = CC_OP_LOGICB + ot;
@@ -4430,7 +4570,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_mulq_EAX_T0(cpu_T[0]);
+                gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
                 s->cc_op = CC_OP_MULQ;
                 break;
 #endif
@@ -4500,7 +4640,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_helper_imulq_EAX_T0(cpu_T[0]);
+                gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
                 s->cc_op = CC_OP_MULQ;
                 break;
 #endif
@@ -4510,21 +4650,21 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             switch(ot) {
             case OT_BYTE:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divb_AL(cpu_T[0]);
+                gen_helper_divb_AL(cpu_env, cpu_T[0]);
                 break;
             case OT_WORD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divw_AX(cpu_T[0]);
+                gen_helper_divw_AX(cpu_env, cpu_T[0]);
                 break;
             default:
             case OT_LONG:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divl_EAX(cpu_T[0]);
+                gen_helper_divl_EAX(cpu_env, cpu_T[0]);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_divq_EAX(cpu_T[0]);
+                gen_helper_divq_EAX(cpu_env, cpu_T[0]);
                 break;
 #endif
             }
@@ -4533,21 +4673,21 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             switch(ot) {
             case OT_BYTE:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivb_AL(cpu_T[0]);
+                gen_helper_idivb_AL(cpu_env, cpu_T[0]);
                 break;
             case OT_WORD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivw_AX(cpu_T[0]);
+                gen_helper_idivw_AX(cpu_env, cpu_T[0]);
                 break;
             default:
             case OT_LONG:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivl_EAX(cpu_T[0]);
+                gen_helper_idivl_EAX(cpu_env, cpu_T[0]);
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_idivq_EAX(cpu_T[0]);
+                gen_helper_idivq_EAX(cpu_env, cpu_T[0]);
                 break;
 #endif
             }
@@ -4564,7 +4704,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         else
             ot = dflag + OT_WORD;
 
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         op = (modrm >> 3) & 7;
@@ -4583,7 +4723,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
@@ -4625,13 +4765,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lcall_protected(cpu_tmp2_i32, cpu_T[1],
-                                           tcg_const_i32(dflag), 
+                gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
+                                           tcg_const_i32(dflag),
                                            tcg_const_i32(s->pc - pc_start));
             } else {
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lcall_real(cpu_tmp2_i32, cpu_T[1],
-                                      tcg_const_i32(dflag), 
+                gen_helper_lcall_real(cpu_env, cpu_tmp2_i32, cpu_T[1],
+                                      tcg_const_i32(dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
             }
             gen_eob(s);
@@ -4652,7 +4792,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_ljmp_protected(cpu_tmp2_i32, cpu_T[1],
+                gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
                                           tcg_const_i32(s->pc - pc_start));
             } else {
                 gen_op_movl_seg_T0_vm(R_CS);
@@ -4676,10 +4816,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         else
             ot = dflag + OT_WORD;
 
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_TN_reg(ot, 1, reg);
         gen_op_testl_T0_T1_cc();
         s->cc_op = CC_OP_LOGICB + ot;
@@ -4691,7 +4831,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        val = insn_get(s, ot);
+        val = insn_get(env, s, ot);
 
         gen_op_mov_TN_reg(ot, 0, OR_EAX);
         gen_op_movl_T1_im(val);
@@ -4741,18 +4881,18 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x69: /* imul Gv, Ev, I */
     case 0x6b:
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (b == 0x69)
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
             gen_op_movl_T1_im(val);
         } else if (b == 0x6b) {
-            val = (int8_t)insn_get(s, OT_BYTE);
+            val = (int8_t)insn_get(env, s, OT_BYTE);
             gen_op_movl_T1_im(val);
         } else {
             gen_op_mov_TN_reg(ot, 1, reg);
@@ -4760,7 +4900,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 
 #ifdef TARGET_X86_64
         if (ot == OT_QUAD) {
-            gen_helper_imulq_T0_T1(cpu_T[0], cpu_T[0], cpu_T[1]);
+            gen_helper_imulq_T0_T1(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
         } else
 #endif
         if (ot == OT_LONG) {
@@ -4805,7 +4945,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
@@ -4816,7 +4956,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_op_mov_reg_T1(ot, reg);
             gen_op_mov_reg_T0(ot, rm);
         } else {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_mov_TN_reg(ot, 0, reg);
             gen_op_ld_T1_A0(ot + s->mem_index);
             gen_op_addl_T0_T1();
@@ -4836,7 +4976,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 ot = OT_BYTE;
             else
                 ot = dflag + OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             t0 = tcg_temp_local_new();
@@ -4848,7 +4988,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(ot + s->mem_index, t0, a0);
                 rm = 0; /* avoid warning */
@@ -4857,20 +4997,23 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             tcg_gen_sub_tl(t2, cpu_regs[R_EAX], t0);
             gen_extu(ot, t2);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
+            label2 = gen_new_label();
             if (mod == 3) {
-                label2 = gen_new_label();
                 gen_op_mov_reg_v(ot, R_EAX, t0);
                 tcg_gen_br(label2);
                 gen_set_label(label1);
                 gen_op_mov_reg_v(ot, rm, t1);
-                gen_set_label(label2);
             } else {
-                tcg_gen_mov_tl(t1, t0);
+                /* perform no-op store cycle like physical cpu; must be
+                   before changing accumulator to ensure idempotency if
+                   the store faults and the instruction is restarted */
+                gen_op_st_v(ot + s->mem_index, t0, a0);
                 gen_op_mov_reg_v(ot, R_EAX, t0);
+                tcg_gen_br(label2);
                 gen_set_label(label1);
-                /* always store */
                 gen_op_st_v(ot + s->mem_index, t1, a0);
             }
+            gen_set_label(label2);
             tcg_gen_mov_tl(cpu_cc_src, t0);
             tcg_gen_mov_tl(cpu_cc_dst, t2);
             s->cc_op = CC_OP_SUBB + ot;
@@ -4881,7 +5024,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0x1c7: /* cmpxchg8b */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if ((mod == 3) || ((modrm & 0x38) != 0x8))
             goto illegal_op;
@@ -4892,8 +5035,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_jmp_im(pc_start - s->cs_base);
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_helper_cmpxchg16b(cpu_A0);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
 #endif        
         {
@@ -4902,8 +5045,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_jmp_im(pc_start - s->cs_base);
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-            gen_helper_cmpxchg8b(cpu_A0);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+            gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         s->cc_op = CC_OP_EFLAGS;
         break;
@@ -4943,9 +5086,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = dflag + OT_WORD;
         }
         if (b == 0x68)
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
         else
-            val = (int8_t)insn_get(s, OT_BYTE);
+            val = (int8_t)insn_get(env, s, OT_BYTE);
         gen_op_movl_T0_im(val);
         gen_push_T0(s);
         break;
@@ -4955,7 +5098,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         } else {
             ot = dflag + OT_WORD;
         }
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         gen_pop_T0(s);
         if (mod == 3) {
@@ -4966,7 +5109,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             s->popl_esp_hack = 0;
             gen_pop_update(s);
         }
@@ -4974,9 +5117,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0xc8: /* enter */
         {
             int level;
-            val = lduw_code(s->pc);
+            val = cpu_lduw_code(env, s->pc);
             s->pc += 2;
-            level = ldub_code(s->pc++);
+            level = cpu_ldub_code(env, s->pc++);
             gen_enter(s, val, level);
         }
         break;
@@ -5029,7 +5172,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             /* If several instructions disable interrupts, only the
                _first_ does it */
             if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                gen_helper_set_inhibit_irq();
+                gen_helper_set_inhibit_irq(cpu_env);
             s->tf = 0;
         }
         if (s->is_jmp) {
@@ -5056,11 +5199,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
         /* generate a generic store */
-        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5068,13 +5211,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         }
-        val = insn_get(s, ot);
+        val = insn_get(env, s, ot);
         gen_op_movl_T0_im(val);
         if (mod != 3)
             gen_op_st_T0_A0(ot + s->mem_index);
@@ -5087,25 +5230,25 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = OT_WORD + dflag;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
 
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_reg_T0(ot, reg);
         break;
     case 0x8e: /* mov seg, Gv */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
             /* If several instructions disable interrupts, only the
                _first_ does it */
             if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                gen_helper_set_inhibit_irq();
+                gen_helper_set_inhibit_irq(cpu_env);
             s->tf = 0;
         }
         if (s->is_jmp) {
@@ -5114,7 +5257,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0x8c: /* mov Gv, seg */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (reg >= 6)
@@ -5124,7 +5267,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_WORD + dflag;
         else
             ot = OT_WORD;
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
         break;
 
     case 0x1b6: /* movzbS Gv, Eb */
@@ -5137,7 +5280,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             d_ot = dflag + OT_WORD;
             /* ot is the size of source */
             ot = (b & 1) + OT_BYTE;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
@@ -5161,7 +5304,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 }
                 gen_op_mov_reg_T0(d_ot, reg);
             } else {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 if (b & 8) {
                     gen_op_lds_T0_A0(ot + s->mem_index);
                 } else {
@@ -5174,7 +5317,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 
     case 0x8d: /* lea */
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
@@ -5183,7 +5326,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         s->addseg = val;
         gen_op_mov_reg_A0(ot - OT_WORD, reg);
         break;
@@ -5201,16 +5344,16 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 ot = dflag + OT_WORD;
 #ifdef TARGET_X86_64
             if (s->aflag == 2) {
-                offset_addr = ldq_code(s->pc);
+                offset_addr = cpu_ldq_code(env, s->pc);
                 s->pc += 8;
                 gen_op_movq_A0_im(offset_addr);
             } else
 #endif
             {
                 if (s->aflag) {
-                    offset_addr = insn_get(s, OT_LONG);
+                    offset_addr = insn_get(env, s, OT_LONG);
                 } else {
-                    offset_addr = insn_get(s, OT_WORD);
+                    offset_addr = insn_get(env, s, OT_WORD);
                 }
                 gen_op_movl_A0_im(offset_addr);
             }
@@ -5248,7 +5391,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_op_mov_reg_T0(OT_BYTE, R_EAX);
         break;
     case 0xb0 ... 0xb7: /* mov R, Ib */
-        val = insn_get(s, OT_BYTE);
+        val = insn_get(env, s, OT_BYTE);
         gen_op_movl_T0_im(val);
         gen_op_mov_reg_T0(OT_BYTE, (b & 7) | REX_B(s));
         break;
@@ -5257,7 +5400,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (dflag == 2) {
             uint64_t tmp;
             /* 64 bit case */
-            tmp = ldq_code(s->pc);
+            tmp = cpu_ldq_code(env, s->pc);
             s->pc += 8;
             reg = (b & 7) | REX_B(s);
             gen_movtl_T0_im(tmp);
@@ -5266,7 +5409,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 #endif
         {
             ot = dflag ? OT_LONG : OT_WORD;
-            val = insn_get(s, ot);
+            val = insn_get(env, s, ot);
             reg = (b & 7) | REX_B(s);
             gen_op_movl_T0_im(val);
             gen_op_mov_reg_T0(ot, reg);
@@ -5285,7 +5428,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
@@ -5296,7 +5439,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_op_mov_reg_T0(ot, rm);
             gen_op_mov_reg_T1(ot, reg);
         } else {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_mov_TN_reg(ot, 0, reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5328,12 +5471,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         op = R_GS;
     do_lxx:
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         gen_op_ld_T1_A0(ot + s->mem_index);
         gen_add_A0_im(s, 1 << (ot - OT_WORD + 1));
         /* load the segment first to handle exceptions properly */
@@ -5360,7 +5503,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             else
                 ot = dflag + OT_WORD;
 
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             mod = (modrm >> 6) & 3;
             op = (modrm >> 3) & 7;
 
@@ -5368,7 +5511,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5379,7 +5522,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_shift(s, op, ot, opreg, OR_ECX);
             } else {
                 if (shift == 2) {
-                    shift = ldub_code(s->pc++);
+                    shift = cpu_ldub_code(env, s->pc++);
                 }
                 gen_shifti(s, op, ot, opreg, shift);
             }
@@ -5413,12 +5556,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         shift = 0;
     do_shiftd:
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5426,7 +5569,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_op_mov_TN_reg(ot, 1, reg);
 
         if (shift) {
-            val = ldub_code(s->pc++);
+            val = cpu_ldub_code(env, s->pc++);
             tcg_gen_movi_tl(cpu_T3, val);
         } else {
             tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
@@ -5443,13 +5586,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
             break;
         }
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5463,30 +5606,30 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     case 0:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_flds_FT0(cpu_tmp2_i32);
+                        gen_helper_flds_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_FT0(cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
                         tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
-                        gen_helper_fldl_FT0(cpu_tmp1_i64);
+                        gen_helper_fldl_FT0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
                         gen_op_lds_T0_A0(OT_WORD + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_FT0(cpu_tmp2_i32);
+                        gen_helper_fildl_FT0(cpu_env, cpu_tmp2_i32);
                         break;
                     }
 
                     gen_helper_fp_arith_ST0_FT0(op1);
                     if (op1 == 3) {
                         /* fcomp needs pop */
-                        gen_helper_fpop();
+                        gen_helper_fpop(cpu_env);
                     }
                 }
                 break;
@@ -5502,23 +5645,23 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     case 0:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_flds_ST0(cpu_tmp2_i32);
+                        gen_helper_flds_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 1:
                         gen_op_ld_T0_A0(OT_LONG + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_ST0(cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     case 2:
                         tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
-                        gen_helper_fldl_ST0(cpu_tmp1_i64);
+                        gen_helper_fldl_ST0(cpu_env, cpu_tmp1_i64);
                         break;
                     case 3:
                     default:
                         gen_op_lds_T0_A0(OT_WORD + s->mem_index);
                         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                        gen_helper_fildl_ST0(cpu_tmp2_i32);
+                        gen_helper_fildl_ST0(cpu_env, cpu_tmp2_i32);
                         break;
                     }
                     break;
@@ -5526,50 +5669,50 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     /* XXX: the corresponding CPUID bit must be tested ! */
                     switch(op >> 4) {
                     case 1:
-                        gen_helper_fisttl_ST0(cpu_tmp2_i32);
+                        gen_helper_fisttl_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_LONG + s->mem_index);
                         break;
                     case 2:
-                        gen_helper_fisttll_ST0(cpu_tmp1_i64);
+                        gen_helper_fisttll_ST0(cpu_tmp1_i64, cpu_env);
                         tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
                         break;
                     case 3:
                     default:
-                        gen_helper_fistt_ST0(cpu_tmp2_i32);
+                        gen_helper_fistt_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_WORD + s->mem_index);
                         break;
                     }
-                    gen_helper_fpop();
+                    gen_helper_fpop(cpu_env);
                     break;
                 default:
                     switch(op >> 4) {
                     case 0:
-                        gen_helper_fsts_ST0(cpu_tmp2_i32);
+                        gen_helper_fsts_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_LONG + s->mem_index);
                         break;
                     case 1:
-                        gen_helper_fistl_ST0(cpu_tmp2_i32);
+                        gen_helper_fistl_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_LONG + s->mem_index);
                         break;
                     case 2:
-                        gen_helper_fstl_ST0(cpu_tmp1_i64);
+                        gen_helper_fstl_ST0(cpu_tmp1_i64, cpu_env);
                         tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
                                           (s->mem_index >> 2) - 1);
                         break;
                     case 3:
                     default:
-                        gen_helper_fist_ST0(cpu_tmp2_i32);
+                        gen_helper_fist_ST0(cpu_tmp2_i32, cpu_env);
                         tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                         gen_op_st_T0_A0(OT_WORD + s->mem_index);
                         break;
                     }
                     if ((op & 7) == 3)
-                        gen_helper_fpop();
+                        gen_helper_fpop(cpu_env);
                     break;
                 }
                 break;
@@ -5577,22 +5720,21 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(
-                                   cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x0d: /* fldcw mem */
                 gen_op_ld_T0_A0(OT_WORD + s->mem_index);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_fldcw(cpu_tmp2_i32);
+                gen_helper_fldcw(cpu_env, cpu_tmp2_i32);
                 break;
             case 0x0e: /* fnstenv mem */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x0f: /* fnstcw mem */
-                gen_helper_fnstcw(cpu_tmp2_i32);
+                gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
                 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 break;
@@ -5600,29 +5742,29 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldt_ST0(cpu_A0);
+                gen_helper_fldt_ST0(cpu_env, cpu_A0);
                 break;
             case 0x1f: /* fstpt mem */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstt_ST0(cpu_A0);
-                gen_helper_fpop();
+                gen_helper_fstt_ST0(cpu_env, cpu_A0);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* frstor mem */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x2e: /* fnsave mem */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_A0, tcg_const_i32(s->dflag));
+                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(s->dflag));
                 break;
             case 0x2f: /* fnstsw mem */
-                gen_helper_fnstsw(cpu_tmp2_i32);
+                gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
                 tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 break;
@@ -5630,25 +5772,25 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fbld_ST0(cpu_A0);
+                gen_helper_fbld_ST0(cpu_env, cpu_A0);
                 break;
             case 0x3e: /* fbstp */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fbst_ST0(cpu_A0);
-                gen_helper_fpop();
+                gen_helper_fbst_ST0(cpu_env, cpu_A0);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x3d: /* fildll */
                 tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0, 
                                   (s->mem_index >> 2) - 1);
-                gen_helper_fildll_ST0(cpu_tmp1_i64);
+                gen_helper_fildll_ST0(cpu_env, cpu_tmp1_i64);
                 break;
             case 0x3f: /* fistpll */
-                gen_helper_fistll_ST0(cpu_tmp1_i64);
+                gen_helper_fistll_ST0(cpu_tmp1_i64, cpu_env);
                 tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0, 
                                   (s->mem_index >> 2) - 1);
-                gen_helper_fpop();
+                gen_helper_fpop(cpu_env);
                 break;
             default:
                 goto illegal_op;
@@ -5659,13 +5801,14 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 
             switch(op) {
             case 0x08: /* fld sti */
-                gen_helper_fpush();
-                gen_helper_fmov_ST0_STN(tcg_const_i32((opreg + 1) & 7));
+                gen_helper_fpush(cpu_env);
+                gen_helper_fmov_ST0_STN(cpu_env,
+                                        tcg_const_i32((opreg + 1) & 7));
                 break;
             case 0x09: /* fxchg sti */
             case 0x29: /* fxchg4 sti, undocumented op */
             case 0x39: /* fxchg7 sti, undocumented op */
-                gen_helper_fxchg_ST0_STN(tcg_const_i32(opreg));
+                gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg));
                 break;
             case 0x0a: /* grp d9/2 */
                 switch(rm) {
@@ -5674,7 +5817,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     if (s->cc_op != CC_OP_DYNAMIC)
                         gen_op_set_cc_op(s->cc_op);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_helper_fwait();
+                    gen_helper_fwait(cpu_env);
                     break;
                 default:
                     goto illegal_op;
@@ -5683,17 +5826,17 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 0x0c: /* grp d9/4 */
                 switch(rm) {
                 case 0: /* fchs */
-                    gen_helper_fchs_ST0();
+                    gen_helper_fchs_ST0(cpu_env);
                     break;
                 case 1: /* fabs */
-                    gen_helper_fabs_ST0();
+                    gen_helper_fabs_ST0(cpu_env);
                     break;
                 case 4: /* ftst */
-                    gen_helper_fldz_FT0();
-                    gen_helper_fcom_ST0_FT0();
+                    gen_helper_fldz_FT0(cpu_env);
+                    gen_helper_fcom_ST0_FT0(cpu_env);
                     break;
                 case 5: /* fxam */
-                    gen_helper_fxam_ST0();
+                    gen_helper_fxam_ST0(cpu_env);
                     break;
                 default:
                     goto illegal_op;
@@ -5703,32 +5846,32 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 {
                     switch(rm) {
                     case 0:
-                        gen_helper_fpush();
-                        gen_helper_fld1_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fld1_ST0(cpu_env);
                         break;
                     case 1:
-                        gen_helper_fpush();
-                        gen_helper_fldl2t_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldl2t_ST0(cpu_env);
                         break;
                     case 2:
-                        gen_helper_fpush();
-                        gen_helper_fldl2e_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldl2e_ST0(cpu_env);
                         break;
                     case 3:
-                        gen_helper_fpush();
-                        gen_helper_fldpi_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldpi_ST0(cpu_env);
                         break;
                     case 4:
-                        gen_helper_fpush();
-                        gen_helper_fldlg2_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldlg2_ST0(cpu_env);
                         break;
                     case 5:
-                        gen_helper_fpush();
-                        gen_helper_fldln2_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldln2_ST0(cpu_env);
                         break;
                     case 6:
-                        gen_helper_fpush();
-                        gen_helper_fldz_ST0();
+                        gen_helper_fpush(cpu_env);
+                        gen_helper_fldz_ST0(cpu_env);
                         break;
                     default:
                         goto illegal_op;
@@ -5738,58 +5881,58 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 0x0e: /* grp d9/6 */
                 switch(rm) {
                 case 0: /* f2xm1 */
-                    gen_helper_f2xm1();
+                    gen_helper_f2xm1(cpu_env);
                     break;
                 case 1: /* fyl2x */
-                    gen_helper_fyl2x();
+                    gen_helper_fyl2x(cpu_env);
                     break;
                 case 2: /* fptan */
-                    gen_helper_fptan();
+                    gen_helper_fptan(cpu_env);
                     break;
                 case 3: /* fpatan */
-                    gen_helper_fpatan();
+                    gen_helper_fpatan(cpu_env);
                     break;
                 case 4: /* fxtract */
-                    gen_helper_fxtract();
+                    gen_helper_fxtract(cpu_env);
                     break;
                 case 5: /* fprem1 */
-                    gen_helper_fprem1();
+                    gen_helper_fprem1(cpu_env);
                     break;
                 case 6: /* fdecstp */
-                    gen_helper_fdecstp();
+                    gen_helper_fdecstp(cpu_env);
                     break;
                 default:
                 case 7: /* fincstp */
-                    gen_helper_fincstp();
+                    gen_helper_fincstp(cpu_env);
                     break;
                 }
                 break;
             case 0x0f: /* grp d9/7 */
                 switch(rm) {
                 case 0: /* fprem */
-                    gen_helper_fprem();
+                    gen_helper_fprem(cpu_env);
                     break;
                 case 1: /* fyl2xp1 */
-                    gen_helper_fyl2xp1();
+                    gen_helper_fyl2xp1(cpu_env);
                     break;
                 case 2: /* fsqrt */
-                    gen_helper_fsqrt();
+                    gen_helper_fsqrt(cpu_env);
                     break;
                 case 3: /* fsincos */
-                    gen_helper_fsincos();
+                    gen_helper_fsincos(cpu_env);
                     break;
                 case 5: /* fscale */
-                    gen_helper_fscale();
+                    gen_helper_fscale(cpu_env);
                     break;
                 case 4: /* frndint */
-                    gen_helper_frndint();
+                    gen_helper_frndint(cpu_env);
                     break;
                 case 6: /* fsin */
-                    gen_helper_fsin();
+                    gen_helper_fsin(cpu_env);
                     break;
                 default:
                 case 7: /* fcos */
-                    gen_helper_fcos();
+                    gen_helper_fcos(cpu_env);
                     break;
                 }
                 break;
@@ -5803,32 +5946,32 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     if (op >= 0x20) {
                         gen_helper_fp_arith_STN_ST0(op1, opreg);
                         if (op >= 0x30)
-                            gen_helper_fpop();
+                            gen_helper_fpop(cpu_env);
                     } else {
-                        gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
+                        gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
                         gen_helper_fp_arith_ST0_FT0(op1);
                     }
                 }
                 break;
             case 0x02: /* fcom */
             case 0x22: /* fcom2, undocumented op */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcom_ST0_FT0();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcom_ST0_FT0(cpu_env);
                 break;
             case 0x03: /* fcomp */
             case 0x23: /* fcomp3, undocumented op */
             case 0x32: /* fcomp5, undocumented op */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcom_ST0_FT0();
-                gen_helper_fpop();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcom_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x15: /* da/5 */
                 switch(rm) {
                 case 1: /* fucompp */
-                    gen_helper_fmov_FT0_STN(tcg_const_i32(1));
-                    gen_helper_fucom_ST0_FT0();
-                    gen_helper_fpop();
-                    gen_helper_fpop();
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
+                    gen_helper_fucom_ST0_FT0(cpu_env);
+                    gen_helper_fpop(cpu_env);
+                    gen_helper_fpop(cpu_env);
                     break;
                 default:
                     goto illegal_op;
@@ -5841,10 +5984,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 case 1: /* fdisi (287 only, just do nop here) */
                     break;
                 case 2: /* fclex */
-                    gen_helper_fclex();
+                    gen_helper_fclex(cpu_env);
                     break;
                 case 3: /* fninit */
-                    gen_helper_fninit();
+                    gen_helper_fninit(cpu_env);
                     break;
                 case 4: /* fsetpm (287 only, just do nop here) */
                     break;
@@ -5855,59 +5998,59 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 0x1d: /* fucomi */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucomi_ST0_FT0();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucomi_ST0_FT0(cpu_env);
                 s->cc_op = CC_OP_EFLAGS;
                 break;
             case 0x1e: /* fcomi */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcomi_ST0_FT0();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcomi_ST0_FT0(cpu_env);
                 s->cc_op = CC_OP_EFLAGS;
                 break;
             case 0x28: /* ffree sti */
-                gen_helper_ffree_STN(tcg_const_i32(opreg));
+                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
                 break;
             case 0x2a: /* fst sti */
-                gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
+                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
                 break;
             case 0x2b: /* fstp sti */
             case 0x0b: /* fstp1 sti, undocumented op */
             case 0x3a: /* fstp8 sti, undocumented op */
             case 0x3b: /* fstp9 sti, undocumented op */
-                gen_helper_fmov_STN_ST0(tcg_const_i32(opreg));
-                gen_helper_fpop();
+                gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x2c: /* fucom st(i) */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucom_ST0_FT0();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucom_ST0_FT0(cpu_env);
                 break;
             case 0x2d: /* fucomp st(i) */
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucom_ST0_FT0();
-                gen_helper_fpop();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucom_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x33: /* de/3 */
                 switch(rm) {
                 case 1: /* fcompp */
-                    gen_helper_fmov_FT0_STN(tcg_const_i32(1));
-                    gen_helper_fcom_ST0_FT0();
-                    gen_helper_fpop();
-                    gen_helper_fpop();
+                    gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1));
+                    gen_helper_fcom_ST0_FT0(cpu_env);
+                    gen_helper_fpop(cpu_env);
+                    gen_helper_fpop(cpu_env);
                     break;
                 default:
                     goto illegal_op;
                 }
                 break;
             case 0x38: /* ffreep sti, undocumented op */
-                gen_helper_ffree_STN(tcg_const_i32(opreg));
-                gen_helper_fpop();
+                gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fpop(cpu_env);
                 break;
             case 0x3c: /* df/4 */
                 switch(rm) {
                 case 0:
-                    gen_helper_fnstsw(cpu_tmp2_i32);
+                    gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
                     tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
                     gen_op_mov_reg_T0(OT_WORD, R_EAX);
                     break;
@@ -5918,17 +6061,17 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             case 0x3d: /* fucomip */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fucomi_ST0_FT0();
-                gen_helper_fpop();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fucomi_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
                 s->cc_op = CC_OP_EFLAGS;
                 break;
             case 0x3e: /* fcomip */
                 if (s->cc_op != CC_OP_DYNAMIC)
                     gen_op_set_cc_op(s->cc_op);
-                gen_helper_fmov_FT0_STN(tcg_const_i32(opreg));
-                gen_helper_fcomi_ST0_FT0();
-                gen_helper_fpop();
+                gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg));
+                gen_helper_fcomi_ST0_FT0(cpu_env);
+                gen_helper_fpop(cpu_env);
                 s->cc_op = CC_OP_EFLAGS;
                 break;
             case 0x10 ... 0x13: /* fcmovxx */
@@ -5944,7 +6087,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1);
                     l1 = gen_new_label();
                     gen_jcc1(s, s->cc_op, op1, l1);
-                    gen_helper_fmov_ST0_STN(tcg_const_i32(opreg));
+                    gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg));
                     gen_set_label(l1);
                 }
                 break;
@@ -6074,7 +6217,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
@@ -6094,7 +6237,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      svm_is_rep(prefixes));
@@ -6103,7 +6246,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (use_icount)
             gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         if (use_icount) {
@@ -6146,7 +6288,6 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (use_icount)
             gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         gen_helper_out_func(ot, cpu_tmp2_i32, cpu_tmp3_i32);
         if (use_icount) {
@@ -6158,7 +6299,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         /************************/
         /* control */
     case 0xc2: /* ret im */
-        val = ldsw_code(s->pc);
+        val = cpu_ldsw_code(env, s->pc);
         s->pc += 2;
         gen_pop_T0(s);
         if (CODE64(s) && s->dflag)
@@ -6178,14 +6319,14 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_eob(s);
         break;
     case 0xca: /* lret im */
-        val = ldsw_code(s->pc);
+        val = cpu_ldsw_code(env, s->pc);
         s->pc += 2;
     do_lret:
         if (s->pe && !s->vm86) {
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_lret_protected(tcg_const_i32(s->dflag),
+            gen_helper_lret_protected(cpu_env, tcg_const_i32(s->dflag),
                                       tcg_const_i32(val));
         } else {
             gen_stack_A0(s);
@@ -6212,20 +6353,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET);
         if (!s->pe) {
             /* real mode */
-            gen_helper_iret_real(tcg_const_i32(s->dflag));
+            gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
             s->cc_op = CC_OP_EFLAGS;
         } else if (s->vm86) {
             if (s->iopl != 3) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
-                gen_helper_iret_real(tcg_const_i32(s->dflag));
+                gen_helper_iret_real(cpu_env, tcg_const_i32(s->dflag));
                 s->cc_op = CC_OP_EFLAGS;
             }
         } else {
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_iret_protected(tcg_const_i32(s->dflag), 
+            gen_helper_iret_protected(cpu_env, tcg_const_i32(s->dflag),
                                       tcg_const_i32(s->pc - s->cs_base));
             s->cc_op = CC_OP_EFLAGS;
         }
@@ -6234,9 +6375,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0xe8: /* call im */
         {
             if (dflag)
-                tval = (int32_t)insn_get(s, OT_LONG);
+                tval = (int32_t)insn_get(env, s, OT_LONG);
             else
-                tval = (int16_t)insn_get(s, OT_WORD);
+                tval = (int16_t)insn_get(env, s, OT_WORD);
             next_eip = s->pc - s->cs_base;
             tval += next_eip;
             if (s->dflag == 0)
@@ -6255,8 +6396,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (CODE64(s))
                 goto illegal_op;
             ot = dflag ? OT_LONG : OT_WORD;
-            offset = insn_get(s, ot);
-            selector = insn_get(s, OT_WORD);
+            offset = insn_get(env, s, ot);
+            selector = insn_get(env, s, OT_WORD);
 
             gen_op_movl_T0_im(selector);
             gen_op_movl_T1_imu(offset);
@@ -6264,9 +6405,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         goto do_lcall;
     case 0xe9: /* jmp im */
         if (dflag)
-            tval = (int32_t)insn_get(s, OT_LONG);
+            tval = (int32_t)insn_get(env, s, OT_LONG);
         else
-            tval = (int16_t)insn_get(s, OT_WORD);
+            tval = (int16_t)insn_get(env, s, OT_WORD);
         tval += s->pc - s->cs_base;
         if (s->dflag == 0)
             tval &= 0xffff;
@@ -6281,28 +6422,28 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (CODE64(s))
                 goto illegal_op;
             ot = dflag ? OT_LONG : OT_WORD;
-            offset = insn_get(s, ot);
-            selector = insn_get(s, OT_WORD);
+            offset = insn_get(env, s, ot);
+            selector = insn_get(env, s, OT_WORD);
 
             gen_op_movl_T0_im(selector);
             gen_op_movl_T1_imu(offset);
         }
         goto do_ljmp;
     case 0xeb: /* jmp Jb */
-        tval = (int8_t)insn_get(s, OT_BYTE);
+        tval = (int8_t)insn_get(env, s, OT_BYTE);
         tval += s->pc - s->cs_base;
         if (s->dflag == 0)
             tval &= 0xffff;
         gen_jmp(s, tval);
         break;
     case 0x70 ... 0x7f: /* jcc Jb */
-        tval = (int8_t)insn_get(s, OT_BYTE);
+        tval = (int8_t)insn_get(env, s, OT_BYTE);
         goto do_jcc;
     case 0x180 ... 0x18f: /* jcc Jv */
         if (dflag) {
-            tval = (int32_t)insn_get(s, OT_LONG);
+            tval = (int32_t)insn_get(env, s, OT_LONG);
         } else {
-            tval = (int16_t)insn_get(s, OT_WORD);
+            tval = (int16_t)insn_get(env, s, OT_WORD);
         }
     do_jcc:
         next_eip = s->pc - s->cs_base;
@@ -6313,9 +6454,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         break;
 
     case 0x190 ... 0x19f: /* setcc Gv */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc(s, b);
-        gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, OT_BYTE, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         {
@@ -6323,12 +6464,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             TCGv t0;
 
             ot = dflag + OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             t0 = tcg_temp_local_new();
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -6363,7 +6504,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         } else {
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
-            gen_helper_read_eflags(cpu_T[0]);
+            gen_helper_read_eflags(cpu_T[0], cpu_env);
             gen_push_T0(s);
         }
         break;
@@ -6375,34 +6516,52 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_pop_T0(s);
             if (s->cpl == 0) {
                 if (s->dflag) {
-                    gen_helper_write_eflags(cpu_T[0],
-                                       tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK)));
+                    gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                            tcg_const_i32((TF_MASK | AC_MASK |
+                                                           ID_MASK | NT_MASK |
+                                                           IF_MASK |
+                                                           IOPL_MASK)));
                 } else {
-                    gen_helper_write_eflags(cpu_T[0],
-                                       tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK | IOPL_MASK) & 0xffff));
+                    gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                            tcg_const_i32((TF_MASK | AC_MASK |
+                                                           ID_MASK | NT_MASK |
+                                                           IF_MASK | IOPL_MASK)
+                                                          & 0xffff));
                 }
             } else {
                 if (s->cpl <= s->iopl) {
                     if (s->dflag) {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK)));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                                tcg_const_i32((TF_MASK |
+                                                               AC_MASK |
+                                                               ID_MASK |
+                                                               NT_MASK |
+                                                               IF_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK | IF_MASK) & 0xffff));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                                tcg_const_i32((TF_MASK |
+                                                               AC_MASK |
+                                                               ID_MASK |
+                                                               NT_MASK |
+                                                               IF_MASK)
+                                                              & 0xffff));
                     }
                 } else {
                     if (s->dflag) {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK)));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK |
+                                                          ID_MASK | NT_MASK)));
                     } else {
-                        gen_helper_write_eflags(cpu_T[0],
-                                           tcg_const_i32((TF_MASK | AC_MASK | ID_MASK | NT_MASK) & 0xffff));
+                        gen_helper_write_eflags(cpu_env, cpu_T[0],
+                                           tcg_const_i32((TF_MASK | AC_MASK |
+                                                          ID_MASK | NT_MASK)
+                                                         & 0xffff));
                     }
                 }
             }
             gen_pop_update(s);
             s->cc_op = CC_OP_EFLAGS;
-            /* abort translation because TF flag may change */
+            /* abort translation because TF/AC flag may change */
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
         }
@@ -6452,30 +6611,30 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         break;
     case 0xfc: /* cld */
         tcg_gen_movi_i32(cpu_tmp2_i32, 1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
     case 0xfd: /* std */
         tcg_gen_movi_i32(cpu_tmp2_i32, -1);
-        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUState, df));
+        tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, offsetof(CPUX86State, df));
         break;
 
         /************************/
         /* bit operations */
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         op = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             gen_op_ld_T0_A0(ot + s->mem_index);
         } else {
             gen_op_mov_TN_reg(ot, 0, rm);
         }
         /* load shift */
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         gen_op_movl_T1_im(val);
         if (op < 4)
             goto illegal_op;
@@ -6494,13 +6653,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         op = 3;
     do_btx:
         ot = dflag + OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
         mod = (modrm >> 6) & 3;
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_TN_reg(OT_LONG, 1, reg);
         if (mod != 3) {
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6555,9 +6714,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             TCGv t0;
 
             ot = dflag + OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(s,modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s,modrm, ot, OR_TMP0, 0);
             gen_extu(ot, cpu_T[0]);
             t0 = tcg_temp_local_new();
             tcg_gen_mov_tl(t0, cpu_T[0]);
@@ -6597,7 +6756,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
-        gen_helper_daa();
+        gen_helper_daa(cpu_env);
         s->cc_op = CC_OP_EFLAGS;
         break;
     case 0x2f: /* das */
@@ -6605,7 +6764,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
-        gen_helper_das();
+        gen_helper_das(cpu_env);
         s->cc_op = CC_OP_EFLAGS;
         break;
     case 0x37: /* aaa */
@@ -6613,7 +6772,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
-        gen_helper_aaa();
+        gen_helper_aaa(cpu_env);
         s->cc_op = CC_OP_EFLAGS;
         break;
     case 0x3f: /* aas */
@@ -6621,25 +6780,25 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
-        gen_helper_aas();
+        gen_helper_aas(cpu_env);
         s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xd4: /* aam */
         if (CODE64(s))
             goto illegal_op;
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         if (val == 0) {
             gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base);
         } else {
-            gen_helper_aam(tcg_const_i32(val));
+            gen_helper_aam(cpu_env, tcg_const_i32(val));
             s->cc_op = CC_OP_LOGICB;
         }
         break;
     case 0xd5: /* aad */
         if (CODE64(s))
             goto illegal_op;
-        val = ldub_code(s->pc++);
-        gen_helper_aad(tcg_const_i32(val));
+        val = cpu_ldub_code(env, s->pc++);
+        gen_helper_aad(cpu_env, tcg_const_i32(val));
         s->cc_op = CC_OP_LOGICB;
         break;
         /************************/
@@ -6665,14 +6824,14 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fwait();
+            gen_helper_fwait(cpu_env);
         }
         break;
     case 0xcc: /* int3 */
         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
         break;
     case 0xcd: /* int N */
-        val = ldub_code(s->pc++);
+        val = cpu_ldub_code(env, s->pc++);
         if (s->vm86 && s->iopl != 3) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
@@ -6685,7 +6844,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_into(tcg_const_i32(s->pc - pc_start));
+        gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
 #ifdef WANT_ICEBP
     case 0xf1: /* icebp (undocumented, exits to external debugger) */
@@ -6694,7 +6853,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_debug(s, pc_start - s->cs_base);
 #else
         /* start debug */
-        tb_flush(cpu_single_env);
+        tb_flush(env);
         cpu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM);
 #endif
         break;
@@ -6702,13 +6861,13 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0xfa: /* cli */
         if (!s->vm86) {
             if (s->cpl <= s->iopl) {
-                gen_helper_cli();
+                gen_helper_cli(cpu_env);
             } else {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             }
         } else {
             if (s->iopl == 3) {
-                gen_helper_cli();
+                gen_helper_cli(cpu_env);
             } else {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             }
@@ -6718,12 +6877,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (!s->vm86) {
             if (s->cpl <= s->iopl) {
             gen_sti:
-                gen_helper_sti();
+                gen_helper_sti(cpu_env);
                 /* interruptions are enabled only the first insn after sti */
                 /* If several instructions disable interrupts, only the
                    _first_ does it */
                 if (!(s->tb->flags & HF_INHIBIT_IRQ_MASK))
-                    gen_helper_set_inhibit_irq();
+                    gen_helper_set_inhibit_irq(cpu_env);
                 /* give a chance to handle pending irqs */
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -6742,19 +6901,20 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (CODE64(s))
             goto illegal_op;
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_TN_reg(ot, 0, reg);
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-        if (ot == OT_WORD)
-            gen_helper_boundw(cpu_A0, cpu_tmp2_i32);
-        else
-            gen_helper_boundl(cpu_A0, cpu_tmp2_i32);
+        if (ot == OT_WORD) {
+            gen_helper_boundw(cpu_env, cpu_A0, cpu_tmp2_i32);
+        } else {
+            gen_helper_boundl(cpu_env, cpu_A0, cpu_tmp2_i32);
+        }
         break;
     case 0x1c8 ... 0x1cf: /* bswap reg */
         reg = (b & 7) | REX_B(s);
@@ -6788,7 +6948,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         {
             int l1, l2, l3;
 
-            tval = (int8_t)insn_get(s, OT_BYTE);
+            tval = (int8_t)insn_get(env, s, OT_BYTE);
             next_eip = s->pc - s->cs_base;
             tval += next_eip;
             if (s->dflag == 0)
@@ -6842,9 +7002,9 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
             if (b & 2) {
-                gen_helper_rdmsr();
+                gen_helper_rdmsr(cpu_env);
             } else {
-                gen_helper_wrmsr();
+                gen_helper_wrmsr(cpu_env);
             }
         }
         break;
@@ -6854,7 +7014,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         gen_jmp_im(pc_start - s->cs_base);
         if (use_icount)
             gen_io_start();
-        gen_helper_rdtsc();
+        gen_helper_rdtsc(cpu_env);
         if (use_icount) {
             gen_io_end();
             gen_jmp(s, s->pc - s->cs_base);
@@ -6864,31 +7024,31 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_rdpmc();
+        gen_helper_rdpmc(cpu_env);
         break;
     case 0x134: /* sysenter */
         /* For Intel SYSENTER is valid on 64-bit */
-        if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
             goto illegal_op;
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysenter();
+            gen_helper_sysenter(cpu_env);
             gen_eob(s);
         }
         break;
     case 0x135: /* sysexit */
         /* For Intel SYSEXIT is valid on 64-bit */
-        if (CODE64(s) && cpu_single_env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
+        if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1)
             goto illegal_op;
         if (!s->pe) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysexit(tcg_const_i32(dflag));
+            gen_helper_sysexit(cpu_env, tcg_const_i32(dflag));
             gen_eob(s);
         }
         break;
@@ -6897,7 +7057,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_syscall(tcg_const_i32(s->pc - pc_start));
+        gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         gen_eob(s);
         break;
     case 0x107: /* sysret */
@@ -6906,7 +7066,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         } else {
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_sysret(tcg_const_i32(s->dflag));
+            gen_helper_sysret(cpu_env, tcg_const_i32(s->dflag));
             /* condition codes are modified only in long mode */
             if (s->lma)
                 s->cc_op = CC_OP_EFLAGS;
@@ -6918,7 +7078,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (s->cc_op != CC_OP_DYNAMIC)
             gen_op_set_cc_op(s->cc_op);
         gen_jmp_im(pc_start - s->cs_base);
-        gen_helper_cpuid();
+        gen_helper_cpuid(cpu_env);
         break;
     case 0xf4: /* hlt */
         if (s->cpl != 0) {
@@ -6927,12 +7087,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_hlt(tcg_const_i32(s->pc - pc_start));
+            gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = DISAS_TB_JUMP;
         }
         break;
     case 0x100:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -6944,7 +7104,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_WORD;
             if (mod == 3)
                 ot += s->dflag;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -6953,10 +7113,10 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_lldt(cpu_tmp2_i32);
+                gen_helper_lldt(cpu_env, cpu_tmp2_i32);
             }
             break;
         case 1: /* str */
@@ -6967,7 +7127,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             ot = OT_WORD;
             if (mod == 3)
                 ot += s->dflag;
-            gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -6976,23 +7136,24 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
-                gen_helper_ltr(cpu_tmp2_i32);
+                gen_helper_ltr(cpu_env, cpu_tmp2_i32);
             }
             break;
         case 4: /* verr */
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
-            if (op == 4)
-                gen_helper_verr(cpu_T[0]);
-            else
-                gen_helper_verw(cpu_T[0]);
+            if (op == 4) {
+                gen_helper_verr(cpu_env, cpu_T[0]);
+            } else {
+                gen_helper_verw(cpu_env, cpu_T[0]);
+            }
             s->cc_op = CC_OP_EFLAGS;
             break;
         default:
@@ -7000,7 +7161,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0x101:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         rm = modrm & 7;
@@ -7009,7 +7170,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, gdt.limit));
             gen_op_st_T0_A0(OT_WORD + s->mem_index);
             gen_add_A0_im(s, 2);
@@ -7039,7 +7200,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                             gen_op_andl_A0_ffff();
                     }
                     gen_add_A0_ds_seg(s);
-                    gen_helper_monitor(cpu_A0);
+                    gen_helper_monitor(cpu_env, cpu_A0);
                     break;
                 case 1: /* mwait */
                     if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) ||
@@ -7047,7 +7208,25 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         goto illegal_op;
                     gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_helper_mwait(tcg_const_i32(s->pc - pc_start));
+                    gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
+                    gen_eob(s);
+                    break;
+                case 2: /* clac */
+                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
+                        s->cpl != 0) {
+                        goto illegal_op;
+                    }
+                    gen_helper_clac(cpu_env);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                    break;
+                case 3: /* stac */
+                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
+                        s->cpl != 0) {
+                        goto illegal_op;
+                    }
+                    gen_helper_stac(cpu_env);
+                    gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                     break;
                 default:
@@ -7055,7 +7234,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, idt.limit));
                 gen_op_st_T0_A0(OT_WORD + s->mem_index);
                 gen_add_A0_im(s, 2);
@@ -7079,7 +7258,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmrun(tcg_const_i32(s->aflag),
+                        gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag),
                                          tcg_const_i32(s->pc - pc_start));
                         tcg_gen_exit_tb(0);
                         s->is_jmp = DISAS_TB_JUMP;
@@ -7088,7 +7267,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 case 1: /* VMMCALL */
                     if (!(s->flags & HF_SVME_MASK))
                         goto illegal_op;
-                    gen_helper_vmmcall();
+                    gen_helper_vmmcall(cpu_env);
                     break;
                 case 2: /* VMLOAD */
                     if (!(s->flags & HF_SVME_MASK) || !s->pe)
@@ -7097,7 +7276,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmload(tcg_const_i32(s->aflag));
+                        gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag));
                     }
                     break;
                 case 3: /* VMSAVE */
@@ -7107,7 +7286,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_vmsave(tcg_const_i32(s->aflag));
+                        gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag));
                     }
                     break;
                 case 4: /* STGI */
@@ -7119,7 +7298,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_stgi();
+                        gen_helper_stgi(cpu_env);
                     }
                     break;
                 case 5: /* CLGI */
@@ -7129,7 +7308,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_clgi();
+                        gen_helper_clgi(cpu_env);
                     }
                     break;
                 case 6: /* SKINIT */
@@ -7137,7 +7316,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                          !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) || 
                         !s->pe)
                         goto illegal_op;
-                    gen_helper_skinit();
+                    gen_helper_skinit(cpu_env);
                     break;
                 case 7: /* INVLPGA */
                     if (!(s->flags & HF_SVME_MASK) || !s->pe)
@@ -7146,7 +7325,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                         gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                         break;
                     } else {
-                        gen_helper_invlpga(tcg_const_i32(s->aflag));
+                        gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag));
                     }
                     break;
                 default:
@@ -7157,7 +7336,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE : SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_T1_A0(OT_WORD + s->mem_index);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_T0_A0(CODE64(s) + OT_LONG + s->mem_index);
@@ -7179,15 +7358,15 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 1);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
-                gen_helper_lmsw(cpu_T[0]);
+                gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
+                gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             }
@@ -7200,8 +7379,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     if (s->cc_op != CC_OP_DYNAMIC)
                         gen_op_set_cc_op(s->cc_op);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
-                    gen_helper_invlpg(cpu_A0);
+                    gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
+                    gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                 }
@@ -7236,7 +7415,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     gen_jmp_im(pc_start - s->cs_base);
                     if (use_icount)
                         gen_io_start();
-                    gen_helper_rdtscp();
+                    gen_helper_rdtscp(cpu_env);
                     if (use_icount) {
                         gen_io_end();
                         gen_jmp(s, s->pc - s->cs_base);
@@ -7267,7 +7446,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             /* d_ot is the size of destination */
             d_ot = dflag + OT_WORD;
 
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
             mod = (modrm >> 6) & 3;
             rm = (modrm & 7) | REX_B(s);
@@ -7279,7 +7458,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                     tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
                 gen_op_mov_reg_T0(d_ot, reg);
             } else {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 if (d_ot == OT_QUAD) {
                     gen_op_lds_T0_A0(OT_LONG + s->mem_index);
                 } else {
@@ -7299,12 +7478,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             t1 = tcg_temp_local_new();
             t2 = tcg_temp_local_new();
             ot = OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = (modrm >> 3) & 7;
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
                 gen_op_ld_v(ot + s->mem_index, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7347,16 +7526,17 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (!s->pe || s->vm86)
                 goto illegal_op;
             ot = dflag ? OT_LONG : OT_WORD;
-            modrm = ldub_code(s->pc++);
+            modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(s, modrm, OT_WORD, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, OT_WORD, OR_TMP0, 0);
             t0 = tcg_temp_local_new();
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
-            if (b == 0x102)
-                gen_helper_lar(t0, cpu_T[0]);
-            else
-                gen_helper_lsl(t0, cpu_T[0]);
+            if (b == 0x102) {
+                gen_helper_lar(t0, cpu_env, cpu_T[0]);
+            } else {
+                gen_helper_lsl(t0, cpu_env, cpu_T[0]);
+            }
             tcg_gen_andi_tl(cpu_tmp0, cpu_cc_src, CC_Z);
             label1 = gen_new_label();
             tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
@@ -7367,7 +7547,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0x118:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -7377,26 +7557,29 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
-            gen_nop_modrm(s, modrm);
+            gen_nop_modrm(env, s, modrm);
             break;
         }
         break;
     case 0x119 ... 0x11f: /* nop (multi byte) */
-        modrm = ldub_code(s->pc++);
-        gen_nop_modrm(s, modrm);
+        modrm = cpu_ldub_code(env, s->pc++);
+        gen_nop_modrm(env, s, modrm);
         break;
     case 0x120: /* mov reg, crN */
     case 0x122: /* mov crN, reg */
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = ldub_code(s->pc++);
-            if ((modrm & 0xc0) != 0xc0)
-                goto illegal_op;
+            modrm = cpu_ldub_code(env, s->pc++);
+            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
+             * AMD documentation (24594.pdf) and testing of
+             * intel 386 and 486 processors all show that the mod bits
+             * are assumed to be 1's, regardless of actual values.
+             */
             rm = (modrm & 7) | REX_B(s);
             reg = ((modrm >> 3) & 7) | rex_r;
             if (CODE64(s))
@@ -7418,11 +7601,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
                     gen_op_mov_TN_reg(ot, 0, rm);
-                    gen_helper_write_crN(tcg_const_i32(reg), cpu_T[0]);
+                    gen_helper_write_crN(cpu_env, tcg_const_i32(reg),
+                                         cpu_T[0]);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
                 } else {
-                    gen_helper_read_crN(cpu_T[0], tcg_const_i32(reg));
+                    gen_helper_read_crN(cpu_T[0], cpu_env, tcg_const_i32(reg));
                     gen_op_mov_reg_T0(ot, rm);
                 }
                 break;
@@ -7436,9 +7620,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = ldub_code(s->pc++);
-            if ((modrm & 0xc0) != 0xc0)
-                goto illegal_op;
+            modrm = cpu_ldub_code(env, s->pc++);
+            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
+             * AMD documentation (24594.pdf) and testing of
+             * intel 386 and 486 processors all show that the mod bits
+             * are assumed to be 1's, regardless of actual values.
+             */
             rm = (modrm & 7) | REX_B(s);
             reg = ((modrm >> 3) & 7) | rex_r;
             if (CODE64(s))
@@ -7451,7 +7638,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if (b & 2) {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg);
                 gen_op_mov_TN_reg(ot, 0, rm);
-                gen_helper_movl_drN_T0(tcg_const_i32(reg), cpu_T[0]);
+                gen_helper_movl_drN_T0(cpu_env, tcg_const_i32(reg), cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
             } else {
@@ -7466,7 +7653,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-            gen_helper_clts();
+            gen_helper_clts(cpu_env);
             /* abort block because static cpu state changed */
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
@@ -7477,16 +7664,16 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (!(s->cpuid_features & CPUID_SSE2))
             goto illegal_op;
         ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
         break;
     case 0x1ae:
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -7498,11 +7685,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_A0, tcg_const_i32((s->dflag == 2)));
+            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32((s->dflag == 2)));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7512,11 +7699,12 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             if (s->cc_op != CC_OP_DYNAMIC)
                 gen_op_set_cc_op(s->cc_op);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_A0, tcg_const_i32((s->dflag == 2)));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32((s->dflag == 2)));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7527,10 +7715,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             if (op == 2) {
                 gen_op_ld_T0_A0(OT_LONG + s->mem_index);
-                tcg_gen_st32_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
+                tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+                gen_helper_ldmxcsr(cpu_env, cpu_tmp2_i32);
             } else {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State, mxcsr));
                 gen_op_st_T0_A0(OT_LONG + s->mem_index);
@@ -7538,7 +7727,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             break;
         case 5: /* lfence */
         case 6: /* mfence */
-            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE))
+            if ((modrm & 0xc7) != 0xc0 || !(s->cpuid_features & CPUID_SSE2))
                 goto illegal_op;
             break;
         case 7: /* sfence / clflush */
@@ -7551,7 +7740,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
             }
             break;
         default:
@@ -7559,11 +7748,11 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         }
         break;
     case 0x10d: /* 3DNow! prefetch(w) */
-        modrm = ldub_code(s->pc++);
+        modrm = cpu_ldub_code(env, s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_lea_modrm(env, s, modrm, &reg_addr, &offset_addr);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7572,7 +7761,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
             goto illegal_op;
         gen_update_cc_op(s);
         gen_jmp_im(s->pc - s->cs_base);
-        gen_helper_rsm();
+        gen_helper_rsm(cpu_env);
         gen_eob(s);
         break;
     case 0x1b8: /* SSE4.2 popcnt */
@@ -7582,8 +7771,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
             goto illegal_op;
 
-        modrm = ldub_code(s->pc++);
-        reg = ((modrm >> 3) & 7);
+        modrm = cpu_ldub_code(env, s->pc++);
+        reg = ((modrm >> 3) & 7) | rex_r;
 
         if (s->prefix & PREFIX_DATA)
             ot = OT_WORD;
@@ -7592,8 +7781,8 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
         else
             ot = OT_QUAD;
 
-        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
-        gen_helper_popcnt(cpu_T[0], cpu_T[0], tcg_const_i32(ot));
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_T0(ot, reg);
 
         s->cc_op = CC_OP_EFLAGS;
@@ -7609,7 +7798,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
     case 0x1c2:
     case 0x1c4 ... 0x1c6:
     case 0x1d0 ... 0x1fe:
-        gen_sse(s, b, pc_start, rex_r);
+        gen_sse(env, s, b, pc_start, rex_r);
         break;
     default:
         goto illegal_op;
@@ -7628,71 +7817,66 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start)
 
 void optimize_flags_init(void)
 {
-#if TCG_TARGET_REG_BITS == 32
-    assert(sizeof(CCTable) == (1 << 3));
-#else
-    assert(sizeof(CCTable) == (1 << 4));
-#endif
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
     cpu_cc_op = tcg_global_mem_new_i32(TCG_AREG0,
-                                       offsetof(CPUState, cc_op), "cc_op");
-    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_src),
+                                       offsetof(CPUX86State, cc_op), "cc_op");
+    cpu_cc_src = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src),
                                     "cc_src");
-    cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_dst),
+    cpu_cc_dst = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_dst),
                                     "cc_dst");
-    cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
+    cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_tmp),
                                     "cc_tmp");
 
 #ifdef TARGET_X86_64
     cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EAX]), "rax");
+                                             offsetof(CPUX86State, regs[R_EAX]), "rax");
     cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_ECX]), "rcx");
+                                             offsetof(CPUX86State, regs[R_ECX]), "rcx");
     cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EDX]), "rdx");
+                                             offsetof(CPUX86State, regs[R_EDX]), "rdx");
     cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EBX]), "rbx");
+                                             offsetof(CPUX86State, regs[R_EBX]), "rbx");
     cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_ESP]), "rsp");
+                                             offsetof(CPUX86State, regs[R_ESP]), "rsp");
     cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EBP]), "rbp");
+                                             offsetof(CPUX86State, regs[R_EBP]), "rbp");
     cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_ESI]), "rsi");
+                                             offsetof(CPUX86State, regs[R_ESI]), "rsi");
     cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EDI]), "rdi");
+                                             offsetof(CPUX86State, regs[R_EDI]), "rdi");
     cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
-                                         offsetof(CPUState, regs[8]), "r8");
+                                         offsetof(CPUX86State, regs[8]), "r8");
     cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUState, regs[9]), "r9");
+                                          offsetof(CPUX86State, regs[9]), "r9");
     cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUState, regs[10]), "r10");
+                                          offsetof(CPUX86State, regs[10]), "r10");
     cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUState, regs[11]), "r11");
+                                          offsetof(CPUX86State, regs[11]), "r11");
     cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUState, regs[12]), "r12");
+                                          offsetof(CPUX86State, regs[12]), "r12");
     cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUState, regs[13]), "r13");
+                                          offsetof(CPUX86State, regs[13]), "r13");
     cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUState, regs[14]), "r14");
+                                          offsetof(CPUX86State, regs[14]), "r14");
     cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
-                                          offsetof(CPUState, regs[15]), "r15");
+                                          offsetof(CPUX86State, regs[15]), "r15");
 #else
     cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EAX]), "eax");
+                                             offsetof(CPUX86State, regs[R_EAX]), "eax");
     cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_ECX]), "ecx");
+                                             offsetof(CPUX86State, regs[R_ECX]), "ecx");
     cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EDX]), "edx");
+                                             offsetof(CPUX86State, regs[R_EDX]), "edx");
     cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EBX]), "ebx");
+                                             offsetof(CPUX86State, regs[R_EBX]), "ebx");
     cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_ESP]), "esp");
+                                             offsetof(CPUX86State, regs[R_ESP]), "esp");
     cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EBP]), "ebp");
+                                             offsetof(CPUX86State, regs[R_EBP]), "ebp");
     cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_ESI]), "esi");
+                                             offsetof(CPUX86State, regs[R_ESI]), "esi");
     cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
-                                             offsetof(CPUState, regs[R_EDI]), "edi");
+                                             offsetof(CPUX86State, regs[R_EDI]), "edi");
 #endif
 
     /* register helpers */
@@ -7703,7 +7887,7 @@ void optimize_flags_init(void)
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
    basic block 'tb'. If search_pc is TRUE, also generate PC
    information for each intermediate instruction. */
-static inline void gen_intermediate_code_internal(CPUState *env,
+static inline void gen_intermediate_code_internal(CPUX86State *env,
                                                   TranslationBlock *tb,
                                                   int search_pc)
 {
@@ -7740,15 +7924,13 @@ static inline void gen_intermediate_code_internal(CPUState *env,
     /* select memory access functions */
     dc->mem_index = 0;
     if (flags & HF_SOFTMMU_MASK) {
-        if (dc->cpl == 3)
-            dc->mem_index = 2 * 4;
-        else
-            dc->mem_index = 1 * 4;
+        dc->mem_index = (cpu_mmu_index(env) + 1) << 2;
     }
     dc->cpuid_features = env->cpuid_features;
     dc->cpuid_ext_features = env->cpuid_ext_features;
     dc->cpuid_ext2_features = env->cpuid_ext2_features;
     dc->cpuid_ext3_features = env->cpuid_ext3_features;
+    dc->cpuid_7_0_ebx_features = env->cpuid_7_0_ebx_features;
 #ifdef TARGET_X86_64
     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
@@ -7780,7 +7962,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
     cpu_ptr0 = tcg_temp_new_ptr();
     cpu_ptr1 = tcg_temp_new_ptr();
 
-    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
+    gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
 
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
@@ -7802,7 +7984,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
             }
         }
         if (search_pc) {
-            j = gen_opc_ptr - gen_opc_buf;
+            j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
             if (lj < j) {
                 lj++;
                 while (lj < j)
@@ -7816,7 +7998,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
 
-        pc_ptr = disas_insn(dc, pc_ptr);
+        pc_ptr = disas_insn(env, dc, pc_ptr);
         num_insns++;
         /* stop translation if indicated */
         if (dc->is_jmp)
@@ -7833,7 +8015,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
             break;
         }
         /* if too long translation, stop generation too */
-        if (gen_opc_ptr >= gen_opc_end ||
+        if (tcg_ctx.gen_opc_ptr >= gen_opc_end ||
             (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
             num_insns >= max_insns) {
             gen_jmp_im(pc_ptr - dc->cs_base);
@@ -7849,10 +8031,10 @@ static inline void gen_intermediate_code_internal(CPUState *env,
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
     gen_icount_end(tb, num_insns);
-    *gen_opc_ptr = INDEX_op_end;
+    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
     /* we don't forget to fill the last values */
     if (search_pc) {
-        j = gen_opc_ptr - gen_opc_buf;
+        j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
         lj++;
         while (lj <= j)
             gen_opc_instr_start[lj++] = 0;
@@ -7869,7 +8051,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
         else
 #endif
             disas_flags = !dc->code32;
-        log_target_disas(pc_start, pc_ptr - pc_start, disas_flags);
+        log_target_disas(env, pc_start, pc_ptr - pc_start, disas_flags);
         qemu_log("\n");
     }
 #endif
@@ -7880,18 +8062,17 @@ static inline void gen_intermediate_code_internal(CPUState *env,
     }
 }
 
-void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
+void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
 {
     gen_intermediate_code_internal(env, tb, 0);
 }
 
-void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
+void gen_intermediate_code_pc(CPUX86State *env, TranslationBlock *tb)
 {
     gen_intermediate_code_internal(env, tb, 1);
 }
 
-void gen_pc_load(CPUState *env, TranslationBlock *tb,
-                unsigned long searched_pc, int pc_pos, void *puc)
+void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int pc_pos)
 {
     int cc_op;
 #ifdef DEBUG_DISAS
@@ -7903,8 +8084,8 @@ void gen_pc_load(CPUState *env, TranslationBlock *tb,
                 qemu_log("0x%04x: " TARGET_FMT_lx "\n", i, gen_opc_pc[i]);
             }
         }
-        qemu_log("spc=0x%08lx pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
-                searched_pc, pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
+        qemu_log("pc_pos=0x%x eip=" TARGET_FMT_lx " cs_base=%x\n",
+                pc_pos, gen_opc_pc[pc_pos] - tb->cs_base,
                 (uint32_t)tb->cs_base);
     }
 #endif
This page took 0.203131 seconds and 4 git commands to generate.