tcg: Remove tcg_regset_{or,and,andnot,not}
[qemu.git] / tcg / i386 / tcg-target.inc.c
CommitLineData
c896fe29
FB
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d4a9eb1f 24
4e45f239
RH
25#include "tcg-pool.inc.c"
26
8d8fdbae 27#ifdef CONFIG_DEBUG_TCG
d4a9eb1f 28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
5d8a4f8f
RH
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32#else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34#endif
c896fe29 35};
d4a9eb1f 36#endif
c896fe29 37
d4a9eb1f 38static const int tcg_target_reg_alloc_order[] = {
5d8a4f8f
RH
39#if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55#else
c896fe29
FB
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
6648e296
RH
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
5d8a4f8f 63#endif
c896fe29
FB
64};
65
5d8a4f8f
RH
66static const int tcg_target_call_iarg_regs[] = {
67#if TCG_TARGET_REG_BITS == 64
8d918718
SW
68#if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71#else
5d8a4f8f
RH
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
8d918718 76#endif
5d8a4f8f
RH
77 TCG_REG_R8,
78 TCG_REG_R9,
79#else
d73685e3 80 /* 32 bit mode uses stack based calling convention (GCC default). */
5d8a4f8f
RH
81#endif
82};
83
68af23af 84static const int tcg_target_call_oarg_regs[] = {
5d8a4f8f 85 TCG_REG_EAX,
68af23af 86#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 87 TCG_REG_EDX
68af23af 88#endif
5d8a4f8f 89};
c896fe29 90
a1b29c9a
RH
91/* Constants we accept. */
92#define TCG_CT_CONST_S32 0x100
93#define TCG_CT_CONST_U32 0x200
9d2eec20 94#define TCG_CT_CONST_I32 0x400
bbf25f90 95#define TCG_CT_CONST_WSZ 0x800
a1b29c9a 96
b18212c6
SW
97/* Registers used with L constraint, which are the first argument
98 registers on x86_64, and two random call clobbered registers on
99 i386. */
100#if TCG_TARGET_REG_BITS == 64
101# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
b18212c6
SW
103#else
104# define TCG_REG_L0 TCG_REG_EAX
105# define TCG_REG_L1 TCG_REG_EDX
106#endif
107
085bb5bb
AJ
108/* The host compiler should supply <cpuid.h> to enable runtime features
109 detection, as we're not going to go so far as our own inline assembly.
110 If not available, default values will be assumed. */
111#if defined(CONFIG_CPUID_H)
5dd89908 112#include "qemu/cpuid.h"
085bb5bb
AJ
113#endif
114
5dd89908 115/* For 64-bit, we always know that CMOV is available. */
76a347e1
RH
116#if TCG_TARGET_REG_BITS == 64
117# define have_cmov 1
5dd89908 118#elif defined(CONFIG_CPUID_H)
76a347e1
RH
119static bool have_cmov;
120#else
121# define have_cmov 0
122#endif
123
993508e4 124/* We need these symbols in tcg-target.h, and we can't properly conditionalize
9d2eec20
RH
125 it there. Therefore we always define the variable. */
126bool have_bmi1;
993508e4 127bool have_popcnt;
9d2eec20 128
5dd89908
RH
129#ifdef CONFIG_CPUID_H
130static bool have_movbe;
6399ab33 131static bool have_bmi2;
bbf25f90
RH
132static bool have_lzcnt;
133#else
5dd89908
RH
134# define have_movbe 0
135# define have_bmi2 0
bbf25f90
RH
136# define have_lzcnt 0
137#endif
6399ab33 138
f6bff89d 139static tcg_insn_unit *tb_ret_addr;
b03cce8e 140
f6bff89d 141static void patch_reloc(tcg_insn_unit *code_ptr, int type,
2ba7fae2 142 intptr_t value, intptr_t addend)
c896fe29 143{
f54b3f92 144 value += addend;
c896fe29 145 switch(type) {
c896fe29 146 case R_386_PC32:
5d8a4f8f
RH
147 value -= (uintptr_t)code_ptr;
148 if (value != (int32_t)value) {
149 tcg_abort();
150 }
5c53bb81 151 tcg_patch32(code_ptr, value);
c896fe29 152 break;
f75b56c1 153 case R_386_PC8:
5d8a4f8f 154 value -= (uintptr_t)code_ptr;
f75b56c1
RH
155 if (value != (int8_t)value) {
156 tcg_abort();
157 }
5c53bb81 158 tcg_patch8(code_ptr, value);
f75b56c1 159 break;
c896fe29
FB
160 default:
161 tcg_abort();
162 }
163}
164
c896fe29 165/* parse target specific constraints */
069ea736
RH
166static const char *target_parse_constraint(TCGArgConstraint *ct,
167 const char *ct_str, TCGType type)
c896fe29 168{
069ea736 169 switch(*ct_str++) {
c896fe29
FB
170 case 'a':
171 ct->ct |= TCG_CT_REG;
172 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
173 break;
174 case 'b':
175 ct->ct |= TCG_CT_REG;
176 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
177 break;
178 case 'c':
179 ct->ct |= TCG_CT_REG;
180 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
181 break;
182 case 'd':
183 ct->ct |= TCG_CT_REG;
184 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
185 break;
186 case 'S':
187 ct->ct |= TCG_CT_REG;
188 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
189 break;
190 case 'D':
191 ct->ct |= TCG_CT_REG;
192 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
193 break;
194 case 'q':
195 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
196 if (TCG_TARGET_REG_BITS == 64) {
197 tcg_regset_set32(ct->u.regs, 0, 0xffff);
198 } else {
199 tcg_regset_set32(ct->u.regs, 0, 0xf);
200 }
c896fe29 201 break;
a4773324
JK
202 case 'Q':
203 ct->ct |= TCG_CT_REG;
204 tcg_regset_set32(ct->u.regs, 0, 0xf);
205 break;
c896fe29
FB
206 case 'r':
207 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
208 if (TCG_TARGET_REG_BITS == 64) {
209 tcg_regset_set32(ct->u.regs, 0, 0xffff);
210 } else {
211 tcg_regset_set32(ct->u.regs, 0, 0xff);
212 }
c896fe29 213 break;
bbf25f90
RH
214 case 'W':
215 /* With TZCNT/LZCNT, we can have operand-size as an input. */
216 ct->ct |= TCG_CT_CONST_WSZ;
217 break;
c896fe29
FB
218
219 /* qemu_ld/st address constraint */
220 case 'L':
221 ct->ct |= TCG_CT_REG;
401c227b 222 if (TCG_TARGET_REG_BITS == 64) {
5d8a4f8f 223 tcg_regset_set32(ct->u.regs, 0, 0xffff);
401c227b 224 } else {
5d8a4f8f 225 tcg_regset_set32(ct->u.regs, 0, 0xff);
401c227b 226 }
17b91491
AJ
227 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
228 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
5d8a4f8f
RH
229 break;
230
231 case 'e':
cd26449a 232 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_S32);
5d8a4f8f
RH
233 break;
234 case 'Z':
cd26449a 235 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_U32);
c896fe29 236 break;
9d2eec20 237 case 'I':
cd26449a 238 ct->ct |= (type == TCG_TYPE_I32 ? TCG_CT_CONST : TCG_CT_CONST_I32);
9d2eec20 239 break;
5d8a4f8f 240
c896fe29 241 default:
069ea736 242 return NULL;
c896fe29 243 }
069ea736 244 return ct_str;
c896fe29
FB
245}
246
247/* test if a constant matches the constraint */
f6c6afc1 248static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
c896fe29
FB
249 const TCGArgConstraint *arg_ct)
250{
5d8a4f8f
RH
251 int ct = arg_ct->ct;
252 if (ct & TCG_CT_CONST) {
c896fe29 253 return 1;
5d8a4f8f
RH
254 }
255 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
256 return 1;
257 }
258 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
259 return 1;
260 }
9d2eec20
RH
261 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
262 return 1;
263 }
bbf25f90
RH
264 if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
265 return 1;
266 }
5d8a4f8f 267 return 0;
c896fe29
FB
268}
269
5d8a4f8f
RH
270#if TCG_TARGET_REG_BITS == 64
271# define LOWREGMASK(x) ((x) & 7)
272#else
273# define LOWREGMASK(x) (x)
274#endif
275
96b4cf38 276#define P_EXT 0x100 /* 0x0f opcode prefix */
2a113775
AJ
277#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
278#define P_DATA16 0x400 /* 0x66 opcode prefix */
5d8a4f8f 279#if TCG_TARGET_REG_BITS == 64
2a113775
AJ
280# define P_ADDR32 0x800 /* 0x67 opcode prefix */
281# define P_REXW 0x1000 /* Set REX.W = 1 */
282# define P_REXB_R 0x2000 /* REG field as byte register */
283# define P_REXB_RM 0x4000 /* R/M field as byte register */
284# define P_GS 0x8000 /* gs segment override */
5d8a4f8f
RH
285#else
286# define P_ADDR32 0
287# define P_REXW 0
288# define P_REXB_R 0
289# define P_REXB_RM 0
44b37ace 290# define P_GS 0
5d8a4f8f 291#endif
6399ab33
RH
292#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
293#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
fcb5dac1 294
a369a702
RH
295#define OPC_ARITH_EvIz (0x81)
296#define OPC_ARITH_EvIb (0x83)
81570a70 297#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
9d2eec20 298#define OPC_ANDN (0xf2 | P_EXT38)
81570a70 299#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
bbf25f90
RH
300#define OPC_BSF (0xbc | P_EXT)
301#define OPC_BSR (0xbd | P_EXT)
fcb5dac1 302#define OPC_BSWAP (0xc8 | P_EXT)
aadb21a4 303#define OPC_CALL_Jz (0xe8)
d0a16297 304#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
81570a70
RH
305#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
306#define OPC_DEC_r32 (0x48)
0566d387
RH
307#define OPC_IMUL_GvEv (0xaf | P_EXT)
308#define OPC_IMUL_GvEvIb (0x6b)
309#define OPC_IMUL_GvEvIz (0x69)
81570a70 310#define OPC_INC_r32 (0x40)
da441cff
RH
311#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
312#define OPC_JCC_short (0x70) /* ... plus condition code */
313#define OPC_JMP_long (0xe9)
314#define OPC_JMP_short (0xeb)
34a6d0b7 315#define OPC_LEA (0x8d)
bbf25f90 316#define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3)
af266089
RH
317#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
318#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
319#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
5c2d2a9e 320#define OPC_MOVB_EvIz (0xc6)
5d8a4f8f 321#define OPC_MOVL_EvIz (0xc7)
ef10b106 322#define OPC_MOVL_Iv (0xb8)
085bb5bb
AJ
323#define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
324#define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
6817c355
RH
325#define OPC_MOVSBL (0xbe | P_EXT)
326#define OPC_MOVSWL (0xbf | P_EXT)
5d8a4f8f 327#define OPC_MOVSLQ (0x63 | P_REXW)
55e082a7
RH
328#define OPC_MOVZBL (0xb6 | P_EXT)
329#define OPC_MOVZWL (0xb7 | P_EXT)
6858614e 330#define OPC_POP_r32 (0x58)
993508e4 331#define OPC_POPCNT (0xb8 | P_EXT | P_SIMDF3)
6858614e
RH
332#define OPC_PUSH_r32 (0x50)
333#define OPC_PUSH_Iv (0x68)
334#define OPC_PUSH_Ib (0x6a)
3c3accc6 335#define OPC_RET (0xc3)
5d8a4f8f 336#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
f53dba01
RH
337#define OPC_SHIFT_1 (0xd1)
338#define OPC_SHIFT_Ib (0xc1)
339#define OPC_SHIFT_cl (0xd3)
6399ab33
RH
340#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
341#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
342#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
81570a70 343#define OPC_TESTL (0x85)
bbf25f90 344#define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3)
b3e66df7 345#define OPC_XCHG_ax_r32 (0x90)
fcb5dac1 346
9363dedb
RH
347#define OPC_GRP3_Ev (0xf7)
348#define OPC_GRP5 (0xff)
349
350/* Group 1 opcode extensions for 0x80-0x83.
351 These are also used as modifiers for OPC_ARITH. */
c896fe29
FB
352#define ARITH_ADD 0
353#define ARITH_OR 1
354#define ARITH_ADC 2
355#define ARITH_SBB 3
356#define ARITH_AND 4
357#define ARITH_SUB 5
358#define ARITH_XOR 6
359#define ARITH_CMP 7
360
da441cff 361/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
9619376c
AJ
362#define SHIFT_ROL 0
363#define SHIFT_ROR 1
c896fe29
FB
364#define SHIFT_SHL 4
365#define SHIFT_SHR 5
366#define SHIFT_SAR 7
367
9363dedb
RH
368/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
369#define EXT3_NOT 2
370#define EXT3_NEG 3
371#define EXT3_MUL 4
372#define EXT3_IMUL 5
373#define EXT3_DIV 6
374#define EXT3_IDIV 7
375
376/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
5d8a4f8f
RH
377#define EXT5_INC_Ev 0
378#define EXT5_DEC_Ev 1
9363dedb
RH
379#define EXT5_CALLN_Ev 2
380#define EXT5_JMPN_Ev 4
da441cff
RH
381
382/* Condition codes to be added to OPC_JCC_{long,short}. */
c896fe29
FB
383#define JCC_JMP (-1)
384#define JCC_JO 0x0
385#define JCC_JNO 0x1
386#define JCC_JB 0x2
387#define JCC_JAE 0x3
388#define JCC_JE 0x4
389#define JCC_JNE 0x5
390#define JCC_JBE 0x6
391#define JCC_JA 0x7
392#define JCC_JS 0x8
393#define JCC_JNS 0x9
394#define JCC_JP 0xa
395#define JCC_JNP 0xb
396#define JCC_JL 0xc
397#define JCC_JGE 0xd
398#define JCC_JLE 0xe
399#define JCC_JG 0xf
400
0aed257f 401static const uint8_t tcg_cond_to_jcc[] = {
c896fe29
FB
402 [TCG_COND_EQ] = JCC_JE,
403 [TCG_COND_NE] = JCC_JNE,
404 [TCG_COND_LT] = JCC_JL,
405 [TCG_COND_GE] = JCC_JGE,
406 [TCG_COND_LE] = JCC_JLE,
407 [TCG_COND_GT] = JCC_JG,
408 [TCG_COND_LTU] = JCC_JB,
409 [TCG_COND_GEU] = JCC_JAE,
410 [TCG_COND_LEU] = JCC_JBE,
411 [TCG_COND_GTU] = JCC_JA,
412};
413
5d8a4f8f
RH
414#if TCG_TARGET_REG_BITS == 64
415static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
416{
417 int rex;
418
44b37ace
RH
419 if (opc & P_GS) {
420 tcg_out8(s, 0x65);
421 }
5d8a4f8f
RH
422 if (opc & P_DATA16) {
423 /* We should never be asking for both 16 and 64-bit operation. */
eabb7b91 424 tcg_debug_assert((opc & P_REXW) == 0);
5d8a4f8f
RH
425 tcg_out8(s, 0x66);
426 }
427 if (opc & P_ADDR32) {
428 tcg_out8(s, 0x67);
429 }
bbf25f90
RH
430 if (opc & P_SIMDF3) {
431 tcg_out8(s, 0xf3);
432 } else if (opc & P_SIMDF2) {
433 tcg_out8(s, 0xf2);
434 }
5d8a4f8f
RH
435
436 rex = 0;
c9d78213 437 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
ecc7e843
RH
438 rex |= (r & 8) >> 1; /* REX.R */
439 rex |= (x & 8) >> 2; /* REX.X */
440 rex |= (rm & 8) >> 3; /* REX.B */
5d8a4f8f
RH
441
442 /* P_REXB_{R,RM} indicates that the given register is the low byte.
443 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
444 as otherwise the encoding indicates %[abcd]h. Note that the values
445 that are ORed in merely indicate that the REX byte must be present;
446 those bits get discarded in output. */
447 rex |= opc & (r >= 4 ? P_REXB_R : 0);
448 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
449
450 if (rex) {
451 tcg_out8(s, (uint8_t)(rex | 0x40));
452 }
453
2a113775 454 if (opc & (P_EXT | P_EXT38)) {
5d8a4f8f 455 tcg_out8(s, 0x0f);
2a113775
AJ
456 if (opc & P_EXT38) {
457 tcg_out8(s, 0x38);
458 }
5d8a4f8f 459 }
2a113775 460
5d8a4f8f
RH
461 tcg_out8(s, opc);
462}
463#else
464static void tcg_out_opc(TCGContext *s, int opc)
c896fe29 465{
96b4cf38
RH
466 if (opc & P_DATA16) {
467 tcg_out8(s, 0x66);
468 }
bbf25f90
RH
469 if (opc & P_SIMDF3) {
470 tcg_out8(s, 0xf3);
471 } else if (opc & P_SIMDF2) {
472 tcg_out8(s, 0xf2);
473 }
2a113775 474 if (opc & (P_EXT | P_EXT38)) {
c896fe29 475 tcg_out8(s, 0x0f);
2a113775
AJ
476 if (opc & P_EXT38) {
477 tcg_out8(s, 0x38);
478 }
96b4cf38 479 }
c896fe29
FB
480 tcg_out8(s, opc);
481}
5d8a4f8f
RH
482/* Discard the register arguments to tcg_out_opc early, so as not to penalize
483 the 32-bit compilation paths. This method works with all versions of gcc,
484 whereas relying on optimization may not be able to exclude them. */
485#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
486#endif
c896fe29 487
5d8a4f8f 488static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
c896fe29 489{
5d8a4f8f
RH
490 tcg_out_opc(s, opc, r, rm, 0);
491 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
c896fe29
FB
492}
493
ecc7e843
RH
494static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
495{
496 int tmp;
497
498 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
499 /* Three byte VEX prefix. */
500 tcg_out8(s, 0xc4);
501
502 /* VEX.m-mmmm */
503 if (opc & P_EXT38) {
504 tmp = 2;
505 } else if (opc & P_EXT) {
506 tmp = 1;
507 } else {
508 tcg_abort();
509 }
510 tmp |= 0x40; /* VEX.X */
511 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
512 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
513 tcg_out8(s, tmp);
514
515 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
516 } else {
517 /* Two byte VEX prefix. */
518 tcg_out8(s, 0xc5);
519
520 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
521 }
6399ab33
RH
522 /* VEX.pp */
523 if (opc & P_DATA16) {
524 tmp |= 1; /* 0x66 */
525 } else if (opc & P_SIMDF3) {
526 tmp |= 2; /* 0xf3 */
527 } else if (opc & P_SIMDF2) {
528 tmp |= 3; /* 0xf2 */
529 }
ecc7e843
RH
530 tmp |= (~v & 15) << 3; /* VEX.vvvv */
531 tcg_out8(s, tmp);
532 tcg_out8(s, opc);
533 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
534}
535
34a6d0b7 536/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
5d8a4f8f
RH
537 We handle either RM and INDEX missing with a negative value. In 64-bit
538 mode for absolute addresses, ~RM is the size of the immediate operand
539 that will follow the instruction. */
34a6d0b7
RH
540
541static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
357e3d8a 542 int index, int shift, intptr_t offset)
c896fe29 543{
34a6d0b7
RH
544 int mod, len;
545
5d8a4f8f
RH
546 if (index < 0 && rm < 0) {
547 if (TCG_TARGET_REG_BITS == 64) {
548 /* Try for a rip-relative addressing mode. This has replaced
549 the 32-bit-mode absolute addressing encoding. */
357e3d8a
RH
550 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
551 intptr_t disp = offset - pc;
5d8a4f8f
RH
552 if (disp == (int32_t)disp) {
553 tcg_out_opc(s, opc, r, 0, 0);
554 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
555 tcg_out32(s, disp);
556 return;
557 }
34a6d0b7 558
5d8a4f8f
RH
559 /* Try for an absolute address encoding. This requires the
560 use of the MODRM+SIB encoding and is therefore larger than
561 rip-relative addressing. */
562 if (offset == (int32_t)offset) {
563 tcg_out_opc(s, opc, r, 0, 0);
564 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
565 tcg_out8(s, (4 << 3) | 5);
566 tcg_out32(s, offset);
567 return;
568 }
569
570 /* ??? The memory isn't directly addressable. */
571 tcg_abort();
572 } else {
573 /* Absolute address. */
574 tcg_out_opc(s, opc, r, 0, 0);
575 tcg_out8(s, (r << 3) | 5);
576 tcg_out32(s, offset);
577 return;
578 }
579 }
34a6d0b7
RH
580
581 /* Find the length of the immediate addend. Note that the encoding
582 that would be used for (%ebp) indicates absolute addressing. */
5d8a4f8f 583 if (rm < 0) {
34a6d0b7 584 mod = 0, len = 4, rm = 5;
5d8a4f8f 585 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
34a6d0b7
RH
586 mod = 0, len = 0;
587 } else if (offset == (int8_t)offset) {
588 mod = 0x40, len = 1;
c896fe29 589 } else {
34a6d0b7
RH
590 mod = 0x80, len = 4;
591 }
592
593 /* Use a single byte MODRM format if possible. Note that the encoding
594 that would be used for %esp is the escape to the two byte form. */
5d8a4f8f 595 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
34a6d0b7 596 /* Single byte MODRM format. */
5d8a4f8f
RH
597 tcg_out_opc(s, opc, r, rm, 0);
598 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
599 } else {
600 /* Two byte MODRM+SIB format. */
601
602 /* Note that the encoding that would place %esp into the index
5d8a4f8f
RH
603 field indicates no index register. In 64-bit mode, the REX.X
604 bit counts, so %r12 can be used as the index. */
605 if (index < 0) {
34a6d0b7 606 index = 4;
c896fe29 607 } else {
eabb7b91 608 tcg_debug_assert(index != TCG_REG_ESP);
c896fe29 609 }
34a6d0b7 610
5d8a4f8f
RH
611 tcg_out_opc(s, opc, r, rm, index);
612 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
613 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
614 }
615
616 if (len == 1) {
617 tcg_out8(s, offset);
618 } else if (len == 4) {
c896fe29
FB
619 tcg_out32(s, offset);
620 }
621}
622
5d8a4f8f
RH
623/* A simplification of the above with no index or shift. */
624static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
357e3d8a 625 int rm, intptr_t offset)
34a6d0b7
RH
626{
627 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
628}
629
81570a70
RH
630/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
631static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
632{
5d8a4f8f
RH
633 /* Propagate an opcode prefix, such as P_REXW. */
634 int ext = subop & ~0x7;
635 subop &= 0x7;
636
637 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
81570a70
RH
638}
639
2a534aff
RH
640static inline void tcg_out_mov(TCGContext *s, TCGType type,
641 TCGReg ret, TCGReg arg)
c896fe29 642{
af266089 643 if (arg != ret) {
5d8a4f8f
RH
644 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
645 tcg_out_modrm(s, opc, ret, arg);
af266089 646 }
c896fe29
FB
647}
648
5d8a4f8f 649static void tcg_out_movi(TCGContext *s, TCGType type,
2a534aff 650 TCGReg ret, tcg_target_long arg)
c896fe29 651{
8023ccda
RH
652 tcg_target_long diff;
653
c896fe29 654 if (arg == 0) {
81570a70 655 tgen_arithr(s, ARITH_XOR, ret, ret);
5d8a4f8f 656 return;
8023ccda
RH
657 }
658 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
5d8a4f8f
RH
659 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
660 tcg_out32(s, arg);
8023ccda
RH
661 return;
662 }
663 if (arg == (int32_t)arg) {
5d8a4f8f
RH
664 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
665 tcg_out32(s, arg);
8023ccda 666 return;
c896fe29 667 }
8023ccda
RH
668
669 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
357e3d8a 670 diff = arg - ((uintptr_t)s->code_ptr + 7);
8023ccda
RH
671 if (diff == (int32_t)diff) {
672 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
673 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
674 tcg_out32(s, diff);
675 return;
676 }
677
678 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
679 tcg_out64(s, arg);
c896fe29
FB
680}
681
6858614e
RH
682static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
683{
684 if (val == (int8_t)val) {
5d8a4f8f 685 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
6858614e 686 tcg_out8(s, val);
5d8a4f8f
RH
687 } else if (val == (int32_t)val) {
688 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
6858614e 689 tcg_out32(s, val);
5d8a4f8f
RH
690 } else {
691 tcg_abort();
6858614e
RH
692 }
693}
694
a7d00d4e
PK
695static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
696{
697 /* Given the strength of x86 memory ordering, we only need care for
698 store-load ordering. Experimentally, "lock orl $0,0(%esp)" is
699 faster than "mfence", so don't bother with the sse insn. */
700 if (a0 & TCG_MO_ST_LD) {
701 tcg_out8(s, 0xf0);
702 tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0);
703 tcg_out8(s, 0);
704 }
705}
706
6858614e
RH
707static inline void tcg_out_push(TCGContext *s, int reg)
708{
5d8a4f8f 709 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
710}
711
712static inline void tcg_out_pop(TCGContext *s, int reg)
713{
5d8a4f8f 714 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
715}
716
2a534aff 717static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
a05b5b9b 718 TCGReg arg1, intptr_t arg2)
c896fe29 719{
5d8a4f8f
RH
720 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
721 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
c896fe29
FB
722}
723
2a534aff 724static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
a05b5b9b 725 TCGReg arg1, intptr_t arg2)
c896fe29 726{
5d8a4f8f
RH
727 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
728 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
c896fe29
FB
729}
730
59d7c14e
RH
731static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
732 TCGReg base, intptr_t ofs)
c6f29ff0 733{
59d7c14e
RH
734 int rexw = 0;
735 if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
736 if (val != (int32_t)val) {
737 return false;
738 }
739 rexw = P_REXW;
740 }
741 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | rexw, 0, base, ofs);
c6f29ff0 742 tcg_out32(s, val);
59d7c14e 743 return true;
c6f29ff0
RH
744}
745
f53dba01
RH
746static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
747{
96b4cf38
RH
748 /* Propagate an opcode prefix, such as P_DATA16. */
749 int ext = subopc & ~0x7;
750 subopc &= 0x7;
751
f53dba01 752 if (count == 1) {
5d8a4f8f 753 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
f53dba01 754 } else {
5d8a4f8f 755 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
f53dba01
RH
756 tcg_out8(s, count);
757 }
758}
759
fcb5dac1
RH
760static inline void tcg_out_bswap32(TCGContext *s, int reg)
761{
5d8a4f8f 762 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
fcb5dac1
RH
763}
764
765static inline void tcg_out_rolw_8(TCGContext *s, int reg)
766{
5d8a4f8f 767 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
fcb5dac1
RH
768}
769
55e082a7
RH
770static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
771{
772 /* movzbl */
eabb7b91 773 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 774 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
55e082a7
RH
775}
776
5d8a4f8f 777static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
6817c355
RH
778{
779 /* movsbl */
eabb7b91 780 tcg_debug_assert(src < 4 || TCG_TARGET_REG_BITS == 64);
5d8a4f8f 781 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
6817c355
RH
782}
783
55e082a7
RH
784static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
785{
786 /* movzwl */
787 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
788}
789
5d8a4f8f 790static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
6817c355 791{
5d8a4f8f
RH
792 /* movsw[lq] */
793 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
6817c355
RH
794}
795
5d8a4f8f 796static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
c896fe29 797{
5d8a4f8f
RH
798 /* 32-bit mov zero extends. */
799 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
800}
801
802static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
803{
804 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
805}
806
807static inline void tcg_out_bswap64(TCGContext *s, int reg)
808{
809 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
810}
811
812static void tgen_arithi(TCGContext *s, int c, int r0,
813 tcg_target_long val, int cf)
814{
815 int rexw = 0;
816
817 if (TCG_TARGET_REG_BITS == 64) {
818 rexw = c & -8;
819 c &= 7;
820 }
821
81570a70
RH
822 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
823 partial flags update stalls on Pentium4 and are not recommended
824 by current Intel optimization manuals. */
825 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
447d681e 826 int is_inc = (c == ARITH_ADD) ^ (val < 0);
5d8a4f8f
RH
827 if (TCG_TARGET_REG_BITS == 64) {
828 /* The single-byte increment encodings are re-tasked as the
829 REX prefixes. Use the MODRM encoding. */
830 tcg_out_modrm(s, OPC_GRP5 + rexw,
831 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
832 } else {
833 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
834 }
835 return;
836 }
837
838 if (c == ARITH_AND) {
839 if (TCG_TARGET_REG_BITS == 64) {
840 if (val == 0xffffffffu) {
841 tcg_out_ext32u(s, r0, r0);
842 return;
843 }
844 if (val == (uint32_t)val) {
845 /* AND with no high bits set can use a 32-bit operation. */
846 rexw = 0;
847 }
848 }
dc397ca3 849 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
5d8a4f8f
RH
850 tcg_out_ext8u(s, r0, r0);
851 return;
852 }
853 if (val == 0xffffu) {
854 tcg_out_ext16u(s, r0, r0);
855 return;
856 }
857 }
858
859 if (val == (int8_t)val) {
860 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
c896fe29 861 tcg_out8(s, val);
5d8a4f8f
RH
862 return;
863 }
864 if (rexw == 0 || val == (int32_t)val) {
865 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
c896fe29 866 tcg_out32(s, val);
5d8a4f8f 867 return;
c896fe29 868 }
5d8a4f8f
RH
869
870 tcg_abort();
c896fe29
FB
871}
872
3e9a474e 873static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
c896fe29 874{
5d8a4f8f
RH
875 if (val != 0) {
876 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
877 }
c896fe29
FB
878}
879
f75b56c1 880/* Use SMALL != 0 to force a short forward branch. */
bec16311 881static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
c896fe29
FB
882{
883 int32_t val, val1;
78686523 884
c896fe29 885 if (l->has_value) {
f6bff89d 886 val = tcg_pcrel_diff(s, l->u.value_ptr);
c896fe29
FB
887 val1 = val - 2;
888 if ((int8_t)val1 == val1) {
f75b56c1 889 if (opc == -1) {
da441cff 890 tcg_out8(s, OPC_JMP_short);
f75b56c1 891 } else {
da441cff 892 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 893 }
c896fe29
FB
894 tcg_out8(s, val1);
895 } else {
f75b56c1
RH
896 if (small) {
897 tcg_abort();
898 }
c896fe29 899 if (opc == -1) {
da441cff 900 tcg_out8(s, OPC_JMP_long);
c896fe29
FB
901 tcg_out32(s, val - 5);
902 } else {
5d8a4f8f 903 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29
FB
904 tcg_out32(s, val - 6);
905 }
906 }
f75b56c1
RH
907 } else if (small) {
908 if (opc == -1) {
da441cff 909 tcg_out8(s, OPC_JMP_short);
f75b56c1 910 } else {
da441cff 911 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 912 }
bec16311 913 tcg_out_reloc(s, s->code_ptr, R_386_PC8, l, -1);
f75b56c1 914 s->code_ptr += 1;
c896fe29
FB
915 } else {
916 if (opc == -1) {
da441cff 917 tcg_out8(s, OPC_JMP_long);
c896fe29 918 } else {
5d8a4f8f 919 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29 920 }
bec16311 921 tcg_out_reloc(s, s->code_ptr, R_386_PC32, l, -4);
623e265c 922 s->code_ptr += 4;
c896fe29
FB
923 }
924}
925
1d2699ae 926static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
5d8a4f8f 927 int const_arg2, int rexw)
c896fe29 928{
c896fe29
FB
929 if (const_arg2) {
930 if (arg2 == 0) {
c896fe29 931 /* test r, r */
5d8a4f8f 932 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
c896fe29 933 } else {
5d8a4f8f 934 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
c896fe29
FB
935 }
936 } else {
5d8a4f8f 937 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
c896fe29 938 }
1d2699ae
RH
939}
940
5d8a4f8f
RH
941static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
942 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 943 TCGLabel *label, int small)
1d2699ae 944{
5d8a4f8f 945 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
bec16311 946 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
c896fe29
FB
947}
948
5d8a4f8f
RH
949#if TCG_TARGET_REG_BITS == 64
950static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
951 TCGArg arg1, TCGArg arg2, int const_arg2,
bec16311 952 TCGLabel *label, int small)
5d8a4f8f
RH
953{
954 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
bec16311 955 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label, small);
5d8a4f8f
RH
956}
957#else
c896fe29
FB
958/* XXX: we implement it at the target level to avoid having to
959 handle cross basic blocks temporaries */
f75b56c1
RH
960static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
961 const int *const_args, int small)
c896fe29 962{
bec16311
RH
963 TCGLabel *label_next = gen_new_label();
964 TCGLabel *label_this = arg_label(args[5]);
42a268c2 965
c896fe29
FB
966 switch(args[4]) {
967 case TCG_COND_EQ:
5d8a4f8f
RH
968 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
969 label_next, 1);
970 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
bec16311 971 label_this, small);
c896fe29
FB
972 break;
973 case TCG_COND_NE:
5d8a4f8f 974 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
bec16311 975 label_this, small);
5d8a4f8f 976 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
bec16311 977 label_this, small);
c896fe29
FB
978 break;
979 case TCG_COND_LT:
5d8a4f8f 980 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 981 label_this, small);
f75b56c1 982 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 983 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 984 label_this, small);
c896fe29
FB
985 break;
986 case TCG_COND_LE:
5d8a4f8f 987 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
bec16311 988 label_this, small);
f75b56c1 989 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 990 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 991 label_this, small);
c896fe29
FB
992 break;
993 case TCG_COND_GT:
5d8a4f8f 994 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 995 label_this, small);
f75b56c1 996 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 997 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 998 label_this, small);
c896fe29
FB
999 break;
1000 case TCG_COND_GE:
5d8a4f8f 1001 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
bec16311 1002 label_this, small);
f75b56c1 1003 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1004 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 1005 label_this, small);
c896fe29
FB
1006 break;
1007 case TCG_COND_LTU:
5d8a4f8f 1008 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 1009 label_this, small);
f75b56c1 1010 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1011 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
bec16311 1012 label_this, small);
c896fe29
FB
1013 break;
1014 case TCG_COND_LEU:
5d8a4f8f 1015 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
bec16311 1016 label_this, small);
f75b56c1 1017 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1018 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
bec16311 1019 label_this, small);
c896fe29
FB
1020 break;
1021 case TCG_COND_GTU:
5d8a4f8f 1022 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1023 label_this, small);
f75b56c1 1024 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1025 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
bec16311 1026 label_this, small);
c896fe29
FB
1027 break;
1028 case TCG_COND_GEU:
5d8a4f8f 1029 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
bec16311 1030 label_this, small);
f75b56c1 1031 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f 1032 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
bec16311 1033 label_this, small);
c896fe29
FB
1034 break;
1035 default:
1036 tcg_abort();
1037 }
9d6fca70 1038 tcg_out_label(s, label_next, s->code_ptr);
c896fe29 1039}
5d8a4f8f 1040#endif
c896fe29 1041
5d8a4f8f
RH
1042static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1043 TCGArg arg1, TCGArg arg2, int const_arg2)
1d2699ae 1044{
5d8a4f8f 1045 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
32a8ffb9 1046 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
a369a702 1047 tcg_out_ext8u(s, dest, dest);
1d2699ae
RH
1048}
1049
5d8a4f8f
RH
1050#if TCG_TARGET_REG_BITS == 64
1051static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1052 TCGArg arg1, TCGArg arg2, int const_arg2)
1053{
1054 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1055 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1056 tcg_out_ext8u(s, dest, dest);
1057}
1058#else
1d2699ae
RH
1059static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1060 const int *const_args)
1061{
1062 TCGArg new_args[6];
bec16311 1063 TCGLabel *label_true, *label_over;
1d2699ae
RH
1064
1065 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1066
1067 if (args[0] == args[1] || args[0] == args[2]
1068 || (!const_args[3] && args[0] == args[3])
1069 || (!const_args[4] && args[0] == args[4])) {
1070 /* When the destination overlaps with one of the argument
1071 registers, don't do anything tricky. */
bec16311
RH
1072 label_true = gen_new_label();
1073 label_over = gen_new_label();
1d2699ae 1074
bec16311 1075 new_args[5] = label_arg(label_true);
1d2699ae
RH
1076 tcg_out_brcond2(s, new_args, const_args+1, 1);
1077
1078 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1079 tcg_out_jxx(s, JCC_JMP, label_over, 1);
9d6fca70 1080 tcg_out_label(s, label_true, s->code_ptr);
1d2699ae
RH
1081
1082 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
9d6fca70 1083 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1084 } else {
1085 /* When the destination does not overlap one of the arguments,
1086 clear the destination first, jump if cond false, and emit an
1087 increment in the true case. This results in smaller code. */
1088
1089 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1090
bec16311 1091 label_over = gen_new_label();
1d2699ae 1092 new_args[4] = tcg_invert_cond(new_args[4]);
bec16311 1093 new_args[5] = label_arg(label_over);
1d2699ae
RH
1094 tcg_out_brcond2(s, new_args, const_args+1, 1);
1095
1096 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
9d6fca70 1097 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
1098 }
1099}
5d8a4f8f
RH
1100#endif
1101
bbf25f90
RH
1102static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw,
1103 TCGReg dest, TCGReg v1)
d0a16297 1104{
76a347e1 1105 if (have_cmov) {
bbf25f90 1106 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1);
76a347e1 1107 } else {
bec16311 1108 TCGLabel *over = gen_new_label();
76a347e1
RH
1109 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1110 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1111 tcg_out_label(s, over, s->code_ptr);
1112 }
d0a16297
RH
1113}
1114
bbf25f90
RH
1115static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest,
1116 TCGReg c1, TCGArg c2, int const_c2,
1117 TCGReg v1)
1118{
1119 tcg_out_cmp(s, c1, c2, const_c2, 0);
1120 tcg_out_cmov(s, cond, 0, dest, v1);
1121}
1122
d0a16297 1123#if TCG_TARGET_REG_BITS == 64
bbf25f90
RH
1124static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest,
1125 TCGReg c1, TCGArg c2, int const_c2,
1126 TCGReg v1)
d0a16297
RH
1127{
1128 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
bbf25f90 1129 tcg_out_cmov(s, cond, P_REXW, dest, v1);
d0a16297
RH
1130}
1131#endif
1132
bbf25f90
RH
1133static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1134 TCGArg arg2, bool const_a2)
1135{
39f099ec 1136 if (have_bmi1) {
bbf25f90 1137 tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1);
39f099ec
RH
1138 if (const_a2) {
1139 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1140 } else {
1141 tcg_debug_assert(dest != arg2);
1142 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1143 }
bbf25f90 1144 } else {
9bf38308 1145 tcg_debug_assert(dest != arg2);
bbf25f90 1146 tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1);
9bf38308 1147 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
bbf25f90
RH
1148 }
1149}
1150
1151static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1,
1152 TCGArg arg2, bool const_a2)
1153{
1154 if (have_lzcnt) {
1155 tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1);
1156 if (const_a2) {
1157 tcg_debug_assert(arg2 == (rexw ? 64 : 32));
1158 } else {
1159 tcg_debug_assert(dest != arg2);
1160 tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2);
1161 }
1162 } else {
9bf38308
RH
1163 tcg_debug_assert(!const_a2);
1164 tcg_debug_assert(dest != arg1);
1165 tcg_debug_assert(dest != arg2);
bbf25f90 1166
9bf38308 1167 /* Recall that the output of BSR is the index not the count. */
bbf25f90 1168 tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1);
9bf38308
RH
1169 tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0);
1170
1171 /* Since we have destroyed the flags from BSR, we have to re-test. */
1172 tcg_out_cmp(s, arg1, 0, 1, rexw);
1173 tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2);
bbf25f90
RH
1174 }
1175}
1176
f6bff89d 1177static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest)
5d8a4f8f 1178{
f6bff89d 1179 intptr_t disp = tcg_pcrel_diff(s, dest) - 5;
5d8a4f8f
RH
1180
1181 if (disp == (int32_t)disp) {
1182 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1183 tcg_out32(s, disp);
1184 } else {
4e45f239
RH
1185 /* rip-relative addressing into the constant pool.
1186 This is 6 + 8 = 14 bytes, as compared to using an
1187 an immediate load 10 + 6 = 16 bytes, plus we may
1188 be able to re-use the pool constant for more calls. */
1189 tcg_out_opc(s, OPC_GRP5, 0, 0, 0);
1190 tcg_out8(s, (call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev) << 3 | 5);
1191 new_pool_label(s, (uintptr_t)dest, R_386_PC32, s->code_ptr, -4);
1192 tcg_out32(s, 0);
5d8a4f8f
RH
1193 }
1194}
1195
6bf3e997 1196static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *dest)
5d8a4f8f
RH
1197{
1198 tcg_out_branch(s, 1, dest);
1199}
1d2699ae 1200
f6bff89d 1201static void tcg_out_jmp(TCGContext *s, tcg_insn_unit *dest)
aadb21a4 1202{
5d8a4f8f 1203 tcg_out_branch(s, 0, dest);
aadb21a4
RH
1204}
1205
0d07abf0
SF
1206static void tcg_out_nopn(TCGContext *s, int n)
1207{
1208 int i;
1209 /* Emit 1 or 2 operand size prefixes for the standard one byte nop,
1210 * "xchg %eax,%eax", forming "xchg %ax,%ax". All cores accept the
1211 * duplicate prefix, and all of the interesting recent cores can
1212 * decode and discard the duplicates in a single cycle.
1213 */
1214 tcg_debug_assert(n >= 1);
1215 for (i = 1; i < n; ++i) {
1216 tcg_out8(s, 0x66);
1217 }
1218 tcg_out8(s, 0x90);
1219}
1220
c896fe29 1221#if defined(CONFIG_SOFTMMU)
659ef5cb
RH
1222#include "tcg-ldst.inc.c"
1223
401c227b
RH
1224/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1225 * int mmu_idx, uintptr_t ra)
1226 */
f6bff89d 1227static void * const qemu_ld_helpers[16] = {
8221a267
RH
1228 [MO_UB] = helper_ret_ldub_mmu,
1229 [MO_LEUW] = helper_le_lduw_mmu,
1230 [MO_LEUL] = helper_le_ldul_mmu,
1231 [MO_LEQ] = helper_le_ldq_mmu,
1232 [MO_BEUW] = helper_be_lduw_mmu,
1233 [MO_BEUL] = helper_be_ldul_mmu,
1234 [MO_BEQ] = helper_be_ldq_mmu,
e141ab52
BS
1235};
1236
401c227b
RH
1237/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1238 * uintxx_t val, int mmu_idx, uintptr_t ra)
1239 */
f6bff89d 1240static void * const qemu_st_helpers[16] = {
8221a267
RH
1241 [MO_UB] = helper_ret_stb_mmu,
1242 [MO_LEUW] = helper_le_stw_mmu,
1243 [MO_LEUL] = helper_le_stl_mmu,
1244 [MO_LEQ] = helper_le_stq_mmu,
1245 [MO_BEUW] = helper_be_stw_mmu,
1246 [MO_BEUL] = helper_be_stl_mmu,
1247 [MO_BEQ] = helper_be_stq_mmu,
e141ab52 1248};
8516a044
RH
1249
1250/* Perform the TLB load and compare.
1251
1252 Inputs:
7352ee54 1253 ADDRLO and ADDRHI contain the low and high part of the address.
8516a044
RH
1254
1255 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1256
1257 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1258 This should be offsetof addr_read or addr_write.
1259
1260 Outputs:
1261 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1262 positions of the displacements of forward jumps to the TLB miss case.
1263
166792f7 1264 Second argument register is loaded with the low part of the address.
5d8a4f8f
RH
1265 In the TLB hit case, it has been adjusted as indicated by the TLB
1266 and so is a host address. In the TLB miss case, it continues to
1267 hold a guest address.
8516a044 1268
166792f7 1269 First argument register is clobbered. */
8516a044 1270
7352ee54 1271static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
8cc580f6 1272 int mem_index, TCGMemOp opc,
f6bff89d 1273 tcg_insn_unit **label_ptr, int which)
8516a044 1274{
7352ee54
RH
1275 const TCGReg r0 = TCG_REG_L0;
1276 const TCGReg r1 = TCG_REG_L1;
d5dad3be 1277 TCGType ttype = TCG_TYPE_I32;
08b0b23b
AJ
1278 TCGType tlbtype = TCG_TYPE_I32;
1279 int trexw = 0, hrexw = 0, tlbrexw = 0;
85aa8081
RH
1280 unsigned a_bits = get_alignment_bits(opc);
1281 unsigned s_bits = opc & MO_SIZE;
1282 unsigned a_mask = (1 << a_bits) - 1;
1283 unsigned s_mask = (1 << s_bits) - 1;
1f00b27f 1284 target_ulong tlb_mask;
5d8a4f8f 1285
d5dad3be
RH
1286 if (TCG_TARGET_REG_BITS == 64) {
1287 if (TARGET_LONG_BITS == 64) {
1288 ttype = TCG_TYPE_I64;
1289 trexw = P_REXW;
1290 }
1291 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
d5dad3be 1292 hrexw = P_REXW;
08b0b23b
AJ
1293 if (TARGET_PAGE_BITS + CPU_TLB_BITS > 32) {
1294 tlbtype = TCG_TYPE_I64;
1295 tlbrexw = P_REXW;
1296 }
d5dad3be 1297 }
5d8a4f8f 1298 }
8516a044 1299
08b0b23b 1300 tcg_out_mov(s, tlbtype, r0, addrlo);
85aa8081
RH
1301 /* If the required alignment is at least as large as the access, simply
1302 copy the address and mask. For lesser alignments, check that we don't
1303 cross pages for the complete access. */
1304 if (a_bits >= s_bits) {
8cc580f6
AJ
1305 tcg_out_mov(s, ttype, r1, addrlo);
1306 } else {
85aa8081 1307 tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
8cc580f6 1308 }
ebb90a00 1309 tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
8516a044 1310
08b0b23b 1311 tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
5d8a4f8f 1312 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
8516a044 1313
1f00b27f 1314 tgen_arithi(s, ARITH_AND + trexw, r1, tlb_mask, 0);
08b0b23b 1315 tgen_arithi(s, ARITH_AND + tlbrexw, r0,
5d8a4f8f 1316 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
8516a044 1317
d5dad3be 1318 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
9349b4f9 1319 offsetof(CPUArchState, tlb_table[mem_index][0])
8516a044
RH
1320 + which);
1321
166792f7 1322 /* cmp 0(r0), r1 */
d5dad3be 1323 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
8516a044 1324
d5dad3be
RH
1325 /* Prepare for both the fast path add of the tlb addend, and the slow
1326 path function argument setup. There are two cases worth note:
1327 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1328 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1329 copies the entire guest address for the slow path, while truncation
1330 for the 32-bit host happens with the fastpath ADDL below. */
1331 tcg_out_mov(s, ttype, r1, addrlo);
8516a044 1332
b76f0d8c
YL
1333 /* jne slow_path */
1334 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1335 label_ptr[0] = s->code_ptr;
b76f0d8c 1336 s->code_ptr += 4;
8516a044 1337
5d8a4f8f 1338 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
166792f7 1339 /* cmp 4(r0), addrhi */
7352ee54 1340 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
8516a044 1341
b76f0d8c
YL
1342 /* jne slow_path */
1343 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1344 label_ptr[1] = s->code_ptr;
b76f0d8c 1345 s->code_ptr += 4;
8516a044
RH
1346 }
1347
1348 /* TLB Hit. */
1349
166792f7 1350 /* add addend(r0), r1 */
d5dad3be 1351 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
8516a044
RH
1352 offsetof(CPUTLBEntry, addend) - which);
1353}
7352ee54
RH
1354
1355/*
1356 * Record the context of a call to the out of line helper code for the slow path
1357 * for a load or store, so that we can later generate the correct helper code
1358 */
3972ef6f 1359static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
7352ee54
RH
1360 TCGReg datalo, TCGReg datahi,
1361 TCGReg addrlo, TCGReg addrhi,
3972ef6f 1362 tcg_insn_unit *raddr,
f6bff89d 1363 tcg_insn_unit **label_ptr)
7352ee54
RH
1364{
1365 TCGLabelQemuLdst *label = new_ldst_label(s);
1366
1367 label->is_ld = is_ld;
3972ef6f 1368 label->oi = oi;
7352ee54
RH
1369 label->datalo_reg = datalo;
1370 label->datahi_reg = datahi;
1371 label->addrlo_reg = addrlo;
1372 label->addrhi_reg = addrhi;
7352ee54
RH
1373 label->raddr = raddr;
1374 label->label_ptr[0] = label_ptr[0];
1375 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1376 label->label_ptr[1] = label_ptr[1];
1377 }
1378}
1379
1380/*
1381 * Generate code for the slow path for a load at the end of block
1382 */
1383static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1384{
3972ef6f
RH
1385 TCGMemOpIdx oi = l->oi;
1386 TCGMemOp opc = get_memop(oi);
7352ee54 1387 TCGReg data_reg;
f6bff89d 1388 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1389
1390 /* resolve label address */
5c53bb81 1391 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1392 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1393 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1394 }
1395
1396 if (TCG_TARGET_REG_BITS == 32) {
1397 int ofs = 0;
1398
1399 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1400 ofs += 4;
1401
1402 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1403 ofs += 4;
1404
1405 if (TARGET_LONG_BITS == 64) {
1406 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1407 ofs += 4;
1408 }
1409
59d7c14e 1410 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1411 ofs += 4;
1412
59d7c14e 1413 tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
7352ee54
RH
1414 } else {
1415 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1416 /* The second argument is already loaded with addrlo. */
3972ef6f 1417 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
7352ee54
RH
1418 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1419 (uintptr_t)l->raddr);
1420 }
1421
2b7ec66f 1422 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54
RH
1423
1424 data_reg = l->datalo_reg;
1425 switch (opc & MO_SSIZE) {
1426 case MO_SB:
1427 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1428 break;
1429 case MO_SW:
1430 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1431 break;
1432#if TCG_TARGET_REG_BITS == 64
1433 case MO_SL:
1434 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1435 break;
1436#endif
1437 case MO_UB:
1438 case MO_UW:
1439 /* Note that the helpers have zero-extended to tcg_target_long. */
1440 case MO_UL:
1441 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1442 break;
1443 case MO_Q:
1444 if (TCG_TARGET_REG_BITS == 64) {
1445 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1446 } else if (data_reg == TCG_REG_EDX) {
1447 /* xchg %edx, %eax */
1448 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1449 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1450 } else {
1451 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1452 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1453 }
1454 break;
1455 default:
1456 tcg_abort();
1457 }
1458
1459 /* Jump to the code corresponding to next IR of qemu_st */
f6bff89d 1460 tcg_out_jmp(s, l->raddr);
7352ee54
RH
1461}
1462
1463/*
1464 * Generate code for the slow path for a store at the end of block
1465 */
1466static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1467{
3972ef6f
RH
1468 TCGMemOpIdx oi = l->oi;
1469 TCGMemOp opc = get_memop(oi);
7352ee54 1470 TCGMemOp s_bits = opc & MO_SIZE;
f6bff89d 1471 tcg_insn_unit **label_ptr = &l->label_ptr[0];
7352ee54
RH
1472 TCGReg retaddr;
1473
1474 /* resolve label address */
5c53bb81 1475 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
7352ee54 1476 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
5c53bb81 1477 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
7352ee54
RH
1478 }
1479
1480 if (TCG_TARGET_REG_BITS == 32) {
1481 int ofs = 0;
1482
1483 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1484 ofs += 4;
1485
1486 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1487 ofs += 4;
1488
1489 if (TARGET_LONG_BITS == 64) {
1490 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1491 ofs += 4;
1492 }
1493
1494 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1495 ofs += 4;
1496
1497 if (s_bits == MO_64) {
1498 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1499 ofs += 4;
1500 }
1501
59d7c14e 1502 tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
7352ee54
RH
1503 ofs += 4;
1504
1505 retaddr = TCG_REG_EAX;
3972ef6f
RH
1506 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1507 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
7352ee54
RH
1508 } else {
1509 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1510 /* The second argument is already loaded with addrlo. */
1511 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1512 tcg_target_call_iarg_regs[2], l->datalo_reg);
3972ef6f 1513 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
7352ee54
RH
1514
1515 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1516 retaddr = tcg_target_call_iarg_regs[4];
1517 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1518 } else {
1519 retaddr = TCG_REG_RAX;
1520 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
0b919667
RH
1521 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
1522 TCG_TARGET_CALL_STACK_OFFSET);
7352ee54
RH
1523 }
1524 }
1525
1526 /* "Tail call" to the helper, with the return address back inline. */
1527 tcg_out_push(s, retaddr);
2b7ec66f 1528 tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
7352ee54 1529}
44b37ace
RH
1530#elif defined(__x86_64__) && defined(__linux__)
1531# include <asm/prctl.h>
1532# include <sys/prctl.h>
1533
1534int arch_prctl(int code, unsigned long addr);
1535
1536static int guest_base_flags;
1537static inline void setup_guest_base_seg(void)
1538{
b76f21a7 1539 if (arch_prctl(ARCH_SET_GS, guest_base) == 0) {
44b37ace
RH
1540 guest_base_flags = P_GS;
1541 }
1542}
1543#else
1544# define guest_base_flags 0
1545static inline void setup_guest_base_seg(void) { }
1546#endif /* SOFTMMU */
c896fe29 1547
37c5d0d5 1548static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
ee8ba9e4
RH
1549 TCGReg base, int index, intptr_t ofs,
1550 int seg, TCGMemOp memop)
be5a4eb7 1551{
085bb5bb
AJ
1552 const TCGMemOp real_bswap = memop & MO_BSWAP;
1553 TCGMemOp bswap = real_bswap;
1554 int movop = OPC_MOVL_GvEv;
1555
1556 if (have_movbe && real_bswap) {
1557 bswap = 0;
1558 movop = OPC_MOVBE_GyMy;
1559 }
37c5d0d5
RH
1560
1561 switch (memop & MO_SSIZE) {
1562 case MO_UB:
ee8ba9e4
RH
1563 tcg_out_modrm_sib_offset(s, OPC_MOVZBL + seg, datalo,
1564 base, index, 0, ofs);
be5a4eb7 1565 break;
37c5d0d5 1566 case MO_SB:
ee8ba9e4
RH
1567 tcg_out_modrm_sib_offset(s, OPC_MOVSBL + P_REXW + seg, datalo,
1568 base, index, 0, ofs);
be5a4eb7 1569 break;
37c5d0d5 1570 case MO_UW:
ee8ba9e4
RH
1571 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1572 base, index, 0, ofs);
085bb5bb 1573 if (real_bswap) {
be5a4eb7
RH
1574 tcg_out_rolw_8(s, datalo);
1575 }
1576 break;
37c5d0d5 1577 case MO_SW:
085bb5bb
AJ
1578 if (real_bswap) {
1579 if (have_movbe) {
ee8ba9e4
RH
1580 tcg_out_modrm_sib_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1581 datalo, base, index, 0, ofs);
085bb5bb 1582 } else {
ee8ba9e4
RH
1583 tcg_out_modrm_sib_offset(s, OPC_MOVZWL + seg, datalo,
1584 base, index, 0, ofs);
085bb5bb
AJ
1585 tcg_out_rolw_8(s, datalo);
1586 }
5d8a4f8f
RH
1587 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1588 } else {
ee8ba9e4
RH
1589 tcg_out_modrm_sib_offset(s, OPC_MOVSWL + P_REXW + seg,
1590 datalo, base, index, 0, ofs);
be5a4eb7
RH
1591 }
1592 break;
37c5d0d5 1593 case MO_UL:
ee8ba9e4 1594 tcg_out_modrm_sib_offset(s, movop + seg, datalo, base, index, 0, ofs);
be5a4eb7
RH
1595 if (bswap) {
1596 tcg_out_bswap32(s, datalo);
1597 }
1598 break;
5d8a4f8f 1599#if TCG_TARGET_REG_BITS == 64
37c5d0d5 1600 case MO_SL:
085bb5bb 1601 if (real_bswap) {
ee8ba9e4
RH
1602 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1603 base, index, 0, ofs);
085bb5bb
AJ
1604 if (bswap) {
1605 tcg_out_bswap32(s, datalo);
1606 }
5d8a4f8f 1607 tcg_out_ext32s(s, datalo, datalo);
be5a4eb7 1608 } else {
ee8ba9e4
RH
1609 tcg_out_modrm_sib_offset(s, OPC_MOVSLQ + seg, datalo,
1610 base, index, 0, ofs);
be5a4eb7 1611 }
5d8a4f8f
RH
1612 break;
1613#endif
37c5d0d5 1614 case MO_Q:
5d8a4f8f 1615 if (TCG_TARGET_REG_BITS == 64) {
ee8ba9e4
RH
1616 tcg_out_modrm_sib_offset(s, movop + P_REXW + seg, datalo,
1617 base, index, 0, ofs);
5d8a4f8f
RH
1618 if (bswap) {
1619 tcg_out_bswap64(s, datalo);
1620 }
1621 } else {
085bb5bb 1622 if (real_bswap) {
5d8a4f8f
RH
1623 int t = datalo;
1624 datalo = datahi;
1625 datahi = t;
1626 }
1627 if (base != datalo) {
ee8ba9e4
RH
1628 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1629 base, index, 0, ofs);
1630 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1631 base, index, 0, ofs + 4);
5d8a4f8f 1632 } else {
ee8ba9e4
RH
1633 tcg_out_modrm_sib_offset(s, movop + seg, datahi,
1634 base, index, 0, ofs + 4);
1635 tcg_out_modrm_sib_offset(s, movop + seg, datalo,
1636 base, index, 0, ofs);
5d8a4f8f
RH
1637 }
1638 if (bswap) {
1639 tcg_out_bswap32(s, datalo);
1640 tcg_out_bswap32(s, datahi);
1641 }
be5a4eb7
RH
1642 }
1643 break;
1644 default:
1645 tcg_abort();
1646 }
1647}
379f6698 1648
c896fe29
FB
1649/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1650 EAX. It will be useful once fixed registers globals are less
1651 common. */
8221a267 1652static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1653{
7352ee54 1654 TCGReg datalo, datahi, addrlo;
8221a267 1655 TCGReg addrhi __attribute__((unused));
59227d5d 1656 TCGMemOpIdx oi;
8221a267 1657 TCGMemOp opc;
c896fe29 1658#if defined(CONFIG_SOFTMMU)
37c5d0d5 1659 int mem_index;
f6bff89d 1660 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1661#endif
1662
7352ee54 1663 datalo = *args++;
8221a267 1664 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1665 addrlo = *args++;
8221a267 1666 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1667 oi = *args++;
1668 opc = get_memop(oi);
c896fe29
FB
1669
1670#if defined(CONFIG_SOFTMMU)
59227d5d 1671 mem_index = get_mmuidx(oi);
1a6dc1e4 1672
8cc580f6 1673 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1674 label_ptr, offsetof(CPUTLBEntry, addr_read));
1a6dc1e4
RH
1675
1676 /* TLB Hit. */
ee8ba9e4 1677 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, opc);
c896fe29 1678
b76f0d8c 1679 /* Record the current context of a load into ldst label */
3972ef6f
RH
1680 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1681 s->code_ptr, label_ptr);
c896fe29 1682#else
5d8a4f8f 1683 {
b76f21a7 1684 int32_t offset = guest_base;
7352ee54 1685 TCGReg base = addrlo;
ee8ba9e4 1686 int index = -1;
44b37ace
RH
1687 int seg = 0;
1688
ee8ba9e4
RH
1689 /* For a 32-bit guest, the high 32 bits may contain garbage.
1690 We can do this with the ADDR32 prefix if we're not using
1691 a guest base, or when using segmentation. Otherwise we
1692 need to zero-extend manually. */
b76f21a7 1693 if (guest_base == 0 || guest_base_flags) {
44b37ace
RH
1694 seg = guest_base_flags;
1695 offset = 0;
ee8ba9e4
RH
1696 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1697 seg |= P_ADDR32;
1698 }
1699 } else if (TCG_TARGET_REG_BITS == 64) {
1700 if (TARGET_LONG_BITS == 32) {
1701 tcg_out_ext32u(s, TCG_REG_L0, base);
1702 base = TCG_REG_L0;
1703 }
b76f21a7
LV
1704 if (offset != guest_base) {
1705 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1706 index = TCG_REG_L1;
1707 offset = 0;
1708 }
5d8a4f8f
RH
1709 }
1710
ee8ba9e4
RH
1711 tcg_out_qemu_ld_direct(s, datalo, datahi,
1712 base, index, offset, seg, opc);
5d8a4f8f 1713 }
c896fe29 1714#endif
be5a4eb7 1715}
c896fe29 1716
37c5d0d5
RH
1717static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1718 TCGReg base, intptr_t ofs, int seg,
1719 TCGMemOp memop)
be5a4eb7 1720{
be5a4eb7
RH
1721 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1722 we could perform the bswap twice to restore the original value
1723 instead of moving to the scratch. But as it is, the L constraint
166792f7 1724 means that TCG_REG_L0 is definitely free here. */
37c5d0d5 1725 const TCGReg scratch = TCG_REG_L0;
085bb5bb
AJ
1726 const TCGMemOp real_bswap = memop & MO_BSWAP;
1727 TCGMemOp bswap = real_bswap;
1728 int movop = OPC_MOVL_EvGv;
1729
1730 if (have_movbe && real_bswap) {
1731 bswap = 0;
1732 movop = OPC_MOVBE_MyGy;
1733 }
be5a4eb7 1734
37c5d0d5
RH
1735 switch (memop & MO_SIZE) {
1736 case MO_8:
8589467f 1737 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
b3e2bc50
RH
1738 Use the scratch register if necessary. */
1739 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1740 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1741 datalo = scratch;
1742 }
44b37ace
RH
1743 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1744 datalo, base, ofs);
c896fe29 1745 break;
37c5d0d5 1746 case MO_16:
c896fe29 1747 if (bswap) {
3b6dac34 1748 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1749 tcg_out_rolw_8(s, scratch);
1750 datalo = scratch;
c896fe29 1751 }
085bb5bb 1752 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
c896fe29 1753 break;
37c5d0d5 1754 case MO_32:
c896fe29 1755 if (bswap) {
3b6dac34 1756 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1757 tcg_out_bswap32(s, scratch);
1758 datalo = scratch;
c896fe29 1759 }
085bb5bb 1760 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
c896fe29 1761 break;
37c5d0d5 1762 case MO_64:
5d8a4f8f
RH
1763 if (TCG_TARGET_REG_BITS == 64) {
1764 if (bswap) {
1765 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1766 tcg_out_bswap64(s, scratch);
1767 datalo = scratch;
1768 }
085bb5bb 1769 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
5d8a4f8f 1770 } else if (bswap) {
3b6dac34 1771 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
be5a4eb7 1772 tcg_out_bswap32(s, scratch);
44b37ace 1773 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
3b6dac34 1774 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7 1775 tcg_out_bswap32(s, scratch);
44b37ace 1776 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
c896fe29 1777 } else {
085bb5bb
AJ
1778 if (real_bswap) {
1779 int t = datalo;
1780 datalo = datahi;
1781 datahi = t;
1782 }
1783 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1784 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
c896fe29
FB
1785 }
1786 break;
1787 default:
1788 tcg_abort();
1789 }
c896fe29
FB
1790}
1791
8221a267 1792static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1793{
7352ee54 1794 TCGReg datalo, datahi, addrlo;
8221a267 1795 TCGReg addrhi __attribute__((unused));
59227d5d 1796 TCGMemOpIdx oi;
8221a267 1797 TCGMemOp opc;
c896fe29 1798#if defined(CONFIG_SOFTMMU)
37c5d0d5 1799 int mem_index;
f6bff89d 1800 tcg_insn_unit *label_ptr[2];
c896fe29
FB
1801#endif
1802
7352ee54 1803 datalo = *args++;
8221a267 1804 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1805 addrlo = *args++;
8221a267 1806 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
59227d5d
RH
1807 oi = *args++;
1808 opc = get_memop(oi);
c896fe29
FB
1809
1810#if defined(CONFIG_SOFTMMU)
59227d5d 1811 mem_index = get_mmuidx(oi);
1a6dc1e4 1812
8cc580f6 1813 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
8516a044 1814 label_ptr, offsetof(CPUTLBEntry, addr_write));
1a6dc1e4
RH
1815
1816 /* TLB Hit. */
7352ee54 1817 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
c896fe29 1818
b76f0d8c 1819 /* Record the current context of a store into ldst label */
3972ef6f
RH
1820 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1821 s->code_ptr, label_ptr);
b76f0d8c
YL
1822#else
1823 {
b76f21a7 1824 int32_t offset = guest_base;
7352ee54 1825 TCGReg base = addrlo;
b76f0d8c
YL
1826 int seg = 0;
1827
ee8ba9e4 1828 /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */
b76f21a7 1829 if (guest_base == 0 || guest_base_flags) {
b76f0d8c
YL
1830 seg = guest_base_flags;
1831 offset = 0;
ee8ba9e4
RH
1832 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
1833 seg |= P_ADDR32;
1834 }
1835 } else if (TCG_TARGET_REG_BITS == 64) {
1836 /* ??? Note that we can't use the same SIB addressing scheme
1837 as for loads, since we require L0 free for bswap. */
b76f21a7 1838 if (offset != guest_base) {
ee8ba9e4
RH
1839 if (TARGET_LONG_BITS == 32) {
1840 tcg_out_ext32u(s, TCG_REG_L0, base);
1841 base = TCG_REG_L0;
1842 }
b76f21a7 1843 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base);
ee8ba9e4
RH
1844 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1845 base = TCG_REG_L1;
1846 offset = 0;
1847 } else if (TARGET_LONG_BITS == 32) {
1848 tcg_out_ext32u(s, TCG_REG_L1, base);
1849 base = TCG_REG_L1;
1850 }
b76f0d8c
YL
1851 }
1852
7352ee54 1853 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
b76f0d8c 1854 }
b76f0d8c 1855#endif
b76f0d8c 1856}
c896fe29 1857
a9751609 1858static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c896fe29
FB
1859 const TCGArg *args, const int *const_args)
1860{
42d5b514
RH
1861 TCGArg a0, a1, a2;
1862 int c, const_a2, vexop, rexw = 0;
5d8a4f8f
RH
1863
1864#if TCG_TARGET_REG_BITS == 64
1865# define OP_32_64(x) \
1866 case glue(glue(INDEX_op_, x), _i64): \
1867 rexw = P_REXW; /* FALLTHRU */ \
1868 case glue(glue(INDEX_op_, x), _i32)
1869#else
1870# define OP_32_64(x) \
1871 case glue(glue(INDEX_op_, x), _i32)
1872#endif
78686523 1873
42d5b514
RH
1874 /* Hoist the loads of the most common arguments. */
1875 a0 = args[0];
1876 a1 = args[1];
1877 a2 = args[2];
1878 const_a2 = const_args[2];
1879
1880 switch (opc) {
c896fe29 1881 case INDEX_op_exit_tb:
5cb4ef80
EC
1882 /* Reuse the zeroing that exists for goto_ptr. */
1883 if (a0 == 0) {
1884 tcg_out_jmp(s, s->code_gen_epilogue);
1885 } else {
1886 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0);
1887 tcg_out_jmp(s, tb_ret_addr);
1888 }
c896fe29
FB
1889 break;
1890 case INDEX_op_goto_tb:
f309101c 1891 if (s->tb_jmp_insn_offset) {
c896fe29 1892 /* direct jump method */
0d07abf0
SF
1893 int gap;
1894 /* jump displacement must be aligned for atomic patching;
1895 * see if we need to add extra nops before jump
1896 */
1897 gap = tcg_pcrel_diff(s, QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4));
1898 if (gap != 1) {
1899 tcg_out_nopn(s, gap - 1);
1900 }
da441cff 1901 tcg_out8(s, OPC_JMP_long); /* jmp im */
42d5b514 1902 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
c896fe29
FB
1903 tcg_out32(s, 0);
1904 } else {
1905 /* indirect jump method */
9363dedb 1906 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
42d5b514 1907 (intptr_t)(s->tb_jmp_target_addr + a0));
c896fe29 1908 }
42d5b514 1909 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
c896fe29 1910 break;
5cb4ef80
EC
1911 case INDEX_op_goto_ptr:
1912 /* jmp to the given host address (could be epilogue) */
1913 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0);
1914 break;
c896fe29 1915 case INDEX_op_br:
42d5b514 1916 tcg_out_jxx(s, JCC_JMP, arg_label(a0), 0);
c896fe29 1917 break;
5d8a4f8f
RH
1918 OP_32_64(ld8u):
1919 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
42d5b514 1920 tcg_out_modrm_offset(s, OPC_MOVZBL, a0, a1, a2);
c896fe29 1921 break;
5d8a4f8f 1922 OP_32_64(ld8s):
42d5b514 1923 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, a0, a1, a2);
c896fe29 1924 break;
5d8a4f8f
RH
1925 OP_32_64(ld16u):
1926 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
42d5b514 1927 tcg_out_modrm_offset(s, OPC_MOVZWL, a0, a1, a2);
c896fe29 1928 break;
5d8a4f8f 1929 OP_32_64(ld16s):
42d5b514 1930 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, a0, a1, a2);
c896fe29 1931 break;
5d8a4f8f
RH
1932#if TCG_TARGET_REG_BITS == 64
1933 case INDEX_op_ld32u_i64:
1934#endif
c896fe29 1935 case INDEX_op_ld_i32:
42d5b514 1936 tcg_out_ld(s, TCG_TYPE_I32, a0, a1, a2);
c896fe29 1937 break;
5d8a4f8f
RH
1938
1939 OP_32_64(st8):
5c2d2a9e 1940 if (const_args[0]) {
42d5b514
RH
1941 tcg_out_modrm_offset(s, OPC_MOVB_EvIz, 0, a1, a2);
1942 tcg_out8(s, a0);
5c2d2a9e 1943 } else {
42d5b514 1944 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R, a0, a1, a2);
5c2d2a9e 1945 }
c896fe29 1946 break;
5d8a4f8f 1947 OP_32_64(st16):
5c2d2a9e 1948 if (const_args[0]) {
42d5b514
RH
1949 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16, 0, a1, a2);
1950 tcg_out16(s, a0);
5c2d2a9e 1951 } else {
42d5b514 1952 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16, a0, a1, a2);
5c2d2a9e 1953 }
c896fe29 1954 break;
5d8a4f8f
RH
1955#if TCG_TARGET_REG_BITS == 64
1956 case INDEX_op_st32_i64:
1957#endif
c896fe29 1958 case INDEX_op_st_i32:
5c2d2a9e 1959 if (const_args[0]) {
42d5b514
RH
1960 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, a1, a2);
1961 tcg_out32(s, a0);
5c2d2a9e 1962 } else {
42d5b514 1963 tcg_out_st(s, TCG_TYPE_I32, a0, a1, a2);
5c2d2a9e 1964 }
c896fe29 1965 break;
5d8a4f8f
RH
1966
1967 OP_32_64(add):
5d1e4e85 1968 /* For 3-operand addition, use LEA. */
42d5b514
RH
1969 if (a0 != a1) {
1970 TCGArg c3 = 0;
1971 if (const_a2) {
5d1e4e85
RH
1972 c3 = a2, a2 = -1;
1973 } else if (a0 == a2) {
1974 /* Watch out for dest = src + dest, since we've removed
1975 the matching constraint on the add. */
5d8a4f8f 1976 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
5d1e4e85
RH
1977 break;
1978 }
1979
5d8a4f8f 1980 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
5d1e4e85
RH
1981 break;
1982 }
1983 c = ARITH_ADD;
1984 goto gen_arith;
5d8a4f8f 1985 OP_32_64(sub):
c896fe29
FB
1986 c = ARITH_SUB;
1987 goto gen_arith;
5d8a4f8f 1988 OP_32_64(and):
c896fe29
FB
1989 c = ARITH_AND;
1990 goto gen_arith;
5d8a4f8f 1991 OP_32_64(or):
c896fe29
FB
1992 c = ARITH_OR;
1993 goto gen_arith;
5d8a4f8f 1994 OP_32_64(xor):
c896fe29
FB
1995 c = ARITH_XOR;
1996 goto gen_arith;
c896fe29 1997 gen_arith:
42d5b514
RH
1998 if (const_a2) {
1999 tgen_arithi(s, c + rexw, a0, a2, 0);
c896fe29 2000 } else {
42d5b514 2001 tgen_arithr(s, c + rexw, a0, a2);
c896fe29
FB
2002 }
2003 break;
5d8a4f8f 2004
9d2eec20 2005 OP_32_64(andc):
42d5b514
RH
2006 if (const_a2) {
2007 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
2008 tgen_arithi(s, ARITH_AND + rexw, a0, ~a2, 0);
9d2eec20 2009 } else {
42d5b514 2010 tcg_out_vex_modrm(s, OPC_ANDN + rexw, a0, a2, a1);
9d2eec20
RH
2011 }
2012 break;
2013
5d8a4f8f 2014 OP_32_64(mul):
42d5b514 2015 if (const_a2) {
c896fe29 2016 int32_t val;
42d5b514 2017 val = a2;
c896fe29 2018 if (val == (int8_t)val) {
42d5b514 2019 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, a0, a0);
c896fe29
FB
2020 tcg_out8(s, val);
2021 } else {
42d5b514 2022 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, a0, a0);
c896fe29
FB
2023 tcg_out32(s, val);
2024 }
2025 } else {
42d5b514 2026 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, a0, a2);
c896fe29
FB
2027 }
2028 break;
5d8a4f8f
RH
2029
2030 OP_32_64(div2):
2031 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
c896fe29 2032 break;
5d8a4f8f
RH
2033 OP_32_64(divu2):
2034 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
c896fe29 2035 break;
5d8a4f8f
RH
2036
2037 OP_32_64(shl):
6a5aed4b
RH
2038 /* For small constant 3-operand shift, use LEA. */
2039 if (const_a2 && a0 != a1 && (a2 - 1) < 3) {
2040 if (a2 - 1 == 0) {
2041 /* shl $1,a1,a0 -> lea (a1,a1),a0 */
2042 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a1, 0, 0);
2043 } else {
2044 /* shl $n,a1,a0 -> lea 0(,a1,n),a0 */
2045 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, -1, a1, a2, 0);
2046 }
2047 break;
2048 }
c896fe29 2049 c = SHIFT_SHL;
6399ab33
RH
2050 vexop = OPC_SHLX;
2051 goto gen_shift_maybe_vex;
5d8a4f8f 2052 OP_32_64(shr):
c896fe29 2053 c = SHIFT_SHR;
6399ab33
RH
2054 vexop = OPC_SHRX;
2055 goto gen_shift_maybe_vex;
5d8a4f8f 2056 OP_32_64(sar):
c896fe29 2057 c = SHIFT_SAR;
6399ab33
RH
2058 vexop = OPC_SARX;
2059 goto gen_shift_maybe_vex;
5d8a4f8f 2060 OP_32_64(rotl):
9619376c 2061 c = SHIFT_ROL;
5d8a4f8f
RH
2062 goto gen_shift;
2063 OP_32_64(rotr):
9619376c 2064 c = SHIFT_ROR;
5d8a4f8f 2065 goto gen_shift;
6399ab33 2066 gen_shift_maybe_vex:
6a5aed4b
RH
2067 if (have_bmi2) {
2068 if (!const_a2) {
2069 tcg_out_vex_modrm(s, vexop + rexw, a0, a2, a1);
2070 break;
2071 }
2072 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32, a0, a1);
6399ab33
RH
2073 }
2074 /* FALLTHRU */
5d8a4f8f 2075 gen_shift:
42d5b514
RH
2076 if (const_a2) {
2077 tcg_out_shifti(s, c + rexw, a0, a2);
81570a70 2078 } else {
42d5b514 2079 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, a0);
81570a70 2080 }
c896fe29 2081 break;
5d8a4f8f 2082
bbf25f90
RH
2083 OP_32_64(ctz):
2084 tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]);
2085 break;
2086 OP_32_64(clz):
2087 tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]);
2088 break;
993508e4
RH
2089 OP_32_64(ctpop):
2090 tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
2091 break;
bbf25f90 2092
c896fe29 2093 case INDEX_op_brcond_i32:
42d5b514 2094 tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
c896fe29 2095 break;
5d8a4f8f 2096 case INDEX_op_setcond_i32:
42d5b514 2097 tcg_out_setcond32(s, args[3], a0, a1, a2, const_a2);
c896fe29 2098 break;
d0a16297 2099 case INDEX_op_movcond_i32:
42d5b514 2100 tcg_out_movcond32(s, args[5], a0, a1, a2, const_a2, args[3]);
d0a16297 2101 break;
c896fe29 2102
5d8a4f8f 2103 OP_32_64(bswap16):
42d5b514 2104 tcg_out_rolw_8(s, a0);
5d40cd63 2105 break;
5d8a4f8f 2106 OP_32_64(bswap32):
42d5b514 2107 tcg_out_bswap32(s, a0);
9619376c
AJ
2108 break;
2109
5d8a4f8f 2110 OP_32_64(neg):
42d5b514 2111 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, a0);
9619376c 2112 break;
5d8a4f8f 2113 OP_32_64(not):
42d5b514 2114 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
9619376c
AJ
2115 break;
2116
5d8a4f8f 2117 OP_32_64(ext8s):
42d5b514 2118 tcg_out_ext8s(s, a0, a1, rexw);
9619376c 2119 break;
5d8a4f8f 2120 OP_32_64(ext16s):
42d5b514 2121 tcg_out_ext16s(s, a0, a1, rexw);
9619376c 2122 break;
5d8a4f8f 2123 OP_32_64(ext8u):
42d5b514 2124 tcg_out_ext8u(s, a0, a1);
5f0ce17f 2125 break;
5d8a4f8f 2126 OP_32_64(ext16u):
42d5b514 2127 tcg_out_ext16u(s, a0, a1);
5f0ce17f 2128 break;
9619376c 2129
8221a267
RH
2130 case INDEX_op_qemu_ld_i32:
2131 tcg_out_qemu_ld(s, args, 0);
c896fe29 2132 break;
8221a267
RH
2133 case INDEX_op_qemu_ld_i64:
2134 tcg_out_qemu_ld(s, args, 1);
c896fe29 2135 break;
8221a267
RH
2136 case INDEX_op_qemu_st_i32:
2137 tcg_out_qemu_st(s, args, 0);
c896fe29 2138 break;
8221a267
RH
2139 case INDEX_op_qemu_st_i64:
2140 tcg_out_qemu_st(s, args, 1);
c896fe29
FB
2141 break;
2142
624988a5
RH
2143 OP_32_64(mulu2):
2144 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
5d8a4f8f 2145 break;
624988a5
RH
2146 OP_32_64(muls2):
2147 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
2148 break;
2149 OP_32_64(add2):
5d8a4f8f 2150 if (const_args[4]) {
42d5b514 2151 tgen_arithi(s, ARITH_ADD + rexw, a0, args[4], 1);
5d8a4f8f 2152 } else {
42d5b514 2153 tgen_arithr(s, ARITH_ADD + rexw, a0, args[4]);
5d8a4f8f
RH
2154 }
2155 if (const_args[5]) {
42d5b514 2156 tgen_arithi(s, ARITH_ADC + rexw, a1, args[5], 1);
5d8a4f8f 2157 } else {
42d5b514 2158 tgen_arithr(s, ARITH_ADC + rexw, a1, args[5]);
5d8a4f8f
RH
2159 }
2160 break;
624988a5 2161 OP_32_64(sub2):
5d8a4f8f 2162 if (const_args[4]) {
42d5b514 2163 tgen_arithi(s, ARITH_SUB + rexw, a0, args[4], 1);
5d8a4f8f 2164 } else {
42d5b514 2165 tgen_arithr(s, ARITH_SUB + rexw, a0, args[4]);
5d8a4f8f
RH
2166 }
2167 if (const_args[5]) {
42d5b514 2168 tgen_arithi(s, ARITH_SBB + rexw, a1, args[5], 1);
5d8a4f8f 2169 } else {
42d5b514 2170 tgen_arithr(s, ARITH_SBB + rexw, a1, args[5]);
5d8a4f8f
RH
2171 }
2172 break;
bbc863bf
RH
2173
2174#if TCG_TARGET_REG_BITS == 32
2175 case INDEX_op_brcond2_i32:
2176 tcg_out_brcond2(s, args, const_args, 0);
2177 break;
2178 case INDEX_op_setcond2_i32:
2179 tcg_out_setcond2(s, args, const_args);
2180 break;
5d8a4f8f 2181#else /* TCG_TARGET_REG_BITS == 64 */
5d8a4f8f 2182 case INDEX_op_ld32s_i64:
42d5b514 2183 tcg_out_modrm_offset(s, OPC_MOVSLQ, a0, a1, a2);
5d8a4f8f
RH
2184 break;
2185 case INDEX_op_ld_i64:
42d5b514 2186 tcg_out_ld(s, TCG_TYPE_I64, a0, a1, a2);
5d8a4f8f
RH
2187 break;
2188 case INDEX_op_st_i64:
5c2d2a9e 2189 if (const_args[0]) {
42d5b514
RH
2190 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW, 0, a1, a2);
2191 tcg_out32(s, a0);
5c2d2a9e 2192 } else {
42d5b514 2193 tcg_out_st(s, TCG_TYPE_I64, a0, a1, a2);
5c2d2a9e 2194 }
5d8a4f8f 2195 break;
5d8a4f8f
RH
2196
2197 case INDEX_op_brcond_i64:
42d5b514 2198 tcg_out_brcond64(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0);
5d8a4f8f
RH
2199 break;
2200 case INDEX_op_setcond_i64:
42d5b514 2201 tcg_out_setcond64(s, args[3], a0, a1, a2, const_a2);
5d8a4f8f 2202 break;
d0a16297 2203 case INDEX_op_movcond_i64:
42d5b514 2204 tcg_out_movcond64(s, args[5], a0, a1, a2, const_a2, args[3]);
d0a16297 2205 break;
5d8a4f8f
RH
2206
2207 case INDEX_op_bswap64_i64:
42d5b514 2208 tcg_out_bswap64(s, a0);
5d8a4f8f 2209 break;
4f2331e5 2210 case INDEX_op_extu_i32_i64:
5d8a4f8f 2211 case INDEX_op_ext32u_i64:
42d5b514 2212 tcg_out_ext32u(s, a0, a1);
5d8a4f8f 2213 break;
4f2331e5 2214 case INDEX_op_ext_i32_i64:
5d8a4f8f 2215 case INDEX_op_ext32s_i64:
42d5b514 2216 tcg_out_ext32s(s, a0, a1);
5d8a4f8f
RH
2217 break;
2218#endif
2219
a4773324
JK
2220 OP_32_64(deposit):
2221 if (args[3] == 0 && args[4] == 8) {
2222 /* load bits 0..7 */
42d5b514 2223 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM, a2, a0);
a4773324
JK
2224 } else if (args[3] == 8 && args[4] == 8) {
2225 /* load bits 8..15 */
42d5b514 2226 tcg_out_modrm(s, OPC_MOVB_EvGv, a2, a0 + 4);
a4773324
JK
2227 } else if (args[3] == 0 && args[4] == 16) {
2228 /* load bits 0..15 */
42d5b514 2229 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, a2, a0);
a4773324
JK
2230 } else {
2231 tcg_abort();
2232 }
2233 break;
2234
78fdbfb9 2235 case INDEX_op_extract_i64:
42d5b514 2236 if (a2 + args[3] == 32) {
78fdbfb9 2237 /* This is a 32-bit zero-extending right shift. */
42d5b514
RH
2238 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2239 tcg_out_shifti(s, SHIFT_SHR, a0, a2);
78fdbfb9
RH
2240 break;
2241 }
2242 /* FALLTHRU */
2243 case INDEX_op_extract_i32:
2244 /* On the off-chance that we can use the high-byte registers.
2245 Otherwise we emit the same ext16 + shift pattern that we
2246 would have gotten from the normal tcg-op.c expansion. */
42d5b514
RH
2247 tcg_debug_assert(a2 == 8 && args[3] == 8);
2248 if (a1 < 4 && a0 < 8) {
2249 tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
78fdbfb9 2250 } else {
42d5b514
RH
2251 tcg_out_ext16u(s, a0, a1);
2252 tcg_out_shifti(s, SHIFT_SHR, a0, 8);
78fdbfb9
RH
2253 }
2254 break;
2255
2256 case INDEX_op_sextract_i32:
2257 /* We don't implement sextract_i64, as we cannot sign-extend to
2258 64-bits without using the REX prefix that explicitly excludes
2259 access to the high-byte registers. */
42d5b514
RH
2260 tcg_debug_assert(a2 == 8 && args[3] == 8);
2261 if (a1 < 4 && a0 < 8) {
2262 tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
78fdbfb9 2263 } else {
42d5b514
RH
2264 tcg_out_ext16s(s, a0, a1, 0);
2265 tcg_out_shifti(s, SHIFT_SAR, a0, 8);
78fdbfb9
RH
2266 }
2267 break;
2268
a7d00d4e 2269 case INDEX_op_mb:
42d5b514 2270 tcg_out_mb(s, a0);
a7d00d4e 2271 break;
96d0ee7f
RH
2272 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2273 case INDEX_op_mov_i64:
2274 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
2275 case INDEX_op_movi_i64:
2276 case INDEX_op_call: /* Always emitted via tcg_out_call. */
c896fe29
FB
2277 default:
2278 tcg_abort();
2279 }
5d8a4f8f
RH
2280
2281#undef OP_32_64
c896fe29
FB
2282}
2283
cd26449a
RH
2284static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2285{
5cb4ef80 2286 static const TCGTargetOpDef r = { .args_ct_str = { "r" } };
cd26449a
RH
2287 static const TCGTargetOpDef ri_r = { .args_ct_str = { "ri", "r" } };
2288 static const TCGTargetOpDef re_r = { .args_ct_str = { "re", "r" } };
2289 static const TCGTargetOpDef qi_r = { .args_ct_str = { "qi", "r" } };
2290 static const TCGTargetOpDef r_r = { .args_ct_str = { "r", "r" } };
2291 static const TCGTargetOpDef r_q = { .args_ct_str = { "r", "q" } };
2292 static const TCGTargetOpDef r_re = { .args_ct_str = { "r", "re" } };
2293 static const TCGTargetOpDef r_0 = { .args_ct_str = { "r", "0" } };
6a5aed4b 2294 static const TCGTargetOpDef r_r_ri = { .args_ct_str = { "r", "r", "ri" } };
cd26449a
RH
2295 static const TCGTargetOpDef r_r_re = { .args_ct_str = { "r", "r", "re" } };
2296 static const TCGTargetOpDef r_0_re = { .args_ct_str = { "r", "0", "re" } };
cd26449a
RH
2297 static const TCGTargetOpDef r_0_ci = { .args_ct_str = { "r", "0", "ci" } };
2298 static const TCGTargetOpDef r_L = { .args_ct_str = { "r", "L" } };
2299 static const TCGTargetOpDef L_L = { .args_ct_str = { "L", "L" } };
2300 static const TCGTargetOpDef r_L_L = { .args_ct_str = { "r", "L", "L" } };
2301 static const TCGTargetOpDef r_r_L = { .args_ct_str = { "r", "r", "L" } };
2302 static const TCGTargetOpDef L_L_L = { .args_ct_str = { "L", "L", "L" } };
2303 static const TCGTargetOpDef r_r_L_L
2304 = { .args_ct_str = { "r", "r", "L", "L" } };
2305 static const TCGTargetOpDef L_L_L_L
2306 = { .args_ct_str = { "L", "L", "L", "L" } };
2307
2308 switch (op) {
5cb4ef80
EC
2309 case INDEX_op_goto_ptr:
2310 return &r;
2311
cd26449a
RH
2312 case INDEX_op_ld8u_i32:
2313 case INDEX_op_ld8u_i64:
2314 case INDEX_op_ld8s_i32:
2315 case INDEX_op_ld8s_i64:
2316 case INDEX_op_ld16u_i32:
2317 case INDEX_op_ld16u_i64:
2318 case INDEX_op_ld16s_i32:
2319 case INDEX_op_ld16s_i64:
2320 case INDEX_op_ld_i32:
2321 case INDEX_op_ld32u_i64:
2322 case INDEX_op_ld32s_i64:
2323 case INDEX_op_ld_i64:
2324 return &r_r;
a4773324 2325
cd26449a
RH
2326 case INDEX_op_st8_i32:
2327 case INDEX_op_st8_i64:
2328 return &qi_r;
2329 case INDEX_op_st16_i32:
2330 case INDEX_op_st16_i64:
2331 case INDEX_op_st_i32:
2332 case INDEX_op_st32_i64:
2333 return &ri_r;
2334 case INDEX_op_st_i64:
2335 return &re_r;
2336
2337 case INDEX_op_add_i32:
2338 case INDEX_op_add_i64:
2339 return &r_r_re;
2340 case INDEX_op_sub_i32:
2341 case INDEX_op_sub_i64:
2342 case INDEX_op_mul_i32:
2343 case INDEX_op_mul_i64:
2344 case INDEX_op_or_i32:
2345 case INDEX_op_or_i64:
2346 case INDEX_op_xor_i32:
2347 case INDEX_op_xor_i64:
2348 return &r_0_re;
2349
2350 case INDEX_op_and_i32:
2351 case INDEX_op_and_i64:
2352 {
2353 static const TCGTargetOpDef and
2354 = { .args_ct_str = { "r", "0", "reZ" } };
2355 return &and;
2356 }
2357 break;
2358 case INDEX_op_andc_i32:
2359 case INDEX_op_andc_i64:
2360 {
2361 static const TCGTargetOpDef andc
2362 = { .args_ct_str = { "r", "r", "rI" } };
2363 return &andc;
2364 }
2365 break;
bbc863bf 2366
cd26449a
RH
2367 case INDEX_op_shl_i32:
2368 case INDEX_op_shl_i64:
2369 case INDEX_op_shr_i32:
2370 case INDEX_op_shr_i64:
2371 case INDEX_op_sar_i32:
2372 case INDEX_op_sar_i64:
6a5aed4b 2373 return have_bmi2 ? &r_r_ri : &r_0_ci;
cd26449a
RH
2374 case INDEX_op_rotl_i32:
2375 case INDEX_op_rotl_i64:
2376 case INDEX_op_rotr_i32:
2377 case INDEX_op_rotr_i64:
2378 return &r_0_ci;
a7d00d4e 2379
cd26449a
RH
2380 case INDEX_op_brcond_i32:
2381 case INDEX_op_brcond_i64:
2382 return &r_re;
1d2699ae 2383
cd26449a
RH
2384 case INDEX_op_bswap16_i32:
2385 case INDEX_op_bswap16_i64:
2386 case INDEX_op_bswap32_i32:
2387 case INDEX_op_bswap32_i64:
2388 case INDEX_op_bswap64_i64:
2389 case INDEX_op_neg_i32:
2390 case INDEX_op_neg_i64:
2391 case INDEX_op_not_i32:
2392 case INDEX_op_not_i64:
2393 return &r_0;
2394
2395 case INDEX_op_ext8s_i32:
2396 case INDEX_op_ext8s_i64:
2397 case INDEX_op_ext8u_i32:
2398 case INDEX_op_ext8u_i64:
2399 return &r_q;
2400 case INDEX_op_ext16s_i32:
2401 case INDEX_op_ext16s_i64:
2402 case INDEX_op_ext16u_i32:
2403 case INDEX_op_ext16u_i64:
2404 case INDEX_op_ext32s_i64:
2405 case INDEX_op_ext32u_i64:
2406 case INDEX_op_ext_i32_i64:
2407 case INDEX_op_extu_i32_i64:
2408 case INDEX_op_extract_i32:
2409 case INDEX_op_extract_i64:
2410 case INDEX_op_sextract_i32:
993508e4
RH
2411 case INDEX_op_ctpop_i32:
2412 case INDEX_op_ctpop_i64:
cd26449a
RH
2413 return &r_r;
2414
2415 case INDEX_op_deposit_i32:
2416 case INDEX_op_deposit_i64:
2417 {
2418 static const TCGTargetOpDef dep
2419 = { .args_ct_str = { "Q", "0", "Q" } };
2420 return &dep;
2421 }
2422 case INDEX_op_setcond_i32:
2423 case INDEX_op_setcond_i64:
2424 {
2425 static const TCGTargetOpDef setc
2426 = { .args_ct_str = { "q", "r", "re" } };
2427 return &setc;
2428 }
2429 case INDEX_op_movcond_i32:
2430 case INDEX_op_movcond_i64:
2431 {
2432 static const TCGTargetOpDef movc
2433 = { .args_ct_str = { "r", "r", "re", "r", "0" } };
2434 return &movc;
2435 }
2436 case INDEX_op_div2_i32:
2437 case INDEX_op_div2_i64:
2438 case INDEX_op_divu2_i32:
2439 case INDEX_op_divu2_i64:
2440 {
2441 static const TCGTargetOpDef div2
2442 = { .args_ct_str = { "a", "d", "0", "1", "r" } };
2443 return &div2;
2444 }
2445 case INDEX_op_mulu2_i32:
2446 case INDEX_op_mulu2_i64:
2447 case INDEX_op_muls2_i32:
2448 case INDEX_op_muls2_i64:
2449 {
2450 static const TCGTargetOpDef mul2
2451 = { .args_ct_str = { "a", "d", "a", "r" } };
2452 return &mul2;
2453 }
2454 case INDEX_op_add2_i32:
2455 case INDEX_op_add2_i64:
2456 case INDEX_op_sub2_i32:
2457 case INDEX_op_sub2_i64:
2458 {
2459 static const TCGTargetOpDef arith2
2460 = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } };
2461 return &arith2;
2462 }
bbf25f90
RH
2463 case INDEX_op_ctz_i32:
2464 case INDEX_op_ctz_i64:
2465 {
2466 static const TCGTargetOpDef ctz[2] = {
9bf38308 2467 { .args_ct_str = { "&r", "r", "r" } },
bbf25f90
RH
2468 { .args_ct_str = { "&r", "r", "rW" } },
2469 };
2470 return &ctz[have_bmi1];
2471 }
2472 case INDEX_op_clz_i32:
2473 case INDEX_op_clz_i64:
2474 {
2475 static const TCGTargetOpDef clz[2] = {
9bf38308 2476 { .args_ct_str = { "&r", "r", "r" } },
bbf25f90
RH
2477 { .args_ct_str = { "&r", "r", "rW" } },
2478 };
2479 return &clz[have_lzcnt];
2480 }
c896fe29 2481
cd26449a
RH
2482 case INDEX_op_qemu_ld_i32:
2483 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L;
2484 case INDEX_op_qemu_st_i32:
2485 return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L : &L_L_L;
2486 case INDEX_op_qemu_ld_i64:
2487 return (TCG_TARGET_REG_BITS == 64 ? &r_L
2488 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_r_L
2489 : &r_r_L_L);
2490 case INDEX_op_qemu_st_i64:
2491 return (TCG_TARGET_REG_BITS == 64 ? &L_L
2492 : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &L_L_L
2493 : &L_L_L_L);
f69d277e 2494
cd26449a
RH
2495 case INDEX_op_brcond2_i32:
2496 {
2497 static const TCGTargetOpDef b2
2498 = { .args_ct_str = { "r", "r", "ri", "ri" } };
2499 return &b2;
2500 }
2501 case INDEX_op_setcond2_i32:
2502 {
2503 static const TCGTargetOpDef s2
2504 = { .args_ct_str = { "r", "r", "r", "ri", "ri" } };
2505 return &s2;
f69d277e 2506 }
cd26449a
RH
2507
2508 default:
2509 break;
f69d277e
RH
2510 }
2511 return NULL;
2512}
2513
b03cce8e 2514static int tcg_target_callee_save_regs[] = {
5d8a4f8f
RH
2515#if TCG_TARGET_REG_BITS == 64
2516 TCG_REG_RBP,
2517 TCG_REG_RBX,
8d918718
SW
2518#if defined(_WIN64)
2519 TCG_REG_RDI,
2520 TCG_REG_RSI,
2521#endif
5d8a4f8f
RH
2522 TCG_REG_R12,
2523 TCG_REG_R13,
cea5f9a2 2524 TCG_REG_R14, /* Currently used for the global env. */
5d8a4f8f
RH
2525 TCG_REG_R15,
2526#else
cea5f9a2 2527 TCG_REG_EBP, /* Currently used for the global env. */
b03cce8e
FB
2528 TCG_REG_EBX,
2529 TCG_REG_ESI,
2530 TCG_REG_EDI,
5d8a4f8f 2531#endif
b03cce8e
FB
2532};
2533
813da627
RH
2534/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2535 and tcg_register_jit. */
2536
2537#define PUSH_SIZE \
2538 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2539 * (TCG_TARGET_REG_BITS / 8))
2540
2541#define FRAME_SIZE \
2542 ((PUSH_SIZE \
2543 + TCG_STATIC_CALL_ARGS_SIZE \
2544 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2545 + TCG_TARGET_STACK_ALIGN - 1) \
2546 & ~(TCG_TARGET_STACK_ALIGN - 1))
2547
b03cce8e 2548/* Generate global QEMU prologue and epilogue code */
e4d58b41 2549static void tcg_target_qemu_prologue(TCGContext *s)
b03cce8e 2550{
813da627 2551 int i, stack_addend;
78686523 2552
b03cce8e 2553 /* TB prologue */
5d8a4f8f 2554
ac0275dc 2555 /* Reserve some stack space, also for TCG temps. */
813da627 2556 stack_addend = FRAME_SIZE - PUSH_SIZE;
ac0275dc
BS
2557 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2558 CPU_TEMP_BUF_NLONGS * sizeof(long));
2559
2560 /* Save all callee saved registers. */
2561 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2562 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2563 }
2564
6a18ae2d
BS
2565#if TCG_TARGET_REG_BITS == 32
2566 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2567 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
b18212c6
SW
2568 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2569 /* jmp *tb. */
2570 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2571 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2572 + stack_addend);
6a18ae2d 2573#else
cea5f9a2 2574 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
6a18ae2d 2575 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
5d8a4f8f 2576 /* jmp *tb. */
cea5f9a2 2577 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
b18212c6 2578#endif
78686523 2579
5cb4ef80
EC
2580 /*
2581 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2582 * and fall through to the rest of the epilogue.
2583 */
2584 s->code_gen_epilogue = s->code_ptr;
2585 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_EAX, 0);
2586
b03cce8e
FB
2587 /* TB epilogue */
2588 tb_ret_addr = s->code_ptr;
5d8a4f8f 2589
e83c80f7 2590 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
5d8a4f8f
RH
2591
2592 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
b03cce8e
FB
2593 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2594 }
5d8a4f8f 2595 tcg_out_opc(s, OPC_RET, 0, 0, 0);
44b37ace
RH
2596
2597#if !defined(CONFIG_SOFTMMU)
b76f21a7
LV
2598 /* Try to set up a segment register to point to guest_base. */
2599 if (guest_base) {
44b37ace
RH
2600 setup_guest_base_seg();
2601 }
2602#endif
b03cce8e
FB
2603}
2604
4e45f239
RH
2605static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
2606{
2607 memset(p, 0x90, count);
2608}
2609
e4d58b41 2610static void tcg_target_init(TCGContext *s)
c896fe29 2611{
774d566c 2612#ifdef CONFIG_CPUID_H
9d2eec20
RH
2613 unsigned a, b, c, d;
2614 int max = __get_cpuid_max(0, 0);
085bb5bb 2615
9d2eec20
RH
2616 if (max >= 1) {
2617 __cpuid(1, a, b, c, d);
2618#ifndef have_cmov
085bb5bb
AJ
2619 /* For 32-bit, 99% certainty that we're running on hardware that
2620 supports cmov, but we still need to check. In case cmov is not
2621 available, we'll use a small forward branch. */
9d2eec20
RH
2622 have_cmov = (d & bit_CMOV) != 0;
2623#endif
085bb5bb
AJ
2624 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2625 need to probe for it. */
9d2eec20 2626 have_movbe = (c & bit_MOVBE) != 0;
993508e4 2627 have_popcnt = (c & bit_POPCNT) != 0;
76a347e1 2628 }
9d2eec20
RH
2629
2630 if (max >= 7) {
2631 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2632 __cpuid_count(7, 0, a, b, c, d);
9d2eec20 2633 have_bmi1 = (b & bit_BMI) != 0;
6399ab33 2634 have_bmi2 = (b & bit_BMI2) != 0;
9d2eec20 2635 }
76a347e1 2636
bbf25f90
RH
2637 max = __get_cpuid_max(0x8000000, 0);
2638 if (max >= 1) {
2639 __cpuid(0x80000001, a, b, c, d);
2640 /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */
2641 have_lzcnt = (c & bit_LZCNT) != 0;
2642 }
5dd89908 2643#endif /* CONFIG_CPUID_H */
bbf25f90 2644
5d8a4f8f
RH
2645 if (TCG_TARGET_REG_BITS == 64) {
2646 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2647 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2648 } else {
2649 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2650 }
4ab50ccf 2651
ccb1bb66 2652 tcg_target_call_clobber_regs = 0;
4ab50ccf
RH
2653 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2654 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2655 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
5d8a4f8f 2656 if (TCG_TARGET_REG_BITS == 64) {
8d918718 2657#if !defined(_WIN64)
5d8a4f8f
RH
2658 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2659 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
8d918718 2660#endif
5d8a4f8f
RH
2661 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2662 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2663 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2664 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2665 }
4ab50ccf 2666
ccb1bb66 2667 s->reserved_regs = 0;
e83c80f7 2668 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
c896fe29 2669}
813da627 2670
813da627 2671typedef struct {
e9a9a5b6 2672 DebugFrameHeader h;
497a22eb
RH
2673 uint8_t fde_def_cfa[4];
2674 uint8_t fde_reg_ofs[14];
813da627
RH
2675} DebugFrame;
2676
b5cc476d
RH
2677/* We're expecting a 2 byte uleb128 encoded value. */
2678QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2679
c170cb66
SW
2680#if !defined(__ELF__)
2681 /* Host machine without ELF. */
2682#elif TCG_TARGET_REG_BITS == 64
813da627 2683#define ELF_HOST_MACHINE EM_X86_64
e9a9a5b6
RH
2684static const DebugFrame debug_frame = {
2685 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2686 .h.cie.id = -1,
2687 .h.cie.version = 1,
2688 .h.cie.code_align = 1,
2689 .h.cie.data_align = 0x78, /* sleb128 -8 */
2690 .h.cie.return_column = 16,
813da627 2691
497a22eb 2692 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2693 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2694
2695 .fde_def_cfa = {
813da627
RH
2696 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2697 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2698 (FRAME_SIZE >> 7)
2699 },
497a22eb 2700 .fde_reg_ofs = {
813da627
RH
2701 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2702 /* The following ordering must match tcg_target_callee_save_regs. */
2703 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2704 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2705 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2706 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2707 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2708 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2709 }
2710};
2711#else
2712#define ELF_HOST_MACHINE EM_386
e9a9a5b6
RH
2713static const DebugFrame debug_frame = {
2714 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2715 .h.cie.id = -1,
2716 .h.cie.version = 1,
2717 .h.cie.code_align = 1,
2718 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2719 .h.cie.return_column = 8,
813da627 2720
497a22eb 2721 /* Total FDE size does not include the "len" member. */
e9a9a5b6 2722 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
497a22eb
RH
2723
2724 .fde_def_cfa = {
813da627
RH
2725 12, 4, /* DW_CFA_def_cfa %esp, ... */
2726 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2727 (FRAME_SIZE >> 7)
2728 },
497a22eb 2729 .fde_reg_ofs = {
813da627
RH
2730 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2731 /* The following ordering must match tcg_target_callee_save_regs. */
2732 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2733 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2734 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2735 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2736 }
2737};
2738#endif
2739
c170cb66 2740#if defined(ELF_HOST_MACHINE)
813da627
RH
2741void tcg_register_jit(void *buf, size_t buf_size)
2742{
813da627
RH
2743 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2744}
c170cb66 2745#endif
This page took 1.51897 seconds and 4 git commands to generate.