]> Git Repo - qemu.git/blame - tcg/i386/tcg-target.c
tcg/i386: add support for three-byte opcodes
[qemu.git] / tcg / i386 / tcg-target.c
CommitLineData
c896fe29
FB
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
d4a9eb1f 24
9ecefc84
RH
25#include "tcg-be-ldst.h"
26
d4a9eb1f
BS
27#ifndef NDEBUG
28static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
5d8a4f8f
RH
29#if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
32#else
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
34#endif
c896fe29 35};
d4a9eb1f 36#endif
c896fe29 37
d4a9eb1f 38static const int tcg_target_reg_alloc_order[] = {
5d8a4f8f
RH
39#if TCG_TARGET_REG_BITS == 64
40 TCG_REG_RBP,
41 TCG_REG_RBX,
42 TCG_REG_R12,
43 TCG_REG_R13,
44 TCG_REG_R14,
45 TCG_REG_R15,
46 TCG_REG_R10,
47 TCG_REG_R11,
48 TCG_REG_R9,
49 TCG_REG_R8,
50 TCG_REG_RCX,
51 TCG_REG_RDX,
52 TCG_REG_RSI,
53 TCG_REG_RDI,
54 TCG_REG_RAX,
55#else
c896fe29
FB
56 TCG_REG_EBX,
57 TCG_REG_ESI,
58 TCG_REG_EDI,
59 TCG_REG_EBP,
6648e296
RH
60 TCG_REG_ECX,
61 TCG_REG_EDX,
62 TCG_REG_EAX,
5d8a4f8f 63#endif
c896fe29
FB
64};
65
5d8a4f8f
RH
66static const int tcg_target_call_iarg_regs[] = {
67#if TCG_TARGET_REG_BITS == 64
8d918718
SW
68#if defined(_WIN64)
69 TCG_REG_RCX,
70 TCG_REG_RDX,
71#else
5d8a4f8f
RH
72 TCG_REG_RDI,
73 TCG_REG_RSI,
74 TCG_REG_RDX,
75 TCG_REG_RCX,
8d918718 76#endif
5d8a4f8f
RH
77 TCG_REG_R8,
78 TCG_REG_R9,
79#else
d73685e3 80 /* 32 bit mode uses stack based calling convention (GCC default). */
5d8a4f8f
RH
81#endif
82};
83
68af23af 84static const int tcg_target_call_oarg_regs[] = {
5d8a4f8f 85 TCG_REG_EAX,
68af23af 86#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 87 TCG_REG_EDX
68af23af 88#endif
5d8a4f8f 89};
c896fe29 90
b18212c6
SW
91/* Registers used with L constraint, which are the first argument
92 registers on x86_64, and two random call clobbered registers on
93 i386. */
94#if TCG_TARGET_REG_BITS == 64
95# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
96# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
b18212c6
SW
97#else
98# define TCG_REG_L0 TCG_REG_EAX
99# define TCG_REG_L1 TCG_REG_EDX
100#endif
101
76a347e1
RH
102/* For 32-bit, we are going to attempt to determine at runtime whether cmov
103 is available. However, the host compiler must supply <cpuid.h>, as we're
104 not going to go so far as our own inline assembly. */
105#if TCG_TARGET_REG_BITS == 64
106# define have_cmov 1
107#elif defined(CONFIG_CPUID_H)
108#include <cpuid.h>
109static bool have_cmov;
110#else
111# define have_cmov 0
112#endif
113
b03cce8e
FB
114static uint8_t *tb_ret_addr;
115
78686523 116static void patch_reloc(uint8_t *code_ptr, int type,
2ba7fae2 117 intptr_t value, intptr_t addend)
c896fe29 118{
f54b3f92 119 value += addend;
c896fe29 120 switch(type) {
c896fe29 121 case R_386_PC32:
5d8a4f8f
RH
122 value -= (uintptr_t)code_ptr;
123 if (value != (int32_t)value) {
124 tcg_abort();
125 }
126 *(uint32_t *)code_ptr = value;
c896fe29 127 break;
f75b56c1 128 case R_386_PC8:
5d8a4f8f 129 value -= (uintptr_t)code_ptr;
f75b56c1
RH
130 if (value != (int8_t)value) {
131 tcg_abort();
132 }
133 *(uint8_t *)code_ptr = value;
134 break;
c896fe29
FB
135 default:
136 tcg_abort();
137 }
138}
139
c896fe29 140/* parse target specific constraints */
d4a9eb1f 141static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
c896fe29
FB
142{
143 const char *ct_str;
144
145 ct_str = *pct_str;
146 switch(ct_str[0]) {
147 case 'a':
148 ct->ct |= TCG_CT_REG;
149 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
150 break;
151 case 'b':
152 ct->ct |= TCG_CT_REG;
153 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
154 break;
155 case 'c':
156 ct->ct |= TCG_CT_REG;
157 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
158 break;
159 case 'd':
160 ct->ct |= TCG_CT_REG;
161 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
162 break;
163 case 'S':
164 ct->ct |= TCG_CT_REG;
165 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
166 break;
167 case 'D':
168 ct->ct |= TCG_CT_REG;
169 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
170 break;
171 case 'q':
172 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
173 if (TCG_TARGET_REG_BITS == 64) {
174 tcg_regset_set32(ct->u.regs, 0, 0xffff);
175 } else {
176 tcg_regset_set32(ct->u.regs, 0, 0xf);
177 }
c896fe29 178 break;
a4773324
JK
179 case 'Q':
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set32(ct->u.regs, 0, 0xf);
182 break;
c896fe29
FB
183 case 'r':
184 ct->ct |= TCG_CT_REG;
5d8a4f8f
RH
185 if (TCG_TARGET_REG_BITS == 64) {
186 tcg_regset_set32(ct->u.regs, 0, 0xffff);
187 } else {
188 tcg_regset_set32(ct->u.regs, 0, 0xff);
189 }
c896fe29
FB
190 break;
191
192 /* qemu_ld/st address constraint */
193 case 'L':
194 ct->ct |= TCG_CT_REG;
401c227b 195 if (TCG_TARGET_REG_BITS == 64) {
5d8a4f8f 196 tcg_regset_set32(ct->u.regs, 0, 0xffff);
401c227b 197 } else {
5d8a4f8f 198 tcg_regset_set32(ct->u.regs, 0, 0xff);
401c227b 199 }
17b91491
AJ
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
201 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
5d8a4f8f
RH
202 break;
203
204 case 'e':
205 ct->ct |= TCG_CT_CONST_S32;
206 break;
207 case 'Z':
208 ct->ct |= TCG_CT_CONST_U32;
c896fe29 209 break;
5d8a4f8f 210
c896fe29
FB
211 default:
212 return -1;
213 }
214 ct_str++;
215 *pct_str = ct_str;
216 return 0;
217}
218
219/* test if a constant matches the constraint */
220static inline int tcg_target_const_match(tcg_target_long val,
221 const TCGArgConstraint *arg_ct)
222{
5d8a4f8f
RH
223 int ct = arg_ct->ct;
224 if (ct & TCG_CT_CONST) {
c896fe29 225 return 1;
5d8a4f8f
RH
226 }
227 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
228 return 1;
229 }
230 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
231 return 1;
232 }
233 return 0;
c896fe29
FB
234}
235
5d8a4f8f
RH
236#if TCG_TARGET_REG_BITS == 64
237# define LOWREGMASK(x) ((x) & 7)
238#else
239# define LOWREGMASK(x) (x)
240#endif
241
96b4cf38 242#define P_EXT 0x100 /* 0x0f opcode prefix */
2a113775
AJ
243#define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
244#define P_DATA16 0x400 /* 0x66 opcode prefix */
5d8a4f8f 245#if TCG_TARGET_REG_BITS == 64
2a113775
AJ
246# define P_ADDR32 0x800 /* 0x67 opcode prefix */
247# define P_REXW 0x1000 /* Set REX.W = 1 */
248# define P_REXB_R 0x2000 /* REG field as byte register */
249# define P_REXB_RM 0x4000 /* R/M field as byte register */
250# define P_GS 0x8000 /* gs segment override */
5d8a4f8f
RH
251#else
252# define P_ADDR32 0
253# define P_REXW 0
254# define P_REXB_R 0
255# define P_REXB_RM 0
44b37ace 256# define P_GS 0
5d8a4f8f 257#endif
fcb5dac1 258
a369a702
RH
259#define OPC_ARITH_EvIz (0x81)
260#define OPC_ARITH_EvIb (0x83)
81570a70
RH
261#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
262#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
fcb5dac1 263#define OPC_BSWAP (0xc8 | P_EXT)
aadb21a4 264#define OPC_CALL_Jz (0xe8)
d0a16297 265#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
81570a70
RH
266#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
267#define OPC_DEC_r32 (0x48)
0566d387
RH
268#define OPC_IMUL_GvEv (0xaf | P_EXT)
269#define OPC_IMUL_GvEvIb (0x6b)
270#define OPC_IMUL_GvEvIz (0x69)
81570a70 271#define OPC_INC_r32 (0x40)
da441cff
RH
272#define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
273#define OPC_JCC_short (0x70) /* ... plus condition code */
274#define OPC_JMP_long (0xe9)
275#define OPC_JMP_short (0xeb)
34a6d0b7 276#define OPC_LEA (0x8d)
af266089
RH
277#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
278#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
279#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
5c2d2a9e 280#define OPC_MOVB_EvIz (0xc6)
5d8a4f8f 281#define OPC_MOVL_EvIz (0xc7)
ef10b106 282#define OPC_MOVL_Iv (0xb8)
6817c355
RH
283#define OPC_MOVSBL (0xbe | P_EXT)
284#define OPC_MOVSWL (0xbf | P_EXT)
5d8a4f8f 285#define OPC_MOVSLQ (0x63 | P_REXW)
55e082a7
RH
286#define OPC_MOVZBL (0xb6 | P_EXT)
287#define OPC_MOVZWL (0xb7 | P_EXT)
6858614e
RH
288#define OPC_POP_r32 (0x58)
289#define OPC_PUSH_r32 (0x50)
290#define OPC_PUSH_Iv (0x68)
291#define OPC_PUSH_Ib (0x6a)
3c3accc6 292#define OPC_RET (0xc3)
5d8a4f8f 293#define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
f53dba01
RH
294#define OPC_SHIFT_1 (0xd1)
295#define OPC_SHIFT_Ib (0xc1)
296#define OPC_SHIFT_cl (0xd3)
81570a70 297#define OPC_TESTL (0x85)
b3e66df7 298#define OPC_XCHG_ax_r32 (0x90)
fcb5dac1 299
9363dedb
RH
300#define OPC_GRP3_Ev (0xf7)
301#define OPC_GRP5 (0xff)
302
303/* Group 1 opcode extensions for 0x80-0x83.
304 These are also used as modifiers for OPC_ARITH. */
c896fe29
FB
305#define ARITH_ADD 0
306#define ARITH_OR 1
307#define ARITH_ADC 2
308#define ARITH_SBB 3
309#define ARITH_AND 4
310#define ARITH_SUB 5
311#define ARITH_XOR 6
312#define ARITH_CMP 7
313
da441cff 314/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
9619376c
AJ
315#define SHIFT_ROL 0
316#define SHIFT_ROR 1
c896fe29
FB
317#define SHIFT_SHL 4
318#define SHIFT_SHR 5
319#define SHIFT_SAR 7
320
9363dedb
RH
321/* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
322#define EXT3_NOT 2
323#define EXT3_NEG 3
324#define EXT3_MUL 4
325#define EXT3_IMUL 5
326#define EXT3_DIV 6
327#define EXT3_IDIV 7
328
329/* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
5d8a4f8f
RH
330#define EXT5_INC_Ev 0
331#define EXT5_DEC_Ev 1
9363dedb
RH
332#define EXT5_CALLN_Ev 2
333#define EXT5_JMPN_Ev 4
da441cff
RH
334
335/* Condition codes to be added to OPC_JCC_{long,short}. */
c896fe29
FB
336#define JCC_JMP (-1)
337#define JCC_JO 0x0
338#define JCC_JNO 0x1
339#define JCC_JB 0x2
340#define JCC_JAE 0x3
341#define JCC_JE 0x4
342#define JCC_JNE 0x5
343#define JCC_JBE 0x6
344#define JCC_JA 0x7
345#define JCC_JS 0x8
346#define JCC_JNS 0x9
347#define JCC_JP 0xa
348#define JCC_JNP 0xb
349#define JCC_JL 0xc
350#define JCC_JGE 0xd
351#define JCC_JLE 0xe
352#define JCC_JG 0xf
353
0aed257f 354static const uint8_t tcg_cond_to_jcc[] = {
c896fe29
FB
355 [TCG_COND_EQ] = JCC_JE,
356 [TCG_COND_NE] = JCC_JNE,
357 [TCG_COND_LT] = JCC_JL,
358 [TCG_COND_GE] = JCC_JGE,
359 [TCG_COND_LE] = JCC_JLE,
360 [TCG_COND_GT] = JCC_JG,
361 [TCG_COND_LTU] = JCC_JB,
362 [TCG_COND_GEU] = JCC_JAE,
363 [TCG_COND_LEU] = JCC_JBE,
364 [TCG_COND_GTU] = JCC_JA,
365};
366
5d8a4f8f
RH
367#if TCG_TARGET_REG_BITS == 64
368static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
369{
370 int rex;
371
44b37ace
RH
372 if (opc & P_GS) {
373 tcg_out8(s, 0x65);
374 }
5d8a4f8f
RH
375 if (opc & P_DATA16) {
376 /* We should never be asking for both 16 and 64-bit operation. */
377 assert((opc & P_REXW) == 0);
378 tcg_out8(s, 0x66);
379 }
380 if (opc & P_ADDR32) {
381 tcg_out8(s, 0x67);
382 }
383
384 rex = 0;
c9d78213 385 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
5d8a4f8f
RH
386 rex |= (r & 8) >> 1; /* REX.R */
387 rex |= (x & 8) >> 2; /* REX.X */
388 rex |= (rm & 8) >> 3; /* REX.B */
389
390 /* P_REXB_{R,RM} indicates that the given register is the low byte.
391 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
392 as otherwise the encoding indicates %[abcd]h. Note that the values
393 that are ORed in merely indicate that the REX byte must be present;
394 those bits get discarded in output. */
395 rex |= opc & (r >= 4 ? P_REXB_R : 0);
396 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
397
398 if (rex) {
399 tcg_out8(s, (uint8_t)(rex | 0x40));
400 }
401
2a113775 402 if (opc & (P_EXT | P_EXT38)) {
5d8a4f8f 403 tcg_out8(s, 0x0f);
2a113775
AJ
404 if (opc & P_EXT38) {
405 tcg_out8(s, 0x38);
406 }
5d8a4f8f 407 }
2a113775 408
5d8a4f8f
RH
409 tcg_out8(s, opc);
410}
411#else
412static void tcg_out_opc(TCGContext *s, int opc)
c896fe29 413{
96b4cf38
RH
414 if (opc & P_DATA16) {
415 tcg_out8(s, 0x66);
416 }
2a113775 417 if (opc & (P_EXT | P_EXT38)) {
c896fe29 418 tcg_out8(s, 0x0f);
2a113775
AJ
419 if (opc & P_EXT38) {
420 tcg_out8(s, 0x38);
421 }
96b4cf38 422 }
c896fe29
FB
423 tcg_out8(s, opc);
424}
5d8a4f8f
RH
425/* Discard the register arguments to tcg_out_opc early, so as not to penalize
426 the 32-bit compilation paths. This method works with all versions of gcc,
427 whereas relying on optimization may not be able to exclude them. */
428#define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
429#endif
c896fe29 430
5d8a4f8f 431static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
c896fe29 432{
5d8a4f8f
RH
433 tcg_out_opc(s, opc, r, rm, 0);
434 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
c896fe29
FB
435}
436
34a6d0b7 437/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
5d8a4f8f
RH
438 We handle either RM and INDEX missing with a negative value. In 64-bit
439 mode for absolute addresses, ~RM is the size of the immediate operand
440 that will follow the instruction. */
34a6d0b7
RH
441
442static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
357e3d8a 443 int index, int shift, intptr_t offset)
c896fe29 444{
34a6d0b7
RH
445 int mod, len;
446
5d8a4f8f
RH
447 if (index < 0 && rm < 0) {
448 if (TCG_TARGET_REG_BITS == 64) {
449 /* Try for a rip-relative addressing mode. This has replaced
450 the 32-bit-mode absolute addressing encoding. */
357e3d8a
RH
451 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
452 intptr_t disp = offset - pc;
5d8a4f8f
RH
453 if (disp == (int32_t)disp) {
454 tcg_out_opc(s, opc, r, 0, 0);
455 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
456 tcg_out32(s, disp);
457 return;
458 }
34a6d0b7 459
5d8a4f8f
RH
460 /* Try for an absolute address encoding. This requires the
461 use of the MODRM+SIB encoding and is therefore larger than
462 rip-relative addressing. */
463 if (offset == (int32_t)offset) {
464 tcg_out_opc(s, opc, r, 0, 0);
465 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
466 tcg_out8(s, (4 << 3) | 5);
467 tcg_out32(s, offset);
468 return;
469 }
470
471 /* ??? The memory isn't directly addressable. */
472 tcg_abort();
473 } else {
474 /* Absolute address. */
475 tcg_out_opc(s, opc, r, 0, 0);
476 tcg_out8(s, (r << 3) | 5);
477 tcg_out32(s, offset);
478 return;
479 }
480 }
34a6d0b7
RH
481
482 /* Find the length of the immediate addend. Note that the encoding
483 that would be used for (%ebp) indicates absolute addressing. */
5d8a4f8f 484 if (rm < 0) {
34a6d0b7 485 mod = 0, len = 4, rm = 5;
5d8a4f8f 486 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
34a6d0b7
RH
487 mod = 0, len = 0;
488 } else if (offset == (int8_t)offset) {
489 mod = 0x40, len = 1;
c896fe29 490 } else {
34a6d0b7
RH
491 mod = 0x80, len = 4;
492 }
493
494 /* Use a single byte MODRM format if possible. Note that the encoding
495 that would be used for %esp is the escape to the two byte form. */
5d8a4f8f 496 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
34a6d0b7 497 /* Single byte MODRM format. */
5d8a4f8f
RH
498 tcg_out_opc(s, opc, r, rm, 0);
499 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
500 } else {
501 /* Two byte MODRM+SIB format. */
502
503 /* Note that the encoding that would place %esp into the index
5d8a4f8f
RH
504 field indicates no index register. In 64-bit mode, the REX.X
505 bit counts, so %r12 can be used as the index. */
506 if (index < 0) {
34a6d0b7 507 index = 4;
c896fe29 508 } else {
34a6d0b7 509 assert(index != TCG_REG_ESP);
c896fe29 510 }
34a6d0b7 511
5d8a4f8f
RH
512 tcg_out_opc(s, opc, r, rm, index);
513 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
514 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
34a6d0b7
RH
515 }
516
517 if (len == 1) {
518 tcg_out8(s, offset);
519 } else if (len == 4) {
c896fe29
FB
520 tcg_out32(s, offset);
521 }
522}
523
5d8a4f8f
RH
524/* A simplification of the above with no index or shift. */
525static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
357e3d8a 526 int rm, intptr_t offset)
34a6d0b7
RH
527{
528 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
529}
530
81570a70
RH
531/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
532static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
533{
5d8a4f8f
RH
534 /* Propagate an opcode prefix, such as P_REXW. */
535 int ext = subop & ~0x7;
536 subop &= 0x7;
537
538 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
81570a70
RH
539}
540
2a534aff
RH
541static inline void tcg_out_mov(TCGContext *s, TCGType type,
542 TCGReg ret, TCGReg arg)
c896fe29 543{
af266089 544 if (arg != ret) {
5d8a4f8f
RH
545 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
546 tcg_out_modrm(s, opc, ret, arg);
af266089 547 }
c896fe29
FB
548}
549
5d8a4f8f 550static void tcg_out_movi(TCGContext *s, TCGType type,
2a534aff 551 TCGReg ret, tcg_target_long arg)
c896fe29 552{
8023ccda
RH
553 tcg_target_long diff;
554
c896fe29 555 if (arg == 0) {
81570a70 556 tgen_arithr(s, ARITH_XOR, ret, ret);
5d8a4f8f 557 return;
8023ccda
RH
558 }
559 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
5d8a4f8f
RH
560 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
561 tcg_out32(s, arg);
8023ccda
RH
562 return;
563 }
564 if (arg == (int32_t)arg) {
5d8a4f8f
RH
565 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
566 tcg_out32(s, arg);
8023ccda 567 return;
c896fe29 568 }
8023ccda
RH
569
570 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
357e3d8a 571 diff = arg - ((uintptr_t)s->code_ptr + 7);
8023ccda
RH
572 if (diff == (int32_t)diff) {
573 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
574 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
575 tcg_out32(s, diff);
576 return;
577 }
578
579 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
580 tcg_out64(s, arg);
c896fe29
FB
581}
582
6858614e
RH
583static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
584{
585 if (val == (int8_t)val) {
5d8a4f8f 586 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
6858614e 587 tcg_out8(s, val);
5d8a4f8f
RH
588 } else if (val == (int32_t)val) {
589 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
6858614e 590 tcg_out32(s, val);
5d8a4f8f
RH
591 } else {
592 tcg_abort();
6858614e
RH
593 }
594}
595
596static inline void tcg_out_push(TCGContext *s, int reg)
597{
5d8a4f8f 598 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
599}
600
601static inline void tcg_out_pop(TCGContext *s, int reg)
602{
5d8a4f8f 603 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
6858614e
RH
604}
605
2a534aff 606static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
a05b5b9b 607 TCGReg arg1, intptr_t arg2)
c896fe29 608{
5d8a4f8f
RH
609 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
610 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
c896fe29
FB
611}
612
2a534aff 613static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
a05b5b9b 614 TCGReg arg1, intptr_t arg2)
c896fe29 615{
5d8a4f8f
RH
616 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
617 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
c896fe29
FB
618}
619
c6f29ff0
RH
620static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
621 tcg_target_long ofs, tcg_target_long val)
622{
623 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
624 tcg_out_modrm_offset(s, opc, 0, base, ofs);
625 tcg_out32(s, val);
626}
627
f53dba01
RH
628static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
629{
96b4cf38
RH
630 /* Propagate an opcode prefix, such as P_DATA16. */
631 int ext = subopc & ~0x7;
632 subopc &= 0x7;
633
f53dba01 634 if (count == 1) {
5d8a4f8f 635 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
f53dba01 636 } else {
5d8a4f8f 637 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
f53dba01
RH
638 tcg_out8(s, count);
639 }
640}
641
fcb5dac1
RH
642static inline void tcg_out_bswap32(TCGContext *s, int reg)
643{
5d8a4f8f 644 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
fcb5dac1
RH
645}
646
647static inline void tcg_out_rolw_8(TCGContext *s, int reg)
648{
5d8a4f8f 649 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
fcb5dac1
RH
650}
651
55e082a7
RH
652static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
653{
654 /* movzbl */
5d8a4f8f
RH
655 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
656 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
55e082a7
RH
657}
658
5d8a4f8f 659static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
6817c355
RH
660{
661 /* movsbl */
5d8a4f8f
RH
662 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
663 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
6817c355
RH
664}
665
55e082a7
RH
666static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
667{
668 /* movzwl */
669 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
670}
671
5d8a4f8f 672static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
6817c355 673{
5d8a4f8f
RH
674 /* movsw[lq] */
675 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
6817c355
RH
676}
677
5d8a4f8f 678static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
c896fe29 679{
5d8a4f8f
RH
680 /* 32-bit mov zero extends. */
681 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
682}
683
684static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
685{
686 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
687}
688
689static inline void tcg_out_bswap64(TCGContext *s, int reg)
690{
691 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
692}
693
694static void tgen_arithi(TCGContext *s, int c, int r0,
695 tcg_target_long val, int cf)
696{
697 int rexw = 0;
698
699 if (TCG_TARGET_REG_BITS == 64) {
700 rexw = c & -8;
701 c &= 7;
702 }
703
81570a70
RH
704 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
705 partial flags update stalls on Pentium4 and are not recommended
706 by current Intel optimization manuals. */
707 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
447d681e 708 int is_inc = (c == ARITH_ADD) ^ (val < 0);
5d8a4f8f
RH
709 if (TCG_TARGET_REG_BITS == 64) {
710 /* The single-byte increment encodings are re-tasked as the
711 REX prefixes. Use the MODRM encoding. */
712 tcg_out_modrm(s, OPC_GRP5 + rexw,
713 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
714 } else {
715 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
716 }
717 return;
718 }
719
720 if (c == ARITH_AND) {
721 if (TCG_TARGET_REG_BITS == 64) {
722 if (val == 0xffffffffu) {
723 tcg_out_ext32u(s, r0, r0);
724 return;
725 }
726 if (val == (uint32_t)val) {
727 /* AND with no high bits set can use a 32-bit operation. */
728 rexw = 0;
729 }
730 }
dc397ca3 731 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
5d8a4f8f
RH
732 tcg_out_ext8u(s, r0, r0);
733 return;
734 }
735 if (val == 0xffffu) {
736 tcg_out_ext16u(s, r0, r0);
737 return;
738 }
739 }
740
741 if (val == (int8_t)val) {
742 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
c896fe29 743 tcg_out8(s, val);
5d8a4f8f
RH
744 return;
745 }
746 if (rexw == 0 || val == (int32_t)val) {
747 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
c896fe29 748 tcg_out32(s, val);
5d8a4f8f 749 return;
c896fe29 750 }
5d8a4f8f
RH
751
752 tcg_abort();
c896fe29
FB
753}
754
3e9a474e 755static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
c896fe29 756{
5d8a4f8f
RH
757 if (val != 0) {
758 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
759 }
c896fe29
FB
760}
761
f75b56c1
RH
762/* Use SMALL != 0 to force a short forward branch. */
763static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
c896fe29
FB
764{
765 int32_t val, val1;
766 TCGLabel *l = &s->labels[label_index];
78686523 767
c896fe29 768 if (l->has_value) {
357e3d8a 769 val = l->u.value - (intptr_t)s->code_ptr;
c896fe29
FB
770 val1 = val - 2;
771 if ((int8_t)val1 == val1) {
f75b56c1 772 if (opc == -1) {
da441cff 773 tcg_out8(s, OPC_JMP_short);
f75b56c1 774 } else {
da441cff 775 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1 776 }
c896fe29
FB
777 tcg_out8(s, val1);
778 } else {
f75b56c1
RH
779 if (small) {
780 tcg_abort();
781 }
c896fe29 782 if (opc == -1) {
da441cff 783 tcg_out8(s, OPC_JMP_long);
c896fe29
FB
784 tcg_out32(s, val - 5);
785 } else {
5d8a4f8f 786 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29
FB
787 tcg_out32(s, val - 6);
788 }
789 }
f75b56c1
RH
790 } else if (small) {
791 if (opc == -1) {
da441cff 792 tcg_out8(s, OPC_JMP_short);
f75b56c1 793 } else {
da441cff 794 tcg_out8(s, OPC_JCC_short + opc);
f75b56c1
RH
795 }
796 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
797 s->code_ptr += 1;
c896fe29
FB
798 } else {
799 if (opc == -1) {
da441cff 800 tcg_out8(s, OPC_JMP_long);
c896fe29 801 } else {
5d8a4f8f 802 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
c896fe29
FB
803 }
804 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
623e265c 805 s->code_ptr += 4;
c896fe29
FB
806 }
807}
808
1d2699ae 809static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
5d8a4f8f 810 int const_arg2, int rexw)
c896fe29 811{
c896fe29
FB
812 if (const_arg2) {
813 if (arg2 == 0) {
c896fe29 814 /* test r, r */
5d8a4f8f 815 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
c896fe29 816 } else {
5d8a4f8f 817 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
c896fe29
FB
818 }
819 } else {
5d8a4f8f 820 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
c896fe29 821 }
1d2699ae
RH
822}
823
5d8a4f8f
RH
824static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
825 TCGArg arg1, TCGArg arg2, int const_arg2,
826 int label_index, int small)
1d2699ae 827{
5d8a4f8f 828 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
f75b56c1 829 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
c896fe29
FB
830}
831
5d8a4f8f
RH
832#if TCG_TARGET_REG_BITS == 64
833static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
834 TCGArg arg1, TCGArg arg2, int const_arg2,
835 int label_index, int small)
836{
837 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
838 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
839}
840#else
c896fe29
FB
841/* XXX: we implement it at the target level to avoid having to
842 handle cross basic blocks temporaries */
f75b56c1
RH
843static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
844 const int *const_args, int small)
c896fe29
FB
845{
846 int label_next;
847 label_next = gen_new_label();
848 switch(args[4]) {
849 case TCG_COND_EQ:
5d8a4f8f
RH
850 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
851 label_next, 1);
852 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
853 args[5], small);
c896fe29
FB
854 break;
855 case TCG_COND_NE:
5d8a4f8f
RH
856 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
857 args[5], small);
858 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
859 args[5], small);
c896fe29
FB
860 break;
861 case TCG_COND_LT:
5d8a4f8f
RH
862 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
863 args[5], small);
f75b56c1 864 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
865 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
866 args[5], small);
c896fe29
FB
867 break;
868 case TCG_COND_LE:
5d8a4f8f
RH
869 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
870 args[5], small);
f75b56c1 871 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
872 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
873 args[5], small);
c896fe29
FB
874 break;
875 case TCG_COND_GT:
5d8a4f8f
RH
876 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
877 args[5], small);
f75b56c1 878 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
879 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
880 args[5], small);
c896fe29
FB
881 break;
882 case TCG_COND_GE:
5d8a4f8f
RH
883 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
884 args[5], small);
f75b56c1 885 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
886 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
887 args[5], small);
c896fe29
FB
888 break;
889 case TCG_COND_LTU:
5d8a4f8f
RH
890 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
891 args[5], small);
f75b56c1 892 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
893 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
894 args[5], small);
c896fe29
FB
895 break;
896 case TCG_COND_LEU:
5d8a4f8f
RH
897 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
898 args[5], small);
f75b56c1 899 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
900 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
901 args[5], small);
c896fe29
FB
902 break;
903 case TCG_COND_GTU:
5d8a4f8f
RH
904 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
905 args[5], small);
f75b56c1 906 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
907 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
908 args[5], small);
c896fe29
FB
909 break;
910 case TCG_COND_GEU:
5d8a4f8f
RH
911 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
912 args[5], small);
f75b56c1 913 tcg_out_jxx(s, JCC_JNE, label_next, 1);
5d8a4f8f
RH
914 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
915 args[5], small);
c896fe29
FB
916 break;
917 default:
918 tcg_abort();
919 }
9d6fca70 920 tcg_out_label(s, label_next, s->code_ptr);
c896fe29 921}
5d8a4f8f 922#endif
c896fe29 923
5d8a4f8f
RH
924static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
925 TCGArg arg1, TCGArg arg2, int const_arg2)
1d2699ae 926{
5d8a4f8f 927 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
32a8ffb9 928 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
a369a702 929 tcg_out_ext8u(s, dest, dest);
1d2699ae
RH
930}
931
5d8a4f8f
RH
932#if TCG_TARGET_REG_BITS == 64
933static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
934 TCGArg arg1, TCGArg arg2, int const_arg2)
935{
936 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
937 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
938 tcg_out_ext8u(s, dest, dest);
939}
940#else
1d2699ae
RH
941static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
942 const int *const_args)
943{
944 TCGArg new_args[6];
945 int label_true, label_over;
946
947 memcpy(new_args, args+1, 5*sizeof(TCGArg));
948
949 if (args[0] == args[1] || args[0] == args[2]
950 || (!const_args[3] && args[0] == args[3])
951 || (!const_args[4] && args[0] == args[4])) {
952 /* When the destination overlaps with one of the argument
953 registers, don't do anything tricky. */
954 label_true = gen_new_label();
955 label_over = gen_new_label();
956
957 new_args[5] = label_true;
958 tcg_out_brcond2(s, new_args, const_args+1, 1);
959
960 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
961 tcg_out_jxx(s, JCC_JMP, label_over, 1);
9d6fca70 962 tcg_out_label(s, label_true, s->code_ptr);
1d2699ae
RH
963
964 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
9d6fca70 965 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
966 } else {
967 /* When the destination does not overlap one of the arguments,
968 clear the destination first, jump if cond false, and emit an
969 increment in the true case. This results in smaller code. */
970
971 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
972
973 label_over = gen_new_label();
974 new_args[4] = tcg_invert_cond(new_args[4]);
975 new_args[5] = label_over;
976 tcg_out_brcond2(s, new_args, const_args+1, 1);
977
978 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
9d6fca70 979 tcg_out_label(s, label_over, s->code_ptr);
1d2699ae
RH
980 }
981}
5d8a4f8f
RH
982#endif
983
d0a16297
RH
984static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
985 TCGArg c1, TCGArg c2, int const_c2,
986 TCGArg v1)
987{
988 tcg_out_cmp(s, c1, c2, const_c2, 0);
76a347e1
RH
989 if (have_cmov) {
990 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
991 } else {
992 int over = gen_new_label();
993 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
994 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
995 tcg_out_label(s, over, s->code_ptr);
996 }
d0a16297
RH
997}
998
999#if TCG_TARGET_REG_BITS == 64
1000static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1001 TCGArg c1, TCGArg c2, int const_c2,
1002 TCGArg v1)
1003{
1004 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1005 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1006}
1007#endif
1008
357e3d8a 1009static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
5d8a4f8f 1010{
357e3d8a 1011 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
5d8a4f8f
RH
1012
1013 if (disp == (int32_t)disp) {
1014 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1015 tcg_out32(s, disp);
1016 } else {
1017 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1018 tcg_out_modrm(s, OPC_GRP5,
1019 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1020 }
1021}
1022
357e3d8a 1023static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
5d8a4f8f
RH
1024{
1025 tcg_out_branch(s, 1, dest);
1026}
1d2699ae 1027
357e3d8a 1028static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
aadb21a4 1029{
5d8a4f8f 1030 tcg_out_branch(s, 0, dest);
aadb21a4
RH
1031}
1032
c896fe29 1033#if defined(CONFIG_SOFTMMU)
401c227b
RH
1034/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1035 * int mmu_idx, uintptr_t ra)
1036 */
8221a267
RH
1037static const void * const qemu_ld_helpers[16] = {
1038 [MO_UB] = helper_ret_ldub_mmu,
1039 [MO_LEUW] = helper_le_lduw_mmu,
1040 [MO_LEUL] = helper_le_ldul_mmu,
1041 [MO_LEQ] = helper_le_ldq_mmu,
1042 [MO_BEUW] = helper_be_lduw_mmu,
1043 [MO_BEUL] = helper_be_ldul_mmu,
1044 [MO_BEQ] = helper_be_ldq_mmu,
e141ab52
BS
1045};
1046
401c227b
RH
1047/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1048 * uintxx_t val, int mmu_idx, uintptr_t ra)
1049 */
8221a267
RH
1050static const void * const qemu_st_helpers[16] = {
1051 [MO_UB] = helper_ret_stb_mmu,
1052 [MO_LEUW] = helper_le_stw_mmu,
1053 [MO_LEUL] = helper_le_stl_mmu,
1054 [MO_LEQ] = helper_le_stq_mmu,
1055 [MO_BEUW] = helper_be_stw_mmu,
1056 [MO_BEUL] = helper_be_stl_mmu,
1057 [MO_BEQ] = helper_be_stq_mmu,
e141ab52 1058};
8516a044
RH
1059
1060/* Perform the TLB load and compare.
1061
1062 Inputs:
7352ee54 1063 ADDRLO and ADDRHI contain the low and high part of the address.
8516a044
RH
1064
1065 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1066
1067 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1068 This should be offsetof addr_read or addr_write.
1069
1070 Outputs:
1071 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1072 positions of the displacements of forward jumps to the TLB miss case.
1073
166792f7 1074 Second argument register is loaded with the low part of the address.
5d8a4f8f
RH
1075 In the TLB hit case, it has been adjusted as indicated by the TLB
1076 and so is a host address. In the TLB miss case, it continues to
1077 hold a guest address.
8516a044 1078
166792f7 1079 First argument register is clobbered. */
8516a044 1080
7352ee54 1081static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
37c5d0d5 1082 int mem_index, TCGMemOp s_bits,
c28b14c6 1083 uint8_t **label_ptr, int which)
8516a044 1084{
7352ee54
RH
1085 const TCGReg r0 = TCG_REG_L0;
1086 const TCGReg r1 = TCG_REG_L1;
d5dad3be
RH
1087 TCGType ttype = TCG_TYPE_I32;
1088 TCGType htype = TCG_TYPE_I32;
1089 int trexw = 0, hrexw = 0;
5d8a4f8f 1090
d5dad3be
RH
1091 if (TCG_TARGET_REG_BITS == 64) {
1092 if (TARGET_LONG_BITS == 64) {
1093 ttype = TCG_TYPE_I64;
1094 trexw = P_REXW;
1095 }
1096 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1097 htype = TCG_TYPE_I64;
1098 hrexw = P_REXW;
1099 }
5d8a4f8f 1100 }
8516a044 1101
d5dad3be
RH
1102 tcg_out_mov(s, htype, r0, addrlo);
1103 tcg_out_mov(s, ttype, r1, addrlo);
8516a044 1104
d5dad3be 1105 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
5d8a4f8f 1106 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
8516a044 1107
d5dad3be 1108 tgen_arithi(s, ARITH_AND + trexw, r1,
166792f7 1109 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
d5dad3be 1110 tgen_arithi(s, ARITH_AND + hrexw, r0,
5d8a4f8f 1111 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
8516a044 1112
d5dad3be 1113 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
9349b4f9 1114 offsetof(CPUArchState, tlb_table[mem_index][0])
8516a044
RH
1115 + which);
1116
166792f7 1117 /* cmp 0(r0), r1 */
d5dad3be 1118 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
8516a044 1119
d5dad3be
RH
1120 /* Prepare for both the fast path add of the tlb addend, and the slow
1121 path function argument setup. There are two cases worth note:
1122 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1123 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1124 copies the entire guest address for the slow path, while truncation
1125 for the 32-bit host happens with the fastpath ADDL below. */
1126 tcg_out_mov(s, ttype, r1, addrlo);
8516a044 1127
b76f0d8c
YL
1128 /* jne slow_path */
1129 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1130 label_ptr[0] = s->code_ptr;
b76f0d8c 1131 s->code_ptr += 4;
8516a044 1132
5d8a4f8f 1133 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
166792f7 1134 /* cmp 4(r0), addrhi */
7352ee54 1135 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
8516a044 1136
b76f0d8c
YL
1137 /* jne slow_path */
1138 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
8516a044 1139 label_ptr[1] = s->code_ptr;
b76f0d8c 1140 s->code_ptr += 4;
8516a044
RH
1141 }
1142
1143 /* TLB Hit. */
1144
166792f7 1145 /* add addend(r0), r1 */
d5dad3be 1146 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
8516a044
RH
1147 offsetof(CPUTLBEntry, addend) - which);
1148}
7352ee54
RH
1149
1150/*
1151 * Record the context of a call to the out of line helper code for the slow path
1152 * for a load or store, so that we can later generate the correct helper code
1153 */
1154static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1155 TCGReg datalo, TCGReg datahi,
1156 TCGReg addrlo, TCGReg addrhi,
1157 int mem_index, uint8_t *raddr,
1158 uint8_t **label_ptr)
1159{
1160 TCGLabelQemuLdst *label = new_ldst_label(s);
1161
1162 label->is_ld = is_ld;
1163 label->opc = opc;
1164 label->datalo_reg = datalo;
1165 label->datahi_reg = datahi;
1166 label->addrlo_reg = addrlo;
1167 label->addrhi_reg = addrhi;
1168 label->mem_index = mem_index;
1169 label->raddr = raddr;
1170 label->label_ptr[0] = label_ptr[0];
1171 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1172 label->label_ptr[1] = label_ptr[1];
1173 }
1174}
1175
1176/*
1177 * Generate code for the slow path for a load at the end of block
1178 */
1179static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1180{
1181 TCGMemOp opc = l->opc;
7352ee54
RH
1182 TCGReg data_reg;
1183 uint8_t **label_ptr = &l->label_ptr[0];
1184
1185 /* resolve label address */
1186 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1187 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1188 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1189 }
1190
1191 if (TCG_TARGET_REG_BITS == 32) {
1192 int ofs = 0;
1193
1194 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1195 ofs += 4;
1196
1197 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1198 ofs += 4;
1199
1200 if (TARGET_LONG_BITS == 64) {
1201 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1202 ofs += 4;
1203 }
1204
1205 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1206 ofs += 4;
1207
1208 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1209 } else {
1210 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1211 /* The second argument is already loaded with addrlo. */
1212 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1213 l->mem_index);
1214 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1215 (uintptr_t)l->raddr);
1216 }
1217
8221a267 1218 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
7352ee54
RH
1219
1220 data_reg = l->datalo_reg;
1221 switch (opc & MO_SSIZE) {
1222 case MO_SB:
1223 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1224 break;
1225 case MO_SW:
1226 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1227 break;
1228#if TCG_TARGET_REG_BITS == 64
1229 case MO_SL:
1230 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1231 break;
1232#endif
1233 case MO_UB:
1234 case MO_UW:
1235 /* Note that the helpers have zero-extended to tcg_target_long. */
1236 case MO_UL:
1237 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1238 break;
1239 case MO_Q:
1240 if (TCG_TARGET_REG_BITS == 64) {
1241 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1242 } else if (data_reg == TCG_REG_EDX) {
1243 /* xchg %edx, %eax */
1244 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1245 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1246 } else {
1247 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1248 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1249 }
1250 break;
1251 default:
1252 tcg_abort();
1253 }
1254
1255 /* Jump to the code corresponding to next IR of qemu_st */
1256 tcg_out_jmp(s, (uintptr_t)l->raddr);
1257}
1258
1259/*
1260 * Generate code for the slow path for a store at the end of block
1261 */
1262static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1263{
1264 TCGMemOp opc = l->opc;
1265 TCGMemOp s_bits = opc & MO_SIZE;
1266 uint8_t **label_ptr = &l->label_ptr[0];
1267 TCGReg retaddr;
1268
1269 /* resolve label address */
1270 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1271 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1272 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1273 }
1274
1275 if (TCG_TARGET_REG_BITS == 32) {
1276 int ofs = 0;
1277
1278 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1279 ofs += 4;
1280
1281 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1282 ofs += 4;
1283
1284 if (TARGET_LONG_BITS == 64) {
1285 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1286 ofs += 4;
1287 }
1288
1289 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1290 ofs += 4;
1291
1292 if (s_bits == MO_64) {
1293 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1294 ofs += 4;
1295 }
1296
1297 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1298 ofs += 4;
1299
1300 retaddr = TCG_REG_EAX;
1301 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1302 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1303 } else {
1304 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1305 /* The second argument is already loaded with addrlo. */
1306 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1307 tcg_target_call_iarg_regs[2], l->datalo_reg);
1308 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1309 l->mem_index);
1310
1311 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1312 retaddr = tcg_target_call_iarg_regs[4];
1313 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1314 } else {
1315 retaddr = TCG_REG_RAX;
1316 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1317 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1318 }
1319 }
1320
1321 /* "Tail call" to the helper, with the return address back inline. */
1322 tcg_out_push(s, retaddr);
8221a267 1323 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
7352ee54 1324}
44b37ace
RH
1325#elif defined(__x86_64__) && defined(__linux__)
1326# include <asm/prctl.h>
1327# include <sys/prctl.h>
1328
1329int arch_prctl(int code, unsigned long addr);
1330
1331static int guest_base_flags;
1332static inline void setup_guest_base_seg(void)
1333{
1334 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1335 guest_base_flags = P_GS;
1336 }
1337}
1338#else
1339# define guest_base_flags 0
1340static inline void setup_guest_base_seg(void) { }
1341#endif /* SOFTMMU */
c896fe29 1342
37c5d0d5
RH
1343static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1344 TCGReg base, intptr_t ofs, int seg,
1345 TCGMemOp memop)
be5a4eb7 1346{
37c5d0d5
RH
1347 const TCGMemOp bswap = memop & MO_BSWAP;
1348
1349 switch (memop & MO_SSIZE) {
1350 case MO_UB:
44b37ace 1351 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
be5a4eb7 1352 break;
37c5d0d5 1353 case MO_SB:
44b37ace 1354 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
be5a4eb7 1355 break;
37c5d0d5 1356 case MO_UW:
44b37ace 1357 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
be5a4eb7
RH
1358 if (bswap) {
1359 tcg_out_rolw_8(s, datalo);
1360 }
1361 break;
37c5d0d5 1362 case MO_SW:
be5a4eb7 1363 if (bswap) {
44b37ace 1364 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
be5a4eb7 1365 tcg_out_rolw_8(s, datalo);
5d8a4f8f
RH
1366 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1367 } else {
44b37ace
RH
1368 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1369 datalo, base, ofs);
be5a4eb7
RH
1370 }
1371 break;
37c5d0d5 1372 case MO_UL:
44b37ace 1373 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
be5a4eb7
RH
1374 if (bswap) {
1375 tcg_out_bswap32(s, datalo);
1376 }
1377 break;
5d8a4f8f 1378#if TCG_TARGET_REG_BITS == 64
37c5d0d5 1379 case MO_SL:
be5a4eb7 1380 if (bswap) {
44b37ace 1381 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
5d8a4f8f
RH
1382 tcg_out_bswap32(s, datalo);
1383 tcg_out_ext32s(s, datalo, datalo);
be5a4eb7 1384 } else {
44b37ace 1385 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
be5a4eb7 1386 }
5d8a4f8f
RH
1387 break;
1388#endif
37c5d0d5 1389 case MO_Q:
5d8a4f8f 1390 if (TCG_TARGET_REG_BITS == 64) {
44b37ace
RH
1391 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1392 datalo, base, ofs);
5d8a4f8f
RH
1393 if (bswap) {
1394 tcg_out_bswap64(s, datalo);
1395 }
1396 } else {
1397 if (bswap) {
1398 int t = datalo;
1399 datalo = datahi;
1400 datahi = t;
1401 }
1402 if (base != datalo) {
44b37ace
RH
1403 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1404 datalo, base, ofs);
1405 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1406 datahi, base, ofs + 4);
5d8a4f8f 1407 } else {
44b37ace
RH
1408 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1409 datahi, base, ofs + 4);
1410 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1411 datalo, base, ofs);
5d8a4f8f
RH
1412 }
1413 if (bswap) {
1414 tcg_out_bswap32(s, datalo);
1415 tcg_out_bswap32(s, datahi);
1416 }
be5a4eb7
RH
1417 }
1418 break;
1419 default:
1420 tcg_abort();
1421 }
1422}
379f6698 1423
c896fe29
FB
1424/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1425 EAX. It will be useful once fixed registers globals are less
1426 common. */
8221a267 1427static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1428{
7352ee54 1429 TCGReg datalo, datahi, addrlo;
8221a267
RH
1430 TCGReg addrhi __attribute__((unused));
1431 TCGMemOp opc;
c896fe29 1432#if defined(CONFIG_SOFTMMU)
37c5d0d5
RH
1433 int mem_index;
1434 TCGMemOp s_bits;
b76f0d8c 1435 uint8_t *label_ptr[2];
c896fe29
FB
1436#endif
1437
7352ee54 1438 datalo = *args++;
8221a267 1439 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1440 addrlo = *args++;
8221a267
RH
1441 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1442 opc = *args++;
c896fe29
FB
1443
1444#if defined(CONFIG_SOFTMMU)
7352ee54 1445 mem_index = *args++;
37c5d0d5 1446 s_bits = opc & MO_SIZE;
1a6dc1e4 1447
7352ee54 1448 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
8516a044 1449 label_ptr, offsetof(CPUTLBEntry, addr_read));
1a6dc1e4
RH
1450
1451 /* TLB Hit. */
7352ee54 1452 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
c896fe29 1453
b76f0d8c 1454 /* Record the current context of a load into ldst label */
7352ee54
RH
1455 add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
1456 mem_index, s->code_ptr, label_ptr);
c896fe29 1457#else
5d8a4f8f
RH
1458 {
1459 int32_t offset = GUEST_BASE;
7352ee54 1460 TCGReg base = addrlo;
44b37ace
RH
1461 int seg = 0;
1462
1463 /* ??? We assume all operations have left us with register contents
1464 that are zero extended. So far this appears to be true. If we
1465 want to enforce this, we can either do an explicit zero-extension
1466 here, or (if GUEST_BASE == 0, or a segment register is in use)
1467 use the ADDR32 prefix. For now, do nothing. */
1468 if (GUEST_BASE && guest_base_flags) {
1469 seg = guest_base_flags;
1470 offset = 0;
1471 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1472 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1473 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1474 base = TCG_REG_L1;
1475 offset = 0;
5d8a4f8f
RH
1476 }
1477
7352ee54 1478 tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
5d8a4f8f 1479 }
c896fe29 1480#endif
be5a4eb7 1481}
c896fe29 1482
37c5d0d5
RH
1483static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1484 TCGReg base, intptr_t ofs, int seg,
1485 TCGMemOp memop)
be5a4eb7 1486{
37c5d0d5
RH
1487 const TCGMemOp bswap = memop & MO_BSWAP;
1488
be5a4eb7
RH
1489 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1490 we could perform the bswap twice to restore the original value
1491 instead of moving to the scratch. But as it is, the L constraint
166792f7 1492 means that TCG_REG_L0 is definitely free here. */
37c5d0d5 1493 const TCGReg scratch = TCG_REG_L0;
be5a4eb7 1494
37c5d0d5
RH
1495 switch (memop & MO_SIZE) {
1496 case MO_8:
8589467f 1497 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
b3e2bc50
RH
1498 Use the scratch register if necessary. */
1499 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1500 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1501 datalo = scratch;
1502 }
44b37ace
RH
1503 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1504 datalo, base, ofs);
c896fe29 1505 break;
37c5d0d5 1506 case MO_16:
c896fe29 1507 if (bswap) {
3b6dac34 1508 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1509 tcg_out_rolw_8(s, scratch);
1510 datalo = scratch;
c896fe29 1511 }
44b37ace
RH
1512 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1513 datalo, base, ofs);
c896fe29 1514 break;
37c5d0d5 1515 case MO_32:
c896fe29 1516 if (bswap) {
3b6dac34 1517 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7
RH
1518 tcg_out_bswap32(s, scratch);
1519 datalo = scratch;
c896fe29 1520 }
44b37ace 1521 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
c896fe29 1522 break;
37c5d0d5 1523 case MO_64:
5d8a4f8f
RH
1524 if (TCG_TARGET_REG_BITS == 64) {
1525 if (bswap) {
1526 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1527 tcg_out_bswap64(s, scratch);
1528 datalo = scratch;
1529 }
44b37ace
RH
1530 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1531 datalo, base, ofs);
5d8a4f8f 1532 } else if (bswap) {
3b6dac34 1533 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
be5a4eb7 1534 tcg_out_bswap32(s, scratch);
44b37ace 1535 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
3b6dac34 1536 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
be5a4eb7 1537 tcg_out_bswap32(s, scratch);
44b37ace 1538 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
c896fe29 1539 } else {
44b37ace
RH
1540 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1541 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
c896fe29
FB
1542 }
1543 break;
1544 default:
1545 tcg_abort();
1546 }
c896fe29
FB
1547}
1548
8221a267 1549static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
c896fe29 1550{
7352ee54 1551 TCGReg datalo, datahi, addrlo;
8221a267
RH
1552 TCGReg addrhi __attribute__((unused));
1553 TCGMemOp opc;
c896fe29 1554#if defined(CONFIG_SOFTMMU)
37c5d0d5
RH
1555 int mem_index;
1556 TCGMemOp s_bits;
b76f0d8c 1557 uint8_t *label_ptr[2];
c896fe29
FB
1558#endif
1559
7352ee54 1560 datalo = *args++;
8221a267 1561 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
7352ee54 1562 addrlo = *args++;
8221a267
RH
1563 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1564 opc = *args++;
c896fe29
FB
1565
1566#if defined(CONFIG_SOFTMMU)
7352ee54 1567 mem_index = *args++;
37c5d0d5 1568 s_bits = opc & MO_SIZE;
1a6dc1e4 1569
7352ee54 1570 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
8516a044 1571 label_ptr, offsetof(CPUTLBEntry, addr_write));
1a6dc1e4
RH
1572
1573 /* TLB Hit. */
7352ee54 1574 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
c896fe29 1575
b76f0d8c 1576 /* Record the current context of a store into ldst label */
7352ee54
RH
1577 add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
1578 mem_index, s->code_ptr, label_ptr);
b76f0d8c
YL
1579#else
1580 {
1581 int32_t offset = GUEST_BASE;
7352ee54 1582 TCGReg base = addrlo;
b76f0d8c
YL
1583 int seg = 0;
1584
1585 /* ??? We assume all operations have left us with register contents
1586 that are zero extended. So far this appears to be true. If we
1587 want to enforce this, we can either do an explicit zero-extension
1588 here, or (if GUEST_BASE == 0, or a segment register is in use)
1589 use the ADDR32 prefix. For now, do nothing. */
1590 if (GUEST_BASE && guest_base_flags) {
1591 seg = guest_base_flags;
1592 offset = 0;
1593 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1594 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1595 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1596 base = TCG_REG_L1;
1597 offset = 0;
1598 }
1599
7352ee54 1600 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
b76f0d8c 1601 }
b76f0d8c 1602#endif
b76f0d8c 1603}
c896fe29 1604
a9751609 1605static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c896fe29
FB
1606 const TCGArg *args, const int *const_args)
1607{
5d8a4f8f
RH
1608 int c, rexw = 0;
1609
1610#if TCG_TARGET_REG_BITS == 64
1611# define OP_32_64(x) \
1612 case glue(glue(INDEX_op_, x), _i64): \
1613 rexw = P_REXW; /* FALLTHRU */ \
1614 case glue(glue(INDEX_op_, x), _i32)
1615#else
1616# define OP_32_64(x) \
1617 case glue(glue(INDEX_op_, x), _i32)
1618#endif
78686523 1619
c896fe29
FB
1620 switch(opc) {
1621 case INDEX_op_exit_tb:
5d8a4f8f 1622 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
357e3d8a 1623 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
c896fe29
FB
1624 break;
1625 case INDEX_op_goto_tb:
1626 if (s->tb_jmp_offset) {
1627 /* direct jump method */
da441cff 1628 tcg_out8(s, OPC_JMP_long); /* jmp im */
c896fe29
FB
1629 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1630 tcg_out32(s, 0);
1631 } else {
1632 /* indirect jump method */
9363dedb 1633 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
357e3d8a 1634 (intptr_t)(s->tb_next + args[0]));
c896fe29
FB
1635 }
1636 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1637 break;
1638 case INDEX_op_call:
1639 if (const_args[0]) {
aadb21a4 1640 tcg_out_calli(s, args[0]);
c896fe29 1641 } else {
aadb21a4 1642 /* call *reg */
9363dedb 1643 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
c896fe29
FB
1644 }
1645 break;
c896fe29 1646 case INDEX_op_br:
f75b56c1 1647 tcg_out_jxx(s, JCC_JMP, args[0], 0);
c896fe29
FB
1648 break;
1649 case INDEX_op_movi_i32:
1650 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1651 break;
5d8a4f8f
RH
1652 OP_32_64(ld8u):
1653 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
55e082a7 1654 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
c896fe29 1655 break;
5d8a4f8f
RH
1656 OP_32_64(ld8s):
1657 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
c896fe29 1658 break;
5d8a4f8f
RH
1659 OP_32_64(ld16u):
1660 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
55e082a7 1661 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
c896fe29 1662 break;
5d8a4f8f
RH
1663 OP_32_64(ld16s):
1664 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
c896fe29 1665 break;
5d8a4f8f
RH
1666#if TCG_TARGET_REG_BITS == 64
1667 case INDEX_op_ld32u_i64:
1668#endif
c896fe29 1669 case INDEX_op_ld_i32:
af266089 1670 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
c896fe29 1671 break;
5d8a4f8f
RH
1672
1673 OP_32_64(st8):
5c2d2a9e
AJ
1674 if (const_args[0]) {
1675 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1676 0, args[1], args[2]);
1677 tcg_out8(s, args[0]);
1678 } else {
1679 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1680 args[0], args[1], args[2]);
1681 }
c896fe29 1682 break;
5d8a4f8f 1683 OP_32_64(st16):
5c2d2a9e
AJ
1684 if (const_args[0]) {
1685 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1686 0, args[1], args[2]);
1687 tcg_out16(s, args[0]);
1688 } else {
1689 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1690 args[0], args[1], args[2]);
1691 }
c896fe29 1692 break;
5d8a4f8f
RH
1693#if TCG_TARGET_REG_BITS == 64
1694 case INDEX_op_st32_i64:
1695#endif
c896fe29 1696 case INDEX_op_st_i32:
5c2d2a9e
AJ
1697 if (const_args[0]) {
1698 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1699 tcg_out32(s, args[0]);
1700 } else {
1701 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1702 }
c896fe29 1703 break;
5d8a4f8f
RH
1704
1705 OP_32_64(add):
5d1e4e85
RH
1706 /* For 3-operand addition, use LEA. */
1707 if (args[0] != args[1]) {
1708 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1709
1710 if (const_args[2]) {
1711 c3 = a2, a2 = -1;
1712 } else if (a0 == a2) {
1713 /* Watch out for dest = src + dest, since we've removed
1714 the matching constraint on the add. */
5d8a4f8f 1715 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
5d1e4e85
RH
1716 break;
1717 }
1718
5d8a4f8f 1719 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
5d1e4e85
RH
1720 break;
1721 }
1722 c = ARITH_ADD;
1723 goto gen_arith;
5d8a4f8f 1724 OP_32_64(sub):
c896fe29
FB
1725 c = ARITH_SUB;
1726 goto gen_arith;
5d8a4f8f 1727 OP_32_64(and):
c896fe29
FB
1728 c = ARITH_AND;
1729 goto gen_arith;
5d8a4f8f 1730 OP_32_64(or):
c896fe29
FB
1731 c = ARITH_OR;
1732 goto gen_arith;
5d8a4f8f 1733 OP_32_64(xor):
c896fe29
FB
1734 c = ARITH_XOR;
1735 goto gen_arith;
c896fe29
FB
1736 gen_arith:
1737 if (const_args[2]) {
5d8a4f8f 1738 tgen_arithi(s, c + rexw, args[0], args[2], 0);
c896fe29 1739 } else {
5d8a4f8f 1740 tgen_arithr(s, c + rexw, args[0], args[2]);
c896fe29
FB
1741 }
1742 break;
5d8a4f8f
RH
1743
1744 OP_32_64(mul):
c896fe29
FB
1745 if (const_args[2]) {
1746 int32_t val;
1747 val = args[2];
1748 if (val == (int8_t)val) {
5d8a4f8f 1749 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
c896fe29
FB
1750 tcg_out8(s, val);
1751 } else {
5d8a4f8f 1752 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
c896fe29
FB
1753 tcg_out32(s, val);
1754 }
1755 } else {
5d8a4f8f 1756 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
c896fe29
FB
1757 }
1758 break;
5d8a4f8f
RH
1759
1760 OP_32_64(div2):
1761 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
c896fe29 1762 break;
5d8a4f8f
RH
1763 OP_32_64(divu2):
1764 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
c896fe29 1765 break;
5d8a4f8f
RH
1766
1767 OP_32_64(shl):
c896fe29 1768 c = SHIFT_SHL;
5d8a4f8f
RH
1769 goto gen_shift;
1770 OP_32_64(shr):
c896fe29 1771 c = SHIFT_SHR;
5d8a4f8f
RH
1772 goto gen_shift;
1773 OP_32_64(sar):
c896fe29 1774 c = SHIFT_SAR;
5d8a4f8f
RH
1775 goto gen_shift;
1776 OP_32_64(rotl):
9619376c 1777 c = SHIFT_ROL;
5d8a4f8f
RH
1778 goto gen_shift;
1779 OP_32_64(rotr):
9619376c 1780 c = SHIFT_ROR;
5d8a4f8f
RH
1781 goto gen_shift;
1782 gen_shift:
1783 if (const_args[2]) {
1784 tcg_out_shifti(s, c + rexw, args[0], args[2]);
81570a70 1785 } else {
5d8a4f8f 1786 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
81570a70 1787 }
c896fe29 1788 break;
5d8a4f8f 1789
c896fe29 1790 case INDEX_op_brcond_i32:
5d8a4f8f
RH
1791 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1792 args[3], 0);
c896fe29 1793 break;
5d8a4f8f
RH
1794 case INDEX_op_setcond_i32:
1795 tcg_out_setcond32(s, args[3], args[0], args[1],
1796 args[2], const_args[2]);
c896fe29 1797 break;
d0a16297
RH
1798 case INDEX_op_movcond_i32:
1799 tcg_out_movcond32(s, args[5], args[0], args[1],
1800 args[2], const_args[2], args[3]);
1801 break;
c896fe29 1802
5d8a4f8f 1803 OP_32_64(bswap16):
fcb5dac1 1804 tcg_out_rolw_8(s, args[0]);
5d40cd63 1805 break;
5d8a4f8f 1806 OP_32_64(bswap32):
fcb5dac1 1807 tcg_out_bswap32(s, args[0]);
9619376c
AJ
1808 break;
1809
5d8a4f8f
RH
1810 OP_32_64(neg):
1811 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
9619376c 1812 break;
5d8a4f8f
RH
1813 OP_32_64(not):
1814 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
9619376c
AJ
1815 break;
1816
5d8a4f8f
RH
1817 OP_32_64(ext8s):
1818 tcg_out_ext8s(s, args[0], args[1], rexw);
9619376c 1819 break;
5d8a4f8f
RH
1820 OP_32_64(ext16s):
1821 tcg_out_ext16s(s, args[0], args[1], rexw);
9619376c 1822 break;
5d8a4f8f 1823 OP_32_64(ext8u):
55e082a7 1824 tcg_out_ext8u(s, args[0], args[1]);
5f0ce17f 1825 break;
5d8a4f8f 1826 OP_32_64(ext16u):
55e082a7 1827 tcg_out_ext16u(s, args[0], args[1]);
5f0ce17f 1828 break;
9619376c 1829
8221a267
RH
1830 case INDEX_op_qemu_ld_i32:
1831 tcg_out_qemu_ld(s, args, 0);
c896fe29 1832 break;
8221a267
RH
1833 case INDEX_op_qemu_ld_i64:
1834 tcg_out_qemu_ld(s, args, 1);
c896fe29 1835 break;
8221a267
RH
1836 case INDEX_op_qemu_st_i32:
1837 tcg_out_qemu_st(s, args, 0);
c896fe29 1838 break;
8221a267
RH
1839 case INDEX_op_qemu_st_i64:
1840 tcg_out_qemu_st(s, args, 1);
c896fe29
FB
1841 break;
1842
624988a5
RH
1843 OP_32_64(mulu2):
1844 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
5d8a4f8f 1845 break;
624988a5
RH
1846 OP_32_64(muls2):
1847 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1848 break;
1849 OP_32_64(add2):
5d8a4f8f 1850 if (const_args[4]) {
624988a5 1851 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
5d8a4f8f 1852 } else {
624988a5 1853 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
5d8a4f8f
RH
1854 }
1855 if (const_args[5]) {
624988a5 1856 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
5d8a4f8f 1857 } else {
624988a5 1858 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
5d8a4f8f
RH
1859 }
1860 break;
624988a5 1861 OP_32_64(sub2):
5d8a4f8f 1862 if (const_args[4]) {
624988a5 1863 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
5d8a4f8f 1864 } else {
624988a5 1865 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
5d8a4f8f
RH
1866 }
1867 if (const_args[5]) {
624988a5 1868 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
5d8a4f8f 1869 } else {
624988a5 1870 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
5d8a4f8f
RH
1871 }
1872 break;
bbc863bf
RH
1873
1874#if TCG_TARGET_REG_BITS == 32
1875 case INDEX_op_brcond2_i32:
1876 tcg_out_brcond2(s, args, const_args, 0);
1877 break;
1878 case INDEX_op_setcond2_i32:
1879 tcg_out_setcond2(s, args, const_args);
1880 break;
5d8a4f8f
RH
1881#else /* TCG_TARGET_REG_BITS == 64 */
1882 case INDEX_op_movi_i64:
1883 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1884 break;
1885 case INDEX_op_ld32s_i64:
1886 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1887 break;
1888 case INDEX_op_ld_i64:
1889 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1890 break;
1891 case INDEX_op_st_i64:
5c2d2a9e
AJ
1892 if (const_args[0]) {
1893 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1894 0, args[1], args[2]);
1895 tcg_out32(s, args[0]);
1896 } else {
1897 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1898 }
5d8a4f8f 1899 break;
5d8a4f8f
RH
1900
1901 case INDEX_op_brcond_i64:
1902 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1903 args[3], 0);
1904 break;
1905 case INDEX_op_setcond_i64:
1906 tcg_out_setcond64(s, args[3], args[0], args[1],
1907 args[2], const_args[2]);
1908 break;
d0a16297
RH
1909 case INDEX_op_movcond_i64:
1910 tcg_out_movcond64(s, args[5], args[0], args[1],
1911 args[2], const_args[2], args[3]);
1912 break;
5d8a4f8f
RH
1913
1914 case INDEX_op_bswap64_i64:
1915 tcg_out_bswap64(s, args[0]);
1916 break;
1917 case INDEX_op_ext32u_i64:
1918 tcg_out_ext32u(s, args[0], args[1]);
1919 break;
1920 case INDEX_op_ext32s_i64:
1921 tcg_out_ext32s(s, args[0], args[1]);
1922 break;
1923#endif
1924
a4773324
JK
1925 OP_32_64(deposit):
1926 if (args[3] == 0 && args[4] == 8) {
1927 /* load bits 0..7 */
1928 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1929 args[2], args[0]);
1930 } else if (args[3] == 8 && args[4] == 8) {
1931 /* load bits 8..15 */
1932 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1933 } else if (args[3] == 0 && args[4] == 16) {
1934 /* load bits 0..15 */
1935 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1936 } else {
1937 tcg_abort();
1938 }
1939 break;
1940
c896fe29
FB
1941 default:
1942 tcg_abort();
1943 }
5d8a4f8f
RH
1944
1945#undef OP_32_64
c896fe29
FB
1946}
1947
1948static const TCGTargetOpDef x86_op_defs[] = {
1949 { INDEX_op_exit_tb, { } },
1950 { INDEX_op_goto_tb, { } },
1951 { INDEX_op_call, { "ri" } },
c896fe29
FB
1952 { INDEX_op_br, { } },
1953 { INDEX_op_mov_i32, { "r", "r" } },
1954 { INDEX_op_movi_i32, { "r" } },
1955 { INDEX_op_ld8u_i32, { "r", "r" } },
1956 { INDEX_op_ld8s_i32, { "r", "r" } },
1957 { INDEX_op_ld16u_i32, { "r", "r" } },
1958 { INDEX_op_ld16s_i32, { "r", "r" } },
1959 { INDEX_op_ld_i32, { "r", "r" } },
5c2d2a9e
AJ
1960 { INDEX_op_st8_i32, { "qi", "r" } },
1961 { INDEX_op_st16_i32, { "ri", "r" } },
1962 { INDEX_op_st_i32, { "ri", "r" } },
c896fe29 1963
5d1e4e85 1964 { INDEX_op_add_i32, { "r", "r", "ri" } },
c896fe29
FB
1965 { INDEX_op_sub_i32, { "r", "0", "ri" } },
1966 { INDEX_op_mul_i32, { "r", "0", "ri" } },
c896fe29
FB
1967 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1968 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1969 { INDEX_op_and_i32, { "r", "0", "ri" } },
1970 { INDEX_op_or_i32, { "r", "0", "ri" } },
1971 { INDEX_op_xor_i32, { "r", "0", "ri" } },
1972
1973 { INDEX_op_shl_i32, { "r", "0", "ci" } },
1974 { INDEX_op_shr_i32, { "r", "0", "ci" } },
1975 { INDEX_op_sar_i32, { "r", "0", "ci" } },
9619376c
AJ
1976 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1977 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
c896fe29
FB
1978
1979 { INDEX_op_brcond_i32, { "r", "ri" } },
1980
5d40cd63 1981 { INDEX_op_bswap16_i32, { "r", "0" } },
66896cb8 1982 { INDEX_op_bswap32_i32, { "r", "0" } },
9619376c
AJ
1983
1984 { INDEX_op_neg_i32, { "r", "0" } },
1985
1986 { INDEX_op_not_i32, { "r", "0" } },
1987
1988 { INDEX_op_ext8s_i32, { "r", "q" } },
1989 { INDEX_op_ext16s_i32, { "r", "r" } },
55e082a7
RH
1990 { INDEX_op_ext8u_i32, { "r", "q" } },
1991 { INDEX_op_ext16u_i32, { "r", "r" } },
9619376c 1992
1d2699ae 1993 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
5d8a4f8f 1994
a4773324 1995 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
f813cb83 1996#if TCG_TARGET_HAS_movcond_i32
d0a16297 1997 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
f813cb83 1998#endif
a4773324 1999
5d8a4f8f 2000 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
624988a5 2001 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
5d8a4f8f
RH
2002 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2003 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
bbc863bf
RH
2004
2005#if TCG_TARGET_REG_BITS == 32
5d8a4f8f 2006 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1d2699ae 2007 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
5d8a4f8f
RH
2008#else
2009 { INDEX_op_mov_i64, { "r", "r" } },
2010 { INDEX_op_movi_i64, { "r" } },
2011 { INDEX_op_ld8u_i64, { "r", "r" } },
2012 { INDEX_op_ld8s_i64, { "r", "r" } },
2013 { INDEX_op_ld16u_i64, { "r", "r" } },
2014 { INDEX_op_ld16s_i64, { "r", "r" } },
2015 { INDEX_op_ld32u_i64, { "r", "r" } },
2016 { INDEX_op_ld32s_i64, { "r", "r" } },
2017 { INDEX_op_ld_i64, { "r", "r" } },
5c2d2a9e
AJ
2018 { INDEX_op_st8_i64, { "ri", "r" } },
2019 { INDEX_op_st16_i64, { "ri", "r" } },
2020 { INDEX_op_st32_i64, { "ri", "r" } },
2021 { INDEX_op_st_i64, { "re", "r" } },
5d8a4f8f 2022
163fa4b0 2023 { INDEX_op_add_i64, { "r", "r", "re" } },
5d8a4f8f
RH
2024 { INDEX_op_mul_i64, { "r", "0", "re" } },
2025 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2026 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2027 { INDEX_op_sub_i64, { "r", "0", "re" } },
2028 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2029 { INDEX_op_or_i64, { "r", "0", "re" } },
2030 { INDEX_op_xor_i64, { "r", "0", "re" } },
2031
2032 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2033 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2034 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2035 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2036 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2037
2038 { INDEX_op_brcond_i64, { "r", "re" } },
2039 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2040
2041 { INDEX_op_bswap16_i64, { "r", "0" } },
2042 { INDEX_op_bswap32_i64, { "r", "0" } },
2043 { INDEX_op_bswap64_i64, { "r", "0" } },
2044 { INDEX_op_neg_i64, { "r", "0" } },
2045 { INDEX_op_not_i64, { "r", "0" } },
2046
2047 { INDEX_op_ext8s_i64, { "r", "r" } },
2048 { INDEX_op_ext16s_i64, { "r", "r" } },
2049 { INDEX_op_ext32s_i64, { "r", "r" } },
2050 { INDEX_op_ext8u_i64, { "r", "r" } },
2051 { INDEX_op_ext16u_i64, { "r", "r" } },
2052 { INDEX_op_ext32u_i64, { "r", "r" } },
a4773324
JK
2053
2054 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
d0a16297 2055 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
624988a5
RH
2056
2057 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2058 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2059 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2060 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
5d8a4f8f 2061#endif
1d2699ae 2062
5d8a4f8f 2063#if TCG_TARGET_REG_BITS == 64
8221a267
RH
2064 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2065 { INDEX_op_qemu_st_i32, { "L", "L" } },
2066 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2067 { INDEX_op_qemu_st_i64, { "L", "L" } },
5d8a4f8f 2068#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
8221a267
RH
2069 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2070 { INDEX_op_qemu_st_i32, { "L", "L" } },
2071 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2072 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
c896fe29 2073#else
8221a267
RH
2074 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2075 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2076 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2077 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
c896fe29
FB
2078#endif
2079 { -1 },
2080};
2081
b03cce8e 2082static int tcg_target_callee_save_regs[] = {
5d8a4f8f
RH
2083#if TCG_TARGET_REG_BITS == 64
2084 TCG_REG_RBP,
2085 TCG_REG_RBX,
8d918718
SW
2086#if defined(_WIN64)
2087 TCG_REG_RDI,
2088 TCG_REG_RSI,
2089#endif
5d8a4f8f
RH
2090 TCG_REG_R12,
2091 TCG_REG_R13,
cea5f9a2 2092 TCG_REG_R14, /* Currently used for the global env. */
5d8a4f8f
RH
2093 TCG_REG_R15,
2094#else
cea5f9a2 2095 TCG_REG_EBP, /* Currently used for the global env. */
b03cce8e
FB
2096 TCG_REG_EBX,
2097 TCG_REG_ESI,
2098 TCG_REG_EDI,
5d8a4f8f 2099#endif
b03cce8e
FB
2100};
2101
813da627
RH
2102/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2103 and tcg_register_jit. */
2104
2105#define PUSH_SIZE \
2106 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2107 * (TCG_TARGET_REG_BITS / 8))
2108
2109#define FRAME_SIZE \
2110 ((PUSH_SIZE \
2111 + TCG_STATIC_CALL_ARGS_SIZE \
2112 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2113 + TCG_TARGET_STACK_ALIGN - 1) \
2114 & ~(TCG_TARGET_STACK_ALIGN - 1))
2115
b03cce8e 2116/* Generate global QEMU prologue and epilogue code */
e4d58b41 2117static void tcg_target_qemu_prologue(TCGContext *s)
b03cce8e 2118{
813da627 2119 int i, stack_addend;
78686523 2120
b03cce8e 2121 /* TB prologue */
5d8a4f8f 2122
ac0275dc 2123 /* Reserve some stack space, also for TCG temps. */
813da627 2124 stack_addend = FRAME_SIZE - PUSH_SIZE;
ac0275dc
BS
2125 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2126 CPU_TEMP_BUF_NLONGS * sizeof(long));
2127
2128 /* Save all callee saved registers. */
2129 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2130 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2131 }
2132
6a18ae2d
BS
2133#if TCG_TARGET_REG_BITS == 32
2134 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2135 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
b18212c6
SW
2136 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2137 /* jmp *tb. */
2138 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2139 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2140 + stack_addend);
6a18ae2d 2141#else
cea5f9a2 2142 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
6a18ae2d 2143 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
5d8a4f8f 2144 /* jmp *tb. */
cea5f9a2 2145 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
b18212c6 2146#endif
78686523 2147
b03cce8e
FB
2148 /* TB epilogue */
2149 tb_ret_addr = s->code_ptr;
5d8a4f8f 2150
e83c80f7 2151 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
5d8a4f8f
RH
2152
2153 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
b03cce8e
FB
2154 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2155 }
5d8a4f8f 2156 tcg_out_opc(s, OPC_RET, 0, 0, 0);
44b37ace
RH
2157
2158#if !defined(CONFIG_SOFTMMU)
2159 /* Try to set up a segment register to point to GUEST_BASE. */
2160 if (GUEST_BASE) {
2161 setup_guest_base_seg();
2162 }
2163#endif
b03cce8e
FB
2164}
2165
e4d58b41 2166static void tcg_target_init(TCGContext *s)
c896fe29 2167{
76a347e1
RH
2168 /* For 32-bit, 99% certainty that we're running on hardware that supports
2169 cmov, but we still need to check. In case cmov is not available, we'll
2170 use a small forward branch. */
2171#ifndef have_cmov
2172 {
2173 unsigned a, b, c, d;
2174 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2175 }
2176#endif
2177
5d8a4f8f
RH
2178 if (TCG_TARGET_REG_BITS == 64) {
2179 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2180 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2181 } else {
2182 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2183 }
4ab50ccf
RH
2184
2185 tcg_regset_clear(tcg_target_call_clobber_regs);
2186 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2187 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2188 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
5d8a4f8f 2189 if (TCG_TARGET_REG_BITS == 64) {
8d918718 2190#if !defined(_WIN64)
5d8a4f8f
RH
2191 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2192 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
8d918718 2193#endif
5d8a4f8f
RH
2194 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2195 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2196 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2197 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2198 }
4ab50ccf 2199
c896fe29 2200 tcg_regset_clear(s->reserved_regs);
e83c80f7 2201 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
c896fe29
FB
2202
2203 tcg_add_target_add_op_defs(x86_op_defs);
2204}
813da627 2205
813da627
RH
2206typedef struct {
2207 DebugFrameCIE cie;
497a22eb
RH
2208 DebugFrameFDEHeader fde;
2209 uint8_t fde_def_cfa[4];
2210 uint8_t fde_reg_ofs[14];
813da627
RH
2211} DebugFrame;
2212
b5cc476d
RH
2213/* We're expecting a 2 byte uleb128 encoded value. */
2214QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2215
c170cb66
SW
2216#if !defined(__ELF__)
2217 /* Host machine without ELF. */
2218#elif TCG_TARGET_REG_BITS == 64
813da627
RH
2219#define ELF_HOST_MACHINE EM_X86_64
2220static DebugFrame debug_frame = {
2221 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2222 .cie.id = -1,
2223 .cie.version = 1,
2224 .cie.code_align = 1,
2225 .cie.data_align = 0x78, /* sleb128 -8 */
2226 .cie.return_column = 16,
2227
497a22eb
RH
2228 /* Total FDE size does not include the "len" member. */
2229 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2230
2231 .fde_def_cfa = {
813da627
RH
2232 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2233 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2234 (FRAME_SIZE >> 7)
2235 },
497a22eb 2236 .fde_reg_ofs = {
813da627
RH
2237 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2238 /* The following ordering must match tcg_target_callee_save_regs. */
2239 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2240 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2241 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2242 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2243 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2244 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2245 }
2246};
2247#else
2248#define ELF_HOST_MACHINE EM_386
2249static DebugFrame debug_frame = {
2250 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2251 .cie.id = -1,
2252 .cie.version = 1,
2253 .cie.code_align = 1,
2254 .cie.data_align = 0x7c, /* sleb128 -4 */
2255 .cie.return_column = 8,
2256
497a22eb
RH
2257 /* Total FDE size does not include the "len" member. */
2258 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2259
2260 .fde_def_cfa = {
813da627
RH
2261 12, 4, /* DW_CFA_def_cfa %esp, ... */
2262 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2263 (FRAME_SIZE >> 7)
2264 },
497a22eb 2265 .fde_reg_ofs = {
813da627
RH
2266 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2267 /* The following ordering must match tcg_target_callee_save_regs. */
2268 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2269 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2270 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2271 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2272 }
2273};
2274#endif
2275
c170cb66 2276#if defined(ELF_HOST_MACHINE)
813da627
RH
2277void tcg_register_jit(void *buf, size_t buf_size)
2278{
357e3d8a 2279 debug_frame.fde.func_start = (uintptr_t)buf;
813da627
RH
2280 debug_frame.fde.func_len = buf_size;
2281
2282 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2283}
c170cb66 2284#endif
This page took 1.079101 seconds and 4 git commands to generate.