2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "tcg-be-ldst.h"
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
80 /* 32 bit mode uses stack based calling convention (GCC default). */
84 static const int tcg_target_call_oarg_regs[] = {
86 #if TCG_TARGET_REG_BITS == 32
91 /* Registers used with L constraint, which are the first argument
92 registers on x86_64, and two random call clobbered registers on
94 #if TCG_TARGET_REG_BITS == 64
95 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
96 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
98 # define TCG_REG_L0 TCG_REG_EAX
99 # define TCG_REG_L1 TCG_REG_EDX
102 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
103 is available. However, the host compiler must supply <cpuid.h>, as we're
104 not going to go so far as our own inline assembly. */
105 #if TCG_TARGET_REG_BITS == 64
107 #elif defined(CONFIG_CPUID_H)
109 static bool have_cmov;
114 static uint8_t *tb_ret_addr;
116 static void patch_reloc(uint8_t *code_ptr, int type,
117 intptr_t value, intptr_t addend)
122 value -= (uintptr_t)code_ptr;
123 if (value != (int32_t)value) {
126 *(uint32_t *)code_ptr = value;
129 value -= (uintptr_t)code_ptr;
130 if (value != (int8_t)value) {
133 *(uint8_t *)code_ptr = value;
140 /* parse target specific constraints */
141 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148 ct->ct |= TCG_CT_REG;
149 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
152 ct->ct |= TCG_CT_REG;
153 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
156 ct->ct |= TCG_CT_REG;
157 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
160 ct->ct |= TCG_CT_REG;
161 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
164 ct->ct |= TCG_CT_REG;
165 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
168 ct->ct |= TCG_CT_REG;
169 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
172 ct->ct |= TCG_CT_REG;
173 if (TCG_TARGET_REG_BITS == 64) {
174 tcg_regset_set32(ct->u.regs, 0, 0xffff);
176 tcg_regset_set32(ct->u.regs, 0, 0xf);
180 ct->ct |= TCG_CT_REG;
181 tcg_regset_set32(ct->u.regs, 0, 0xf);
184 ct->ct |= TCG_CT_REG;
185 if (TCG_TARGET_REG_BITS == 64) {
186 tcg_regset_set32(ct->u.regs, 0, 0xffff);
188 tcg_regset_set32(ct->u.regs, 0, 0xff);
192 /* qemu_ld/st address constraint */
194 ct->ct |= TCG_CT_REG;
195 if (TCG_TARGET_REG_BITS == 64) {
196 tcg_regset_set32(ct->u.regs, 0, 0xffff);
198 tcg_regset_set32(ct->u.regs, 0, 0xff);
200 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
201 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
205 ct->ct |= TCG_CT_CONST_S32;
208 ct->ct |= TCG_CT_CONST_U32;
219 /* test if a constant matches the constraint */
220 static inline int tcg_target_const_match(tcg_target_long val,
221 const TCGArgConstraint *arg_ct)
224 if (ct & TCG_CT_CONST) {
227 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
230 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
236 #if TCG_TARGET_REG_BITS == 64
237 # define LOWREGMASK(x) ((x) & 7)
239 # define LOWREGMASK(x) (x)
242 #define P_EXT 0x100 /* 0x0f opcode prefix */
243 #define P_DATA16 0x200 /* 0x66 opcode prefix */
244 #if TCG_TARGET_REG_BITS == 64
245 # define P_ADDR32 0x400 /* 0x67 opcode prefix */
246 # define P_REXW 0x800 /* Set REX.W = 1 */
247 # define P_REXB_R 0x1000 /* REG field as byte register */
248 # define P_REXB_RM 0x2000 /* R/M field as byte register */
249 # define P_GS 0x4000 /* gs segment override */
258 #define OPC_ARITH_EvIz (0x81)
259 #define OPC_ARITH_EvIb (0x83)
260 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
261 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
262 #define OPC_BSWAP (0xc8 | P_EXT)
263 #define OPC_CALL_Jz (0xe8)
264 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
265 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
266 #define OPC_DEC_r32 (0x48)
267 #define OPC_IMUL_GvEv (0xaf | P_EXT)
268 #define OPC_IMUL_GvEvIb (0x6b)
269 #define OPC_IMUL_GvEvIz (0x69)
270 #define OPC_INC_r32 (0x40)
271 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
272 #define OPC_JCC_short (0x70) /* ... plus condition code */
273 #define OPC_JMP_long (0xe9)
274 #define OPC_JMP_short (0xeb)
275 #define OPC_LEA (0x8d)
276 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
277 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
278 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
279 #define OPC_MOVB_EvIz (0xc6)
280 #define OPC_MOVL_EvIz (0xc7)
281 #define OPC_MOVL_Iv (0xb8)
282 #define OPC_MOVSBL (0xbe | P_EXT)
283 #define OPC_MOVSWL (0xbf | P_EXT)
284 #define OPC_MOVSLQ (0x63 | P_REXW)
285 #define OPC_MOVZBL (0xb6 | P_EXT)
286 #define OPC_MOVZWL (0xb7 | P_EXT)
287 #define OPC_POP_r32 (0x58)
288 #define OPC_PUSH_r32 (0x50)
289 #define OPC_PUSH_Iv (0x68)
290 #define OPC_PUSH_Ib (0x6a)
291 #define OPC_RET (0xc3)
292 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
293 #define OPC_SHIFT_1 (0xd1)
294 #define OPC_SHIFT_Ib (0xc1)
295 #define OPC_SHIFT_cl (0xd3)
296 #define OPC_TESTL (0x85)
297 #define OPC_XCHG_ax_r32 (0x90)
299 #define OPC_GRP3_Ev (0xf7)
300 #define OPC_GRP5 (0xff)
302 /* Group 1 opcode extensions for 0x80-0x83.
303 These are also used as modifiers for OPC_ARITH. */
313 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
320 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
328 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
329 #define EXT5_INC_Ev 0
330 #define EXT5_DEC_Ev 1
331 #define EXT5_CALLN_Ev 2
332 #define EXT5_JMPN_Ev 4
334 /* Condition codes to be added to OPC_JCC_{long,short}. */
353 static const uint8_t tcg_cond_to_jcc[] = {
354 [TCG_COND_EQ] = JCC_JE,
355 [TCG_COND_NE] = JCC_JNE,
356 [TCG_COND_LT] = JCC_JL,
357 [TCG_COND_GE] = JCC_JGE,
358 [TCG_COND_LE] = JCC_JLE,
359 [TCG_COND_GT] = JCC_JG,
360 [TCG_COND_LTU] = JCC_JB,
361 [TCG_COND_GEU] = JCC_JAE,
362 [TCG_COND_LEU] = JCC_JBE,
363 [TCG_COND_GTU] = JCC_JA,
366 #if TCG_TARGET_REG_BITS == 64
367 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
374 if (opc & P_DATA16) {
375 /* We should never be asking for both 16 and 64-bit operation. */
376 assert((opc & P_REXW) == 0);
379 if (opc & P_ADDR32) {
384 rex |= (opc & P_REXW) >> 8; /* REX.W */
385 rex |= (r & 8) >> 1; /* REX.R */
386 rex |= (x & 8) >> 2; /* REX.X */
387 rex |= (rm & 8) >> 3; /* REX.B */
389 /* P_REXB_{R,RM} indicates that the given register is the low byte.
390 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
391 as otherwise the encoding indicates %[abcd]h. Note that the values
392 that are ORed in merely indicate that the REX byte must be present;
393 those bits get discarded in output. */
394 rex |= opc & (r >= 4 ? P_REXB_R : 0);
395 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
398 tcg_out8(s, (uint8_t)(rex | 0x40));
407 static void tcg_out_opc(TCGContext *s, int opc)
409 if (opc & P_DATA16) {
417 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
418 the 32-bit compilation paths. This method works with all versions of gcc,
419 whereas relying on optimization may not be able to exclude them. */
420 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
423 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
425 tcg_out_opc(s, opc, r, rm, 0);
426 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
429 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
430 We handle either RM and INDEX missing with a negative value. In 64-bit
431 mode for absolute addresses, ~RM is the size of the immediate operand
432 that will follow the instruction. */
434 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
435 int index, int shift, intptr_t offset)
439 if (index < 0 && rm < 0) {
440 if (TCG_TARGET_REG_BITS == 64) {
441 /* Try for a rip-relative addressing mode. This has replaced
442 the 32-bit-mode absolute addressing encoding. */
443 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
444 intptr_t disp = offset - pc;
445 if (disp == (int32_t)disp) {
446 tcg_out_opc(s, opc, r, 0, 0);
447 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
452 /* Try for an absolute address encoding. This requires the
453 use of the MODRM+SIB encoding and is therefore larger than
454 rip-relative addressing. */
455 if (offset == (int32_t)offset) {
456 tcg_out_opc(s, opc, r, 0, 0);
457 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
458 tcg_out8(s, (4 << 3) | 5);
459 tcg_out32(s, offset);
463 /* ??? The memory isn't directly addressable. */
466 /* Absolute address. */
467 tcg_out_opc(s, opc, r, 0, 0);
468 tcg_out8(s, (r << 3) | 5);
469 tcg_out32(s, offset);
474 /* Find the length of the immediate addend. Note that the encoding
475 that would be used for (%ebp) indicates absolute addressing. */
477 mod = 0, len = 4, rm = 5;
478 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
480 } else if (offset == (int8_t)offset) {
486 /* Use a single byte MODRM format if possible. Note that the encoding
487 that would be used for %esp is the escape to the two byte form. */
488 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
489 /* Single byte MODRM format. */
490 tcg_out_opc(s, opc, r, rm, 0);
491 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
493 /* Two byte MODRM+SIB format. */
495 /* Note that the encoding that would place %esp into the index
496 field indicates no index register. In 64-bit mode, the REX.X
497 bit counts, so %r12 can be used as the index. */
501 assert(index != TCG_REG_ESP);
504 tcg_out_opc(s, opc, r, rm, index);
505 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
506 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
511 } else if (len == 4) {
512 tcg_out32(s, offset);
516 /* A simplification of the above with no index or shift. */
517 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
518 int rm, intptr_t offset)
520 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
523 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
524 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
526 /* Propagate an opcode prefix, such as P_REXW. */
527 int ext = subop & ~0x7;
530 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
533 static inline void tcg_out_mov(TCGContext *s, TCGType type,
534 TCGReg ret, TCGReg arg)
537 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
538 tcg_out_modrm(s, opc, ret, arg);
542 static void tcg_out_movi(TCGContext *s, TCGType type,
543 TCGReg ret, tcg_target_long arg)
545 tcg_target_long diff;
548 tgen_arithr(s, ARITH_XOR, ret, ret);
551 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
552 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
556 if (arg == (int32_t)arg) {
557 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
562 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
563 diff = arg - ((uintptr_t)s->code_ptr + 7);
564 if (diff == (int32_t)diff) {
565 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
566 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
571 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
575 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
577 if (val == (int8_t)val) {
578 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
580 } else if (val == (int32_t)val) {
581 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
588 static inline void tcg_out_push(TCGContext *s, int reg)
590 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
593 static inline void tcg_out_pop(TCGContext *s, int reg)
595 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
598 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
599 TCGReg arg1, intptr_t arg2)
601 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
602 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
605 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
606 TCGReg arg1, intptr_t arg2)
608 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
609 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
612 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
613 tcg_target_long ofs, tcg_target_long val)
615 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
616 tcg_out_modrm_offset(s, opc, 0, base, ofs);
620 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
622 /* Propagate an opcode prefix, such as P_DATA16. */
623 int ext = subopc & ~0x7;
627 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
629 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
634 static inline void tcg_out_bswap32(TCGContext *s, int reg)
636 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
639 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
641 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
644 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
647 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
648 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
651 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
654 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
655 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
658 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
661 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
664 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
667 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
670 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
672 /* 32-bit mov zero extends. */
673 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
676 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
678 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
681 static inline void tcg_out_bswap64(TCGContext *s, int reg)
683 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
686 static void tgen_arithi(TCGContext *s, int c, int r0,
687 tcg_target_long val, int cf)
691 if (TCG_TARGET_REG_BITS == 64) {
696 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
697 partial flags update stalls on Pentium4 and are not recommended
698 by current Intel optimization manuals. */
699 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
700 int is_inc = (c == ARITH_ADD) ^ (val < 0);
701 if (TCG_TARGET_REG_BITS == 64) {
702 /* The single-byte increment encodings are re-tasked as the
703 REX prefixes. Use the MODRM encoding. */
704 tcg_out_modrm(s, OPC_GRP5 + rexw,
705 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
707 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
712 if (c == ARITH_AND) {
713 if (TCG_TARGET_REG_BITS == 64) {
714 if (val == 0xffffffffu) {
715 tcg_out_ext32u(s, r0, r0);
718 if (val == (uint32_t)val) {
719 /* AND with no high bits set can use a 32-bit operation. */
723 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
724 tcg_out_ext8u(s, r0, r0);
727 if (val == 0xffffu) {
728 tcg_out_ext16u(s, r0, r0);
733 if (val == (int8_t)val) {
734 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
738 if (rexw == 0 || val == (int32_t)val) {
739 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
747 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
750 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
754 /* Use SMALL != 0 to force a short forward branch. */
755 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
758 TCGLabel *l = &s->labels[label_index];
761 val = l->u.value - (intptr_t)s->code_ptr;
763 if ((int8_t)val1 == val1) {
765 tcg_out8(s, OPC_JMP_short);
767 tcg_out8(s, OPC_JCC_short + opc);
775 tcg_out8(s, OPC_JMP_long);
776 tcg_out32(s, val - 5);
778 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
779 tcg_out32(s, val - 6);
784 tcg_out8(s, OPC_JMP_short);
786 tcg_out8(s, OPC_JCC_short + opc);
788 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
792 tcg_out8(s, OPC_JMP_long);
794 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
796 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
801 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
802 int const_arg2, int rexw)
807 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
809 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
812 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
816 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
817 TCGArg arg1, TCGArg arg2, int const_arg2,
818 int label_index, int small)
820 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
821 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
824 #if TCG_TARGET_REG_BITS == 64
825 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
826 TCGArg arg1, TCGArg arg2, int const_arg2,
827 int label_index, int small)
829 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
830 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
833 /* XXX: we implement it at the target level to avoid having to
834 handle cross basic blocks temporaries */
835 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
836 const int *const_args, int small)
839 label_next = gen_new_label();
842 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
844 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
848 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
850 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
854 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
856 tcg_out_jxx(s, JCC_JNE, label_next, 1);
857 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
861 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
863 tcg_out_jxx(s, JCC_JNE, label_next, 1);
864 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
868 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
870 tcg_out_jxx(s, JCC_JNE, label_next, 1);
871 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
875 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
877 tcg_out_jxx(s, JCC_JNE, label_next, 1);
878 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
882 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
884 tcg_out_jxx(s, JCC_JNE, label_next, 1);
885 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
889 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
891 tcg_out_jxx(s, JCC_JNE, label_next, 1);
892 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
896 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
898 tcg_out_jxx(s, JCC_JNE, label_next, 1);
899 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
903 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
905 tcg_out_jxx(s, JCC_JNE, label_next, 1);
906 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
912 tcg_out_label(s, label_next, s->code_ptr);
916 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
917 TCGArg arg1, TCGArg arg2, int const_arg2)
919 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
920 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
921 tcg_out_ext8u(s, dest, dest);
924 #if TCG_TARGET_REG_BITS == 64
925 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
926 TCGArg arg1, TCGArg arg2, int const_arg2)
928 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
929 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
930 tcg_out_ext8u(s, dest, dest);
933 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
934 const int *const_args)
937 int label_true, label_over;
939 memcpy(new_args, args+1, 5*sizeof(TCGArg));
941 if (args[0] == args[1] || args[0] == args[2]
942 || (!const_args[3] && args[0] == args[3])
943 || (!const_args[4] && args[0] == args[4])) {
944 /* When the destination overlaps with one of the argument
945 registers, don't do anything tricky. */
946 label_true = gen_new_label();
947 label_over = gen_new_label();
949 new_args[5] = label_true;
950 tcg_out_brcond2(s, new_args, const_args+1, 1);
952 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
953 tcg_out_jxx(s, JCC_JMP, label_over, 1);
954 tcg_out_label(s, label_true, s->code_ptr);
956 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
957 tcg_out_label(s, label_over, s->code_ptr);
959 /* When the destination does not overlap one of the arguments,
960 clear the destination first, jump if cond false, and emit an
961 increment in the true case. This results in smaller code. */
963 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
965 label_over = gen_new_label();
966 new_args[4] = tcg_invert_cond(new_args[4]);
967 new_args[5] = label_over;
968 tcg_out_brcond2(s, new_args, const_args+1, 1);
970 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
971 tcg_out_label(s, label_over, s->code_ptr);
976 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
977 TCGArg c1, TCGArg c2, int const_c2,
980 tcg_out_cmp(s, c1, c2, const_c2, 0);
982 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
984 int over = gen_new_label();
985 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
986 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
987 tcg_out_label(s, over, s->code_ptr);
991 #if TCG_TARGET_REG_BITS == 64
992 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
993 TCGArg c1, TCGArg c2, int const_c2,
996 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
997 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1001 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1003 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1005 if (disp == (int32_t)disp) {
1006 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1009 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1010 tcg_out_modrm(s, OPC_GRP5,
1011 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1015 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1017 tcg_out_branch(s, 1, dest);
1020 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1022 tcg_out_branch(s, 0, dest);
1025 #if defined(CONFIG_SOFTMMU)
1026 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1027 * int mmu_idx, uintptr_t ra)
1029 static const void * const qemu_ld_helpers[4] = {
1030 helper_ret_ldub_mmu,
1031 helper_ret_lduw_mmu,
1032 helper_ret_ldul_mmu,
1036 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1037 * uintxx_t val, int mmu_idx, uintptr_t ra)
1039 static const void * const qemu_st_helpers[4] = {
1046 static void add_qemu_ldst_label(TCGContext *s,
1055 uint8_t **label_ptr);
1057 /* Perform the TLB load and compare.
1060 ADDRLO_IDX contains the index into ARGS of the low part of the
1061 address; the high part of the address is at ADDR_LOW_IDX+1.
1063 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1065 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1066 This should be offsetof addr_read or addr_write.
1069 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1070 positions of the displacements of forward jumps to the TLB miss case.
1072 Second argument register is loaded with the low part of the address.
1073 In the TLB hit case, it has been adjusted as indicated by the TLB
1074 and so is a host address. In the TLB miss case, it continues to
1075 hold a guest address.
1077 First argument register is clobbered. */
1079 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1080 int mem_index, int s_bits,
1082 uint8_t **label_ptr, int which)
1084 const int addrlo = args[addrlo_idx];
1085 const int r0 = TCG_REG_L0;
1086 const int r1 = TCG_REG_L1;
1087 TCGType ttype = TCG_TYPE_I32;
1088 TCGType htype = TCG_TYPE_I32;
1089 int trexw = 0, hrexw = 0;
1091 if (TCG_TARGET_REG_BITS == 64) {
1092 if (TARGET_LONG_BITS == 64) {
1093 ttype = TCG_TYPE_I64;
1096 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1097 htype = TCG_TYPE_I64;
1102 tcg_out_mov(s, htype, r0, addrlo);
1103 tcg_out_mov(s, ttype, r1, addrlo);
1105 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1106 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1108 tgen_arithi(s, ARITH_AND + trexw, r1,
1109 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1110 tgen_arithi(s, ARITH_AND + hrexw, r0,
1111 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1113 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1114 offsetof(CPUArchState, tlb_table[mem_index][0])
1118 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1120 /* Prepare for both the fast path add of the tlb addend, and the slow
1121 path function argument setup. There are two cases worth note:
1122 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1123 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1124 copies the entire guest address for the slow path, while truncation
1125 for the 32-bit host happens with the fastpath ADDL below. */
1126 tcg_out_mov(s, ttype, r1, addrlo);
1129 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1130 label_ptr[0] = s->code_ptr;
1133 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1134 /* cmp 4(r0), addrhi */
1135 tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
1138 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1139 label_ptr[1] = s->code_ptr;
1145 /* add addend(r0), r1 */
1146 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1147 offsetof(CPUTLBEntry, addend) - which);
1149 #elif defined(__x86_64__) && defined(__linux__)
1150 # include <asm/prctl.h>
1151 # include <sys/prctl.h>
1153 int arch_prctl(int code, unsigned long addr);
1155 static int guest_base_flags;
1156 static inline void setup_guest_base_seg(void)
1158 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1159 guest_base_flags = P_GS;
1163 # define guest_base_flags 0
1164 static inline void setup_guest_base_seg(void) { }
1165 #endif /* SOFTMMU */
1167 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1168 int base, intptr_t ofs, int seg, int sizeop)
1170 #ifdef TARGET_WORDS_BIGENDIAN
1171 const int bswap = 1;
1173 const int bswap = 0;
1177 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1180 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1183 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1185 tcg_out_rolw_8(s, datalo);
1190 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1191 tcg_out_rolw_8(s, datalo);
1192 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1194 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1199 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1201 tcg_out_bswap32(s, datalo);
1204 #if TCG_TARGET_REG_BITS == 64
1207 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1208 tcg_out_bswap32(s, datalo);
1209 tcg_out_ext32s(s, datalo, datalo);
1211 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1216 if (TCG_TARGET_REG_BITS == 64) {
1217 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1220 tcg_out_bswap64(s, datalo);
1228 if (base != datalo) {
1229 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1231 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1232 datahi, base, ofs + 4);
1234 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1235 datahi, base, ofs + 4);
1236 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1240 tcg_out_bswap32(s, datalo);
1241 tcg_out_bswap32(s, datahi);
1250 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1251 EAX. It will be useful once fixed registers globals are less
1253 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1256 int data_reg, data_reg2 = 0;
1258 #if defined(CONFIG_SOFTMMU)
1259 int mem_index, s_bits;
1260 uint8_t *label_ptr[2];
1265 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1266 data_reg2 = args[1];
1270 #if defined(CONFIG_SOFTMMU)
1271 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1274 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1275 label_ptr, offsetof(CPUTLBEntry, addr_read));
1278 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1280 /* Record the current context of a load into ldst label */
1281 add_qemu_ldst_label(s,
1287 args[addrlo_idx + 1],
1293 int32_t offset = GUEST_BASE;
1294 int base = args[addrlo_idx];
1297 /* ??? We assume all operations have left us with register contents
1298 that are zero extended. So far this appears to be true. If we
1299 want to enforce this, we can either do an explicit zero-extension
1300 here, or (if GUEST_BASE == 0, or a segment register is in use)
1301 use the ADDR32 prefix. For now, do nothing. */
1302 if (GUEST_BASE && guest_base_flags) {
1303 seg = guest_base_flags;
1305 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1306 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1307 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1312 tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1317 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1318 int base, intptr_t ofs, int seg,
1321 #ifdef TARGET_WORDS_BIGENDIAN
1322 const int bswap = 1;
1324 const int bswap = 0;
1326 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1327 we could perform the bswap twice to restore the original value
1328 instead of moving to the scratch. But as it is, the L constraint
1329 means that TCG_REG_L0 is definitely free here. */
1330 const int scratch = TCG_REG_L0;
1334 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1339 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1340 tcg_out_rolw_8(s, scratch);
1343 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1348 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1349 tcg_out_bswap32(s, scratch);
1352 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1355 if (TCG_TARGET_REG_BITS == 64) {
1357 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1358 tcg_out_bswap64(s, scratch);
1361 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1364 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1365 tcg_out_bswap32(s, scratch);
1366 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1367 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1368 tcg_out_bswap32(s, scratch);
1369 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1371 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1372 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1380 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1383 int data_reg, data_reg2 = 0;
1385 #if defined(CONFIG_SOFTMMU)
1386 int mem_index, s_bits;
1387 uint8_t *label_ptr[2];
1392 if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1393 data_reg2 = args[1];
1397 #if defined(CONFIG_SOFTMMU)
1398 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1401 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1402 label_ptr, offsetof(CPUTLBEntry, addr_write));
1405 tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
1407 /* Record the current context of a store into ldst label */
1408 add_qemu_ldst_label(s,
1414 args[addrlo_idx + 1],
1420 int32_t offset = GUEST_BASE;
1421 int base = args[addrlo_idx];
1424 /* ??? We assume all operations have left us with register contents
1425 that are zero extended. So far this appears to be true. If we
1426 want to enforce this, we can either do an explicit zero-extension
1427 here, or (if GUEST_BASE == 0, or a segment register is in use)
1428 use the ADDR32 prefix. For now, do nothing. */
1429 if (GUEST_BASE && guest_base_flags) {
1430 seg = guest_base_flags;
1432 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1433 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1434 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1439 tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
1444 #if defined(CONFIG_SOFTMMU)
1446 * Record the context of a call to the out of line helper code for the slow path
1447 * for a load or store, so that we can later generate the correct helper code
1449 static void add_qemu_ldst_label(TCGContext *s,
1458 uint8_t **label_ptr)
1460 TCGLabelQemuLdst *label = new_ldst_label(s);
1462 label->is_ld = is_ld;
1464 label->datalo_reg = data_reg;
1465 label->datahi_reg = data_reg2;
1466 label->addrlo_reg = addrlo_reg;
1467 label->addrhi_reg = addrhi_reg;
1468 label->mem_index = mem_index;
1469 label->raddr = raddr;
1470 label->label_ptr[0] = label_ptr[0];
1471 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1472 label->label_ptr[1] = label_ptr[1];
1477 * Generate code for the slow path for a load at the end of block
1479 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1482 int s_bits = opc & 3;
1484 uint8_t **label_ptr = &l->label_ptr[0];
1486 /* resolve label address */
1487 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1488 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1489 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1492 if (TCG_TARGET_REG_BITS == 32) {
1495 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1498 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1501 if (TARGET_LONG_BITS == 64) {
1502 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1506 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1509 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1511 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1512 /* The second argument is already loaded with addrlo. */
1513 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1515 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1516 (uintptr_t)l->raddr);
1519 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[s_bits]);
1521 data_reg = l->datalo_reg;
1524 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1527 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1529 #if TCG_TARGET_REG_BITS == 64
1531 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1536 /* Note that the helpers have zero-extended to tcg_target_long. */
1538 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1541 if (TCG_TARGET_REG_BITS == 64) {
1542 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1543 } else if (data_reg == TCG_REG_EDX) {
1544 /* xchg %edx, %eax */
1545 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1546 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1548 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1549 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1556 /* Jump to the code corresponding to next IR of qemu_st */
1557 tcg_out_jmp(s, (uintptr_t)l->raddr);
1561 * Generate code for the slow path for a store at the end of block
1563 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1566 int s_bits = opc & 3;
1567 uint8_t **label_ptr = &l->label_ptr[0];
1570 /* resolve label address */
1571 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1572 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1573 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1576 if (TCG_TARGET_REG_BITS == 32) {
1579 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1582 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1585 if (TARGET_LONG_BITS == 64) {
1586 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1590 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1594 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1598 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1601 retaddr = TCG_REG_EAX;
1602 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1603 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1605 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1606 /* The second argument is already loaded with addrlo. */
1607 tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1608 tcg_target_call_iarg_regs[2], l->datalo_reg);
1609 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1612 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1613 retaddr = tcg_target_call_iarg_regs[4];
1614 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1616 retaddr = TCG_REG_RAX;
1617 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1618 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1622 /* "Tail call" to the helper, with the return address back inline. */
1623 tcg_out_push(s, retaddr);
1624 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[s_bits]);
1626 #endif /* CONFIG_SOFTMMU */
1628 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1629 const TCGArg *args, const int *const_args)
1633 #if TCG_TARGET_REG_BITS == 64
1634 # define OP_32_64(x) \
1635 case glue(glue(INDEX_op_, x), _i64): \
1636 rexw = P_REXW; /* FALLTHRU */ \
1637 case glue(glue(INDEX_op_, x), _i32)
1639 # define OP_32_64(x) \
1640 case glue(glue(INDEX_op_, x), _i32)
1644 case INDEX_op_exit_tb:
1645 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1646 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1648 case INDEX_op_goto_tb:
1649 if (s->tb_jmp_offset) {
1650 /* direct jump method */
1651 tcg_out8(s, OPC_JMP_long); /* jmp im */
1652 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1655 /* indirect jump method */
1656 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1657 (intptr_t)(s->tb_next + args[0]));
1659 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1662 if (const_args[0]) {
1663 tcg_out_calli(s, args[0]);
1666 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1670 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1672 case INDEX_op_movi_i32:
1673 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1676 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1677 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1680 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1683 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1684 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1687 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1689 #if TCG_TARGET_REG_BITS == 64
1690 case INDEX_op_ld32u_i64:
1692 case INDEX_op_ld_i32:
1693 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1697 if (const_args[0]) {
1698 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1699 0, args[1], args[2]);
1700 tcg_out8(s, args[0]);
1702 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1703 args[0], args[1], args[2]);
1707 if (const_args[0]) {
1708 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1709 0, args[1], args[2]);
1710 tcg_out16(s, args[0]);
1712 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1713 args[0], args[1], args[2]);
1716 #if TCG_TARGET_REG_BITS == 64
1717 case INDEX_op_st32_i64:
1719 case INDEX_op_st_i32:
1720 if (const_args[0]) {
1721 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1722 tcg_out32(s, args[0]);
1724 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1729 /* For 3-operand addition, use LEA. */
1730 if (args[0] != args[1]) {
1731 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1733 if (const_args[2]) {
1735 } else if (a0 == a2) {
1736 /* Watch out for dest = src + dest, since we've removed
1737 the matching constraint on the add. */
1738 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1742 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1760 if (const_args[2]) {
1761 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1763 tgen_arithr(s, c + rexw, args[0], args[2]);
1768 if (const_args[2]) {
1771 if (val == (int8_t)val) {
1772 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1775 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1779 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1784 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1787 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1806 if (const_args[2]) {
1807 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1809 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1813 case INDEX_op_brcond_i32:
1814 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1817 case INDEX_op_setcond_i32:
1818 tcg_out_setcond32(s, args[3], args[0], args[1],
1819 args[2], const_args[2]);
1821 case INDEX_op_movcond_i32:
1822 tcg_out_movcond32(s, args[5], args[0], args[1],
1823 args[2], const_args[2], args[3]);
1827 tcg_out_rolw_8(s, args[0]);
1830 tcg_out_bswap32(s, args[0]);
1834 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1837 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1841 tcg_out_ext8s(s, args[0], args[1], rexw);
1844 tcg_out_ext16s(s, args[0], args[1], rexw);
1847 tcg_out_ext8u(s, args[0], args[1]);
1850 tcg_out_ext16u(s, args[0], args[1]);
1853 case INDEX_op_qemu_ld8u:
1854 tcg_out_qemu_ld(s, args, 0);
1856 case INDEX_op_qemu_ld8s:
1857 tcg_out_qemu_ld(s, args, 0 | 4);
1859 case INDEX_op_qemu_ld16u:
1860 tcg_out_qemu_ld(s, args, 1);
1862 case INDEX_op_qemu_ld16s:
1863 tcg_out_qemu_ld(s, args, 1 | 4);
1865 #if TCG_TARGET_REG_BITS == 64
1866 case INDEX_op_qemu_ld32u:
1868 case INDEX_op_qemu_ld32:
1869 tcg_out_qemu_ld(s, args, 2);
1871 case INDEX_op_qemu_ld64:
1872 tcg_out_qemu_ld(s, args, 3);
1875 case INDEX_op_qemu_st8:
1876 tcg_out_qemu_st(s, args, 0);
1878 case INDEX_op_qemu_st16:
1879 tcg_out_qemu_st(s, args, 1);
1881 case INDEX_op_qemu_st32:
1882 tcg_out_qemu_st(s, args, 2);
1884 case INDEX_op_qemu_st64:
1885 tcg_out_qemu_st(s, args, 3);
1889 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1892 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1895 if (const_args[4]) {
1896 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1898 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1900 if (const_args[5]) {
1901 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1903 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1907 if (const_args[4]) {
1908 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1910 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1912 if (const_args[5]) {
1913 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1915 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1919 #if TCG_TARGET_REG_BITS == 32
1920 case INDEX_op_brcond2_i32:
1921 tcg_out_brcond2(s, args, const_args, 0);
1923 case INDEX_op_setcond2_i32:
1924 tcg_out_setcond2(s, args, const_args);
1926 #else /* TCG_TARGET_REG_BITS == 64 */
1927 case INDEX_op_movi_i64:
1928 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1930 case INDEX_op_ld32s_i64:
1931 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1933 case INDEX_op_ld_i64:
1934 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1936 case INDEX_op_st_i64:
1937 if (const_args[0]) {
1938 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1939 0, args[1], args[2]);
1940 tcg_out32(s, args[0]);
1942 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1945 case INDEX_op_qemu_ld32s:
1946 tcg_out_qemu_ld(s, args, 2 | 4);
1949 case INDEX_op_brcond_i64:
1950 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1953 case INDEX_op_setcond_i64:
1954 tcg_out_setcond64(s, args[3], args[0], args[1],
1955 args[2], const_args[2]);
1957 case INDEX_op_movcond_i64:
1958 tcg_out_movcond64(s, args[5], args[0], args[1],
1959 args[2], const_args[2], args[3]);
1962 case INDEX_op_bswap64_i64:
1963 tcg_out_bswap64(s, args[0]);
1965 case INDEX_op_ext32u_i64:
1966 tcg_out_ext32u(s, args[0], args[1]);
1968 case INDEX_op_ext32s_i64:
1969 tcg_out_ext32s(s, args[0], args[1]);
1974 if (args[3] == 0 && args[4] == 8) {
1975 /* load bits 0..7 */
1976 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1978 } else if (args[3] == 8 && args[4] == 8) {
1979 /* load bits 8..15 */
1980 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1981 } else if (args[3] == 0 && args[4] == 16) {
1982 /* load bits 0..15 */
1983 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1996 static const TCGTargetOpDef x86_op_defs[] = {
1997 { INDEX_op_exit_tb, { } },
1998 { INDEX_op_goto_tb, { } },
1999 { INDEX_op_call, { "ri" } },
2000 { INDEX_op_br, { } },
2001 { INDEX_op_mov_i32, { "r", "r" } },
2002 { INDEX_op_movi_i32, { "r" } },
2003 { INDEX_op_ld8u_i32, { "r", "r" } },
2004 { INDEX_op_ld8s_i32, { "r", "r" } },
2005 { INDEX_op_ld16u_i32, { "r", "r" } },
2006 { INDEX_op_ld16s_i32, { "r", "r" } },
2007 { INDEX_op_ld_i32, { "r", "r" } },
2008 { INDEX_op_st8_i32, { "qi", "r" } },
2009 { INDEX_op_st16_i32, { "ri", "r" } },
2010 { INDEX_op_st_i32, { "ri", "r" } },
2012 { INDEX_op_add_i32, { "r", "r", "ri" } },
2013 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2014 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2015 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2016 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2017 { INDEX_op_and_i32, { "r", "0", "ri" } },
2018 { INDEX_op_or_i32, { "r", "0", "ri" } },
2019 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2021 { INDEX_op_shl_i32, { "r", "0", "ci" } },
2022 { INDEX_op_shr_i32, { "r", "0", "ci" } },
2023 { INDEX_op_sar_i32, { "r", "0", "ci" } },
2024 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2025 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2027 { INDEX_op_brcond_i32, { "r", "ri" } },
2029 { INDEX_op_bswap16_i32, { "r", "0" } },
2030 { INDEX_op_bswap32_i32, { "r", "0" } },
2032 { INDEX_op_neg_i32, { "r", "0" } },
2034 { INDEX_op_not_i32, { "r", "0" } },
2036 { INDEX_op_ext8s_i32, { "r", "q" } },
2037 { INDEX_op_ext16s_i32, { "r", "r" } },
2038 { INDEX_op_ext8u_i32, { "r", "q" } },
2039 { INDEX_op_ext16u_i32, { "r", "r" } },
2041 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2043 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2044 #if TCG_TARGET_HAS_movcond_i32
2045 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2048 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2049 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2050 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2051 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2053 #if TCG_TARGET_REG_BITS == 32
2054 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2055 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2057 { INDEX_op_mov_i64, { "r", "r" } },
2058 { INDEX_op_movi_i64, { "r" } },
2059 { INDEX_op_ld8u_i64, { "r", "r" } },
2060 { INDEX_op_ld8s_i64, { "r", "r" } },
2061 { INDEX_op_ld16u_i64, { "r", "r" } },
2062 { INDEX_op_ld16s_i64, { "r", "r" } },
2063 { INDEX_op_ld32u_i64, { "r", "r" } },
2064 { INDEX_op_ld32s_i64, { "r", "r" } },
2065 { INDEX_op_ld_i64, { "r", "r" } },
2066 { INDEX_op_st8_i64, { "ri", "r" } },
2067 { INDEX_op_st16_i64, { "ri", "r" } },
2068 { INDEX_op_st32_i64, { "ri", "r" } },
2069 { INDEX_op_st_i64, { "re", "r" } },
2071 { INDEX_op_add_i64, { "r", "r", "re" } },
2072 { INDEX_op_mul_i64, { "r", "0", "re" } },
2073 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2074 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2075 { INDEX_op_sub_i64, { "r", "0", "re" } },
2076 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2077 { INDEX_op_or_i64, { "r", "0", "re" } },
2078 { INDEX_op_xor_i64, { "r", "0", "re" } },
2080 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2081 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2082 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2083 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2084 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2086 { INDEX_op_brcond_i64, { "r", "re" } },
2087 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2089 { INDEX_op_bswap16_i64, { "r", "0" } },
2090 { INDEX_op_bswap32_i64, { "r", "0" } },
2091 { INDEX_op_bswap64_i64, { "r", "0" } },
2092 { INDEX_op_neg_i64, { "r", "0" } },
2093 { INDEX_op_not_i64, { "r", "0" } },
2095 { INDEX_op_ext8s_i64, { "r", "r" } },
2096 { INDEX_op_ext16s_i64, { "r", "r" } },
2097 { INDEX_op_ext32s_i64, { "r", "r" } },
2098 { INDEX_op_ext8u_i64, { "r", "r" } },
2099 { INDEX_op_ext16u_i64, { "r", "r" } },
2100 { INDEX_op_ext32u_i64, { "r", "r" } },
2102 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2103 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2105 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2106 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2107 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2108 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2111 #if TCG_TARGET_REG_BITS == 64
2112 { INDEX_op_qemu_ld8u, { "r", "L" } },
2113 { INDEX_op_qemu_ld8s, { "r", "L" } },
2114 { INDEX_op_qemu_ld16u, { "r", "L" } },
2115 { INDEX_op_qemu_ld16s, { "r", "L" } },
2116 { INDEX_op_qemu_ld32, { "r", "L" } },
2117 { INDEX_op_qemu_ld32u, { "r", "L" } },
2118 { INDEX_op_qemu_ld32s, { "r", "L" } },
2119 { INDEX_op_qemu_ld64, { "r", "L" } },
2121 { INDEX_op_qemu_st8, { "L", "L" } },
2122 { INDEX_op_qemu_st16, { "L", "L" } },
2123 { INDEX_op_qemu_st32, { "L", "L" } },
2124 { INDEX_op_qemu_st64, { "L", "L" } },
2125 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2126 { INDEX_op_qemu_ld8u, { "r", "L" } },
2127 { INDEX_op_qemu_ld8s, { "r", "L" } },
2128 { INDEX_op_qemu_ld16u, { "r", "L" } },
2129 { INDEX_op_qemu_ld16s, { "r", "L" } },
2130 { INDEX_op_qemu_ld32, { "r", "L" } },
2131 { INDEX_op_qemu_ld64, { "r", "r", "L" } },
2133 { INDEX_op_qemu_st8, { "cb", "L" } },
2134 { INDEX_op_qemu_st16, { "L", "L" } },
2135 { INDEX_op_qemu_st32, { "L", "L" } },
2136 { INDEX_op_qemu_st64, { "L", "L", "L" } },
2138 { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
2139 { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
2140 { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
2141 { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
2142 { INDEX_op_qemu_ld32, { "r", "L", "L" } },
2143 { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
2145 { INDEX_op_qemu_st8, { "cb", "L", "L" } },
2146 { INDEX_op_qemu_st16, { "L", "L", "L" } },
2147 { INDEX_op_qemu_st32, { "L", "L", "L" } },
2148 { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
2153 static int tcg_target_callee_save_regs[] = {
2154 #if TCG_TARGET_REG_BITS == 64
2163 TCG_REG_R14, /* Currently used for the global env. */
2166 TCG_REG_EBP, /* Currently used for the global env. */
2173 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2174 and tcg_register_jit. */
2177 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2178 * (TCG_TARGET_REG_BITS / 8))
2180 #define FRAME_SIZE \
2182 + TCG_STATIC_CALL_ARGS_SIZE \
2183 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2184 + TCG_TARGET_STACK_ALIGN - 1) \
2185 & ~(TCG_TARGET_STACK_ALIGN - 1))
2187 /* Generate global QEMU prologue and epilogue code */
2188 static void tcg_target_qemu_prologue(TCGContext *s)
2190 int i, stack_addend;
2194 /* Reserve some stack space, also for TCG temps. */
2195 stack_addend = FRAME_SIZE - PUSH_SIZE;
2196 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2197 CPU_TEMP_BUF_NLONGS * sizeof(long));
2199 /* Save all callee saved registers. */
2200 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2201 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2204 #if TCG_TARGET_REG_BITS == 32
2205 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2206 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2207 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2209 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2210 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2213 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2214 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2216 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2220 tb_ret_addr = s->code_ptr;
2222 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2224 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2225 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2227 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2229 #if !defined(CONFIG_SOFTMMU)
2230 /* Try to set up a segment register to point to GUEST_BASE. */
2232 setup_guest_base_seg();
2237 static void tcg_target_init(TCGContext *s)
2239 /* For 32-bit, 99% certainty that we're running on hardware that supports
2240 cmov, but we still need to check. In case cmov is not available, we'll
2241 use a small forward branch. */
2244 unsigned a, b, c, d;
2245 have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2249 if (TCG_TARGET_REG_BITS == 64) {
2250 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2251 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2253 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2256 tcg_regset_clear(tcg_target_call_clobber_regs);
2257 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2258 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2259 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2260 if (TCG_TARGET_REG_BITS == 64) {
2261 #if !defined(_WIN64)
2262 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2263 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2265 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2266 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2267 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2268 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2271 tcg_regset_clear(s->reserved_regs);
2272 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2274 tcg_add_target_add_op_defs(x86_op_defs);
2279 DebugFrameFDEHeader fde;
2280 uint8_t fde_def_cfa[4];
2281 uint8_t fde_reg_ofs[14];
2284 /* We're expecting a 2 byte uleb128 encoded value. */
2285 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2287 #if !defined(__ELF__)
2288 /* Host machine without ELF. */
2289 #elif TCG_TARGET_REG_BITS == 64
2290 #define ELF_HOST_MACHINE EM_X86_64
2291 static DebugFrame debug_frame = {
2292 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2295 .cie.code_align = 1,
2296 .cie.data_align = 0x78, /* sleb128 -8 */
2297 .cie.return_column = 16,
2299 /* Total FDE size does not include the "len" member. */
2300 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2303 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2304 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2308 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2309 /* The following ordering must match tcg_target_callee_save_regs. */
2310 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2311 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2312 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2313 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2314 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2315 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2319 #define ELF_HOST_MACHINE EM_386
2320 static DebugFrame debug_frame = {
2321 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2324 .cie.code_align = 1,
2325 .cie.data_align = 0x7c, /* sleb128 -4 */
2326 .cie.return_column = 8,
2328 /* Total FDE size does not include the "len" member. */
2329 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2332 12, 4, /* DW_CFA_def_cfa %esp, ... */
2333 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2337 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2338 /* The following ordering must match tcg_target_callee_save_regs. */
2339 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2340 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2341 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2342 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2347 #if defined(ELF_HOST_MACHINE)
2348 void tcg_register_jit(void *buf, size_t buf_size)
2350 debug_frame.fde.func_start = (uintptr_t)buf;
2351 debug_frame.fde.func_len = buf_size;
2353 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));