2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "tcg-be-ldst.h"
28 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
29 #if TCG_TARGET_REG_BITS == 64
30 "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
31 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
33 "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
38 static const int tcg_target_reg_alloc_order[] = {
39 #if TCG_TARGET_REG_BITS == 64
66 static const int tcg_target_call_iarg_regs[] = {
67 #if TCG_TARGET_REG_BITS == 64
80 /* 32 bit mode uses stack based calling convention (GCC default). */
84 static const int tcg_target_call_oarg_regs[] = {
86 #if TCG_TARGET_REG_BITS == 32
91 /* Constants we accept. */
92 #define TCG_CT_CONST_S32 0x100
93 #define TCG_CT_CONST_U32 0x200
94 #define TCG_CT_CONST_I32 0x400
96 /* Registers used with L constraint, which are the first argument
97 registers on x86_64, and two random call clobbered registers on
99 #if TCG_TARGET_REG_BITS == 64
100 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
101 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
103 # define TCG_REG_L0 TCG_REG_EAX
104 # define TCG_REG_L1 TCG_REG_EDX
107 /* The host compiler should supply <cpuid.h> to enable runtime features
108 detection, as we're not going to go so far as our own inline assembly.
109 If not available, default values will be assumed. */
110 #if defined(CONFIG_CPUID_H)
114 /* For 32-bit, we are going to attempt to determine at runtime whether cmov
116 #if TCG_TARGET_REG_BITS == 64
118 #elif defined(CONFIG_CPUID_H)
119 static bool have_cmov;
124 /* If bit_MOVBE is defined in cpuid.h (added in GCC version 4.6), we are
125 going to attempt to determine at runtime whether movbe is available. */
126 #if defined(CONFIG_CPUID_H) && defined(bit_MOVBE)
127 static bool have_movbe;
129 # define have_movbe 0
132 /* We need this symbol in tcg-target.h, and we can't properly conditionalize
133 it there. Therefore we always define the variable. */
136 static uint8_t *tb_ret_addr;
138 static void patch_reloc(uint8_t *code_ptr, int type,
139 intptr_t value, intptr_t addend)
144 value -= (uintptr_t)code_ptr;
145 if (value != (int32_t)value) {
148 *(uint32_t *)code_ptr = value;
151 value -= (uintptr_t)code_ptr;
152 if (value != (int8_t)value) {
155 *(uint8_t *)code_ptr = value;
162 /* parse target specific constraints */
163 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
170 ct->ct |= TCG_CT_REG;
171 tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
174 ct->ct |= TCG_CT_REG;
175 tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
178 ct->ct |= TCG_CT_REG;
179 tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
182 ct->ct |= TCG_CT_REG;
183 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
186 ct->ct |= TCG_CT_REG;
187 tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
190 ct->ct |= TCG_CT_REG;
191 tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
194 ct->ct |= TCG_CT_REG;
195 if (TCG_TARGET_REG_BITS == 64) {
196 tcg_regset_set32(ct->u.regs, 0, 0xffff);
198 tcg_regset_set32(ct->u.regs, 0, 0xf);
202 ct->ct |= TCG_CT_REG;
203 tcg_regset_set32(ct->u.regs, 0, 0xf);
206 ct->ct |= TCG_CT_REG;
207 if (TCG_TARGET_REG_BITS == 64) {
208 tcg_regset_set32(ct->u.regs, 0, 0xffff);
210 tcg_regset_set32(ct->u.regs, 0, 0xff);
214 /* qemu_ld/st address constraint */
216 ct->ct |= TCG_CT_REG;
217 if (TCG_TARGET_REG_BITS == 64) {
218 tcg_regset_set32(ct->u.regs, 0, 0xffff);
220 tcg_regset_set32(ct->u.regs, 0, 0xff);
222 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
223 tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
227 ct->ct |= TCG_CT_CONST_S32;
230 ct->ct |= TCG_CT_CONST_U32;
233 ct->ct |= TCG_CT_CONST_I32;
244 /* test if a constant matches the constraint */
245 static inline int tcg_target_const_match(tcg_target_long val,
246 const TCGArgConstraint *arg_ct)
249 if (ct & TCG_CT_CONST) {
252 if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
255 if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
258 if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
264 #if TCG_TARGET_REG_BITS == 64
265 # define LOWREGMASK(x) ((x) & 7)
267 # define LOWREGMASK(x) (x)
270 #define P_EXT 0x100 /* 0x0f opcode prefix */
271 #define P_EXT38 0x200 /* 0x0f 0x38 opcode prefix */
272 #define P_DATA16 0x400 /* 0x66 opcode prefix */
273 #if TCG_TARGET_REG_BITS == 64
274 # define P_ADDR32 0x800 /* 0x67 opcode prefix */
275 # define P_REXW 0x1000 /* Set REX.W = 1 */
276 # define P_REXB_R 0x2000 /* REG field as byte register */
277 # define P_REXB_RM 0x4000 /* R/M field as byte register */
278 # define P_GS 0x8000 /* gs segment override */
287 #define OPC_ARITH_EvIz (0x81)
288 #define OPC_ARITH_EvIb (0x83)
289 #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
290 #define OPC_ANDN (0xf2 | P_EXT38)
291 #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
292 #define OPC_BSWAP (0xc8 | P_EXT)
293 #define OPC_CALL_Jz (0xe8)
294 #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
295 #define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
296 #define OPC_DEC_r32 (0x48)
297 #define OPC_IMUL_GvEv (0xaf | P_EXT)
298 #define OPC_IMUL_GvEvIb (0x6b)
299 #define OPC_IMUL_GvEvIz (0x69)
300 #define OPC_INC_r32 (0x40)
301 #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
302 #define OPC_JCC_short (0x70) /* ... plus condition code */
303 #define OPC_JMP_long (0xe9)
304 #define OPC_JMP_short (0xeb)
305 #define OPC_LEA (0x8d)
306 #define OPC_MOVB_EvGv (0x88) /* stores, more or less */
307 #define OPC_MOVL_EvGv (0x89) /* stores, more or less */
308 #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
309 #define OPC_MOVB_EvIz (0xc6)
310 #define OPC_MOVL_EvIz (0xc7)
311 #define OPC_MOVL_Iv (0xb8)
312 #define OPC_MOVBE_GyMy (0xf0 | P_EXT38)
313 #define OPC_MOVBE_MyGy (0xf1 | P_EXT38)
314 #define OPC_MOVSBL (0xbe | P_EXT)
315 #define OPC_MOVSWL (0xbf | P_EXT)
316 #define OPC_MOVSLQ (0x63 | P_REXW)
317 #define OPC_MOVZBL (0xb6 | P_EXT)
318 #define OPC_MOVZWL (0xb7 | P_EXT)
319 #define OPC_POP_r32 (0x58)
320 #define OPC_PUSH_r32 (0x50)
321 #define OPC_PUSH_Iv (0x68)
322 #define OPC_PUSH_Ib (0x6a)
323 #define OPC_RET (0xc3)
324 #define OPC_SETCC (0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
325 #define OPC_SHIFT_1 (0xd1)
326 #define OPC_SHIFT_Ib (0xc1)
327 #define OPC_SHIFT_cl (0xd3)
328 #define OPC_TESTL (0x85)
329 #define OPC_XCHG_ax_r32 (0x90)
331 #define OPC_GRP3_Ev (0xf7)
332 #define OPC_GRP5 (0xff)
334 /* Group 1 opcode extensions for 0x80-0x83.
335 These are also used as modifiers for OPC_ARITH. */
345 /* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3. */
352 /* Group 3 opcode extensions for 0xf6, 0xf7. To be used with OPC_GRP3. */
360 /* Group 5 opcode extensions for 0xff. To be used with OPC_GRP5. */
361 #define EXT5_INC_Ev 0
362 #define EXT5_DEC_Ev 1
363 #define EXT5_CALLN_Ev 2
364 #define EXT5_JMPN_Ev 4
366 /* Condition codes to be added to OPC_JCC_{long,short}. */
385 static const uint8_t tcg_cond_to_jcc[] = {
386 [TCG_COND_EQ] = JCC_JE,
387 [TCG_COND_NE] = JCC_JNE,
388 [TCG_COND_LT] = JCC_JL,
389 [TCG_COND_GE] = JCC_JGE,
390 [TCG_COND_LE] = JCC_JLE,
391 [TCG_COND_GT] = JCC_JG,
392 [TCG_COND_LTU] = JCC_JB,
393 [TCG_COND_GEU] = JCC_JAE,
394 [TCG_COND_LEU] = JCC_JBE,
395 [TCG_COND_GTU] = JCC_JA,
398 #if TCG_TARGET_REG_BITS == 64
399 static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
406 if (opc & P_DATA16) {
407 /* We should never be asking for both 16 and 64-bit operation. */
408 assert((opc & P_REXW) == 0);
411 if (opc & P_ADDR32) {
416 rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */
417 rex |= (r & 8) >> 1; /* REX.R */
418 rex |= (x & 8) >> 2; /* REX.X */
419 rex |= (rm & 8) >> 3; /* REX.B */
421 /* P_REXB_{R,RM} indicates that the given register is the low byte.
422 For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
423 as otherwise the encoding indicates %[abcd]h. Note that the values
424 that are ORed in merely indicate that the REX byte must be present;
425 those bits get discarded in output. */
426 rex |= opc & (r >= 4 ? P_REXB_R : 0);
427 rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
430 tcg_out8(s, (uint8_t)(rex | 0x40));
433 if (opc & (P_EXT | P_EXT38)) {
443 static void tcg_out_opc(TCGContext *s, int opc)
445 if (opc & P_DATA16) {
448 if (opc & (P_EXT | P_EXT38)) {
456 /* Discard the register arguments to tcg_out_opc early, so as not to penalize
457 the 32-bit compilation paths. This method works with all versions of gcc,
458 whereas relying on optimization may not be able to exclude them. */
459 #define tcg_out_opc(s, opc, r, rm, x) (tcg_out_opc)(s, opc)
462 static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
464 tcg_out_opc(s, opc, r, rm, 0);
465 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
468 static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
472 if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
473 /* Three byte VEX prefix. */
479 } else if (opc & P_EXT) {
484 tmp |= 0x40; /* VEX.X */
485 tmp |= (r & 8 ? 0 : 0x80); /* VEX.R */
486 tmp |= (rm & 8 ? 0 : 0x20); /* VEX.B */
489 tmp = (opc & P_REXW ? 0x80 : 0); /* VEX.W */
491 /* Two byte VEX prefix. */
494 tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
496 tmp |= (opc & P_DATA16 ? 1 : 0); /* VEX.pp */
497 tmp |= (~v & 15) << 3; /* VEX.vvvv */
500 tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
503 /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
504 We handle either RM and INDEX missing with a negative value. In 64-bit
505 mode for absolute addresses, ~RM is the size of the immediate operand
506 that will follow the instruction. */
508 static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
509 int index, int shift, intptr_t offset)
513 if (index < 0 && rm < 0) {
514 if (TCG_TARGET_REG_BITS == 64) {
515 /* Try for a rip-relative addressing mode. This has replaced
516 the 32-bit-mode absolute addressing encoding. */
517 intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
518 intptr_t disp = offset - pc;
519 if (disp == (int32_t)disp) {
520 tcg_out_opc(s, opc, r, 0, 0);
521 tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
526 /* Try for an absolute address encoding. This requires the
527 use of the MODRM+SIB encoding and is therefore larger than
528 rip-relative addressing. */
529 if (offset == (int32_t)offset) {
530 tcg_out_opc(s, opc, r, 0, 0);
531 tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
532 tcg_out8(s, (4 << 3) | 5);
533 tcg_out32(s, offset);
537 /* ??? The memory isn't directly addressable. */
540 /* Absolute address. */
541 tcg_out_opc(s, opc, r, 0, 0);
542 tcg_out8(s, (r << 3) | 5);
543 tcg_out32(s, offset);
548 /* Find the length of the immediate addend. Note that the encoding
549 that would be used for (%ebp) indicates absolute addressing. */
551 mod = 0, len = 4, rm = 5;
552 } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
554 } else if (offset == (int8_t)offset) {
560 /* Use a single byte MODRM format if possible. Note that the encoding
561 that would be used for %esp is the escape to the two byte form. */
562 if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
563 /* Single byte MODRM format. */
564 tcg_out_opc(s, opc, r, rm, 0);
565 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
567 /* Two byte MODRM+SIB format. */
569 /* Note that the encoding that would place %esp into the index
570 field indicates no index register. In 64-bit mode, the REX.X
571 bit counts, so %r12 can be used as the index. */
575 assert(index != TCG_REG_ESP);
578 tcg_out_opc(s, opc, r, rm, index);
579 tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
580 tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
585 } else if (len == 4) {
586 tcg_out32(s, offset);
590 /* A simplification of the above with no index or shift. */
591 static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
592 int rm, intptr_t offset)
594 tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
597 /* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
598 static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
600 /* Propagate an opcode prefix, such as P_REXW. */
601 int ext = subop & ~0x7;
604 tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
607 static inline void tcg_out_mov(TCGContext *s, TCGType type,
608 TCGReg ret, TCGReg arg)
611 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
612 tcg_out_modrm(s, opc, ret, arg);
616 static void tcg_out_movi(TCGContext *s, TCGType type,
617 TCGReg ret, tcg_target_long arg)
619 tcg_target_long diff;
622 tgen_arithr(s, ARITH_XOR, ret, ret);
625 if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
626 tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
630 if (arg == (int32_t)arg) {
631 tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
636 /* Try a 7 byte pc-relative lea before the 10 byte movq. */
637 diff = arg - ((uintptr_t)s->code_ptr + 7);
638 if (diff == (int32_t)diff) {
639 tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
640 tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
645 tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
649 static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
651 if (val == (int8_t)val) {
652 tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
654 } else if (val == (int32_t)val) {
655 tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
662 static inline void tcg_out_push(TCGContext *s, int reg)
664 tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
667 static inline void tcg_out_pop(TCGContext *s, int reg)
669 tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
672 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
673 TCGReg arg1, intptr_t arg2)
675 int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
676 tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
679 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
680 TCGReg arg1, intptr_t arg2)
682 int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
683 tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
686 static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
687 tcg_target_long ofs, tcg_target_long val)
689 int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
690 tcg_out_modrm_offset(s, opc, 0, base, ofs);
694 static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
696 /* Propagate an opcode prefix, such as P_DATA16. */
697 int ext = subopc & ~0x7;
701 tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
703 tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
708 static inline void tcg_out_bswap32(TCGContext *s, int reg)
710 tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
713 static inline void tcg_out_rolw_8(TCGContext *s, int reg)
715 tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
718 static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
721 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
722 tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
725 static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
728 assert(src < 4 || TCG_TARGET_REG_BITS == 64);
729 tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
732 static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
735 tcg_out_modrm(s, OPC_MOVZWL, dest, src);
738 static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
741 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
744 static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
746 /* 32-bit mov zero extends. */
747 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
750 static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
752 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
755 static inline void tcg_out_bswap64(TCGContext *s, int reg)
757 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
760 static void tgen_arithi(TCGContext *s, int c, int r0,
761 tcg_target_long val, int cf)
765 if (TCG_TARGET_REG_BITS == 64) {
770 /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
771 partial flags update stalls on Pentium4 and are not recommended
772 by current Intel optimization manuals. */
773 if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
774 int is_inc = (c == ARITH_ADD) ^ (val < 0);
775 if (TCG_TARGET_REG_BITS == 64) {
776 /* The single-byte increment encodings are re-tasked as the
777 REX prefixes. Use the MODRM encoding. */
778 tcg_out_modrm(s, OPC_GRP5 + rexw,
779 (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
781 tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
786 if (c == ARITH_AND) {
787 if (TCG_TARGET_REG_BITS == 64) {
788 if (val == 0xffffffffu) {
789 tcg_out_ext32u(s, r0, r0);
792 if (val == (uint32_t)val) {
793 /* AND with no high bits set can use a 32-bit operation. */
797 if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
798 tcg_out_ext8u(s, r0, r0);
801 if (val == 0xffffu) {
802 tcg_out_ext16u(s, r0, r0);
807 if (val == (int8_t)val) {
808 tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
812 if (rexw == 0 || val == (int32_t)val) {
813 tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
821 static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
824 tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
828 /* Use SMALL != 0 to force a short forward branch. */
829 static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
832 TCGLabel *l = &s->labels[label_index];
835 val = l->u.value - (intptr_t)s->code_ptr;
837 if ((int8_t)val1 == val1) {
839 tcg_out8(s, OPC_JMP_short);
841 tcg_out8(s, OPC_JCC_short + opc);
849 tcg_out8(s, OPC_JMP_long);
850 tcg_out32(s, val - 5);
852 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
853 tcg_out32(s, val - 6);
858 tcg_out8(s, OPC_JMP_short);
860 tcg_out8(s, OPC_JCC_short + opc);
862 tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
866 tcg_out8(s, OPC_JMP_long);
868 tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
870 tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
875 static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
876 int const_arg2, int rexw)
881 tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
883 tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
886 tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
890 static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
891 TCGArg arg1, TCGArg arg2, int const_arg2,
892 int label_index, int small)
894 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
895 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
898 #if TCG_TARGET_REG_BITS == 64
899 static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
900 TCGArg arg1, TCGArg arg2, int const_arg2,
901 int label_index, int small)
903 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
904 tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
907 /* XXX: we implement it at the target level to avoid having to
908 handle cross basic blocks temporaries */
909 static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
910 const int *const_args, int small)
913 label_next = gen_new_label();
916 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
918 tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
922 tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
924 tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
928 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
930 tcg_out_jxx(s, JCC_JNE, label_next, 1);
931 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
935 tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
937 tcg_out_jxx(s, JCC_JNE, label_next, 1);
938 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
942 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
944 tcg_out_jxx(s, JCC_JNE, label_next, 1);
945 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
949 tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
951 tcg_out_jxx(s, JCC_JNE, label_next, 1);
952 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
956 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
958 tcg_out_jxx(s, JCC_JNE, label_next, 1);
959 tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
963 tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
965 tcg_out_jxx(s, JCC_JNE, label_next, 1);
966 tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
970 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
972 tcg_out_jxx(s, JCC_JNE, label_next, 1);
973 tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
977 tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
979 tcg_out_jxx(s, JCC_JNE, label_next, 1);
980 tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
986 tcg_out_label(s, label_next, s->code_ptr);
990 static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
991 TCGArg arg1, TCGArg arg2, int const_arg2)
993 tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
994 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
995 tcg_out_ext8u(s, dest, dest);
998 #if TCG_TARGET_REG_BITS == 64
999 static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1000 TCGArg arg1, TCGArg arg2, int const_arg2)
1002 tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
1003 tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
1004 tcg_out_ext8u(s, dest, dest);
1007 static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
1008 const int *const_args)
1011 int label_true, label_over;
1013 memcpy(new_args, args+1, 5*sizeof(TCGArg));
1015 if (args[0] == args[1] || args[0] == args[2]
1016 || (!const_args[3] && args[0] == args[3])
1017 || (!const_args[4] && args[0] == args[4])) {
1018 /* When the destination overlaps with one of the argument
1019 registers, don't do anything tricky. */
1020 label_true = gen_new_label();
1021 label_over = gen_new_label();
1023 new_args[5] = label_true;
1024 tcg_out_brcond2(s, new_args, const_args+1, 1);
1026 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1027 tcg_out_jxx(s, JCC_JMP, label_over, 1);
1028 tcg_out_label(s, label_true, s->code_ptr);
1030 tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
1031 tcg_out_label(s, label_over, s->code_ptr);
1033 /* When the destination does not overlap one of the arguments,
1034 clear the destination first, jump if cond false, and emit an
1035 increment in the true case. This results in smaller code. */
1037 tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
1039 label_over = gen_new_label();
1040 new_args[4] = tcg_invert_cond(new_args[4]);
1041 new_args[5] = label_over;
1042 tcg_out_brcond2(s, new_args, const_args+1, 1);
1044 tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
1045 tcg_out_label(s, label_over, s->code_ptr);
1050 static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
1051 TCGArg c1, TCGArg c2, int const_c2,
1054 tcg_out_cmp(s, c1, c2, const_c2, 0);
1056 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
1058 int over = gen_new_label();
1059 tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
1060 tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
1061 tcg_out_label(s, over, s->code_ptr);
1065 #if TCG_TARGET_REG_BITS == 64
1066 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
1067 TCGArg c1, TCGArg c2, int const_c2,
1070 tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1071 tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1075 static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1077 intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1079 if (disp == (int32_t)disp) {
1080 tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1083 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1084 tcg_out_modrm(s, OPC_GRP5,
1085 call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1089 static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1091 tcg_out_branch(s, 1, dest);
1094 static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1096 tcg_out_branch(s, 0, dest);
1099 #if defined(CONFIG_SOFTMMU)
1100 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1101 * int mmu_idx, uintptr_t ra)
1103 static const void * const qemu_ld_helpers[16] = {
1104 [MO_UB] = helper_ret_ldub_mmu,
1105 [MO_LEUW] = helper_le_lduw_mmu,
1106 [MO_LEUL] = helper_le_ldul_mmu,
1107 [MO_LEQ] = helper_le_ldq_mmu,
1108 [MO_BEUW] = helper_be_lduw_mmu,
1109 [MO_BEUL] = helper_be_ldul_mmu,
1110 [MO_BEQ] = helper_be_ldq_mmu,
1113 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1114 * uintxx_t val, int mmu_idx, uintptr_t ra)
1116 static const void * const qemu_st_helpers[16] = {
1117 [MO_UB] = helper_ret_stb_mmu,
1118 [MO_LEUW] = helper_le_stw_mmu,
1119 [MO_LEUL] = helper_le_stl_mmu,
1120 [MO_LEQ] = helper_le_stq_mmu,
1121 [MO_BEUW] = helper_be_stw_mmu,
1122 [MO_BEUL] = helper_be_stl_mmu,
1123 [MO_BEQ] = helper_be_stq_mmu,
1126 /* Perform the TLB load and compare.
1129 ADDRLO and ADDRHI contain the low and high part of the address.
1131 MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1133 WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1134 This should be offsetof addr_read or addr_write.
1137 LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1138 positions of the displacements of forward jumps to the TLB miss case.
1140 Second argument register is loaded with the low part of the address.
1141 In the TLB hit case, it has been adjusted as indicated by the TLB
1142 and so is a host address. In the TLB miss case, it continues to
1143 hold a guest address.
1145 First argument register is clobbered. */
1147 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1148 int mem_index, TCGMemOp s_bits,
1149 uint8_t **label_ptr, int which)
1151 const TCGReg r0 = TCG_REG_L0;
1152 const TCGReg r1 = TCG_REG_L1;
1153 TCGType ttype = TCG_TYPE_I32;
1154 TCGType htype = TCG_TYPE_I32;
1155 int trexw = 0, hrexw = 0;
1157 if (TCG_TARGET_REG_BITS == 64) {
1158 if (TARGET_LONG_BITS == 64) {
1159 ttype = TCG_TYPE_I64;
1162 if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1163 htype = TCG_TYPE_I64;
1168 tcg_out_mov(s, htype, r0, addrlo);
1169 tcg_out_mov(s, ttype, r1, addrlo);
1171 tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1172 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1174 tgen_arithi(s, ARITH_AND + trexw, r1,
1175 TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1176 tgen_arithi(s, ARITH_AND + hrexw, r0,
1177 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1179 tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1180 offsetof(CPUArchState, tlb_table[mem_index][0])
1184 tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1186 /* Prepare for both the fast path add of the tlb addend, and the slow
1187 path function argument setup. There are two cases worth note:
1188 For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1189 before the fastpath ADDQ below. For 64-bit guest and x32 host, MOVQ
1190 copies the entire guest address for the slow path, while truncation
1191 for the 32-bit host happens with the fastpath ADDL below. */
1192 tcg_out_mov(s, ttype, r1, addrlo);
1195 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1196 label_ptr[0] = s->code_ptr;
1199 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1200 /* cmp 4(r0), addrhi */
1201 tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1204 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1205 label_ptr[1] = s->code_ptr;
1211 /* add addend(r0), r1 */
1212 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1213 offsetof(CPUTLBEntry, addend) - which);
1217 * Record the context of a call to the out of line helper code for the slow path
1218 * for a load or store, so that we can later generate the correct helper code
1220 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1221 TCGReg datalo, TCGReg datahi,
1222 TCGReg addrlo, TCGReg addrhi,
1223 int mem_index, uint8_t *raddr,
1224 uint8_t **label_ptr)
1226 TCGLabelQemuLdst *label = new_ldst_label(s);
1228 label->is_ld = is_ld;
1230 label->datalo_reg = datalo;
1231 label->datahi_reg = datahi;
1232 label->addrlo_reg = addrlo;
1233 label->addrhi_reg = addrhi;
1234 label->mem_index = mem_index;
1235 label->raddr = raddr;
1236 label->label_ptr[0] = label_ptr[0];
1237 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1238 label->label_ptr[1] = label_ptr[1];
1243 * Generate code for the slow path for a load at the end of block
1245 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1247 TCGMemOp opc = l->opc;
1249 uint8_t **label_ptr = &l->label_ptr[0];
1251 /* resolve label address */
1252 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1253 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1254 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1257 if (TCG_TARGET_REG_BITS == 32) {
1260 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1263 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1266 if (TARGET_LONG_BITS == 64) {
1267 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1271 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1274 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1276 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1277 /* The second argument is already loaded with addrlo. */
1278 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1280 tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1281 (uintptr_t)l->raddr);
1284 tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1286 data_reg = l->datalo_reg;
1287 switch (opc & MO_SSIZE) {
1289 tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1292 tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1294 #if TCG_TARGET_REG_BITS == 64
1296 tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1301 /* Note that the helpers have zero-extended to tcg_target_long. */
1303 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1306 if (TCG_TARGET_REG_BITS == 64) {
1307 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1308 } else if (data_reg == TCG_REG_EDX) {
1309 /* xchg %edx, %eax */
1310 tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1311 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1313 tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1314 tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1321 /* Jump to the code corresponding to next IR of qemu_st */
1322 tcg_out_jmp(s, (uintptr_t)l->raddr);
1326 * Generate code for the slow path for a store at the end of block
1328 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1330 TCGMemOp opc = l->opc;
1331 TCGMemOp s_bits = opc & MO_SIZE;
1332 uint8_t **label_ptr = &l->label_ptr[0];
1335 /* resolve label address */
1336 *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1337 if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1338 *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1341 if (TCG_TARGET_REG_BITS == 32) {
1344 tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1347 tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1350 if (TARGET_LONG_BITS == 64) {
1351 tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1355 tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1358 if (s_bits == MO_64) {
1359 tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1363 tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1366 retaddr = TCG_REG_EAX;
1367 tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1368 tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1370 tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1371 /* The second argument is already loaded with addrlo. */
1372 tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1373 tcg_target_call_iarg_regs[2], l->datalo_reg);
1374 tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1377 if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1378 retaddr = tcg_target_call_iarg_regs[4];
1379 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1381 retaddr = TCG_REG_RAX;
1382 tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1383 tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1387 /* "Tail call" to the helper, with the return address back inline. */
1388 tcg_out_push(s, retaddr);
1389 tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
1391 #elif defined(__x86_64__) && defined(__linux__)
1392 # include <asm/prctl.h>
1393 # include <sys/prctl.h>
1395 int arch_prctl(int code, unsigned long addr);
1397 static int guest_base_flags;
1398 static inline void setup_guest_base_seg(void)
1400 if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1401 guest_base_flags = P_GS;
1405 # define guest_base_flags 0
1406 static inline void setup_guest_base_seg(void) { }
1407 #endif /* SOFTMMU */
1409 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1410 TCGReg base, intptr_t ofs, int seg,
1413 const TCGMemOp real_bswap = memop & MO_BSWAP;
1414 TCGMemOp bswap = real_bswap;
1415 int movop = OPC_MOVL_GvEv;
1417 if (have_movbe && real_bswap) {
1419 movop = OPC_MOVBE_GyMy;
1422 switch (memop & MO_SSIZE) {
1424 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1427 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1430 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1432 tcg_out_rolw_8(s, datalo);
1438 tcg_out_modrm_offset(s, OPC_MOVBE_GyMy + P_DATA16 + seg,
1441 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1442 tcg_out_rolw_8(s, datalo);
1444 tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1446 tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1451 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1453 tcg_out_bswap32(s, datalo);
1456 #if TCG_TARGET_REG_BITS == 64
1459 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1461 tcg_out_bswap32(s, datalo);
1463 tcg_out_ext32s(s, datalo, datalo);
1465 tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1470 if (TCG_TARGET_REG_BITS == 64) {
1471 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1473 tcg_out_bswap64(s, datalo);
1481 if (base != datalo) {
1482 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1483 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
1485 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs + 4);
1486 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1489 tcg_out_bswap32(s, datalo);
1490 tcg_out_bswap32(s, datahi);
1499 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1500 EAX. It will be useful once fixed registers globals are less
1502 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1504 TCGReg datalo, datahi, addrlo;
1505 TCGReg addrhi __attribute__((unused));
1507 #if defined(CONFIG_SOFTMMU)
1510 uint8_t *label_ptr[2];
1514 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1516 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1519 #if defined(CONFIG_SOFTMMU)
1520 mem_index = *args++;
1521 s_bits = opc & MO_SIZE;
1523 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1524 label_ptr, offsetof(CPUTLBEntry, addr_read));
1527 tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1529 /* Record the current context of a load into ldst label */
1530 add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
1531 mem_index, s->code_ptr, label_ptr);
1534 int32_t offset = GUEST_BASE;
1535 TCGReg base = addrlo;
1538 /* ??? We assume all operations have left us with register contents
1539 that are zero extended. So far this appears to be true. If we
1540 want to enforce this, we can either do an explicit zero-extension
1541 here, or (if GUEST_BASE == 0, or a segment register is in use)
1542 use the ADDR32 prefix. For now, do nothing. */
1543 if (GUEST_BASE && guest_base_flags) {
1544 seg = guest_base_flags;
1546 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1547 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1548 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1553 tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
1558 static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1559 TCGReg base, intptr_t ofs, int seg,
1562 /* ??? Ideally we wouldn't need a scratch register. For user-only,
1563 we could perform the bswap twice to restore the original value
1564 instead of moving to the scratch. But as it is, the L constraint
1565 means that TCG_REG_L0 is definitely free here. */
1566 const TCGReg scratch = TCG_REG_L0;
1567 const TCGMemOp real_bswap = memop & MO_BSWAP;
1568 TCGMemOp bswap = real_bswap;
1569 int movop = OPC_MOVL_EvGv;
1571 if (have_movbe && real_bswap) {
1573 movop = OPC_MOVBE_MyGy;
1576 switch (memop & MO_SIZE) {
1578 /* In 32-bit mode, 8-bit stores can only happen from [abcd]x.
1579 Use the scratch register if necessary. */
1580 if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1581 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1584 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1589 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1590 tcg_out_rolw_8(s, scratch);
1593 tcg_out_modrm_offset(s, movop + P_DATA16 + seg, datalo, base, ofs);
1597 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1598 tcg_out_bswap32(s, scratch);
1601 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1604 if (TCG_TARGET_REG_BITS == 64) {
1606 tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1607 tcg_out_bswap64(s, scratch);
1610 tcg_out_modrm_offset(s, movop + P_REXW + seg, datalo, base, ofs);
1612 tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1613 tcg_out_bswap32(s, scratch);
1614 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1615 tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1616 tcg_out_bswap32(s, scratch);
1617 tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1624 tcg_out_modrm_offset(s, movop + seg, datalo, base, ofs);
1625 tcg_out_modrm_offset(s, movop + seg, datahi, base, ofs+4);
1633 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1635 TCGReg datalo, datahi, addrlo;
1636 TCGReg addrhi __attribute__((unused));
1638 #if defined(CONFIG_SOFTMMU)
1641 uint8_t *label_ptr[2];
1645 datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1647 addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1650 #if defined(CONFIG_SOFTMMU)
1651 mem_index = *args++;
1652 s_bits = opc & MO_SIZE;
1654 tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1655 label_ptr, offsetof(CPUTLBEntry, addr_write));
1658 tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1660 /* Record the current context of a store into ldst label */
1661 add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
1662 mem_index, s->code_ptr, label_ptr);
1665 int32_t offset = GUEST_BASE;
1666 TCGReg base = addrlo;
1669 /* ??? We assume all operations have left us with register contents
1670 that are zero extended. So far this appears to be true. If we
1671 want to enforce this, we can either do an explicit zero-extension
1672 here, or (if GUEST_BASE == 0, or a segment register is in use)
1673 use the ADDR32 prefix. For now, do nothing. */
1674 if (GUEST_BASE && guest_base_flags) {
1675 seg = guest_base_flags;
1677 } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1678 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1679 tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1684 tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1689 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1690 const TCGArg *args, const int *const_args)
1694 #if TCG_TARGET_REG_BITS == 64
1695 # define OP_32_64(x) \
1696 case glue(glue(INDEX_op_, x), _i64): \
1697 rexw = P_REXW; /* FALLTHRU */ \
1698 case glue(glue(INDEX_op_, x), _i32)
1700 # define OP_32_64(x) \
1701 case glue(glue(INDEX_op_, x), _i32)
1705 case INDEX_op_exit_tb:
1706 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1707 tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1709 case INDEX_op_goto_tb:
1710 if (s->tb_jmp_offset) {
1711 /* direct jump method */
1712 tcg_out8(s, OPC_JMP_long); /* jmp im */
1713 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1716 /* indirect jump method */
1717 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1718 (intptr_t)(s->tb_next + args[0]));
1720 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1723 if (const_args[0]) {
1724 tcg_out_calli(s, args[0]);
1727 tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1731 tcg_out_jxx(s, JCC_JMP, args[0], 0);
1733 case INDEX_op_movi_i32:
1734 tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1737 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1738 tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1741 tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1744 /* Note that we can ignore REXW for the zero-extend to 64-bit. */
1745 tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1748 tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1750 #if TCG_TARGET_REG_BITS == 64
1751 case INDEX_op_ld32u_i64:
1753 case INDEX_op_ld_i32:
1754 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1758 if (const_args[0]) {
1759 tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1760 0, args[1], args[2]);
1761 tcg_out8(s, args[0]);
1763 tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1764 args[0], args[1], args[2]);
1768 if (const_args[0]) {
1769 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1770 0, args[1], args[2]);
1771 tcg_out16(s, args[0]);
1773 tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1774 args[0], args[1], args[2]);
1777 #if TCG_TARGET_REG_BITS == 64
1778 case INDEX_op_st32_i64:
1780 case INDEX_op_st_i32:
1781 if (const_args[0]) {
1782 tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1783 tcg_out32(s, args[0]);
1785 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1790 /* For 3-operand addition, use LEA. */
1791 if (args[0] != args[1]) {
1792 TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1794 if (const_args[2]) {
1796 } else if (a0 == a2) {
1797 /* Watch out for dest = src + dest, since we've removed
1798 the matching constraint on the add. */
1799 tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1803 tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1821 if (const_args[2]) {
1822 tgen_arithi(s, c + rexw, args[0], args[2], 0);
1824 tgen_arithr(s, c + rexw, args[0], args[2]);
1829 if (const_args[2]) {
1830 tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
1832 tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
1834 tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
1839 if (const_args[2]) {
1842 if (val == (int8_t)val) {
1843 tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1846 tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1850 tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1855 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1858 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1877 if (const_args[2]) {
1878 tcg_out_shifti(s, c + rexw, args[0], args[2]);
1880 tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1884 case INDEX_op_brcond_i32:
1885 tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1888 case INDEX_op_setcond_i32:
1889 tcg_out_setcond32(s, args[3], args[0], args[1],
1890 args[2], const_args[2]);
1892 case INDEX_op_movcond_i32:
1893 tcg_out_movcond32(s, args[5], args[0], args[1],
1894 args[2], const_args[2], args[3]);
1898 tcg_out_rolw_8(s, args[0]);
1901 tcg_out_bswap32(s, args[0]);
1905 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1908 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1912 tcg_out_ext8s(s, args[0], args[1], rexw);
1915 tcg_out_ext16s(s, args[0], args[1], rexw);
1918 tcg_out_ext8u(s, args[0], args[1]);
1921 tcg_out_ext16u(s, args[0], args[1]);
1924 case INDEX_op_qemu_ld_i32:
1925 tcg_out_qemu_ld(s, args, 0);
1927 case INDEX_op_qemu_ld_i64:
1928 tcg_out_qemu_ld(s, args, 1);
1930 case INDEX_op_qemu_st_i32:
1931 tcg_out_qemu_st(s, args, 0);
1933 case INDEX_op_qemu_st_i64:
1934 tcg_out_qemu_st(s, args, 1);
1938 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1941 tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1944 if (const_args[4]) {
1945 tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1947 tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1949 if (const_args[5]) {
1950 tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1952 tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1956 if (const_args[4]) {
1957 tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1959 tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1961 if (const_args[5]) {
1962 tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1964 tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1968 #if TCG_TARGET_REG_BITS == 32
1969 case INDEX_op_brcond2_i32:
1970 tcg_out_brcond2(s, args, const_args, 0);
1972 case INDEX_op_setcond2_i32:
1973 tcg_out_setcond2(s, args, const_args);
1975 #else /* TCG_TARGET_REG_BITS == 64 */
1976 case INDEX_op_movi_i64:
1977 tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1979 case INDEX_op_ld32s_i64:
1980 tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1982 case INDEX_op_ld_i64:
1983 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1985 case INDEX_op_st_i64:
1986 if (const_args[0]) {
1987 tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1988 0, args[1], args[2]);
1989 tcg_out32(s, args[0]);
1991 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1995 case INDEX_op_brcond_i64:
1996 tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1999 case INDEX_op_setcond_i64:
2000 tcg_out_setcond64(s, args[3], args[0], args[1],
2001 args[2], const_args[2]);
2003 case INDEX_op_movcond_i64:
2004 tcg_out_movcond64(s, args[5], args[0], args[1],
2005 args[2], const_args[2], args[3]);
2008 case INDEX_op_bswap64_i64:
2009 tcg_out_bswap64(s, args[0]);
2011 case INDEX_op_ext32u_i64:
2012 tcg_out_ext32u(s, args[0], args[1]);
2014 case INDEX_op_ext32s_i64:
2015 tcg_out_ext32s(s, args[0], args[1]);
2020 if (args[3] == 0 && args[4] == 8) {
2021 /* load bits 0..7 */
2022 tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
2024 } else if (args[3] == 8 && args[4] == 8) {
2025 /* load bits 8..15 */
2026 tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
2027 } else if (args[3] == 0 && args[4] == 16) {
2028 /* load bits 0..15 */
2029 tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
2042 static const TCGTargetOpDef x86_op_defs[] = {
2043 { INDEX_op_exit_tb, { } },
2044 { INDEX_op_goto_tb, { } },
2045 { INDEX_op_call, { "ri" } },
2046 { INDEX_op_br, { } },
2047 { INDEX_op_mov_i32, { "r", "r" } },
2048 { INDEX_op_movi_i32, { "r" } },
2049 { INDEX_op_ld8u_i32, { "r", "r" } },
2050 { INDEX_op_ld8s_i32, { "r", "r" } },
2051 { INDEX_op_ld16u_i32, { "r", "r" } },
2052 { INDEX_op_ld16s_i32, { "r", "r" } },
2053 { INDEX_op_ld_i32, { "r", "r" } },
2054 { INDEX_op_st8_i32, { "qi", "r" } },
2055 { INDEX_op_st16_i32, { "ri", "r" } },
2056 { INDEX_op_st_i32, { "ri", "r" } },
2058 { INDEX_op_add_i32, { "r", "r", "ri" } },
2059 { INDEX_op_sub_i32, { "r", "0", "ri" } },
2060 { INDEX_op_mul_i32, { "r", "0", "ri" } },
2061 { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
2062 { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
2063 { INDEX_op_and_i32, { "r", "0", "ri" } },
2064 { INDEX_op_or_i32, { "r", "0", "ri" } },
2065 { INDEX_op_xor_i32, { "r", "0", "ri" } },
2066 { INDEX_op_andc_i32, { "r", "r", "ri" } },
2068 { INDEX_op_shl_i32, { "r", "0", "ci" } },
2069 { INDEX_op_shr_i32, { "r", "0", "ci" } },
2070 { INDEX_op_sar_i32, { "r", "0", "ci" } },
2071 { INDEX_op_rotl_i32, { "r", "0", "ci" } },
2072 { INDEX_op_rotr_i32, { "r", "0", "ci" } },
2074 { INDEX_op_brcond_i32, { "r", "ri" } },
2076 { INDEX_op_bswap16_i32, { "r", "0" } },
2077 { INDEX_op_bswap32_i32, { "r", "0" } },
2079 { INDEX_op_neg_i32, { "r", "0" } },
2081 { INDEX_op_not_i32, { "r", "0" } },
2083 { INDEX_op_ext8s_i32, { "r", "q" } },
2084 { INDEX_op_ext16s_i32, { "r", "r" } },
2085 { INDEX_op_ext8u_i32, { "r", "q" } },
2086 { INDEX_op_ext16u_i32, { "r", "r" } },
2088 { INDEX_op_setcond_i32, { "q", "r", "ri" } },
2090 { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
2091 { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
2093 { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
2094 { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2095 { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2096 { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2098 #if TCG_TARGET_REG_BITS == 32
2099 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2100 { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2102 { INDEX_op_mov_i64, { "r", "r" } },
2103 { INDEX_op_movi_i64, { "r" } },
2104 { INDEX_op_ld8u_i64, { "r", "r" } },
2105 { INDEX_op_ld8s_i64, { "r", "r" } },
2106 { INDEX_op_ld16u_i64, { "r", "r" } },
2107 { INDEX_op_ld16s_i64, { "r", "r" } },
2108 { INDEX_op_ld32u_i64, { "r", "r" } },
2109 { INDEX_op_ld32s_i64, { "r", "r" } },
2110 { INDEX_op_ld_i64, { "r", "r" } },
2111 { INDEX_op_st8_i64, { "ri", "r" } },
2112 { INDEX_op_st16_i64, { "ri", "r" } },
2113 { INDEX_op_st32_i64, { "ri", "r" } },
2114 { INDEX_op_st_i64, { "re", "r" } },
2116 { INDEX_op_add_i64, { "r", "r", "re" } },
2117 { INDEX_op_mul_i64, { "r", "0", "re" } },
2118 { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2119 { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2120 { INDEX_op_sub_i64, { "r", "0", "re" } },
2121 { INDEX_op_and_i64, { "r", "0", "reZ" } },
2122 { INDEX_op_or_i64, { "r", "0", "re" } },
2123 { INDEX_op_xor_i64, { "r", "0", "re" } },
2124 { INDEX_op_andc_i64, { "r", "r", "rI" } },
2126 { INDEX_op_shl_i64, { "r", "0", "ci" } },
2127 { INDEX_op_shr_i64, { "r", "0", "ci" } },
2128 { INDEX_op_sar_i64, { "r", "0", "ci" } },
2129 { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2130 { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2132 { INDEX_op_brcond_i64, { "r", "re" } },
2133 { INDEX_op_setcond_i64, { "r", "r", "re" } },
2135 { INDEX_op_bswap16_i64, { "r", "0" } },
2136 { INDEX_op_bswap32_i64, { "r", "0" } },
2137 { INDEX_op_bswap64_i64, { "r", "0" } },
2138 { INDEX_op_neg_i64, { "r", "0" } },
2139 { INDEX_op_not_i64, { "r", "0" } },
2141 { INDEX_op_ext8s_i64, { "r", "r" } },
2142 { INDEX_op_ext16s_i64, { "r", "r" } },
2143 { INDEX_op_ext32s_i64, { "r", "r" } },
2144 { INDEX_op_ext8u_i64, { "r", "r" } },
2145 { INDEX_op_ext16u_i64, { "r", "r" } },
2146 { INDEX_op_ext32u_i64, { "r", "r" } },
2148 { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2149 { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2151 { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2152 { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2153 { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2154 { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2157 #if TCG_TARGET_REG_BITS == 64
2158 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2159 { INDEX_op_qemu_st_i32, { "L", "L" } },
2160 { INDEX_op_qemu_ld_i64, { "r", "L" } },
2161 { INDEX_op_qemu_st_i64, { "L", "L" } },
2162 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2163 { INDEX_op_qemu_ld_i32, { "r", "L" } },
2164 { INDEX_op_qemu_st_i32, { "L", "L" } },
2165 { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2166 { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2168 { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2169 { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2170 { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2171 { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2176 static int tcg_target_callee_save_regs[] = {
2177 #if TCG_TARGET_REG_BITS == 64
2186 TCG_REG_R14, /* Currently used for the global env. */
2189 TCG_REG_EBP, /* Currently used for the global env. */
2196 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2197 and tcg_register_jit. */
2200 ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2201 * (TCG_TARGET_REG_BITS / 8))
2203 #define FRAME_SIZE \
2205 + TCG_STATIC_CALL_ARGS_SIZE \
2206 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2207 + TCG_TARGET_STACK_ALIGN - 1) \
2208 & ~(TCG_TARGET_STACK_ALIGN - 1))
2210 /* Generate global QEMU prologue and epilogue code */
2211 static void tcg_target_qemu_prologue(TCGContext *s)
2213 int i, stack_addend;
2217 /* Reserve some stack space, also for TCG temps. */
2218 stack_addend = FRAME_SIZE - PUSH_SIZE;
2219 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2220 CPU_TEMP_BUF_NLONGS * sizeof(long));
2222 /* Save all callee saved registers. */
2223 for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2224 tcg_out_push(s, tcg_target_callee_save_regs[i]);
2227 #if TCG_TARGET_REG_BITS == 32
2228 tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2229 (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2230 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2232 tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2233 (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2236 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2237 tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2239 tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2243 tb_ret_addr = s->code_ptr;
2245 tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2247 for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2248 tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2250 tcg_out_opc(s, OPC_RET, 0, 0, 0);
2252 #if !defined(CONFIG_SOFTMMU)
2253 /* Try to set up a segment register to point to GUEST_BASE. */
2255 setup_guest_base_seg();
2260 static void tcg_target_init(TCGContext *s)
2262 unsigned a, b, c, d;
2263 int max = __get_cpuid_max(0, 0);
2266 __cpuid(1, a, b, c, d);
2268 /* For 32-bit, 99% certainty that we're running on hardware that
2269 supports cmov, but we still need to check. In case cmov is not
2270 available, we'll use a small forward branch. */
2271 have_cmov = (d & bit_CMOV) != 0;
2274 /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
2275 need to probe for it. */
2276 have_movbe = (c & bit_MOVBE) != 0;
2281 /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
2282 __cpuid_count(7, 0, a, b, c, d);
2284 have_bmi1 = (b & bit_BMI) != 0;
2288 if (TCG_TARGET_REG_BITS == 64) {
2289 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2290 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2292 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2295 tcg_regset_clear(tcg_target_call_clobber_regs);
2296 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2297 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2298 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2299 if (TCG_TARGET_REG_BITS == 64) {
2300 #if !defined(_WIN64)
2301 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2302 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2304 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2305 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2306 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2307 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2310 tcg_regset_clear(s->reserved_regs);
2311 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2313 tcg_add_target_add_op_defs(x86_op_defs);
2318 DebugFrameFDEHeader fde;
2319 uint8_t fde_def_cfa[4];
2320 uint8_t fde_reg_ofs[14];
2323 /* We're expecting a 2 byte uleb128 encoded value. */
2324 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2326 #if !defined(__ELF__)
2327 /* Host machine without ELF. */
2328 #elif TCG_TARGET_REG_BITS == 64
2329 #define ELF_HOST_MACHINE EM_X86_64
2330 static DebugFrame debug_frame = {
2331 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2334 .cie.code_align = 1,
2335 .cie.data_align = 0x78, /* sleb128 -8 */
2336 .cie.return_column = 16,
2338 /* Total FDE size does not include the "len" member. */
2339 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2342 12, 7, /* DW_CFA_def_cfa %rsp, ... */
2343 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2347 0x90, 1, /* DW_CFA_offset, %rip, -8 */
2348 /* The following ordering must match tcg_target_callee_save_regs. */
2349 0x86, 2, /* DW_CFA_offset, %rbp, -16 */
2350 0x83, 3, /* DW_CFA_offset, %rbx, -24 */
2351 0x8c, 4, /* DW_CFA_offset, %r12, -32 */
2352 0x8d, 5, /* DW_CFA_offset, %r13, -40 */
2353 0x8e, 6, /* DW_CFA_offset, %r14, -48 */
2354 0x8f, 7, /* DW_CFA_offset, %r15, -56 */
2358 #define ELF_HOST_MACHINE EM_386
2359 static DebugFrame debug_frame = {
2360 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2363 .cie.code_align = 1,
2364 .cie.data_align = 0x7c, /* sleb128 -4 */
2365 .cie.return_column = 8,
2367 /* Total FDE size does not include the "len" member. */
2368 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2371 12, 4, /* DW_CFA_def_cfa %esp, ... */
2372 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2376 0x88, 1, /* DW_CFA_offset, %eip, -4 */
2377 /* The following ordering must match tcg_target_callee_save_regs. */
2378 0x85, 2, /* DW_CFA_offset, %ebp, -8 */
2379 0x83, 3, /* DW_CFA_offset, %ebx, -12 */
2380 0x86, 4, /* DW_CFA_offset, %esi, -16 */
2381 0x87, 5, /* DW_CFA_offset, %edi, -20 */
2386 #if defined(ELF_HOST_MACHINE)
2387 void tcg_register_jit(void *buf, size_t buf_size)
2389 debug_frame.fde.func_start = (uintptr_t)buf;
2390 debug_frame.fde.func_len = buf_size;
2392 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));