]> Git Repo - qemu.git/blame_incremental - tcg/tcg.c
tcg: Allocate sufficient storage in temp_allocate_frame
[qemu.git] / tcg / tcg.c
... / ...
CommitLineData
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* define it to use liveness analysis (better code) */
26#define USE_TCG_OPTIMIZATIONS
27
28#include "qemu/osdep.h"
29
30/* Define to jump the ELF file used to communicate with GDB. */
31#undef DEBUG_JIT
32
33#include "qemu/error-report.h"
34#include "qemu/cutils.h"
35#include "qemu/host-utils.h"
36#include "qemu/qemu-print.h"
37#include "qemu/timer.h"
38#include "qemu/cacheflush.h"
39
40/* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
42 instructions */
43#define NO_CPU_IO_DEFS
44
45#include "exec/exec-all.h"
46#include "tcg/tcg-op.h"
47
48#if UINTPTR_MAX == UINT32_MAX
49# define ELF_CLASS ELFCLASS32
50#else
51# define ELF_CLASS ELFCLASS64
52#endif
53#ifdef HOST_WORDS_BIGENDIAN
54# define ELF_DATA ELFDATA2MSB
55#else
56# define ELF_DATA ELFDATA2LSB
57#endif
58
59#include "elf.h"
60#include "exec/log.h"
61#include "tcg-internal.h"
62
63#ifdef CONFIG_TCG_INTERPRETER
64#include <ffi.h>
65#endif
66
67/* Forward declarations for functions declared in tcg-target.c.inc and
68 used here. */
69static void tcg_target_init(TCGContext *s);
70static void tcg_target_qemu_prologue(TCGContext *s);
71static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72 intptr_t value, intptr_t addend);
73
74/* The CIE and FDE header definitions will be common to all hosts. */
75typedef struct {
76 uint32_t len __attribute__((aligned((sizeof(void *)))));
77 uint32_t id;
78 uint8_t version;
79 char augmentation[1];
80 uint8_t code_align;
81 uint8_t data_align;
82 uint8_t return_column;
83} DebugFrameCIE;
84
85typedef struct QEMU_PACKED {
86 uint32_t len __attribute__((aligned((sizeof(void *)))));
87 uint32_t cie_offset;
88 uintptr_t func_start;
89 uintptr_t func_len;
90} DebugFrameFDEHeader;
91
92typedef struct QEMU_PACKED {
93 DebugFrameCIE cie;
94 DebugFrameFDEHeader fde;
95} DebugFrameHeader;
96
97static void tcg_register_jit_int(const void *buf, size_t size,
98 const void *debug_frame,
99 size_t debug_frame_size)
100 __attribute__((unused));
101
102/* Forward declarations for functions declared and used in tcg-target.c.inc. */
103static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104 intptr_t arg2);
105static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106static void tcg_out_movi(TCGContext *s, TCGType type,
107 TCGReg ret, tcg_target_long arg);
108static void tcg_out_op(TCGContext *s, TCGOpcode opc,
109 const TCGArg args[TCG_MAX_OP_ARGS],
110 const int const_args[TCG_MAX_OP_ARGS]);
111#if TCG_TARGET_MAYBE_vec
112static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113 TCGReg dst, TCGReg src);
114static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115 TCGReg dst, TCGReg base, intptr_t offset);
116static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, int64_t arg);
118static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
119 unsigned vecl, unsigned vece,
120 const TCGArg args[TCG_MAX_OP_ARGS],
121 const int const_args[TCG_MAX_OP_ARGS]);
122#else
123static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124 TCGReg dst, TCGReg src)
125{
126 g_assert_not_reached();
127}
128static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129 TCGReg dst, TCGReg base, intptr_t offset)
130{
131 g_assert_not_reached();
132}
133static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134 TCGReg dst, int64_t arg)
135{
136 g_assert_not_reached();
137}
138static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
139 unsigned vecl, unsigned vece,
140 const TCGArg args[TCG_MAX_OP_ARGS],
141 const int const_args[TCG_MAX_OP_ARGS])
142{
143 g_assert_not_reached();
144}
145#endif
146static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
147 intptr_t arg2);
148static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
149 TCGReg base, intptr_t ofs);
150#ifdef CONFIG_TCG_INTERPRETER
151static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152 ffi_cif *cif);
153#else
154static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
155#endif
156static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
157#ifdef TCG_TARGET_NEED_LDST_LABELS
158static int tcg_out_ldst_finalize(TCGContext *s);
159#endif
160
161TCGContext tcg_init_ctx;
162__thread TCGContext *tcg_ctx;
163
164TCGContext **tcg_ctxs;
165unsigned int tcg_cur_ctxs;
166unsigned int tcg_max_ctxs;
167TCGv_env cpu_env = 0;
168const void *tcg_code_gen_epilogue;
169uintptr_t tcg_splitwx_diff;
170
171#ifndef CONFIG_TCG_INTERPRETER
172tcg_prologue_fn *tcg_qemu_tb_exec;
173#endif
174
175static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176static TCGRegSet tcg_target_call_clobber_regs;
177
178#if TCG_TARGET_INSN_UNIT_SIZE == 1
179static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180{
181 *s->code_ptr++ = v;
182}
183
184static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185 uint8_t v)
186{
187 *p = v;
188}
189#endif
190
191#if TCG_TARGET_INSN_UNIT_SIZE <= 2
192static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193{
194 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195 *s->code_ptr++ = v;
196 } else {
197 tcg_insn_unit *p = s->code_ptr;
198 memcpy(p, &v, sizeof(v));
199 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200 }
201}
202
203static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204 uint16_t v)
205{
206 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207 *p = v;
208 } else {
209 memcpy(p, &v, sizeof(v));
210 }
211}
212#endif
213
214#if TCG_TARGET_INSN_UNIT_SIZE <= 4
215static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216{
217 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218 *s->code_ptr++ = v;
219 } else {
220 tcg_insn_unit *p = s->code_ptr;
221 memcpy(p, &v, sizeof(v));
222 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223 }
224}
225
226static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227 uint32_t v)
228{
229 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230 *p = v;
231 } else {
232 memcpy(p, &v, sizeof(v));
233 }
234}
235#endif
236
237#if TCG_TARGET_INSN_UNIT_SIZE <= 8
238static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239{
240 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241 *s->code_ptr++ = v;
242 } else {
243 tcg_insn_unit *p = s->code_ptr;
244 memcpy(p, &v, sizeof(v));
245 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246 }
247}
248
249static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250 uint64_t v)
251{
252 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253 *p = v;
254 } else {
255 memcpy(p, &v, sizeof(v));
256 }
257}
258#endif
259
260/* label relocation processing */
261
262static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263 TCGLabel *l, intptr_t addend)
264{
265 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
266
267 r->type = type;
268 r->ptr = code_ptr;
269 r->addend = addend;
270 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
271}
272
273static void tcg_out_label(TCGContext *s, TCGLabel *l)
274{
275 tcg_debug_assert(!l->has_value);
276 l->has_value = 1;
277 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
278}
279
280TCGLabel *gen_new_label(void)
281{
282 TCGContext *s = tcg_ctx;
283 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
284
285 memset(l, 0, sizeof(TCGLabel));
286 l->id = s->nb_labels++;
287 QSIMPLEQ_INIT(&l->relocs);
288
289 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
290
291 return l;
292}
293
294static bool tcg_resolve_relocs(TCGContext *s)
295{
296 TCGLabel *l;
297
298 QSIMPLEQ_FOREACH(l, &s->labels, next) {
299 TCGRelocation *r;
300 uintptr_t value = l->u.value;
301
302 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
303 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
304 return false;
305 }
306 }
307 }
308 return true;
309}
310
311static void set_jmp_reset_offset(TCGContext *s, int which)
312{
313 /*
314 * We will check for overflow at the end of the opcode loop in
315 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
316 */
317 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
318}
319
320/* Signal overflow, starting over with fewer guest insns. */
321static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
322{
323 siglongjmp(s->jmp_trans, -2);
324}
325
326#define C_PFX1(P, A) P##A
327#define C_PFX2(P, A, B) P##A##_##B
328#define C_PFX3(P, A, B, C) P##A##_##B##_##C
329#define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
330#define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
331#define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
332
333/* Define an enumeration for the various combinations. */
334
335#define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
336#define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
337#define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
338#define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
339
340#define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
341#define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
342#define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
343#define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
344
345#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
346
347#define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
348#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
349#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
350#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
351
352typedef enum {
353#include "tcg-target-con-set.h"
354} TCGConstraintSetIndex;
355
356static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
357
358#undef C_O0_I1
359#undef C_O0_I2
360#undef C_O0_I3
361#undef C_O0_I4
362#undef C_O1_I1
363#undef C_O1_I2
364#undef C_O1_I3
365#undef C_O1_I4
366#undef C_N1_I2
367#undef C_O2_I1
368#undef C_O2_I2
369#undef C_O2_I3
370#undef C_O2_I4
371
372/* Put all of the constraint sets into an array, indexed by the enum. */
373
374#define C_O0_I1(I1) { .args_ct_str = { #I1 } },
375#define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
376#define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
377#define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
378
379#define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
380#define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
381#define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
382#define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
383
384#define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
385
386#define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
387#define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
388#define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
389#define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
390
391static const TCGTargetOpDef constraint_sets[] = {
392#include "tcg-target-con-set.h"
393};
394
395
396#undef C_O0_I1
397#undef C_O0_I2
398#undef C_O0_I3
399#undef C_O0_I4
400#undef C_O1_I1
401#undef C_O1_I2
402#undef C_O1_I3
403#undef C_O1_I4
404#undef C_N1_I2
405#undef C_O2_I1
406#undef C_O2_I2
407#undef C_O2_I3
408#undef C_O2_I4
409
410/* Expand the enumerator to be returned from tcg_target_op_def(). */
411
412#define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
413#define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
414#define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
415#define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
416
417#define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
418#define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
419#define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
420#define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
421
422#define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
423
424#define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
425#define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
426#define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
427#define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
428
429#include "tcg-target.c.inc"
430
431static void alloc_tcg_plugin_context(TCGContext *s)
432{
433#ifdef CONFIG_PLUGIN
434 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
435 s->plugin_tb->insns =
436 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
437#endif
438}
439
440/*
441 * All TCG threads except the parent (i.e. the one that called tcg_context_init
442 * and registered the target's TCG globals) must register with this function
443 * before initiating translation.
444 *
445 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
446 * of tcg_region_init() for the reasoning behind this.
447 *
448 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
449 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
450 * is not used anymore for translation once this function is called.
451 *
452 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
453 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
454 */
455#ifdef CONFIG_USER_ONLY
456void tcg_register_thread(void)
457{
458 tcg_ctx = &tcg_init_ctx;
459}
460#else
461void tcg_register_thread(void)
462{
463 TCGContext *s = g_malloc(sizeof(*s));
464 unsigned int i, n;
465
466 *s = tcg_init_ctx;
467
468 /* Relink mem_base. */
469 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
470 if (tcg_init_ctx.temps[i].mem_base) {
471 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
472 tcg_debug_assert(b >= 0 && b < n);
473 s->temps[i].mem_base = &s->temps[b];
474 }
475 }
476
477 /* Claim an entry in tcg_ctxs */
478 n = qatomic_fetch_inc(&tcg_cur_ctxs);
479 g_assert(n < tcg_max_ctxs);
480 qatomic_set(&tcg_ctxs[n], s);
481
482 if (n > 0) {
483 alloc_tcg_plugin_context(s);
484 tcg_region_initial_alloc(s);
485 }
486
487 tcg_ctx = s;
488}
489#endif /* !CONFIG_USER_ONLY */
490
491/* pool based memory allocation */
492void *tcg_malloc_internal(TCGContext *s, int size)
493{
494 TCGPool *p;
495 int pool_size;
496
497 if (size > TCG_POOL_CHUNK_SIZE) {
498 /* big malloc: insert a new pool (XXX: could optimize) */
499 p = g_malloc(sizeof(TCGPool) + size);
500 p->size = size;
501 p->next = s->pool_first_large;
502 s->pool_first_large = p;
503 return p->data;
504 } else {
505 p = s->pool_current;
506 if (!p) {
507 p = s->pool_first;
508 if (!p)
509 goto new_pool;
510 } else {
511 if (!p->next) {
512 new_pool:
513 pool_size = TCG_POOL_CHUNK_SIZE;
514 p = g_malloc(sizeof(TCGPool) + pool_size);
515 p->size = pool_size;
516 p->next = NULL;
517 if (s->pool_current)
518 s->pool_current->next = p;
519 else
520 s->pool_first = p;
521 } else {
522 p = p->next;
523 }
524 }
525 }
526 s->pool_current = p;
527 s->pool_cur = p->data + size;
528 s->pool_end = p->data + p->size;
529 return p->data;
530}
531
532void tcg_pool_reset(TCGContext *s)
533{
534 TCGPool *p, *t;
535 for (p = s->pool_first_large; p; p = t) {
536 t = p->next;
537 g_free(p);
538 }
539 s->pool_first_large = NULL;
540 s->pool_cur = s->pool_end = NULL;
541 s->pool_current = NULL;
542}
543
544#include "exec/helper-proto.h"
545
546static const TCGHelperInfo all_helpers[] = {
547#include "exec/helper-tcg.h"
548};
549static GHashTable *helper_table;
550
551#ifdef CONFIG_TCG_INTERPRETER
552static GHashTable *ffi_table;
553
554static ffi_type * const typecode_to_ffi[8] = {
555 [dh_typecode_void] = &ffi_type_void,
556 [dh_typecode_i32] = &ffi_type_uint32,
557 [dh_typecode_s32] = &ffi_type_sint32,
558 [dh_typecode_i64] = &ffi_type_uint64,
559 [dh_typecode_s64] = &ffi_type_sint64,
560 [dh_typecode_ptr] = &ffi_type_pointer,
561};
562#endif
563
564static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
565static void process_op_defs(TCGContext *s);
566static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
567 TCGReg reg, const char *name);
568
569static void tcg_context_init(unsigned max_cpus)
570{
571 TCGContext *s = &tcg_init_ctx;
572 int op, total_args, n, i;
573 TCGOpDef *def;
574 TCGArgConstraint *args_ct;
575 TCGTemp *ts;
576
577 memset(s, 0, sizeof(*s));
578 s->nb_globals = 0;
579
580 /* Count total number of arguments and allocate the corresponding
581 space */
582 total_args = 0;
583 for(op = 0; op < NB_OPS; op++) {
584 def = &tcg_op_defs[op];
585 n = def->nb_iargs + def->nb_oargs;
586 total_args += n;
587 }
588
589 args_ct = g_new0(TCGArgConstraint, total_args);
590
591 for(op = 0; op < NB_OPS; op++) {
592 def = &tcg_op_defs[op];
593 def->args_ct = args_ct;
594 n = def->nb_iargs + def->nb_oargs;
595 args_ct += n;
596 }
597
598 /* Register helpers. */
599 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
600 helper_table = g_hash_table_new(NULL, NULL);
601
602 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
603 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
604 (gpointer)&all_helpers[i]);
605 }
606
607#ifdef CONFIG_TCG_INTERPRETER
608 /* g_direct_hash/equal for direct comparisons on uint32_t. */
609 ffi_table = g_hash_table_new(NULL, NULL);
610 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
611 struct {
612 ffi_cif cif;
613 ffi_type *args[];
614 } *ca;
615 uint32_t typemask = all_helpers[i].typemask;
616 gpointer hash = (gpointer)(uintptr_t)typemask;
617 ffi_status status;
618 int nargs;
619
620 if (g_hash_table_lookup(ffi_table, hash)) {
621 continue;
622 }
623
624 /* Ignoring the return type, find the last non-zero field. */
625 nargs = 32 - clz32(typemask >> 3);
626 nargs = DIV_ROUND_UP(nargs, 3);
627
628 ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
629 ca->cif.rtype = typecode_to_ffi[typemask & 7];
630 ca->cif.nargs = nargs;
631
632 if (nargs != 0) {
633 ca->cif.arg_types = ca->args;
634 for (i = 0; i < nargs; ++i) {
635 int typecode = extract32(typemask, (i + 1) * 3, 3);
636 ca->args[i] = typecode_to_ffi[typecode];
637 }
638 }
639
640 status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
641 ca->cif.rtype, ca->cif.arg_types);
642 assert(status == FFI_OK);
643
644 g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
645 }
646#endif
647
648 tcg_target_init(s);
649 process_op_defs(s);
650
651 /* Reverse the order of the saved registers, assuming they're all at
652 the start of tcg_target_reg_alloc_order. */
653 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
654 int r = tcg_target_reg_alloc_order[n];
655 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
656 break;
657 }
658 }
659 for (i = 0; i < n; ++i) {
660 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
661 }
662 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
663 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
664 }
665
666 alloc_tcg_plugin_context(s);
667
668 tcg_ctx = s;
669 /*
670 * In user-mode we simply share the init context among threads, since we
671 * use a single region. See the documentation tcg_region_init() for the
672 * reasoning behind this.
673 * In softmmu we will have at most max_cpus TCG threads.
674 */
675#ifdef CONFIG_USER_ONLY
676 tcg_ctxs = &tcg_ctx;
677 tcg_cur_ctxs = 1;
678 tcg_max_ctxs = 1;
679#else
680 tcg_max_ctxs = max_cpus;
681 tcg_ctxs = g_new0(TCGContext *, max_cpus);
682#endif
683
684 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
685 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
686 cpu_env = temp_tcgv_ptr(ts);
687}
688
689void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
690{
691 tcg_context_init(max_cpus);
692 tcg_region_init(tb_size, splitwx, max_cpus);
693}
694
695/*
696 * Allocate TBs right before their corresponding translated code, making
697 * sure that TBs and code are on different cache lines.
698 */
699TranslationBlock *tcg_tb_alloc(TCGContext *s)
700{
701 uintptr_t align = qemu_icache_linesize;
702 TranslationBlock *tb;
703 void *next;
704
705 retry:
706 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
707 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
708
709 if (unlikely(next > s->code_gen_highwater)) {
710 if (tcg_region_alloc(s)) {
711 return NULL;
712 }
713 goto retry;
714 }
715 qatomic_set(&s->code_gen_ptr, next);
716 s->data_gen_ptr = NULL;
717 return tb;
718}
719
720void tcg_prologue_init(TCGContext *s)
721{
722 size_t prologue_size;
723
724 s->code_ptr = s->code_gen_ptr;
725 s->code_buf = s->code_gen_ptr;
726 s->data_gen_ptr = NULL;
727
728#ifndef CONFIG_TCG_INTERPRETER
729 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
730#endif
731
732#ifdef TCG_TARGET_NEED_POOL_LABELS
733 s->pool_labels = NULL;
734#endif
735
736 qemu_thread_jit_write();
737 /* Generate the prologue. */
738 tcg_target_qemu_prologue(s);
739
740#ifdef TCG_TARGET_NEED_POOL_LABELS
741 /* Allow the prologue to put e.g. guest_base into a pool entry. */
742 {
743 int result = tcg_out_pool_finalize(s);
744 tcg_debug_assert(result == 0);
745 }
746#endif
747
748 prologue_size = tcg_current_code_size(s);
749
750#ifndef CONFIG_TCG_INTERPRETER
751 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
752 (uintptr_t)s->code_buf, prologue_size);
753#endif
754
755 tcg_region_prologue_set(s);
756
757#ifdef DEBUG_DISAS
758 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
759 FILE *logfile = qemu_log_lock();
760 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
761 if (s->data_gen_ptr) {
762 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
763 size_t data_size = prologue_size - code_size;
764 size_t i;
765
766 log_disas(s->code_gen_ptr, code_size);
767
768 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
769 if (sizeof(tcg_target_ulong) == 8) {
770 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
771 (uintptr_t)s->data_gen_ptr + i,
772 *(uint64_t *)(s->data_gen_ptr + i));
773 } else {
774 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
775 (uintptr_t)s->data_gen_ptr + i,
776 *(uint32_t *)(s->data_gen_ptr + i));
777 }
778 }
779 } else {
780 log_disas(s->code_gen_ptr, prologue_size);
781 }
782 qemu_log("\n");
783 qemu_log_flush();
784 qemu_log_unlock(logfile);
785 }
786#endif
787
788#ifndef CONFIG_TCG_INTERPRETER
789 /*
790 * Assert that goto_ptr is implemented completely, setting an epilogue.
791 * For tci, we use NULL as the signal to return from the interpreter,
792 * so skip this check.
793 */
794 if (TCG_TARGET_HAS_goto_ptr) {
795 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
796 }
797#endif
798}
799
800void tcg_func_start(TCGContext *s)
801{
802 tcg_pool_reset(s);
803 s->nb_temps = s->nb_globals;
804
805 /* No temps have been previously allocated for size or locality. */
806 memset(s->free_temps, 0, sizeof(s->free_temps));
807
808 /* No constant temps have been previously allocated. */
809 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
810 if (s->const_table[i]) {
811 g_hash_table_remove_all(s->const_table[i]);
812 }
813 }
814
815 s->nb_ops = 0;
816 s->nb_labels = 0;
817 s->current_frame_offset = s->frame_start;
818
819#ifdef CONFIG_DEBUG_TCG
820 s->goto_tb_issue_mask = 0;
821#endif
822
823 QTAILQ_INIT(&s->ops);
824 QTAILQ_INIT(&s->free_ops);
825 QSIMPLEQ_INIT(&s->labels);
826}
827
828static TCGTemp *tcg_temp_alloc(TCGContext *s)
829{
830 int n = s->nb_temps++;
831
832 if (n >= TCG_MAX_TEMPS) {
833 tcg_raise_tb_overflow(s);
834 }
835 return memset(&s->temps[n], 0, sizeof(TCGTemp));
836}
837
838static TCGTemp *tcg_global_alloc(TCGContext *s)
839{
840 TCGTemp *ts;
841
842 tcg_debug_assert(s->nb_globals == s->nb_temps);
843 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
844 s->nb_globals++;
845 ts = tcg_temp_alloc(s);
846 ts->kind = TEMP_GLOBAL;
847
848 return ts;
849}
850
851static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
852 TCGReg reg, const char *name)
853{
854 TCGTemp *ts;
855
856 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
857 tcg_abort();
858 }
859
860 ts = tcg_global_alloc(s);
861 ts->base_type = type;
862 ts->type = type;
863 ts->kind = TEMP_FIXED;
864 ts->reg = reg;
865 ts->name = name;
866 tcg_regset_set_reg(s->reserved_regs, reg);
867
868 return ts;
869}
870
871void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
872{
873 s->frame_start = start;
874 s->frame_end = start + size;
875 s->frame_temp
876 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
877}
878
879TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
880 intptr_t offset, const char *name)
881{
882 TCGContext *s = tcg_ctx;
883 TCGTemp *base_ts = tcgv_ptr_temp(base);
884 TCGTemp *ts = tcg_global_alloc(s);
885 int indirect_reg = 0, bigendian = 0;
886#ifdef HOST_WORDS_BIGENDIAN
887 bigendian = 1;
888#endif
889
890 switch (base_ts->kind) {
891 case TEMP_FIXED:
892 break;
893 case TEMP_GLOBAL:
894 /* We do not support double-indirect registers. */
895 tcg_debug_assert(!base_ts->indirect_reg);
896 base_ts->indirect_base = 1;
897 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
898 ? 2 : 1);
899 indirect_reg = 1;
900 break;
901 default:
902 g_assert_not_reached();
903 }
904
905 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
906 TCGTemp *ts2 = tcg_global_alloc(s);
907 char buf[64];
908
909 ts->base_type = TCG_TYPE_I64;
910 ts->type = TCG_TYPE_I32;
911 ts->indirect_reg = indirect_reg;
912 ts->mem_allocated = 1;
913 ts->mem_base = base_ts;
914 ts->mem_offset = offset + bigendian * 4;
915 pstrcpy(buf, sizeof(buf), name);
916 pstrcat(buf, sizeof(buf), "_0");
917 ts->name = strdup(buf);
918
919 tcg_debug_assert(ts2 == ts + 1);
920 ts2->base_type = TCG_TYPE_I64;
921 ts2->type = TCG_TYPE_I32;
922 ts2->indirect_reg = indirect_reg;
923 ts2->mem_allocated = 1;
924 ts2->mem_base = base_ts;
925 ts2->mem_offset = offset + (1 - bigendian) * 4;
926 pstrcpy(buf, sizeof(buf), name);
927 pstrcat(buf, sizeof(buf), "_1");
928 ts2->name = strdup(buf);
929 } else {
930 ts->base_type = type;
931 ts->type = type;
932 ts->indirect_reg = indirect_reg;
933 ts->mem_allocated = 1;
934 ts->mem_base = base_ts;
935 ts->mem_offset = offset;
936 ts->name = name;
937 }
938 return ts;
939}
940
941TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
942{
943 TCGContext *s = tcg_ctx;
944 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
945 TCGTemp *ts;
946 int idx, k;
947
948 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
949 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
950 if (idx < TCG_MAX_TEMPS) {
951 /* There is already an available temp with the right type. */
952 clear_bit(idx, s->free_temps[k].l);
953
954 ts = &s->temps[idx];
955 ts->temp_allocated = 1;
956 tcg_debug_assert(ts->base_type == type);
957 tcg_debug_assert(ts->kind == kind);
958 } else {
959 ts = tcg_temp_alloc(s);
960 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
961 TCGTemp *ts2 = tcg_temp_alloc(s);
962
963 ts->base_type = type;
964 ts->type = TCG_TYPE_I32;
965 ts->temp_allocated = 1;
966 ts->kind = kind;
967
968 tcg_debug_assert(ts2 == ts + 1);
969 ts2->base_type = TCG_TYPE_I64;
970 ts2->type = TCG_TYPE_I32;
971 ts2->temp_allocated = 1;
972 ts2->kind = kind;
973 } else {
974 ts->base_type = type;
975 ts->type = type;
976 ts->temp_allocated = 1;
977 ts->kind = kind;
978 }
979 }
980
981#if defined(CONFIG_DEBUG_TCG)
982 s->temps_in_use++;
983#endif
984 return ts;
985}
986
987TCGv_vec tcg_temp_new_vec(TCGType type)
988{
989 TCGTemp *t;
990
991#ifdef CONFIG_DEBUG_TCG
992 switch (type) {
993 case TCG_TYPE_V64:
994 assert(TCG_TARGET_HAS_v64);
995 break;
996 case TCG_TYPE_V128:
997 assert(TCG_TARGET_HAS_v128);
998 break;
999 case TCG_TYPE_V256:
1000 assert(TCG_TARGET_HAS_v256);
1001 break;
1002 default:
1003 g_assert_not_reached();
1004 }
1005#endif
1006
1007 t = tcg_temp_new_internal(type, 0);
1008 return temp_tcgv_vec(t);
1009}
1010
1011/* Create a new temp of the same type as an existing temp. */
1012TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1013{
1014 TCGTemp *t = tcgv_vec_temp(match);
1015
1016 tcg_debug_assert(t->temp_allocated != 0);
1017
1018 t = tcg_temp_new_internal(t->base_type, 0);
1019 return temp_tcgv_vec(t);
1020}
1021
1022void tcg_temp_free_internal(TCGTemp *ts)
1023{
1024 TCGContext *s = tcg_ctx;
1025 int k, idx;
1026
1027 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1028 if (ts->kind == TEMP_CONST) {
1029 return;
1030 }
1031
1032#if defined(CONFIG_DEBUG_TCG)
1033 s->temps_in_use--;
1034 if (s->temps_in_use < 0) {
1035 fprintf(stderr, "More temporaries freed than allocated!\n");
1036 }
1037#endif
1038
1039 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1040 tcg_debug_assert(ts->temp_allocated != 0);
1041 ts->temp_allocated = 0;
1042
1043 idx = temp_idx(ts);
1044 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1045 set_bit(idx, s->free_temps[k].l);
1046}
1047
1048TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1049{
1050 TCGContext *s = tcg_ctx;
1051 GHashTable *h = s->const_table[type];
1052 TCGTemp *ts;
1053
1054 if (h == NULL) {
1055 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1056 s->const_table[type] = h;
1057 }
1058
1059 ts = g_hash_table_lookup(h, &val);
1060 if (ts == NULL) {
1061 ts = tcg_temp_alloc(s);
1062
1063 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1064 TCGTemp *ts2 = tcg_temp_alloc(s);
1065
1066 ts->base_type = TCG_TYPE_I64;
1067 ts->type = TCG_TYPE_I32;
1068 ts->kind = TEMP_CONST;
1069 ts->temp_allocated = 1;
1070 /*
1071 * Retain the full value of the 64-bit constant in the low
1072 * part, so that the hash table works. Actual uses will
1073 * truncate the value to the low part.
1074 */
1075 ts->val = val;
1076
1077 tcg_debug_assert(ts2 == ts + 1);
1078 ts2->base_type = TCG_TYPE_I64;
1079 ts2->type = TCG_TYPE_I32;
1080 ts2->kind = TEMP_CONST;
1081 ts2->temp_allocated = 1;
1082 ts2->val = val >> 32;
1083 } else {
1084 ts->base_type = type;
1085 ts->type = type;
1086 ts->kind = TEMP_CONST;
1087 ts->temp_allocated = 1;
1088 ts->val = val;
1089 }
1090 g_hash_table_insert(h, &ts->val, ts);
1091 }
1092
1093 return ts;
1094}
1095
1096TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1097{
1098 val = dup_const(vece, val);
1099 return temp_tcgv_vec(tcg_constant_internal(type, val));
1100}
1101
1102TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1103{
1104 TCGTemp *t = tcgv_vec_temp(match);
1105
1106 tcg_debug_assert(t->temp_allocated != 0);
1107 return tcg_constant_vec(t->base_type, vece, val);
1108}
1109
1110TCGv_i32 tcg_const_i32(int32_t val)
1111{
1112 TCGv_i32 t0;
1113 t0 = tcg_temp_new_i32();
1114 tcg_gen_movi_i32(t0, val);
1115 return t0;
1116}
1117
1118TCGv_i64 tcg_const_i64(int64_t val)
1119{
1120 TCGv_i64 t0;
1121 t0 = tcg_temp_new_i64();
1122 tcg_gen_movi_i64(t0, val);
1123 return t0;
1124}
1125
1126TCGv_i32 tcg_const_local_i32(int32_t val)
1127{
1128 TCGv_i32 t0;
1129 t0 = tcg_temp_local_new_i32();
1130 tcg_gen_movi_i32(t0, val);
1131 return t0;
1132}
1133
1134TCGv_i64 tcg_const_local_i64(int64_t val)
1135{
1136 TCGv_i64 t0;
1137 t0 = tcg_temp_local_new_i64();
1138 tcg_gen_movi_i64(t0, val);
1139 return t0;
1140}
1141
1142#if defined(CONFIG_DEBUG_TCG)
1143void tcg_clear_temp_count(void)
1144{
1145 TCGContext *s = tcg_ctx;
1146 s->temps_in_use = 0;
1147}
1148
1149int tcg_check_temp_count(void)
1150{
1151 TCGContext *s = tcg_ctx;
1152 if (s->temps_in_use) {
1153 /* Clear the count so that we don't give another
1154 * warning immediately next time around.
1155 */
1156 s->temps_in_use = 0;
1157 return 1;
1158 }
1159 return 0;
1160}
1161#endif
1162
1163/* Return true if OP may appear in the opcode stream.
1164 Test the runtime variable that controls each opcode. */
1165bool tcg_op_supported(TCGOpcode op)
1166{
1167 const bool have_vec
1168 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1169
1170 switch (op) {
1171 case INDEX_op_discard:
1172 case INDEX_op_set_label:
1173 case INDEX_op_call:
1174 case INDEX_op_br:
1175 case INDEX_op_mb:
1176 case INDEX_op_insn_start:
1177 case INDEX_op_exit_tb:
1178 case INDEX_op_goto_tb:
1179 case INDEX_op_qemu_ld_i32:
1180 case INDEX_op_qemu_st_i32:
1181 case INDEX_op_qemu_ld_i64:
1182 case INDEX_op_qemu_st_i64:
1183 return true;
1184
1185 case INDEX_op_qemu_st8_i32:
1186 return TCG_TARGET_HAS_qemu_st8_i32;
1187
1188 case INDEX_op_goto_ptr:
1189 return TCG_TARGET_HAS_goto_ptr;
1190
1191 case INDEX_op_mov_i32:
1192 case INDEX_op_setcond_i32:
1193 case INDEX_op_brcond_i32:
1194 case INDEX_op_ld8u_i32:
1195 case INDEX_op_ld8s_i32:
1196 case INDEX_op_ld16u_i32:
1197 case INDEX_op_ld16s_i32:
1198 case INDEX_op_ld_i32:
1199 case INDEX_op_st8_i32:
1200 case INDEX_op_st16_i32:
1201 case INDEX_op_st_i32:
1202 case INDEX_op_add_i32:
1203 case INDEX_op_sub_i32:
1204 case INDEX_op_mul_i32:
1205 case INDEX_op_and_i32:
1206 case INDEX_op_or_i32:
1207 case INDEX_op_xor_i32:
1208 case INDEX_op_shl_i32:
1209 case INDEX_op_shr_i32:
1210 case INDEX_op_sar_i32:
1211 return true;
1212
1213 case INDEX_op_movcond_i32:
1214 return TCG_TARGET_HAS_movcond_i32;
1215 case INDEX_op_div_i32:
1216 case INDEX_op_divu_i32:
1217 return TCG_TARGET_HAS_div_i32;
1218 case INDEX_op_rem_i32:
1219 case INDEX_op_remu_i32:
1220 return TCG_TARGET_HAS_rem_i32;
1221 case INDEX_op_div2_i32:
1222 case INDEX_op_divu2_i32:
1223 return TCG_TARGET_HAS_div2_i32;
1224 case INDEX_op_rotl_i32:
1225 case INDEX_op_rotr_i32:
1226 return TCG_TARGET_HAS_rot_i32;
1227 case INDEX_op_deposit_i32:
1228 return TCG_TARGET_HAS_deposit_i32;
1229 case INDEX_op_extract_i32:
1230 return TCG_TARGET_HAS_extract_i32;
1231 case INDEX_op_sextract_i32:
1232 return TCG_TARGET_HAS_sextract_i32;
1233 case INDEX_op_extract2_i32:
1234 return TCG_TARGET_HAS_extract2_i32;
1235 case INDEX_op_add2_i32:
1236 return TCG_TARGET_HAS_add2_i32;
1237 case INDEX_op_sub2_i32:
1238 return TCG_TARGET_HAS_sub2_i32;
1239 case INDEX_op_mulu2_i32:
1240 return TCG_TARGET_HAS_mulu2_i32;
1241 case INDEX_op_muls2_i32:
1242 return TCG_TARGET_HAS_muls2_i32;
1243 case INDEX_op_muluh_i32:
1244 return TCG_TARGET_HAS_muluh_i32;
1245 case INDEX_op_mulsh_i32:
1246 return TCG_TARGET_HAS_mulsh_i32;
1247 case INDEX_op_ext8s_i32:
1248 return TCG_TARGET_HAS_ext8s_i32;
1249 case INDEX_op_ext16s_i32:
1250 return TCG_TARGET_HAS_ext16s_i32;
1251 case INDEX_op_ext8u_i32:
1252 return TCG_TARGET_HAS_ext8u_i32;
1253 case INDEX_op_ext16u_i32:
1254 return TCG_TARGET_HAS_ext16u_i32;
1255 case INDEX_op_bswap16_i32:
1256 return TCG_TARGET_HAS_bswap16_i32;
1257 case INDEX_op_bswap32_i32:
1258 return TCG_TARGET_HAS_bswap32_i32;
1259 case INDEX_op_not_i32:
1260 return TCG_TARGET_HAS_not_i32;
1261 case INDEX_op_neg_i32:
1262 return TCG_TARGET_HAS_neg_i32;
1263 case INDEX_op_andc_i32:
1264 return TCG_TARGET_HAS_andc_i32;
1265 case INDEX_op_orc_i32:
1266 return TCG_TARGET_HAS_orc_i32;
1267 case INDEX_op_eqv_i32:
1268 return TCG_TARGET_HAS_eqv_i32;
1269 case INDEX_op_nand_i32:
1270 return TCG_TARGET_HAS_nand_i32;
1271 case INDEX_op_nor_i32:
1272 return TCG_TARGET_HAS_nor_i32;
1273 case INDEX_op_clz_i32:
1274 return TCG_TARGET_HAS_clz_i32;
1275 case INDEX_op_ctz_i32:
1276 return TCG_TARGET_HAS_ctz_i32;
1277 case INDEX_op_ctpop_i32:
1278 return TCG_TARGET_HAS_ctpop_i32;
1279
1280 case INDEX_op_brcond2_i32:
1281 case INDEX_op_setcond2_i32:
1282 return TCG_TARGET_REG_BITS == 32;
1283
1284 case INDEX_op_mov_i64:
1285 case INDEX_op_setcond_i64:
1286 case INDEX_op_brcond_i64:
1287 case INDEX_op_ld8u_i64:
1288 case INDEX_op_ld8s_i64:
1289 case INDEX_op_ld16u_i64:
1290 case INDEX_op_ld16s_i64:
1291 case INDEX_op_ld32u_i64:
1292 case INDEX_op_ld32s_i64:
1293 case INDEX_op_ld_i64:
1294 case INDEX_op_st8_i64:
1295 case INDEX_op_st16_i64:
1296 case INDEX_op_st32_i64:
1297 case INDEX_op_st_i64:
1298 case INDEX_op_add_i64:
1299 case INDEX_op_sub_i64:
1300 case INDEX_op_mul_i64:
1301 case INDEX_op_and_i64:
1302 case INDEX_op_or_i64:
1303 case INDEX_op_xor_i64:
1304 case INDEX_op_shl_i64:
1305 case INDEX_op_shr_i64:
1306 case INDEX_op_sar_i64:
1307 case INDEX_op_ext_i32_i64:
1308 case INDEX_op_extu_i32_i64:
1309 return TCG_TARGET_REG_BITS == 64;
1310
1311 case INDEX_op_movcond_i64:
1312 return TCG_TARGET_HAS_movcond_i64;
1313 case INDEX_op_div_i64:
1314 case INDEX_op_divu_i64:
1315 return TCG_TARGET_HAS_div_i64;
1316 case INDEX_op_rem_i64:
1317 case INDEX_op_remu_i64:
1318 return TCG_TARGET_HAS_rem_i64;
1319 case INDEX_op_div2_i64:
1320 case INDEX_op_divu2_i64:
1321 return TCG_TARGET_HAS_div2_i64;
1322 case INDEX_op_rotl_i64:
1323 case INDEX_op_rotr_i64:
1324 return TCG_TARGET_HAS_rot_i64;
1325 case INDEX_op_deposit_i64:
1326 return TCG_TARGET_HAS_deposit_i64;
1327 case INDEX_op_extract_i64:
1328 return TCG_TARGET_HAS_extract_i64;
1329 case INDEX_op_sextract_i64:
1330 return TCG_TARGET_HAS_sextract_i64;
1331 case INDEX_op_extract2_i64:
1332 return TCG_TARGET_HAS_extract2_i64;
1333 case INDEX_op_extrl_i64_i32:
1334 return TCG_TARGET_HAS_extrl_i64_i32;
1335 case INDEX_op_extrh_i64_i32:
1336 return TCG_TARGET_HAS_extrh_i64_i32;
1337 case INDEX_op_ext8s_i64:
1338 return TCG_TARGET_HAS_ext8s_i64;
1339 case INDEX_op_ext16s_i64:
1340 return TCG_TARGET_HAS_ext16s_i64;
1341 case INDEX_op_ext32s_i64:
1342 return TCG_TARGET_HAS_ext32s_i64;
1343 case INDEX_op_ext8u_i64:
1344 return TCG_TARGET_HAS_ext8u_i64;
1345 case INDEX_op_ext16u_i64:
1346 return TCG_TARGET_HAS_ext16u_i64;
1347 case INDEX_op_ext32u_i64:
1348 return TCG_TARGET_HAS_ext32u_i64;
1349 case INDEX_op_bswap16_i64:
1350 return TCG_TARGET_HAS_bswap16_i64;
1351 case INDEX_op_bswap32_i64:
1352 return TCG_TARGET_HAS_bswap32_i64;
1353 case INDEX_op_bswap64_i64:
1354 return TCG_TARGET_HAS_bswap64_i64;
1355 case INDEX_op_not_i64:
1356 return TCG_TARGET_HAS_not_i64;
1357 case INDEX_op_neg_i64:
1358 return TCG_TARGET_HAS_neg_i64;
1359 case INDEX_op_andc_i64:
1360 return TCG_TARGET_HAS_andc_i64;
1361 case INDEX_op_orc_i64:
1362 return TCG_TARGET_HAS_orc_i64;
1363 case INDEX_op_eqv_i64:
1364 return TCG_TARGET_HAS_eqv_i64;
1365 case INDEX_op_nand_i64:
1366 return TCG_TARGET_HAS_nand_i64;
1367 case INDEX_op_nor_i64:
1368 return TCG_TARGET_HAS_nor_i64;
1369 case INDEX_op_clz_i64:
1370 return TCG_TARGET_HAS_clz_i64;
1371 case INDEX_op_ctz_i64:
1372 return TCG_TARGET_HAS_ctz_i64;
1373 case INDEX_op_ctpop_i64:
1374 return TCG_TARGET_HAS_ctpop_i64;
1375 case INDEX_op_add2_i64:
1376 return TCG_TARGET_HAS_add2_i64;
1377 case INDEX_op_sub2_i64:
1378 return TCG_TARGET_HAS_sub2_i64;
1379 case INDEX_op_mulu2_i64:
1380 return TCG_TARGET_HAS_mulu2_i64;
1381 case INDEX_op_muls2_i64:
1382 return TCG_TARGET_HAS_muls2_i64;
1383 case INDEX_op_muluh_i64:
1384 return TCG_TARGET_HAS_muluh_i64;
1385 case INDEX_op_mulsh_i64:
1386 return TCG_TARGET_HAS_mulsh_i64;
1387
1388 case INDEX_op_mov_vec:
1389 case INDEX_op_dup_vec:
1390 case INDEX_op_dupm_vec:
1391 case INDEX_op_ld_vec:
1392 case INDEX_op_st_vec:
1393 case INDEX_op_add_vec:
1394 case INDEX_op_sub_vec:
1395 case INDEX_op_and_vec:
1396 case INDEX_op_or_vec:
1397 case INDEX_op_xor_vec:
1398 case INDEX_op_cmp_vec:
1399 return have_vec;
1400 case INDEX_op_dup2_vec:
1401 return have_vec && TCG_TARGET_REG_BITS == 32;
1402 case INDEX_op_not_vec:
1403 return have_vec && TCG_TARGET_HAS_not_vec;
1404 case INDEX_op_neg_vec:
1405 return have_vec && TCG_TARGET_HAS_neg_vec;
1406 case INDEX_op_abs_vec:
1407 return have_vec && TCG_TARGET_HAS_abs_vec;
1408 case INDEX_op_andc_vec:
1409 return have_vec && TCG_TARGET_HAS_andc_vec;
1410 case INDEX_op_orc_vec:
1411 return have_vec && TCG_TARGET_HAS_orc_vec;
1412 case INDEX_op_mul_vec:
1413 return have_vec && TCG_TARGET_HAS_mul_vec;
1414 case INDEX_op_shli_vec:
1415 case INDEX_op_shri_vec:
1416 case INDEX_op_sari_vec:
1417 return have_vec && TCG_TARGET_HAS_shi_vec;
1418 case INDEX_op_shls_vec:
1419 case INDEX_op_shrs_vec:
1420 case INDEX_op_sars_vec:
1421 return have_vec && TCG_TARGET_HAS_shs_vec;
1422 case INDEX_op_shlv_vec:
1423 case INDEX_op_shrv_vec:
1424 case INDEX_op_sarv_vec:
1425 return have_vec && TCG_TARGET_HAS_shv_vec;
1426 case INDEX_op_rotli_vec:
1427 return have_vec && TCG_TARGET_HAS_roti_vec;
1428 case INDEX_op_rotls_vec:
1429 return have_vec && TCG_TARGET_HAS_rots_vec;
1430 case INDEX_op_rotlv_vec:
1431 case INDEX_op_rotrv_vec:
1432 return have_vec && TCG_TARGET_HAS_rotv_vec;
1433 case INDEX_op_ssadd_vec:
1434 case INDEX_op_usadd_vec:
1435 case INDEX_op_sssub_vec:
1436 case INDEX_op_ussub_vec:
1437 return have_vec && TCG_TARGET_HAS_sat_vec;
1438 case INDEX_op_smin_vec:
1439 case INDEX_op_umin_vec:
1440 case INDEX_op_smax_vec:
1441 case INDEX_op_umax_vec:
1442 return have_vec && TCG_TARGET_HAS_minmax_vec;
1443 case INDEX_op_bitsel_vec:
1444 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1445 case INDEX_op_cmpsel_vec:
1446 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1447
1448 default:
1449 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1450 return true;
1451 }
1452}
1453
1454/* Note: we convert the 64 bit args to 32 bit and do some alignment
1455 and endian swap. Maybe it would be better to do the alignment
1456 and endian swap in tcg_reg_alloc_call(). */
1457void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1458{
1459 int i, real_args, nb_rets, pi;
1460 unsigned typemask;
1461 const TCGHelperInfo *info;
1462 TCGOp *op;
1463
1464 info = g_hash_table_lookup(helper_table, (gpointer)func);
1465 typemask = info->typemask;
1466
1467#ifdef CONFIG_PLUGIN
1468 /* detect non-plugin helpers */
1469 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1470 tcg_ctx->plugin_insn->calls_helpers = true;
1471 }
1472#endif
1473
1474#if defined(__sparc__) && !defined(__arch64__) \
1475 && !defined(CONFIG_TCG_INTERPRETER)
1476 /* We have 64-bit values in one register, but need to pass as two
1477 separate parameters. Split them. */
1478 int orig_typemask = typemask;
1479 int orig_nargs = nargs;
1480 TCGv_i64 retl, reth;
1481 TCGTemp *split_args[MAX_OPC_PARAM];
1482
1483 retl = NULL;
1484 reth = NULL;
1485 typemask = 0;
1486 for (i = real_args = 0; i < nargs; ++i) {
1487 int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1488 bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1489
1490 if (is_64bit) {
1491 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1492 TCGv_i32 h = tcg_temp_new_i32();
1493 TCGv_i32 l = tcg_temp_new_i32();
1494 tcg_gen_extr_i64_i32(l, h, orig);
1495 split_args[real_args++] = tcgv_i32_temp(h);
1496 typemask |= dh_typecode_i32 << (real_args * 3);
1497 split_args[real_args++] = tcgv_i32_temp(l);
1498 typemask |= dh_typecode_i32 << (real_args * 3);
1499 } else {
1500 split_args[real_args++] = args[i];
1501 typemask |= argtype << (real_args * 3);
1502 }
1503 }
1504 nargs = real_args;
1505 args = split_args;
1506#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1507 for (i = 0; i < nargs; ++i) {
1508 int argtype = extract32(typemask, (i + 1) * 3, 3);
1509 bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1510 bool is_signed = argtype & 1;
1511
1512 if (is_32bit) {
1513 TCGv_i64 temp = tcg_temp_new_i64();
1514 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1515 if (is_signed) {
1516 tcg_gen_ext32s_i64(temp, orig);
1517 } else {
1518 tcg_gen_ext32u_i64(temp, orig);
1519 }
1520 args[i] = tcgv_i64_temp(temp);
1521 }
1522 }
1523#endif /* TCG_TARGET_EXTEND_ARGS */
1524
1525 op = tcg_emit_op(INDEX_op_call);
1526
1527 pi = 0;
1528 if (ret != NULL) {
1529#if defined(__sparc__) && !defined(__arch64__) \
1530 && !defined(CONFIG_TCG_INTERPRETER)
1531 if ((typemask & 6) == dh_typecode_i64) {
1532 /* The 32-bit ABI is going to return the 64-bit value in
1533 the %o0/%o1 register pair. Prepare for this by using
1534 two return temporaries, and reassemble below. */
1535 retl = tcg_temp_new_i64();
1536 reth = tcg_temp_new_i64();
1537 op->args[pi++] = tcgv_i64_arg(reth);
1538 op->args[pi++] = tcgv_i64_arg(retl);
1539 nb_rets = 2;
1540 } else {
1541 op->args[pi++] = temp_arg(ret);
1542 nb_rets = 1;
1543 }
1544#else
1545 if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1546#ifdef HOST_WORDS_BIGENDIAN
1547 op->args[pi++] = temp_arg(ret + 1);
1548 op->args[pi++] = temp_arg(ret);
1549#else
1550 op->args[pi++] = temp_arg(ret);
1551 op->args[pi++] = temp_arg(ret + 1);
1552#endif
1553 nb_rets = 2;
1554 } else {
1555 op->args[pi++] = temp_arg(ret);
1556 nb_rets = 1;
1557 }
1558#endif
1559 } else {
1560 nb_rets = 0;
1561 }
1562 TCGOP_CALLO(op) = nb_rets;
1563
1564 real_args = 0;
1565 for (i = 0; i < nargs; i++) {
1566 int argtype = extract32(typemask, (i + 1) * 3, 3);
1567 bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1568 bool want_align = false;
1569
1570#if defined(CONFIG_TCG_INTERPRETER)
1571 /*
1572 * Align all arguments, so that they land in predictable places
1573 * for passing off to ffi_call.
1574 */
1575 want_align = true;
1576#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1577 /* Some targets want aligned 64 bit args */
1578 want_align = is_64bit;
1579#endif
1580
1581 if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1582 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1583 real_args++;
1584 }
1585
1586 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1587 /*
1588 * If stack grows up, then we will be placing successive
1589 * arguments at lower addresses, which means we need to
1590 * reverse the order compared to how we would normally
1591 * treat either big or little-endian. For those arguments
1592 * that will wind up in registers, this still works for
1593 * HPPA (the only current STACK_GROWSUP target) since the
1594 * argument registers are *also* allocated in decreasing
1595 * order. If another such target is added, this logic may
1596 * have to get more complicated to differentiate between
1597 * stack arguments and register arguments.
1598 */
1599#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1600 op->args[pi++] = temp_arg(args[i] + 1);
1601 op->args[pi++] = temp_arg(args[i]);
1602#else
1603 op->args[pi++] = temp_arg(args[i]);
1604 op->args[pi++] = temp_arg(args[i] + 1);
1605#endif
1606 real_args += 2;
1607 continue;
1608 }
1609
1610 op->args[pi++] = temp_arg(args[i]);
1611 real_args++;
1612 }
1613 op->args[pi++] = (uintptr_t)func;
1614 op->args[pi++] = (uintptr_t)info;
1615 TCGOP_CALLI(op) = real_args;
1616
1617 /* Make sure the fields didn't overflow. */
1618 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1619 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1620
1621#if defined(__sparc__) && !defined(__arch64__) \
1622 && !defined(CONFIG_TCG_INTERPRETER)
1623 /* Free all of the parts we allocated above. */
1624 for (i = real_args = 0; i < orig_nargs; ++i) {
1625 int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1626 bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1627
1628 if (is_64bit) {
1629 tcg_temp_free_internal(args[real_args++]);
1630 tcg_temp_free_internal(args[real_args++]);
1631 } else {
1632 real_args++;
1633 }
1634 }
1635 if ((orig_typemask & 6) == dh_typecode_i64) {
1636 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1637 Note that describing these as TCGv_i64 eliminates an unnecessary
1638 zero-extension that tcg_gen_concat_i32_i64 would create. */
1639 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1640 tcg_temp_free_i64(retl);
1641 tcg_temp_free_i64(reth);
1642 }
1643#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1644 for (i = 0; i < nargs; ++i) {
1645 int argtype = extract32(typemask, (i + 1) * 3, 3);
1646 bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1647
1648 if (is_32bit) {
1649 tcg_temp_free_internal(args[i]);
1650 }
1651 }
1652#endif /* TCG_TARGET_EXTEND_ARGS */
1653}
1654
1655static void tcg_reg_alloc_start(TCGContext *s)
1656{
1657 int i, n;
1658
1659 for (i = 0, n = s->nb_temps; i < n; i++) {
1660 TCGTemp *ts = &s->temps[i];
1661 TCGTempVal val = TEMP_VAL_MEM;
1662
1663 switch (ts->kind) {
1664 case TEMP_CONST:
1665 val = TEMP_VAL_CONST;
1666 break;
1667 case TEMP_FIXED:
1668 val = TEMP_VAL_REG;
1669 break;
1670 case TEMP_GLOBAL:
1671 break;
1672 case TEMP_NORMAL:
1673 val = TEMP_VAL_DEAD;
1674 /* fall through */
1675 case TEMP_LOCAL:
1676 ts->mem_allocated = 0;
1677 break;
1678 default:
1679 g_assert_not_reached();
1680 }
1681 ts->val_type = val;
1682 }
1683
1684 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1685}
1686
1687static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1688 TCGTemp *ts)
1689{
1690 int idx = temp_idx(ts);
1691
1692 switch (ts->kind) {
1693 case TEMP_FIXED:
1694 case TEMP_GLOBAL:
1695 pstrcpy(buf, buf_size, ts->name);
1696 break;
1697 case TEMP_LOCAL:
1698 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1699 break;
1700 case TEMP_NORMAL:
1701 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1702 break;
1703 case TEMP_CONST:
1704 switch (ts->type) {
1705 case TCG_TYPE_I32:
1706 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1707 break;
1708#if TCG_TARGET_REG_BITS > 32
1709 case TCG_TYPE_I64:
1710 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1711 break;
1712#endif
1713 case TCG_TYPE_V64:
1714 case TCG_TYPE_V128:
1715 case TCG_TYPE_V256:
1716 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1717 64 << (ts->type - TCG_TYPE_V64), ts->val);
1718 break;
1719 default:
1720 g_assert_not_reached();
1721 }
1722 break;
1723 }
1724 return buf;
1725}
1726
1727static char *tcg_get_arg_str(TCGContext *s, char *buf,
1728 int buf_size, TCGArg arg)
1729{
1730 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1731}
1732
1733static const char * const cond_name[] =
1734{
1735 [TCG_COND_NEVER] = "never",
1736 [TCG_COND_ALWAYS] = "always",
1737 [TCG_COND_EQ] = "eq",
1738 [TCG_COND_NE] = "ne",
1739 [TCG_COND_LT] = "lt",
1740 [TCG_COND_GE] = "ge",
1741 [TCG_COND_LE] = "le",
1742 [TCG_COND_GT] = "gt",
1743 [TCG_COND_LTU] = "ltu",
1744 [TCG_COND_GEU] = "geu",
1745 [TCG_COND_LEU] = "leu",
1746 [TCG_COND_GTU] = "gtu"
1747};
1748
1749static const char * const ldst_name[] =
1750{
1751 [MO_UB] = "ub",
1752 [MO_SB] = "sb",
1753 [MO_LEUW] = "leuw",
1754 [MO_LESW] = "lesw",
1755 [MO_LEUL] = "leul",
1756 [MO_LESL] = "lesl",
1757 [MO_LEQ] = "leq",
1758 [MO_BEUW] = "beuw",
1759 [MO_BESW] = "besw",
1760 [MO_BEUL] = "beul",
1761 [MO_BESL] = "besl",
1762 [MO_BEQ] = "beq",
1763};
1764
1765static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1766#ifdef TARGET_ALIGNED_ONLY
1767 [MO_UNALN >> MO_ASHIFT] = "un+",
1768 [MO_ALIGN >> MO_ASHIFT] = "",
1769#else
1770 [MO_UNALN >> MO_ASHIFT] = "",
1771 [MO_ALIGN >> MO_ASHIFT] = "al+",
1772#endif
1773 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1774 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1775 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1776 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1777 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1778 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1779};
1780
1781static inline bool tcg_regset_single(TCGRegSet d)
1782{
1783 return (d & (d - 1)) == 0;
1784}
1785
1786static inline TCGReg tcg_regset_first(TCGRegSet d)
1787{
1788 if (TCG_TARGET_NB_REGS <= 32) {
1789 return ctz32(d);
1790 } else {
1791 return ctz64(d);
1792 }
1793}
1794
1795static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1796{
1797 char buf[128];
1798 TCGOp *op;
1799
1800 QTAILQ_FOREACH(op, &s->ops, link) {
1801 int i, k, nb_oargs, nb_iargs, nb_cargs;
1802 const TCGOpDef *def;
1803 TCGOpcode c;
1804 int col = 0;
1805
1806 c = op->opc;
1807 def = &tcg_op_defs[c];
1808
1809 if (c == INDEX_op_insn_start) {
1810 nb_oargs = 0;
1811 col += qemu_log("\n ----");
1812
1813 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1814 target_ulong a;
1815#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1816 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1817#else
1818 a = op->args[i];
1819#endif
1820 col += qemu_log(" " TARGET_FMT_lx, a);
1821 }
1822 } else if (c == INDEX_op_call) {
1823 const TCGHelperInfo *info = tcg_call_info(op);
1824 void *func = tcg_call_func(op);
1825
1826 /* variable number of arguments */
1827 nb_oargs = TCGOP_CALLO(op);
1828 nb_iargs = TCGOP_CALLI(op);
1829 nb_cargs = def->nb_cargs;
1830
1831 col += qemu_log(" %s ", def->name);
1832
1833 /*
1834 * Print the function name from TCGHelperInfo, if available.
1835 * Note that plugins have a template function for the info,
1836 * but the actual function pointer comes from the plugin.
1837 */
1838 if (func == info->func) {
1839 col += qemu_log("%s", info->name);
1840 } else {
1841 col += qemu_log("plugin(%p)", func);
1842 }
1843
1844 col += qemu_log("$0x%x,$%d", info->flags, nb_oargs);
1845 for (i = 0; i < nb_oargs; i++) {
1846 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1847 op->args[i]));
1848 }
1849 for (i = 0; i < nb_iargs; i++) {
1850 TCGArg arg = op->args[nb_oargs + i];
1851 const char *t = "<dummy>";
1852 if (arg != TCG_CALL_DUMMY_ARG) {
1853 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1854 }
1855 col += qemu_log(",%s", t);
1856 }
1857 } else {
1858 col += qemu_log(" %s ", def->name);
1859
1860 nb_oargs = def->nb_oargs;
1861 nb_iargs = def->nb_iargs;
1862 nb_cargs = def->nb_cargs;
1863
1864 if (def->flags & TCG_OPF_VECTOR) {
1865 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1866 8 << TCGOP_VECE(op));
1867 }
1868
1869 k = 0;
1870 for (i = 0; i < nb_oargs; i++) {
1871 if (k != 0) {
1872 col += qemu_log(",");
1873 }
1874 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1875 op->args[k++]));
1876 }
1877 for (i = 0; i < nb_iargs; i++) {
1878 if (k != 0) {
1879 col += qemu_log(",");
1880 }
1881 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1882 op->args[k++]));
1883 }
1884 switch (c) {
1885 case INDEX_op_brcond_i32:
1886 case INDEX_op_setcond_i32:
1887 case INDEX_op_movcond_i32:
1888 case INDEX_op_brcond2_i32:
1889 case INDEX_op_setcond2_i32:
1890 case INDEX_op_brcond_i64:
1891 case INDEX_op_setcond_i64:
1892 case INDEX_op_movcond_i64:
1893 case INDEX_op_cmp_vec:
1894 case INDEX_op_cmpsel_vec:
1895 if (op->args[k] < ARRAY_SIZE(cond_name)
1896 && cond_name[op->args[k]]) {
1897 col += qemu_log(",%s", cond_name[op->args[k++]]);
1898 } else {
1899 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1900 }
1901 i = 1;
1902 break;
1903 case INDEX_op_qemu_ld_i32:
1904 case INDEX_op_qemu_st_i32:
1905 case INDEX_op_qemu_st8_i32:
1906 case INDEX_op_qemu_ld_i64:
1907 case INDEX_op_qemu_st_i64:
1908 {
1909 TCGMemOpIdx oi = op->args[k++];
1910 MemOp op = get_memop(oi);
1911 unsigned ix = get_mmuidx(oi);
1912
1913 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1914 col += qemu_log(",$0x%x,%u", op, ix);
1915 } else {
1916 const char *s_al, *s_op;
1917 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1918 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1919 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1920 }
1921 i = 1;
1922 }
1923 break;
1924 default:
1925 i = 0;
1926 break;
1927 }
1928 switch (c) {
1929 case INDEX_op_set_label:
1930 case INDEX_op_br:
1931 case INDEX_op_brcond_i32:
1932 case INDEX_op_brcond_i64:
1933 case INDEX_op_brcond2_i32:
1934 col += qemu_log("%s$L%d", k ? "," : "",
1935 arg_label(op->args[k])->id);
1936 i++, k++;
1937 break;
1938 default:
1939 break;
1940 }
1941 for (; i < nb_cargs; i++, k++) {
1942 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1943 }
1944 }
1945
1946 if (have_prefs || op->life) {
1947
1948 QemuLogFile *logfile;
1949
1950 rcu_read_lock();
1951 logfile = qatomic_rcu_read(&qemu_logfile);
1952 if (logfile) {
1953 for (; col < 40; ++col) {
1954 putc(' ', logfile->fd);
1955 }
1956 }
1957 rcu_read_unlock();
1958 }
1959
1960 if (op->life) {
1961 unsigned life = op->life;
1962
1963 if (life & (SYNC_ARG * 3)) {
1964 qemu_log(" sync:");
1965 for (i = 0; i < 2; ++i) {
1966 if (life & (SYNC_ARG << i)) {
1967 qemu_log(" %d", i);
1968 }
1969 }
1970 }
1971 life /= DEAD_ARG;
1972 if (life) {
1973 qemu_log(" dead:");
1974 for (i = 0; life; ++i, life >>= 1) {
1975 if (life & 1) {
1976 qemu_log(" %d", i);
1977 }
1978 }
1979 }
1980 }
1981
1982 if (have_prefs) {
1983 for (i = 0; i < nb_oargs; ++i) {
1984 TCGRegSet set = op->output_pref[i];
1985
1986 if (i == 0) {
1987 qemu_log(" pref=");
1988 } else {
1989 qemu_log(",");
1990 }
1991 if (set == 0) {
1992 qemu_log("none");
1993 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
1994 qemu_log("all");
1995#ifdef CONFIG_DEBUG_TCG
1996 } else if (tcg_regset_single(set)) {
1997 TCGReg reg = tcg_regset_first(set);
1998 qemu_log("%s", tcg_target_reg_names[reg]);
1999#endif
2000 } else if (TCG_TARGET_NB_REGS <= 32) {
2001 qemu_log("%#x", (uint32_t)set);
2002 } else {
2003 qemu_log("%#" PRIx64, (uint64_t)set);
2004 }
2005 }
2006 }
2007
2008 qemu_log("\n");
2009 }
2010}
2011
2012/* we give more priority to constraints with less registers */
2013static int get_constraint_priority(const TCGOpDef *def, int k)
2014{
2015 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2016 int n;
2017
2018 if (arg_ct->oalias) {
2019 /* an alias is equivalent to a single register */
2020 n = 1;
2021 } else {
2022 n = ctpop64(arg_ct->regs);
2023 }
2024 return TCG_TARGET_NB_REGS - n + 1;
2025}
2026
2027/* sort from highest priority to lowest */
2028static void sort_constraints(TCGOpDef *def, int start, int n)
2029{
2030 int i, j;
2031 TCGArgConstraint *a = def->args_ct;
2032
2033 for (i = 0; i < n; i++) {
2034 a[start + i].sort_index = start + i;
2035 }
2036 if (n <= 1) {
2037 return;
2038 }
2039 for (i = 0; i < n - 1; i++) {
2040 for (j = i + 1; j < n; j++) {
2041 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2042 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2043 if (p1 < p2) {
2044 int tmp = a[start + i].sort_index;
2045 a[start + i].sort_index = a[start + j].sort_index;
2046 a[start + j].sort_index = tmp;
2047 }
2048 }
2049 }
2050}
2051
2052static void process_op_defs(TCGContext *s)
2053{
2054 TCGOpcode op;
2055
2056 for (op = 0; op < NB_OPS; op++) {
2057 TCGOpDef *def = &tcg_op_defs[op];
2058 const TCGTargetOpDef *tdefs;
2059 int i, nb_args;
2060
2061 if (def->flags & TCG_OPF_NOT_PRESENT) {
2062 continue;
2063 }
2064
2065 nb_args = def->nb_iargs + def->nb_oargs;
2066 if (nb_args == 0) {
2067 continue;
2068 }
2069
2070 /*
2071 * Macro magic should make it impossible, but double-check that
2072 * the array index is in range. Since the signness of an enum
2073 * is implementation defined, force the result to unsigned.
2074 */
2075 unsigned con_set = tcg_target_op_def(op);
2076 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2077 tdefs = &constraint_sets[con_set];
2078
2079 for (i = 0; i < nb_args; i++) {
2080 const char *ct_str = tdefs->args_ct_str[i];
2081 /* Incomplete TCGTargetOpDef entry. */
2082 tcg_debug_assert(ct_str != NULL);
2083
2084 while (*ct_str != '\0') {
2085 switch(*ct_str) {
2086 case '0' ... '9':
2087 {
2088 int oarg = *ct_str - '0';
2089 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2090 tcg_debug_assert(oarg < def->nb_oargs);
2091 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2092 def->args_ct[i] = def->args_ct[oarg];
2093 /* The output sets oalias. */
2094 def->args_ct[oarg].oalias = true;
2095 def->args_ct[oarg].alias_index = i;
2096 /* The input sets ialias. */
2097 def->args_ct[i].ialias = true;
2098 def->args_ct[i].alias_index = oarg;
2099 }
2100 ct_str++;
2101 break;
2102 case '&':
2103 def->args_ct[i].newreg = true;
2104 ct_str++;
2105 break;
2106 case 'i':
2107 def->args_ct[i].ct |= TCG_CT_CONST;
2108 ct_str++;
2109 break;
2110
2111 /* Include all of the target-specific constraints. */
2112
2113#undef CONST
2114#define CONST(CASE, MASK) \
2115 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2116#define REGS(CASE, MASK) \
2117 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2118
2119#include "tcg-target-con-str.h"
2120
2121#undef REGS
2122#undef CONST
2123 default:
2124 /* Typo in TCGTargetOpDef constraint. */
2125 g_assert_not_reached();
2126 }
2127 }
2128 }
2129
2130 /* TCGTargetOpDef entry with too much information? */
2131 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2132
2133 /* sort the constraints (XXX: this is just an heuristic) */
2134 sort_constraints(def, 0, def->nb_oargs);
2135 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2136 }
2137}
2138
2139void tcg_op_remove(TCGContext *s, TCGOp *op)
2140{
2141 TCGLabel *label;
2142
2143 switch (op->opc) {
2144 case INDEX_op_br:
2145 label = arg_label(op->args[0]);
2146 label->refs--;
2147 break;
2148 case INDEX_op_brcond_i32:
2149 case INDEX_op_brcond_i64:
2150 label = arg_label(op->args[3]);
2151 label->refs--;
2152 break;
2153 case INDEX_op_brcond2_i32:
2154 label = arg_label(op->args[5]);
2155 label->refs--;
2156 break;
2157 default:
2158 break;
2159 }
2160
2161 QTAILQ_REMOVE(&s->ops, op, link);
2162 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2163 s->nb_ops--;
2164
2165#ifdef CONFIG_PROFILER
2166 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2167#endif
2168}
2169
2170void tcg_remove_ops_after(TCGOp *op)
2171{
2172 TCGContext *s = tcg_ctx;
2173
2174 while (true) {
2175 TCGOp *last = tcg_last_op();
2176 if (last == op) {
2177 return;
2178 }
2179 tcg_op_remove(s, last);
2180 }
2181}
2182
2183static TCGOp *tcg_op_alloc(TCGOpcode opc)
2184{
2185 TCGContext *s = tcg_ctx;
2186 TCGOp *op;
2187
2188 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2189 op = tcg_malloc(sizeof(TCGOp));
2190 } else {
2191 op = QTAILQ_FIRST(&s->free_ops);
2192 QTAILQ_REMOVE(&s->free_ops, op, link);
2193 }
2194 memset(op, 0, offsetof(TCGOp, link));
2195 op->opc = opc;
2196 s->nb_ops++;
2197
2198 return op;
2199}
2200
2201TCGOp *tcg_emit_op(TCGOpcode opc)
2202{
2203 TCGOp *op = tcg_op_alloc(opc);
2204 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2205 return op;
2206}
2207
2208TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2209{
2210 TCGOp *new_op = tcg_op_alloc(opc);
2211 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2212 return new_op;
2213}
2214
2215TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2216{
2217 TCGOp *new_op = tcg_op_alloc(opc);
2218 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2219 return new_op;
2220}
2221
2222/* Reachable analysis : remove unreachable code. */
2223static void reachable_code_pass(TCGContext *s)
2224{
2225 TCGOp *op, *op_next;
2226 bool dead = false;
2227
2228 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2229 bool remove = dead;
2230 TCGLabel *label;
2231
2232 switch (op->opc) {
2233 case INDEX_op_set_label:
2234 label = arg_label(op->args[0]);
2235 if (label->refs == 0) {
2236 /*
2237 * While there is an occasional backward branch, virtually
2238 * all branches generated by the translators are forward.
2239 * Which means that generally we will have already removed
2240 * all references to the label that will be, and there is
2241 * little to be gained by iterating.
2242 */
2243 remove = true;
2244 } else {
2245 /* Once we see a label, insns become live again. */
2246 dead = false;
2247 remove = false;
2248
2249 /*
2250 * Optimization can fold conditional branches to unconditional.
2251 * If we find a label with one reference which is preceded by
2252 * an unconditional branch to it, remove both. This needed to
2253 * wait until the dead code in between them was removed.
2254 */
2255 if (label->refs == 1) {
2256 TCGOp *op_prev = QTAILQ_PREV(op, link);
2257 if (op_prev->opc == INDEX_op_br &&
2258 label == arg_label(op_prev->args[0])) {
2259 tcg_op_remove(s, op_prev);
2260 remove = true;
2261 }
2262 }
2263 }
2264 break;
2265
2266 case INDEX_op_br:
2267 case INDEX_op_exit_tb:
2268 case INDEX_op_goto_ptr:
2269 /* Unconditional branches; everything following is dead. */
2270 dead = true;
2271 break;
2272
2273 case INDEX_op_call:
2274 /* Notice noreturn helper calls, raising exceptions. */
2275 if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2276 dead = true;
2277 }
2278 break;
2279
2280 case INDEX_op_insn_start:
2281 /* Never remove -- we need to keep these for unwind. */
2282 remove = false;
2283 break;
2284
2285 default:
2286 break;
2287 }
2288
2289 if (remove) {
2290 tcg_op_remove(s, op);
2291 }
2292 }
2293}
2294
2295#define TS_DEAD 1
2296#define TS_MEM 2
2297
2298#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2299#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2300
2301/* For liveness_pass_1, the register preferences for a given temp. */
2302static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2303{
2304 return ts->state_ptr;
2305}
2306
2307/* For liveness_pass_1, reset the preferences for a given temp to the
2308 * maximal regset for its type.
2309 */
2310static inline void la_reset_pref(TCGTemp *ts)
2311{
2312 *la_temp_pref(ts)
2313 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2314}
2315
2316/* liveness analysis: end of function: all temps are dead, and globals
2317 should be in memory. */
2318static void la_func_end(TCGContext *s, int ng, int nt)
2319{
2320 int i;
2321
2322 for (i = 0; i < ng; ++i) {
2323 s->temps[i].state = TS_DEAD | TS_MEM;
2324 la_reset_pref(&s->temps[i]);
2325 }
2326 for (i = ng; i < nt; ++i) {
2327 s->temps[i].state = TS_DEAD;
2328 la_reset_pref(&s->temps[i]);
2329 }
2330}
2331
2332/* liveness analysis: end of basic block: all temps are dead, globals
2333 and local temps should be in memory. */
2334static void la_bb_end(TCGContext *s, int ng, int nt)
2335{
2336 int i;
2337
2338 for (i = 0; i < nt; ++i) {
2339 TCGTemp *ts = &s->temps[i];
2340 int state;
2341
2342 switch (ts->kind) {
2343 case TEMP_FIXED:
2344 case TEMP_GLOBAL:
2345 case TEMP_LOCAL:
2346 state = TS_DEAD | TS_MEM;
2347 break;
2348 case TEMP_NORMAL:
2349 case TEMP_CONST:
2350 state = TS_DEAD;
2351 break;
2352 default:
2353 g_assert_not_reached();
2354 }
2355 ts->state = state;
2356 la_reset_pref(ts);
2357 }
2358}
2359
2360/* liveness analysis: sync globals back to memory. */
2361static void la_global_sync(TCGContext *s, int ng)
2362{
2363 int i;
2364
2365 for (i = 0; i < ng; ++i) {
2366 int state = s->temps[i].state;
2367 s->temps[i].state = state | TS_MEM;
2368 if (state == TS_DEAD) {
2369 /* If the global was previously dead, reset prefs. */
2370 la_reset_pref(&s->temps[i]);
2371 }
2372 }
2373}
2374
2375/*
2376 * liveness analysis: conditional branch: all temps are dead,
2377 * globals and local temps should be synced.
2378 */
2379static void la_bb_sync(TCGContext *s, int ng, int nt)
2380{
2381 la_global_sync(s, ng);
2382
2383 for (int i = ng; i < nt; ++i) {
2384 TCGTemp *ts = &s->temps[i];
2385 int state;
2386
2387 switch (ts->kind) {
2388 case TEMP_LOCAL:
2389 state = ts->state;
2390 ts->state = state | TS_MEM;
2391 if (state != TS_DEAD) {
2392 continue;
2393 }
2394 break;
2395 case TEMP_NORMAL:
2396 s->temps[i].state = TS_DEAD;
2397 break;
2398 case TEMP_CONST:
2399 continue;
2400 default:
2401 g_assert_not_reached();
2402 }
2403 la_reset_pref(&s->temps[i]);
2404 }
2405}
2406
2407/* liveness analysis: sync globals back to memory and kill. */
2408static void la_global_kill(TCGContext *s, int ng)
2409{
2410 int i;
2411
2412 for (i = 0; i < ng; i++) {
2413 s->temps[i].state = TS_DEAD | TS_MEM;
2414 la_reset_pref(&s->temps[i]);
2415 }
2416}
2417
2418/* liveness analysis: note live globals crossing calls. */
2419static void la_cross_call(TCGContext *s, int nt)
2420{
2421 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2422 int i;
2423
2424 for (i = 0; i < nt; i++) {
2425 TCGTemp *ts = &s->temps[i];
2426 if (!(ts->state & TS_DEAD)) {
2427 TCGRegSet *pset = la_temp_pref(ts);
2428 TCGRegSet set = *pset;
2429
2430 set &= mask;
2431 /* If the combination is not possible, restart. */
2432 if (set == 0) {
2433 set = tcg_target_available_regs[ts->type] & mask;
2434 }
2435 *pset = set;
2436 }
2437 }
2438}
2439
2440/* Liveness analysis : update the opc_arg_life array to tell if a
2441 given input arguments is dead. Instructions updating dead
2442 temporaries are removed. */
2443static void liveness_pass_1(TCGContext *s)
2444{
2445 int nb_globals = s->nb_globals;
2446 int nb_temps = s->nb_temps;
2447 TCGOp *op, *op_prev;
2448 TCGRegSet *prefs;
2449 int i;
2450
2451 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2452 for (i = 0; i < nb_temps; ++i) {
2453 s->temps[i].state_ptr = prefs + i;
2454 }
2455
2456 /* ??? Should be redundant with the exit_tb that ends the TB. */
2457 la_func_end(s, nb_globals, nb_temps);
2458
2459 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2460 int nb_iargs, nb_oargs;
2461 TCGOpcode opc_new, opc_new2;
2462 bool have_opc_new2;
2463 TCGLifeData arg_life = 0;
2464 TCGTemp *ts;
2465 TCGOpcode opc = op->opc;
2466 const TCGOpDef *def = &tcg_op_defs[opc];
2467
2468 switch (opc) {
2469 case INDEX_op_call:
2470 {
2471 int call_flags;
2472 int nb_call_regs;
2473
2474 nb_oargs = TCGOP_CALLO(op);
2475 nb_iargs = TCGOP_CALLI(op);
2476 call_flags = tcg_call_flags(op);
2477
2478 /* pure functions can be removed if their result is unused */
2479 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2480 for (i = 0; i < nb_oargs; i++) {
2481 ts = arg_temp(op->args[i]);
2482 if (ts->state != TS_DEAD) {
2483 goto do_not_remove_call;
2484 }
2485 }
2486 goto do_remove;
2487 }
2488 do_not_remove_call:
2489
2490 /* Output args are dead. */
2491 for (i = 0; i < nb_oargs; i++) {
2492 ts = arg_temp(op->args[i]);
2493 if (ts->state & TS_DEAD) {
2494 arg_life |= DEAD_ARG << i;
2495 }
2496 if (ts->state & TS_MEM) {
2497 arg_life |= SYNC_ARG << i;
2498 }
2499 ts->state = TS_DEAD;
2500 la_reset_pref(ts);
2501
2502 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2503 op->output_pref[i] = 0;
2504 }
2505
2506 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2507 TCG_CALL_NO_READ_GLOBALS))) {
2508 la_global_kill(s, nb_globals);
2509 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2510 la_global_sync(s, nb_globals);
2511 }
2512
2513 /* Record arguments that die in this helper. */
2514 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2515 ts = arg_temp(op->args[i]);
2516 if (ts && ts->state & TS_DEAD) {
2517 arg_life |= DEAD_ARG << i;
2518 }
2519 }
2520
2521 /* For all live registers, remove call-clobbered prefs. */
2522 la_cross_call(s, nb_temps);
2523
2524 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2525
2526 /* Input arguments are live for preceding opcodes. */
2527 for (i = 0; i < nb_iargs; i++) {
2528 ts = arg_temp(op->args[i + nb_oargs]);
2529 if (ts && ts->state & TS_DEAD) {
2530 /* For those arguments that die, and will be allocated
2531 * in registers, clear the register set for that arg,
2532 * to be filled in below. For args that will be on
2533 * the stack, reset to any available reg.
2534 */
2535 *la_temp_pref(ts)
2536 = (i < nb_call_regs ? 0 :
2537 tcg_target_available_regs[ts->type]);
2538 ts->state &= ~TS_DEAD;
2539 }
2540 }
2541
2542 /* For each input argument, add its input register to prefs.
2543 If a temp is used once, this produces a single set bit. */
2544 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2545 ts = arg_temp(op->args[i + nb_oargs]);
2546 if (ts) {
2547 tcg_regset_set_reg(*la_temp_pref(ts),
2548 tcg_target_call_iarg_regs[i]);
2549 }
2550 }
2551 }
2552 break;
2553 case INDEX_op_insn_start:
2554 break;
2555 case INDEX_op_discard:
2556 /* mark the temporary as dead */
2557 ts = arg_temp(op->args[0]);
2558 ts->state = TS_DEAD;
2559 la_reset_pref(ts);
2560 break;
2561
2562 case INDEX_op_add2_i32:
2563 opc_new = INDEX_op_add_i32;
2564 goto do_addsub2;
2565 case INDEX_op_sub2_i32:
2566 opc_new = INDEX_op_sub_i32;
2567 goto do_addsub2;
2568 case INDEX_op_add2_i64:
2569 opc_new = INDEX_op_add_i64;
2570 goto do_addsub2;
2571 case INDEX_op_sub2_i64:
2572 opc_new = INDEX_op_sub_i64;
2573 do_addsub2:
2574 nb_iargs = 4;
2575 nb_oargs = 2;
2576 /* Test if the high part of the operation is dead, but not
2577 the low part. The result can be optimized to a simple
2578 add or sub. This happens often for x86_64 guest when the
2579 cpu mode is set to 32 bit. */
2580 if (arg_temp(op->args[1])->state == TS_DEAD) {
2581 if (arg_temp(op->args[0])->state == TS_DEAD) {
2582 goto do_remove;
2583 }
2584 /* Replace the opcode and adjust the args in place,
2585 leaving 3 unused args at the end. */
2586 op->opc = opc = opc_new;
2587 op->args[1] = op->args[2];
2588 op->args[2] = op->args[4];
2589 /* Fall through and mark the single-word operation live. */
2590 nb_iargs = 2;
2591 nb_oargs = 1;
2592 }
2593 goto do_not_remove;
2594
2595 case INDEX_op_mulu2_i32:
2596 opc_new = INDEX_op_mul_i32;
2597 opc_new2 = INDEX_op_muluh_i32;
2598 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2599 goto do_mul2;
2600 case INDEX_op_muls2_i32:
2601 opc_new = INDEX_op_mul_i32;
2602 opc_new2 = INDEX_op_mulsh_i32;
2603 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2604 goto do_mul2;
2605 case INDEX_op_mulu2_i64:
2606 opc_new = INDEX_op_mul_i64;
2607 opc_new2 = INDEX_op_muluh_i64;
2608 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2609 goto do_mul2;
2610 case INDEX_op_muls2_i64:
2611 opc_new = INDEX_op_mul_i64;
2612 opc_new2 = INDEX_op_mulsh_i64;
2613 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2614 goto do_mul2;
2615 do_mul2:
2616 nb_iargs = 2;
2617 nb_oargs = 2;
2618 if (arg_temp(op->args[1])->state == TS_DEAD) {
2619 if (arg_temp(op->args[0])->state == TS_DEAD) {
2620 /* Both parts of the operation are dead. */
2621 goto do_remove;
2622 }
2623 /* The high part of the operation is dead; generate the low. */
2624 op->opc = opc = opc_new;
2625 op->args[1] = op->args[2];
2626 op->args[2] = op->args[3];
2627 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2628 /* The low part of the operation is dead; generate the high. */
2629 op->opc = opc = opc_new2;
2630 op->args[0] = op->args[1];
2631 op->args[1] = op->args[2];
2632 op->args[2] = op->args[3];
2633 } else {
2634 goto do_not_remove;
2635 }
2636 /* Mark the single-word operation live. */
2637 nb_oargs = 1;
2638 goto do_not_remove;
2639
2640 default:
2641 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2642 nb_iargs = def->nb_iargs;
2643 nb_oargs = def->nb_oargs;
2644
2645 /* Test if the operation can be removed because all
2646 its outputs are dead. We assume that nb_oargs == 0
2647 implies side effects */
2648 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2649 for (i = 0; i < nb_oargs; i++) {
2650 if (arg_temp(op->args[i])->state != TS_DEAD) {
2651 goto do_not_remove;
2652 }
2653 }
2654 goto do_remove;
2655 }
2656 goto do_not_remove;
2657
2658 do_remove:
2659 tcg_op_remove(s, op);
2660 break;
2661
2662 do_not_remove:
2663 for (i = 0; i < nb_oargs; i++) {
2664 ts = arg_temp(op->args[i]);
2665
2666 /* Remember the preference of the uses that followed. */
2667 op->output_pref[i] = *la_temp_pref(ts);
2668
2669 /* Output args are dead. */
2670 if (ts->state & TS_DEAD) {
2671 arg_life |= DEAD_ARG << i;
2672 }
2673 if (ts->state & TS_MEM) {
2674 arg_life |= SYNC_ARG << i;
2675 }
2676 ts->state = TS_DEAD;
2677 la_reset_pref(ts);
2678 }
2679
2680 /* If end of basic block, update. */
2681 if (def->flags & TCG_OPF_BB_EXIT) {
2682 la_func_end(s, nb_globals, nb_temps);
2683 } else if (def->flags & TCG_OPF_COND_BRANCH) {
2684 la_bb_sync(s, nb_globals, nb_temps);
2685 } else if (def->flags & TCG_OPF_BB_END) {
2686 la_bb_end(s, nb_globals, nb_temps);
2687 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2688 la_global_sync(s, nb_globals);
2689 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2690 la_cross_call(s, nb_temps);
2691 }
2692 }
2693
2694 /* Record arguments that die in this opcode. */
2695 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2696 ts = arg_temp(op->args[i]);
2697 if (ts->state & TS_DEAD) {
2698 arg_life |= DEAD_ARG << i;
2699 }
2700 }
2701
2702 /* Input arguments are live for preceding opcodes. */
2703 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2704 ts = arg_temp(op->args[i]);
2705 if (ts->state & TS_DEAD) {
2706 /* For operands that were dead, initially allow
2707 all regs for the type. */
2708 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2709 ts->state &= ~TS_DEAD;
2710 }
2711 }
2712
2713 /* Incorporate constraints for this operand. */
2714 switch (opc) {
2715 case INDEX_op_mov_i32:
2716 case INDEX_op_mov_i64:
2717 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2718 have proper constraints. That said, special case
2719 moves to propagate preferences backward. */
2720 if (IS_DEAD_ARG(1)) {
2721 *la_temp_pref(arg_temp(op->args[0]))
2722 = *la_temp_pref(arg_temp(op->args[1]));
2723 }
2724 break;
2725
2726 default:
2727 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2728 const TCGArgConstraint *ct = &def->args_ct[i];
2729 TCGRegSet set, *pset;
2730
2731 ts = arg_temp(op->args[i]);
2732 pset = la_temp_pref(ts);
2733 set = *pset;
2734
2735 set &= ct->regs;
2736 if (ct->ialias) {
2737 set &= op->output_pref[ct->alias_index];
2738 }
2739 /* If the combination is not possible, restart. */
2740 if (set == 0) {
2741 set = ct->regs;
2742 }
2743 *pset = set;
2744 }
2745 break;
2746 }
2747 break;
2748 }
2749 op->life = arg_life;
2750 }
2751}
2752
2753/* Liveness analysis: Convert indirect regs to direct temporaries. */
2754static bool liveness_pass_2(TCGContext *s)
2755{
2756 int nb_globals = s->nb_globals;
2757 int nb_temps, i;
2758 bool changes = false;
2759 TCGOp *op, *op_next;
2760
2761 /* Create a temporary for each indirect global. */
2762 for (i = 0; i < nb_globals; ++i) {
2763 TCGTemp *its = &s->temps[i];
2764 if (its->indirect_reg) {
2765 TCGTemp *dts = tcg_temp_alloc(s);
2766 dts->type = its->type;
2767 dts->base_type = its->base_type;
2768 its->state_ptr = dts;
2769 } else {
2770 its->state_ptr = NULL;
2771 }
2772 /* All globals begin dead. */
2773 its->state = TS_DEAD;
2774 }
2775 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2776 TCGTemp *its = &s->temps[i];
2777 its->state_ptr = NULL;
2778 its->state = TS_DEAD;
2779 }
2780
2781 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2782 TCGOpcode opc = op->opc;
2783 const TCGOpDef *def = &tcg_op_defs[opc];
2784 TCGLifeData arg_life = op->life;
2785 int nb_iargs, nb_oargs, call_flags;
2786 TCGTemp *arg_ts, *dir_ts;
2787
2788 if (opc == INDEX_op_call) {
2789 nb_oargs = TCGOP_CALLO(op);
2790 nb_iargs = TCGOP_CALLI(op);
2791 call_flags = tcg_call_flags(op);
2792 } else {
2793 nb_iargs = def->nb_iargs;
2794 nb_oargs = def->nb_oargs;
2795
2796 /* Set flags similar to how calls require. */
2797 if (def->flags & TCG_OPF_COND_BRANCH) {
2798 /* Like reading globals: sync_globals */
2799 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2800 } else if (def->flags & TCG_OPF_BB_END) {
2801 /* Like writing globals: save_globals */
2802 call_flags = 0;
2803 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2804 /* Like reading globals: sync_globals */
2805 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2806 } else {
2807 /* No effect on globals. */
2808 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2809 TCG_CALL_NO_WRITE_GLOBALS);
2810 }
2811 }
2812
2813 /* Make sure that input arguments are available. */
2814 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2815 arg_ts = arg_temp(op->args[i]);
2816 if (arg_ts) {
2817 dir_ts = arg_ts->state_ptr;
2818 if (dir_ts && arg_ts->state == TS_DEAD) {
2819 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2820 ? INDEX_op_ld_i32
2821 : INDEX_op_ld_i64);
2822 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2823
2824 lop->args[0] = temp_arg(dir_ts);
2825 lop->args[1] = temp_arg(arg_ts->mem_base);
2826 lop->args[2] = arg_ts->mem_offset;
2827
2828 /* Loaded, but synced with memory. */
2829 arg_ts->state = TS_MEM;
2830 }
2831 }
2832 }
2833
2834 /* Perform input replacement, and mark inputs that became dead.
2835 No action is required except keeping temp_state up to date
2836 so that we reload when needed. */
2837 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2838 arg_ts = arg_temp(op->args[i]);
2839 if (arg_ts) {
2840 dir_ts = arg_ts->state_ptr;
2841 if (dir_ts) {
2842 op->args[i] = temp_arg(dir_ts);
2843 changes = true;
2844 if (IS_DEAD_ARG(i)) {
2845 arg_ts->state = TS_DEAD;
2846 }
2847 }
2848 }
2849 }
2850
2851 /* Liveness analysis should ensure that the following are
2852 all correct, for call sites and basic block end points. */
2853 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2854 /* Nothing to do */
2855 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2856 for (i = 0; i < nb_globals; ++i) {
2857 /* Liveness should see that globals are synced back,
2858 that is, either TS_DEAD or TS_MEM. */
2859 arg_ts = &s->temps[i];
2860 tcg_debug_assert(arg_ts->state_ptr == 0
2861 || arg_ts->state != 0);
2862 }
2863 } else {
2864 for (i = 0; i < nb_globals; ++i) {
2865 /* Liveness should see that globals are saved back,
2866 that is, TS_DEAD, waiting to be reloaded. */
2867 arg_ts = &s->temps[i];
2868 tcg_debug_assert(arg_ts->state_ptr == 0
2869 || arg_ts->state == TS_DEAD);
2870 }
2871 }
2872
2873 /* Outputs become available. */
2874 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2875 arg_ts = arg_temp(op->args[0]);
2876 dir_ts = arg_ts->state_ptr;
2877 if (dir_ts) {
2878 op->args[0] = temp_arg(dir_ts);
2879 changes = true;
2880
2881 /* The output is now live and modified. */
2882 arg_ts->state = 0;
2883
2884 if (NEED_SYNC_ARG(0)) {
2885 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2886 ? INDEX_op_st_i32
2887 : INDEX_op_st_i64);
2888 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2889 TCGTemp *out_ts = dir_ts;
2890
2891 if (IS_DEAD_ARG(0)) {
2892 out_ts = arg_temp(op->args[1]);
2893 arg_ts->state = TS_DEAD;
2894 tcg_op_remove(s, op);
2895 } else {
2896 arg_ts->state = TS_MEM;
2897 }
2898
2899 sop->args[0] = temp_arg(out_ts);
2900 sop->args[1] = temp_arg(arg_ts->mem_base);
2901 sop->args[2] = arg_ts->mem_offset;
2902 } else {
2903 tcg_debug_assert(!IS_DEAD_ARG(0));
2904 }
2905 }
2906 } else {
2907 for (i = 0; i < nb_oargs; i++) {
2908 arg_ts = arg_temp(op->args[i]);
2909 dir_ts = arg_ts->state_ptr;
2910 if (!dir_ts) {
2911 continue;
2912 }
2913 op->args[i] = temp_arg(dir_ts);
2914 changes = true;
2915
2916 /* The output is now live and modified. */
2917 arg_ts->state = 0;
2918
2919 /* Sync outputs upon their last write. */
2920 if (NEED_SYNC_ARG(i)) {
2921 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2922 ? INDEX_op_st_i32
2923 : INDEX_op_st_i64);
2924 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2925
2926 sop->args[0] = temp_arg(dir_ts);
2927 sop->args[1] = temp_arg(arg_ts->mem_base);
2928 sop->args[2] = arg_ts->mem_offset;
2929
2930 arg_ts->state = TS_MEM;
2931 }
2932 /* Drop outputs that are dead. */
2933 if (IS_DEAD_ARG(i)) {
2934 arg_ts->state = TS_DEAD;
2935 }
2936 }
2937 }
2938 }
2939
2940 return changes;
2941}
2942
2943#ifdef CONFIG_DEBUG_TCG
2944static void dump_regs(TCGContext *s)
2945{
2946 TCGTemp *ts;
2947 int i;
2948 char buf[64];
2949
2950 for(i = 0; i < s->nb_temps; i++) {
2951 ts = &s->temps[i];
2952 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2953 switch(ts->val_type) {
2954 case TEMP_VAL_REG:
2955 printf("%s", tcg_target_reg_names[ts->reg]);
2956 break;
2957 case TEMP_VAL_MEM:
2958 printf("%d(%s)", (int)ts->mem_offset,
2959 tcg_target_reg_names[ts->mem_base->reg]);
2960 break;
2961 case TEMP_VAL_CONST:
2962 printf("$0x%" PRIx64, ts->val);
2963 break;
2964 case TEMP_VAL_DEAD:
2965 printf("D");
2966 break;
2967 default:
2968 printf("???");
2969 break;
2970 }
2971 printf("\n");
2972 }
2973
2974 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2975 if (s->reg_to_temp[i] != NULL) {
2976 printf("%s: %s\n",
2977 tcg_target_reg_names[i],
2978 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2979 }
2980 }
2981}
2982
2983static void check_regs(TCGContext *s)
2984{
2985 int reg;
2986 int k;
2987 TCGTemp *ts;
2988 char buf[64];
2989
2990 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2991 ts = s->reg_to_temp[reg];
2992 if (ts != NULL) {
2993 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2994 printf("Inconsistency for register %s:\n",
2995 tcg_target_reg_names[reg]);
2996 goto fail;
2997 }
2998 }
2999 }
3000 for (k = 0; k < s->nb_temps; k++) {
3001 ts = &s->temps[k];
3002 if (ts->val_type == TEMP_VAL_REG
3003 && ts->kind != TEMP_FIXED
3004 && s->reg_to_temp[ts->reg] != ts) {
3005 printf("Inconsistency for temp %s:\n",
3006 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3007 fail:
3008 printf("reg state:\n");
3009 dump_regs(s);
3010 tcg_abort();
3011 }
3012 }
3013}
3014#endif
3015
3016static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3017{
3018 intptr_t off, size, align;
3019
3020 switch (ts->type) {
3021 case TCG_TYPE_I32:
3022 size = align = 4;
3023 break;
3024 case TCG_TYPE_I64:
3025 case TCG_TYPE_V64:
3026 size = align = 8;
3027 break;
3028 case TCG_TYPE_V128:
3029 size = align = 16;
3030 break;
3031 case TCG_TYPE_V256:
3032 /* Note that we do not require aligned storage for V256. */
3033 size = 32, align = 16;
3034 break;
3035 default:
3036 g_assert_not_reached();
3037 }
3038
3039 assert(align <= TCG_TARGET_STACK_ALIGN);
3040 off = ROUND_UP(s->current_frame_offset, align);
3041 assert(off + size <= s->frame_end);
3042 s->current_frame_offset = off + size;
3043
3044 ts->mem_offset = off;
3045#if defined(__sparc__)
3046 ts->mem_offset += TCG_TARGET_STACK_BIAS;
3047#endif
3048 ts->mem_base = s->frame_temp;
3049 ts->mem_allocated = 1;
3050}
3051
3052static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3053
3054/* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3055 mark it free; otherwise mark it dead. */
3056static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3057{
3058 TCGTempVal new_type;
3059
3060 switch (ts->kind) {
3061 case TEMP_FIXED:
3062 return;
3063 case TEMP_GLOBAL:
3064 case TEMP_LOCAL:
3065 new_type = TEMP_VAL_MEM;
3066 break;
3067 case TEMP_NORMAL:
3068 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3069 break;
3070 case TEMP_CONST:
3071 new_type = TEMP_VAL_CONST;
3072 break;
3073 default:
3074 g_assert_not_reached();
3075 }
3076 if (ts->val_type == TEMP_VAL_REG) {
3077 s->reg_to_temp[ts->reg] = NULL;
3078 }
3079 ts->val_type = new_type;
3080}
3081
3082/* Mark a temporary as dead. */
3083static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3084{
3085 temp_free_or_dead(s, ts, 1);
3086}
3087
3088/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3089 registers needs to be allocated to store a constant. If 'free_or_dead'
3090 is non-zero, subsequently release the temporary; if it is positive, the
3091 temp is dead; if it is negative, the temp is free. */
3092static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3093 TCGRegSet preferred_regs, int free_or_dead)
3094{
3095 if (!temp_readonly(ts) && !ts->mem_coherent) {
3096 if (!ts->mem_allocated) {
3097 temp_allocate_frame(s, ts);
3098 }
3099 switch (ts->val_type) {
3100 case TEMP_VAL_CONST:
3101 /* If we're going to free the temp immediately, then we won't
3102 require it later in a register, so attempt to store the
3103 constant to memory directly. */
3104 if (free_or_dead
3105 && tcg_out_sti(s, ts->type, ts->val,
3106 ts->mem_base->reg, ts->mem_offset)) {
3107 break;
3108 }
3109 temp_load(s, ts, tcg_target_available_regs[ts->type],
3110 allocated_regs, preferred_regs);
3111 /* fallthrough */
3112
3113 case TEMP_VAL_REG:
3114 tcg_out_st(s, ts->type, ts->reg,
3115 ts->mem_base->reg, ts->mem_offset);
3116 break;
3117
3118 case TEMP_VAL_MEM:
3119 break;
3120
3121 case TEMP_VAL_DEAD:
3122 default:
3123 tcg_abort();
3124 }
3125 ts->mem_coherent = 1;
3126 }
3127 if (free_or_dead) {
3128 temp_free_or_dead(s, ts, free_or_dead);
3129 }
3130}
3131
3132/* free register 'reg' by spilling the corresponding temporary if necessary */
3133static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3134{
3135 TCGTemp *ts = s->reg_to_temp[reg];
3136 if (ts != NULL) {
3137 temp_sync(s, ts, allocated_regs, 0, -1);
3138 }
3139}
3140
3141/**
3142 * tcg_reg_alloc:
3143 * @required_regs: Set of registers in which we must allocate.
3144 * @allocated_regs: Set of registers which must be avoided.
3145 * @preferred_regs: Set of registers we should prefer.
3146 * @rev: True if we search the registers in "indirect" order.
3147 *
3148 * The allocated register must be in @required_regs & ~@allocated_regs,
3149 * but if we can put it in @preferred_regs we may save a move later.
3150 */
3151static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3152 TCGRegSet allocated_regs,
3153 TCGRegSet preferred_regs, bool rev)
3154{
3155 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3156 TCGRegSet reg_ct[2];
3157 const int *order;
3158
3159 reg_ct[1] = required_regs & ~allocated_regs;
3160 tcg_debug_assert(reg_ct[1] != 0);
3161 reg_ct[0] = reg_ct[1] & preferred_regs;
3162
3163 /* Skip the preferred_regs option if it cannot be satisfied,
3164 or if the preference made no difference. */
3165 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3166
3167 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3168
3169 /* Try free registers, preferences first. */
3170 for (j = f; j < 2; j++) {
3171 TCGRegSet set = reg_ct[j];
3172
3173 if (tcg_regset_single(set)) {
3174 /* One register in the set. */
3175 TCGReg reg = tcg_regset_first(set);
3176 if (s->reg_to_temp[reg] == NULL) {
3177 return reg;
3178 }
3179 } else {
3180 for (i = 0; i < n; i++) {
3181 TCGReg reg = order[i];
3182 if (s->reg_to_temp[reg] == NULL &&
3183 tcg_regset_test_reg(set, reg)) {
3184 return reg;
3185 }
3186 }
3187 }
3188 }
3189
3190 /* We must spill something. */
3191 for (j = f; j < 2; j++) {
3192 TCGRegSet set = reg_ct[j];
3193
3194 if (tcg_regset_single(set)) {
3195 /* One register in the set. */
3196 TCGReg reg = tcg_regset_first(set);
3197 tcg_reg_free(s, reg, allocated_regs);
3198 return reg;
3199 } else {
3200 for (i = 0; i < n; i++) {
3201 TCGReg reg = order[i];
3202 if (tcg_regset_test_reg(set, reg)) {
3203 tcg_reg_free(s, reg, allocated_regs);
3204 return reg;
3205 }
3206 }
3207 }
3208 }
3209
3210 tcg_abort();
3211}
3212
3213/* Make sure the temporary is in a register. If needed, allocate the register
3214 from DESIRED while avoiding ALLOCATED. */
3215static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3216 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3217{
3218 TCGReg reg;
3219
3220 switch (ts->val_type) {
3221 case TEMP_VAL_REG:
3222 return;
3223 case TEMP_VAL_CONST:
3224 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3225 preferred_regs, ts->indirect_base);
3226 if (ts->type <= TCG_TYPE_I64) {
3227 tcg_out_movi(s, ts->type, reg, ts->val);
3228 } else {
3229 uint64_t val = ts->val;
3230 MemOp vece = MO_64;
3231
3232 /*
3233 * Find the minimal vector element that matches the constant.
3234 * The targets will, in general, have to do this search anyway,
3235 * do this generically.
3236 */
3237 if (val == dup_const(MO_8, val)) {
3238 vece = MO_8;
3239 } else if (val == dup_const(MO_16, val)) {
3240 vece = MO_16;
3241 } else if (val == dup_const(MO_32, val)) {
3242 vece = MO_32;
3243 }
3244
3245 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3246 }
3247 ts->mem_coherent = 0;
3248 break;
3249 case TEMP_VAL_MEM:
3250 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3251 preferred_regs, ts->indirect_base);
3252 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3253 ts->mem_coherent = 1;
3254 break;
3255 case TEMP_VAL_DEAD:
3256 default:
3257 tcg_abort();
3258 }
3259 ts->reg = reg;
3260 ts->val_type = TEMP_VAL_REG;
3261 s->reg_to_temp[reg] = ts;
3262}
3263
3264/* Save a temporary to memory. 'allocated_regs' is used in case a
3265 temporary registers needs to be allocated to store a constant. */
3266static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3267{
3268 /* The liveness analysis already ensures that globals are back
3269 in memory. Keep an tcg_debug_assert for safety. */
3270 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3271}
3272
3273/* save globals to their canonical location and assume they can be
3274 modified be the following code. 'allocated_regs' is used in case a
3275 temporary registers needs to be allocated to store a constant. */
3276static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3277{
3278 int i, n;
3279
3280 for (i = 0, n = s->nb_globals; i < n; i++) {
3281 temp_save(s, &s->temps[i], allocated_regs);
3282 }
3283}
3284
3285/* sync globals to their canonical location and assume they can be
3286 read by the following code. 'allocated_regs' is used in case a
3287 temporary registers needs to be allocated to store a constant. */
3288static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3289{
3290 int i, n;
3291
3292 for (i = 0, n = s->nb_globals; i < n; i++) {
3293 TCGTemp *ts = &s->temps[i];
3294 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3295 || ts->kind == TEMP_FIXED
3296 || ts->mem_coherent);
3297 }
3298}
3299
3300/* at the end of a basic block, we assume all temporaries are dead and
3301 all globals are stored at their canonical location. */
3302static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3303{
3304 int i;
3305
3306 for (i = s->nb_globals; i < s->nb_temps; i++) {
3307 TCGTemp *ts = &s->temps[i];
3308
3309 switch (ts->kind) {
3310 case TEMP_LOCAL:
3311 temp_save(s, ts, allocated_regs);
3312 break;
3313 case TEMP_NORMAL:
3314 /* The liveness analysis already ensures that temps are dead.
3315 Keep an tcg_debug_assert for safety. */
3316 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3317 break;
3318 case TEMP_CONST:
3319 /* Similarly, we should have freed any allocated register. */
3320 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3321 break;
3322 default:
3323 g_assert_not_reached();
3324 }
3325 }
3326
3327 save_globals(s, allocated_regs);
3328}
3329
3330/*
3331 * At a conditional branch, we assume all temporaries are dead and
3332 * all globals and local temps are synced to their location.
3333 */
3334static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3335{
3336 sync_globals(s, allocated_regs);
3337
3338 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3339 TCGTemp *ts = &s->temps[i];
3340 /*
3341 * The liveness analysis already ensures that temps are dead.
3342 * Keep tcg_debug_asserts for safety.
3343 */
3344 switch (ts->kind) {
3345 case TEMP_LOCAL:
3346 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3347 break;
3348 case TEMP_NORMAL:
3349 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3350 break;
3351 case TEMP_CONST:
3352 break;
3353 default:
3354 g_assert_not_reached();
3355 }
3356 }
3357}
3358
3359/*
3360 * Specialized code generation for INDEX_op_mov_* with a constant.
3361 */
3362static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3363 tcg_target_ulong val, TCGLifeData arg_life,
3364 TCGRegSet preferred_regs)
3365{
3366 /* ENV should not be modified. */
3367 tcg_debug_assert(!temp_readonly(ots));
3368
3369 /* The movi is not explicitly generated here. */
3370 if (ots->val_type == TEMP_VAL_REG) {
3371 s->reg_to_temp[ots->reg] = NULL;
3372 }
3373 ots->val_type = TEMP_VAL_CONST;
3374 ots->val = val;
3375 ots->mem_coherent = 0;
3376 if (NEED_SYNC_ARG(0)) {
3377 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3378 } else if (IS_DEAD_ARG(0)) {
3379 temp_dead(s, ots);
3380 }
3381}
3382
3383/*
3384 * Specialized code generation for INDEX_op_mov_*.
3385 */
3386static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3387{
3388 const TCGLifeData arg_life = op->life;
3389 TCGRegSet allocated_regs, preferred_regs;
3390 TCGTemp *ts, *ots;
3391 TCGType otype, itype;
3392
3393 allocated_regs = s->reserved_regs;
3394 preferred_regs = op->output_pref[0];
3395 ots = arg_temp(op->args[0]);
3396 ts = arg_temp(op->args[1]);
3397
3398 /* ENV should not be modified. */
3399 tcg_debug_assert(!temp_readonly(ots));
3400
3401 /* Note that otype != itype for no-op truncation. */
3402 otype = ots->type;
3403 itype = ts->type;
3404
3405 if (ts->val_type == TEMP_VAL_CONST) {
3406 /* propagate constant or generate sti */
3407 tcg_target_ulong val = ts->val;
3408 if (IS_DEAD_ARG(1)) {
3409 temp_dead(s, ts);
3410 }
3411 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3412 return;
3413 }
3414
3415 /* If the source value is in memory we're going to be forced
3416 to have it in a register in order to perform the copy. Copy
3417 the SOURCE value into its own register first, that way we
3418 don't have to reload SOURCE the next time it is used. */
3419 if (ts->val_type == TEMP_VAL_MEM) {
3420 temp_load(s, ts, tcg_target_available_regs[itype],
3421 allocated_regs, preferred_regs);
3422 }
3423
3424 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3425 if (IS_DEAD_ARG(0)) {
3426 /* mov to a non-saved dead register makes no sense (even with
3427 liveness analysis disabled). */
3428 tcg_debug_assert(NEED_SYNC_ARG(0));
3429 if (!ots->mem_allocated) {
3430 temp_allocate_frame(s, ots);
3431 }
3432 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3433 if (IS_DEAD_ARG(1)) {
3434 temp_dead(s, ts);
3435 }
3436 temp_dead(s, ots);
3437 } else {
3438 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3439 /* the mov can be suppressed */
3440 if (ots->val_type == TEMP_VAL_REG) {
3441 s->reg_to_temp[ots->reg] = NULL;
3442 }
3443 ots->reg = ts->reg;
3444 temp_dead(s, ts);
3445 } else {
3446 if (ots->val_type != TEMP_VAL_REG) {
3447 /* When allocating a new register, make sure to not spill the
3448 input one. */
3449 tcg_regset_set_reg(allocated_regs, ts->reg);
3450 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3451 allocated_regs, preferred_regs,
3452 ots->indirect_base);
3453 }
3454 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3455 /*
3456 * Cross register class move not supported.
3457 * Store the source register into the destination slot
3458 * and leave the destination temp as TEMP_VAL_MEM.
3459 */
3460 assert(!temp_readonly(ots));
3461 if (!ts->mem_allocated) {
3462 temp_allocate_frame(s, ots);
3463 }
3464 tcg_out_st(s, ts->type, ts->reg,
3465 ots->mem_base->reg, ots->mem_offset);
3466 ots->mem_coherent = 1;
3467 temp_free_or_dead(s, ots, -1);
3468 return;
3469 }
3470 }
3471 ots->val_type = TEMP_VAL_REG;
3472 ots->mem_coherent = 0;
3473 s->reg_to_temp[ots->reg] = ots;
3474 if (NEED_SYNC_ARG(0)) {
3475 temp_sync(s, ots, allocated_regs, 0, 0);
3476 }
3477 }
3478}
3479
3480/*
3481 * Specialized code generation for INDEX_op_dup_vec.
3482 */
3483static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3484{
3485 const TCGLifeData arg_life = op->life;
3486 TCGRegSet dup_out_regs, dup_in_regs;
3487 TCGTemp *its, *ots;
3488 TCGType itype, vtype;
3489 intptr_t endian_fixup;
3490 unsigned vece;
3491 bool ok;
3492
3493 ots = arg_temp(op->args[0]);
3494 its = arg_temp(op->args[1]);
3495
3496 /* ENV should not be modified. */
3497 tcg_debug_assert(!temp_readonly(ots));
3498
3499 itype = its->type;
3500 vece = TCGOP_VECE(op);
3501 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3502
3503 if (its->val_type == TEMP_VAL_CONST) {
3504 /* Propagate constant via movi -> dupi. */
3505 tcg_target_ulong val = its->val;
3506 if (IS_DEAD_ARG(1)) {
3507 temp_dead(s, its);
3508 }
3509 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3510 return;
3511 }
3512
3513 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3514 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3515
3516 /* Allocate the output register now. */
3517 if (ots->val_type != TEMP_VAL_REG) {
3518 TCGRegSet allocated_regs = s->reserved_regs;
3519
3520 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3521 /* Make sure to not spill the input register. */
3522 tcg_regset_set_reg(allocated_regs, its->reg);
3523 }
3524 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3525 op->output_pref[0], ots->indirect_base);
3526 ots->val_type = TEMP_VAL_REG;
3527 ots->mem_coherent = 0;
3528 s->reg_to_temp[ots->reg] = ots;
3529 }
3530
3531 switch (its->val_type) {
3532 case TEMP_VAL_REG:
3533 /*
3534 * The dup constriaints must be broad, covering all possible VECE.
3535 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3536 * to fail, indicating that extra moves are required for that case.
3537 */
3538 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3539 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3540 goto done;
3541 }
3542 /* Try again from memory or a vector input register. */
3543 }
3544 if (!its->mem_coherent) {
3545 /*
3546 * The input register is not synced, and so an extra store
3547 * would be required to use memory. Attempt an integer-vector
3548 * register move first. We do not have a TCGRegSet for this.
3549 */
3550 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3551 break;
3552 }
3553 /* Sync the temp back to its slot and load from there. */
3554 temp_sync(s, its, s->reserved_regs, 0, 0);
3555 }
3556 /* fall through */
3557
3558 case TEMP_VAL_MEM:
3559#ifdef HOST_WORDS_BIGENDIAN
3560 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3561 endian_fixup -= 1 << vece;
3562#else
3563 endian_fixup = 0;
3564#endif
3565 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3566 its->mem_offset + endian_fixup)) {
3567 goto done;
3568 }
3569 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3570 break;
3571
3572 default:
3573 g_assert_not_reached();
3574 }
3575
3576 /* We now have a vector input register, so dup must succeed. */
3577 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3578 tcg_debug_assert(ok);
3579
3580 done:
3581 if (IS_DEAD_ARG(1)) {
3582 temp_dead(s, its);
3583 }
3584 if (NEED_SYNC_ARG(0)) {
3585 temp_sync(s, ots, s->reserved_regs, 0, 0);
3586 }
3587 if (IS_DEAD_ARG(0)) {
3588 temp_dead(s, ots);
3589 }
3590}
3591
3592static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3593{
3594 const TCGLifeData arg_life = op->life;
3595 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3596 TCGRegSet i_allocated_regs;
3597 TCGRegSet o_allocated_regs;
3598 int i, k, nb_iargs, nb_oargs;
3599 TCGReg reg;
3600 TCGArg arg;
3601 const TCGArgConstraint *arg_ct;
3602 TCGTemp *ts;
3603 TCGArg new_args[TCG_MAX_OP_ARGS];
3604 int const_args[TCG_MAX_OP_ARGS];
3605
3606 nb_oargs = def->nb_oargs;
3607 nb_iargs = def->nb_iargs;
3608
3609 /* copy constants */
3610 memcpy(new_args + nb_oargs + nb_iargs,
3611 op->args + nb_oargs + nb_iargs,
3612 sizeof(TCGArg) * def->nb_cargs);
3613
3614 i_allocated_regs = s->reserved_regs;
3615 o_allocated_regs = s->reserved_regs;
3616
3617 /* satisfy input constraints */
3618 for (k = 0; k < nb_iargs; k++) {
3619 TCGRegSet i_preferred_regs, o_preferred_regs;
3620
3621 i = def->args_ct[nb_oargs + k].sort_index;
3622 arg = op->args[i];
3623 arg_ct = &def->args_ct[i];
3624 ts = arg_temp(arg);
3625
3626 if (ts->val_type == TEMP_VAL_CONST
3627 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3628 /* constant is OK for instruction */
3629 const_args[i] = 1;
3630 new_args[i] = ts->val;
3631 continue;
3632 }
3633
3634 i_preferred_regs = o_preferred_regs = 0;
3635 if (arg_ct->ialias) {
3636 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3637
3638 /*
3639 * If the input is readonly, then it cannot also be an
3640 * output and aliased to itself. If the input is not
3641 * dead after the instruction, we must allocate a new
3642 * register and move it.
3643 */
3644 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3645 goto allocate_in_reg;
3646 }
3647
3648 /*
3649 * Check if the current register has already been allocated
3650 * for another input aliased to an output.
3651 */
3652 if (ts->val_type == TEMP_VAL_REG) {
3653 reg = ts->reg;
3654 for (int k2 = 0; k2 < k; k2++) {
3655 int i2 = def->args_ct[nb_oargs + k2].sort_index;
3656 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3657 goto allocate_in_reg;
3658 }
3659 }
3660 }
3661 i_preferred_regs = o_preferred_regs;
3662 }
3663
3664 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3665 reg = ts->reg;
3666
3667 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3668 allocate_in_reg:
3669 /*
3670 * Allocate a new register matching the constraint
3671 * and move the temporary register into it.
3672 */
3673 temp_load(s, ts, tcg_target_available_regs[ts->type],
3674 i_allocated_regs, 0);
3675 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3676 o_preferred_regs, ts->indirect_base);
3677 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3678 /*
3679 * Cross register class move not supported. Sync the
3680 * temp back to its slot and load from there.
3681 */
3682 temp_sync(s, ts, i_allocated_regs, 0, 0);
3683 tcg_out_ld(s, ts->type, reg,
3684 ts->mem_base->reg, ts->mem_offset);
3685 }
3686 }
3687 new_args[i] = reg;
3688 const_args[i] = 0;
3689 tcg_regset_set_reg(i_allocated_regs, reg);
3690 }
3691
3692 /* mark dead temporaries and free the associated registers */
3693 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3694 if (IS_DEAD_ARG(i)) {
3695 temp_dead(s, arg_temp(op->args[i]));
3696 }
3697 }
3698
3699 if (def->flags & TCG_OPF_COND_BRANCH) {
3700 tcg_reg_alloc_cbranch(s, i_allocated_regs);
3701 } else if (def->flags & TCG_OPF_BB_END) {
3702 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3703 } else {
3704 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3705 /* XXX: permit generic clobber register list ? */
3706 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3707 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3708 tcg_reg_free(s, i, i_allocated_regs);
3709 }
3710 }
3711 }
3712 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3713 /* sync globals if the op has side effects and might trigger
3714 an exception. */
3715 sync_globals(s, i_allocated_regs);
3716 }
3717
3718 /* satisfy the output constraints */
3719 for(k = 0; k < nb_oargs; k++) {
3720 i = def->args_ct[k].sort_index;
3721 arg = op->args[i];
3722 arg_ct = &def->args_ct[i];
3723 ts = arg_temp(arg);
3724
3725 /* ENV should not be modified. */
3726 tcg_debug_assert(!temp_readonly(ts));
3727
3728 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3729 reg = new_args[arg_ct->alias_index];
3730 } else if (arg_ct->newreg) {
3731 reg = tcg_reg_alloc(s, arg_ct->regs,
3732 i_allocated_regs | o_allocated_regs,
3733 op->output_pref[k], ts->indirect_base);
3734 } else {
3735 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3736 op->output_pref[k], ts->indirect_base);
3737 }
3738 tcg_regset_set_reg(o_allocated_regs, reg);
3739 if (ts->val_type == TEMP_VAL_REG) {
3740 s->reg_to_temp[ts->reg] = NULL;
3741 }
3742 ts->val_type = TEMP_VAL_REG;
3743 ts->reg = reg;
3744 /*
3745 * Temp value is modified, so the value kept in memory is
3746 * potentially not the same.
3747 */
3748 ts->mem_coherent = 0;
3749 s->reg_to_temp[reg] = ts;
3750 new_args[i] = reg;
3751 }
3752 }
3753
3754 /* emit instruction */
3755 if (def->flags & TCG_OPF_VECTOR) {
3756 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3757 new_args, const_args);
3758 } else {
3759 tcg_out_op(s, op->opc, new_args, const_args);
3760 }
3761
3762 /* move the outputs in the correct register if needed */
3763 for(i = 0; i < nb_oargs; i++) {
3764 ts = arg_temp(op->args[i]);
3765
3766 /* ENV should not be modified. */
3767 tcg_debug_assert(!temp_readonly(ts));
3768
3769 if (NEED_SYNC_ARG(i)) {
3770 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3771 } else if (IS_DEAD_ARG(i)) {
3772 temp_dead(s, ts);
3773 }
3774 }
3775}
3776
3777static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3778{
3779 const TCGLifeData arg_life = op->life;
3780 TCGTemp *ots, *itsl, *itsh;
3781 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3782
3783 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3784 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3785 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3786
3787 ots = arg_temp(op->args[0]);
3788 itsl = arg_temp(op->args[1]);
3789 itsh = arg_temp(op->args[2]);
3790
3791 /* ENV should not be modified. */
3792 tcg_debug_assert(!temp_readonly(ots));
3793
3794 /* Allocate the output register now. */
3795 if (ots->val_type != TEMP_VAL_REG) {
3796 TCGRegSet allocated_regs = s->reserved_regs;
3797 TCGRegSet dup_out_regs =
3798 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3799
3800 /* Make sure to not spill the input registers. */
3801 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3802 tcg_regset_set_reg(allocated_regs, itsl->reg);
3803 }
3804 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3805 tcg_regset_set_reg(allocated_regs, itsh->reg);
3806 }
3807
3808 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3809 op->output_pref[0], ots->indirect_base);
3810 ots->val_type = TEMP_VAL_REG;
3811 ots->mem_coherent = 0;
3812 s->reg_to_temp[ots->reg] = ots;
3813 }
3814
3815 /* Promote dup2 of immediates to dupi_vec. */
3816 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3817 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3818 MemOp vece = MO_64;
3819
3820 if (val == dup_const(MO_8, val)) {
3821 vece = MO_8;
3822 } else if (val == dup_const(MO_16, val)) {
3823 vece = MO_16;
3824 } else if (val == dup_const(MO_32, val)) {
3825 vece = MO_32;
3826 }
3827
3828 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3829 goto done;
3830 }
3831
3832 /* If the two inputs form one 64-bit value, try dupm_vec. */
3833 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3834 if (!itsl->mem_coherent) {
3835 temp_sync(s, itsl, s->reserved_regs, 0, 0);
3836 }
3837 if (!itsh->mem_coherent) {
3838 temp_sync(s, itsh, s->reserved_regs, 0, 0);
3839 }
3840#ifdef HOST_WORDS_BIGENDIAN
3841 TCGTemp *its = itsh;
3842#else
3843 TCGTemp *its = itsl;
3844#endif
3845 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3846 its->mem_base->reg, its->mem_offset)) {
3847 goto done;
3848 }
3849 }
3850
3851 /* Fall back to generic expansion. */
3852 return false;
3853
3854 done:
3855 if (IS_DEAD_ARG(1)) {
3856 temp_dead(s, itsl);
3857 }
3858 if (IS_DEAD_ARG(2)) {
3859 temp_dead(s, itsh);
3860 }
3861 if (NEED_SYNC_ARG(0)) {
3862 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3863 } else if (IS_DEAD_ARG(0)) {
3864 temp_dead(s, ots);
3865 }
3866 return true;
3867}
3868
3869#ifdef TCG_TARGET_STACK_GROWSUP
3870#define STACK_DIR(x) (-(x))
3871#else
3872#define STACK_DIR(x) (x)
3873#endif
3874
3875static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3876{
3877 const int nb_oargs = TCGOP_CALLO(op);
3878 const int nb_iargs = TCGOP_CALLI(op);
3879 const TCGLifeData arg_life = op->life;
3880 const TCGHelperInfo *info;
3881 int flags, nb_regs, i;
3882 TCGReg reg;
3883 TCGArg arg;
3884 TCGTemp *ts;
3885 intptr_t stack_offset;
3886 size_t call_stack_size;
3887 tcg_insn_unit *func_addr;
3888 int allocate_args;
3889 TCGRegSet allocated_regs;
3890
3891 func_addr = tcg_call_func(op);
3892 info = tcg_call_info(op);
3893 flags = info->flags;
3894
3895 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3896 if (nb_regs > nb_iargs) {
3897 nb_regs = nb_iargs;
3898 }
3899
3900 /* assign stack slots first */
3901 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3902 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3903 ~(TCG_TARGET_STACK_ALIGN - 1);
3904 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3905 if (allocate_args) {
3906 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3907 preallocate call stack */
3908 tcg_abort();
3909 }
3910
3911 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3912 for (i = nb_regs; i < nb_iargs; i++) {
3913 arg = op->args[nb_oargs + i];
3914#ifdef TCG_TARGET_STACK_GROWSUP
3915 stack_offset -= sizeof(tcg_target_long);
3916#endif
3917 if (arg != TCG_CALL_DUMMY_ARG) {
3918 ts = arg_temp(arg);
3919 temp_load(s, ts, tcg_target_available_regs[ts->type],
3920 s->reserved_regs, 0);
3921 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3922 }
3923#ifndef TCG_TARGET_STACK_GROWSUP
3924 stack_offset += sizeof(tcg_target_long);
3925#endif
3926 }
3927
3928 /* assign input registers */
3929 allocated_regs = s->reserved_regs;
3930 for (i = 0; i < nb_regs; i++) {
3931 arg = op->args[nb_oargs + i];
3932 if (arg != TCG_CALL_DUMMY_ARG) {
3933 ts = arg_temp(arg);
3934 reg = tcg_target_call_iarg_regs[i];
3935
3936 if (ts->val_type == TEMP_VAL_REG) {
3937 if (ts->reg != reg) {
3938 tcg_reg_free(s, reg, allocated_regs);
3939 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3940 /*
3941 * Cross register class move not supported. Sync the
3942 * temp back to its slot and load from there.
3943 */
3944 temp_sync(s, ts, allocated_regs, 0, 0);
3945 tcg_out_ld(s, ts->type, reg,
3946 ts->mem_base->reg, ts->mem_offset);
3947 }
3948 }
3949 } else {
3950 TCGRegSet arg_set = 0;
3951
3952 tcg_reg_free(s, reg, allocated_regs);
3953 tcg_regset_set_reg(arg_set, reg);
3954 temp_load(s, ts, arg_set, allocated_regs, 0);
3955 }
3956
3957 tcg_regset_set_reg(allocated_regs, reg);
3958 }
3959 }
3960
3961 /* mark dead temporaries and free the associated registers */
3962 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3963 if (IS_DEAD_ARG(i)) {
3964 temp_dead(s, arg_temp(op->args[i]));
3965 }
3966 }
3967
3968 /* clobber call registers */
3969 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3970 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3971 tcg_reg_free(s, i, allocated_regs);
3972 }
3973 }
3974
3975 /* Save globals if they might be written by the helper, sync them if
3976 they might be read. */
3977 if (flags & TCG_CALL_NO_READ_GLOBALS) {
3978 /* Nothing to do */
3979 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3980 sync_globals(s, allocated_regs);
3981 } else {
3982 save_globals(s, allocated_regs);
3983 }
3984
3985#ifdef CONFIG_TCG_INTERPRETER
3986 {
3987 gpointer hash = (gpointer)(uintptr_t)info->typemask;
3988 ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
3989 assert(cif != NULL);
3990 tcg_out_call(s, func_addr, cif);
3991 }
3992#else
3993 tcg_out_call(s, func_addr);
3994#endif
3995
3996 /* assign output registers and emit moves if needed */
3997 for(i = 0; i < nb_oargs; i++) {
3998 arg = op->args[i];
3999 ts = arg_temp(arg);
4000
4001 /* ENV should not be modified. */
4002 tcg_debug_assert(!temp_readonly(ts));
4003
4004 reg = tcg_target_call_oarg_regs[i];
4005 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4006 if (ts->val_type == TEMP_VAL_REG) {
4007 s->reg_to_temp[ts->reg] = NULL;
4008 }
4009 ts->val_type = TEMP_VAL_REG;
4010 ts->reg = reg;
4011 ts->mem_coherent = 0;
4012 s->reg_to_temp[reg] = ts;
4013 if (NEED_SYNC_ARG(i)) {
4014 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4015 } else if (IS_DEAD_ARG(i)) {
4016 temp_dead(s, ts);
4017 }
4018 }
4019}
4020
4021#ifdef CONFIG_PROFILER
4022
4023/* avoid copy/paste errors */
4024#define PROF_ADD(to, from, field) \
4025 do { \
4026 (to)->field += qatomic_read(&((from)->field)); \
4027 } while (0)
4028
4029#define PROF_MAX(to, from, field) \
4030 do { \
4031 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4032 if (val__ > (to)->field) { \
4033 (to)->field = val__; \
4034 } \
4035 } while (0)
4036
4037/* Pass in a zero'ed @prof */
4038static inline
4039void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4040{
4041 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4042 unsigned int i;
4043
4044 for (i = 0; i < n_ctxs; i++) {
4045 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4046 const TCGProfile *orig = &s->prof;
4047
4048 if (counters) {
4049 PROF_ADD(prof, orig, cpu_exec_time);
4050 PROF_ADD(prof, orig, tb_count1);
4051 PROF_ADD(prof, orig, tb_count);
4052 PROF_ADD(prof, orig, op_count);
4053 PROF_MAX(prof, orig, op_count_max);
4054 PROF_ADD(prof, orig, temp_count);
4055 PROF_MAX(prof, orig, temp_count_max);
4056 PROF_ADD(prof, orig, del_op_count);
4057 PROF_ADD(prof, orig, code_in_len);
4058 PROF_ADD(prof, orig, code_out_len);
4059 PROF_ADD(prof, orig, search_out_len);
4060 PROF_ADD(prof, orig, interm_time);
4061 PROF_ADD(prof, orig, code_time);
4062 PROF_ADD(prof, orig, la_time);
4063 PROF_ADD(prof, orig, opt_time);
4064 PROF_ADD(prof, orig, restore_count);
4065 PROF_ADD(prof, orig, restore_time);
4066 }
4067 if (table) {
4068 int i;
4069
4070 for (i = 0; i < NB_OPS; i++) {
4071 PROF_ADD(prof, orig, table_op_count[i]);
4072 }
4073 }
4074 }
4075}
4076
4077#undef PROF_ADD
4078#undef PROF_MAX
4079
4080static void tcg_profile_snapshot_counters(TCGProfile *prof)
4081{
4082 tcg_profile_snapshot(prof, true, false);
4083}
4084
4085static void tcg_profile_snapshot_table(TCGProfile *prof)
4086{
4087 tcg_profile_snapshot(prof, false, true);
4088}
4089
4090void tcg_dump_op_count(void)
4091{
4092 TCGProfile prof = {};
4093 int i;
4094
4095 tcg_profile_snapshot_table(&prof);
4096 for (i = 0; i < NB_OPS; i++) {
4097 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4098 prof.table_op_count[i]);
4099 }
4100}
4101
4102int64_t tcg_cpu_exec_time(void)
4103{
4104 unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4105 unsigned int i;
4106 int64_t ret = 0;
4107
4108 for (i = 0; i < n_ctxs; i++) {
4109 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4110 const TCGProfile *prof = &s->prof;
4111
4112 ret += qatomic_read(&prof->cpu_exec_time);
4113 }
4114 return ret;
4115}
4116#else
4117void tcg_dump_op_count(void)
4118{
4119 qemu_printf("[TCG profiler not compiled]\n");
4120}
4121
4122int64_t tcg_cpu_exec_time(void)
4123{
4124 error_report("%s: TCG profiler not compiled", __func__);
4125 exit(EXIT_FAILURE);
4126}
4127#endif
4128
4129
4130int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4131{
4132#ifdef CONFIG_PROFILER
4133 TCGProfile *prof = &s->prof;
4134#endif
4135 int i, num_insns;
4136 TCGOp *op;
4137
4138#ifdef CONFIG_PROFILER
4139 {
4140 int n = 0;
4141
4142 QTAILQ_FOREACH(op, &s->ops, link) {
4143 n++;
4144 }
4145 qatomic_set(&prof->op_count, prof->op_count + n);
4146 if (n > prof->op_count_max) {
4147 qatomic_set(&prof->op_count_max, n);
4148 }
4149
4150 n = s->nb_temps;
4151 qatomic_set(&prof->temp_count, prof->temp_count + n);
4152 if (n > prof->temp_count_max) {
4153 qatomic_set(&prof->temp_count_max, n);
4154 }
4155 }
4156#endif
4157
4158#ifdef DEBUG_DISAS
4159 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4160 && qemu_log_in_addr_range(tb->pc))) {
4161 FILE *logfile = qemu_log_lock();
4162 qemu_log("OP:\n");
4163 tcg_dump_ops(s, false);
4164 qemu_log("\n");
4165 qemu_log_unlock(logfile);
4166 }
4167#endif
4168
4169#ifdef CONFIG_DEBUG_TCG
4170 /* Ensure all labels referenced have been emitted. */
4171 {
4172 TCGLabel *l;
4173 bool error = false;
4174
4175 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4176 if (unlikely(!l->present) && l->refs) {
4177 qemu_log_mask(CPU_LOG_TB_OP,
4178 "$L%d referenced but not present.\n", l->id);
4179 error = true;
4180 }
4181 }
4182 assert(!error);
4183 }
4184#endif
4185
4186#ifdef CONFIG_PROFILER
4187 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4188#endif
4189
4190#ifdef USE_TCG_OPTIMIZATIONS
4191 tcg_optimize(s);
4192#endif
4193
4194#ifdef CONFIG_PROFILER
4195 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4196 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4197#endif
4198
4199 reachable_code_pass(s);
4200 liveness_pass_1(s);
4201
4202 if (s->nb_indirects > 0) {
4203#ifdef DEBUG_DISAS
4204 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4205 && qemu_log_in_addr_range(tb->pc))) {
4206 FILE *logfile = qemu_log_lock();
4207 qemu_log("OP before indirect lowering:\n");
4208 tcg_dump_ops(s, false);
4209 qemu_log("\n");
4210 qemu_log_unlock(logfile);
4211 }
4212#endif
4213 /* Replace indirect temps with direct temps. */
4214 if (liveness_pass_2(s)) {
4215 /* If changes were made, re-run liveness. */
4216 liveness_pass_1(s);
4217 }
4218 }
4219
4220#ifdef CONFIG_PROFILER
4221 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4222#endif
4223
4224#ifdef DEBUG_DISAS
4225 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4226 && qemu_log_in_addr_range(tb->pc))) {
4227 FILE *logfile = qemu_log_lock();
4228 qemu_log("OP after optimization and liveness analysis:\n");
4229 tcg_dump_ops(s, true);
4230 qemu_log("\n");
4231 qemu_log_unlock(logfile);
4232 }
4233#endif
4234
4235 tcg_reg_alloc_start(s);
4236
4237 /*
4238 * Reset the buffer pointers when restarting after overflow.
4239 * TODO: Move this into translate-all.c with the rest of the
4240 * buffer management. Having only this done here is confusing.
4241 */
4242 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4243 s->code_ptr = s->code_buf;
4244
4245#ifdef TCG_TARGET_NEED_LDST_LABELS
4246 QSIMPLEQ_INIT(&s->ldst_labels);
4247#endif
4248#ifdef TCG_TARGET_NEED_POOL_LABELS
4249 s->pool_labels = NULL;
4250#endif
4251
4252 num_insns = -1;
4253 QTAILQ_FOREACH(op, &s->ops, link) {
4254 TCGOpcode opc = op->opc;
4255
4256#ifdef CONFIG_PROFILER
4257 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4258#endif
4259
4260 switch (opc) {
4261 case INDEX_op_mov_i32:
4262 case INDEX_op_mov_i64:
4263 case INDEX_op_mov_vec:
4264 tcg_reg_alloc_mov(s, op);
4265 break;
4266 case INDEX_op_dup_vec:
4267 tcg_reg_alloc_dup(s, op);
4268 break;
4269 case INDEX_op_insn_start:
4270 if (num_insns >= 0) {
4271 size_t off = tcg_current_code_size(s);
4272 s->gen_insn_end_off[num_insns] = off;
4273 /* Assert that we do not overflow our stored offset. */
4274 assert(s->gen_insn_end_off[num_insns] == off);
4275 }
4276 num_insns++;
4277 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4278 target_ulong a;
4279#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4280 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4281#else
4282 a = op->args[i];
4283#endif
4284 s->gen_insn_data[num_insns][i] = a;
4285 }
4286 break;
4287 case INDEX_op_discard:
4288 temp_dead(s, arg_temp(op->args[0]));
4289 break;
4290 case INDEX_op_set_label:
4291 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4292 tcg_out_label(s, arg_label(op->args[0]));
4293 break;
4294 case INDEX_op_call:
4295 tcg_reg_alloc_call(s, op);
4296 break;
4297 case INDEX_op_dup2_vec:
4298 if (tcg_reg_alloc_dup2(s, op)) {
4299 break;
4300 }
4301 /* fall through */
4302 default:
4303 /* Sanity check that we've not introduced any unhandled opcodes. */
4304 tcg_debug_assert(tcg_op_supported(opc));
4305 /* Note: in order to speed up the code, it would be much
4306 faster to have specialized register allocator functions for
4307 some common argument patterns */
4308 tcg_reg_alloc_op(s, op);
4309 break;
4310 }
4311#ifdef CONFIG_DEBUG_TCG
4312 check_regs(s);
4313#endif
4314 /* Test for (pending) buffer overflow. The assumption is that any
4315 one operation beginning below the high water mark cannot overrun
4316 the buffer completely. Thus we can test for overflow after
4317 generating code without having to check during generation. */
4318 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4319 return -1;
4320 }
4321 /* Test for TB overflow, as seen by gen_insn_end_off. */
4322 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4323 return -2;
4324 }
4325 }
4326 tcg_debug_assert(num_insns >= 0);
4327 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4328
4329 /* Generate TB finalization at the end of block */
4330#ifdef TCG_TARGET_NEED_LDST_LABELS
4331 i = tcg_out_ldst_finalize(s);
4332 if (i < 0) {
4333 return i;
4334 }
4335#endif
4336#ifdef TCG_TARGET_NEED_POOL_LABELS
4337 i = tcg_out_pool_finalize(s);
4338 if (i < 0) {
4339 return i;
4340 }
4341#endif
4342 if (!tcg_resolve_relocs(s)) {
4343 return -2;
4344 }
4345
4346#ifndef CONFIG_TCG_INTERPRETER
4347 /* flush instruction cache */
4348 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4349 (uintptr_t)s->code_buf,
4350 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4351#endif
4352
4353 return tcg_current_code_size(s);
4354}
4355
4356#ifdef CONFIG_PROFILER
4357void tcg_dump_info(void)
4358{
4359 TCGProfile prof = {};
4360 const TCGProfile *s;
4361 int64_t tb_count;
4362 int64_t tb_div_count;
4363 int64_t tot;
4364
4365 tcg_profile_snapshot_counters(&prof);
4366 s = &prof;
4367 tb_count = s->tb_count;
4368 tb_div_count = tb_count ? tb_count : 1;
4369 tot = s->interm_time + s->code_time;
4370
4371 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4372 tot, tot / 2.4e9);
4373 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4374 " %0.1f%%)\n",
4375 tb_count, s->tb_count1 - tb_count,
4376 (double)(s->tb_count1 - s->tb_count)
4377 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4378 qemu_printf("avg ops/TB %0.1f max=%d\n",
4379 (double)s->op_count / tb_div_count, s->op_count_max);
4380 qemu_printf("deleted ops/TB %0.2f\n",
4381 (double)s->del_op_count / tb_div_count);
4382 qemu_printf("avg temps/TB %0.2f max=%d\n",
4383 (double)s->temp_count / tb_div_count, s->temp_count_max);
4384 qemu_printf("avg host code/TB %0.1f\n",
4385 (double)s->code_out_len / tb_div_count);
4386 qemu_printf("avg search data/TB %0.1f\n",
4387 (double)s->search_out_len / tb_div_count);
4388
4389 qemu_printf("cycles/op %0.1f\n",
4390 s->op_count ? (double)tot / s->op_count : 0);
4391 qemu_printf("cycles/in byte %0.1f\n",
4392 s->code_in_len ? (double)tot / s->code_in_len : 0);
4393 qemu_printf("cycles/out byte %0.1f\n",
4394 s->code_out_len ? (double)tot / s->code_out_len : 0);
4395 qemu_printf("cycles/search byte %0.1f\n",
4396 s->search_out_len ? (double)tot / s->search_out_len : 0);
4397 if (tot == 0) {
4398 tot = 1;
4399 }
4400 qemu_printf(" gen_interm time %0.1f%%\n",
4401 (double)s->interm_time / tot * 100.0);
4402 qemu_printf(" gen_code time %0.1f%%\n",
4403 (double)s->code_time / tot * 100.0);
4404 qemu_printf("optim./code time %0.1f%%\n",
4405 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4406 * 100.0);
4407 qemu_printf("liveness/code time %0.1f%%\n",
4408 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4409 qemu_printf("cpu_restore count %" PRId64 "\n",
4410 s->restore_count);
4411 qemu_printf(" avg cycles %0.1f\n",
4412 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4413}
4414#else
4415void tcg_dump_info(void)
4416{
4417 qemu_printf("[TCG profiler not compiled]\n");
4418}
4419#endif
4420
4421#ifdef ELF_HOST_MACHINE
4422/* In order to use this feature, the backend needs to do three things:
4423
4424 (1) Define ELF_HOST_MACHINE to indicate both what value to
4425 put into the ELF image and to indicate support for the feature.
4426
4427 (2) Define tcg_register_jit. This should create a buffer containing
4428 the contents of a .debug_frame section that describes the post-
4429 prologue unwind info for the tcg machine.
4430
4431 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4432*/
4433
4434/* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4435typedef enum {
4436 JIT_NOACTION = 0,
4437 JIT_REGISTER_FN,
4438 JIT_UNREGISTER_FN
4439} jit_actions_t;
4440
4441struct jit_code_entry {
4442 struct jit_code_entry *next_entry;
4443 struct jit_code_entry *prev_entry;
4444 const void *symfile_addr;
4445 uint64_t symfile_size;
4446};
4447
4448struct jit_descriptor {
4449 uint32_t version;
4450 uint32_t action_flag;
4451 struct jit_code_entry *relevant_entry;
4452 struct jit_code_entry *first_entry;
4453};
4454
4455void __jit_debug_register_code(void) __attribute__((noinline));
4456void __jit_debug_register_code(void)
4457{
4458 asm("");
4459}
4460
4461/* Must statically initialize the version, because GDB may check
4462 the version before we can set it. */
4463struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4464
4465/* End GDB interface. */
4466
4467static int find_string(const char *strtab, const char *str)
4468{
4469 const char *p = strtab + 1;
4470
4471 while (1) {
4472 if (strcmp(p, str) == 0) {
4473 return p - strtab;
4474 }
4475 p += strlen(p) + 1;
4476 }
4477}
4478
4479static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4480 const void *debug_frame,
4481 size_t debug_frame_size)
4482{
4483 struct __attribute__((packed)) DebugInfo {
4484 uint32_t len;
4485 uint16_t version;
4486 uint32_t abbrev;
4487 uint8_t ptr_size;
4488 uint8_t cu_die;
4489 uint16_t cu_lang;
4490 uintptr_t cu_low_pc;
4491 uintptr_t cu_high_pc;
4492 uint8_t fn_die;
4493 char fn_name[16];
4494 uintptr_t fn_low_pc;
4495 uintptr_t fn_high_pc;
4496 uint8_t cu_eoc;
4497 };
4498
4499 struct ElfImage {
4500 ElfW(Ehdr) ehdr;
4501 ElfW(Phdr) phdr;
4502 ElfW(Shdr) shdr[7];
4503 ElfW(Sym) sym[2];
4504 struct DebugInfo di;
4505 uint8_t da[24];
4506 char str[80];
4507 };
4508
4509 struct ElfImage *img;
4510
4511 static const struct ElfImage img_template = {
4512 .ehdr = {
4513 .e_ident[EI_MAG0] = ELFMAG0,
4514 .e_ident[EI_MAG1] = ELFMAG1,
4515 .e_ident[EI_MAG2] = ELFMAG2,
4516 .e_ident[EI_MAG3] = ELFMAG3,
4517 .e_ident[EI_CLASS] = ELF_CLASS,
4518 .e_ident[EI_DATA] = ELF_DATA,
4519 .e_ident[EI_VERSION] = EV_CURRENT,
4520 .e_type = ET_EXEC,
4521 .e_machine = ELF_HOST_MACHINE,
4522 .e_version = EV_CURRENT,
4523 .e_phoff = offsetof(struct ElfImage, phdr),
4524 .e_shoff = offsetof(struct ElfImage, shdr),
4525 .e_ehsize = sizeof(ElfW(Shdr)),
4526 .e_phentsize = sizeof(ElfW(Phdr)),
4527 .e_phnum = 1,
4528 .e_shentsize = sizeof(ElfW(Shdr)),
4529 .e_shnum = ARRAY_SIZE(img->shdr),
4530 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4531#ifdef ELF_HOST_FLAGS
4532 .e_flags = ELF_HOST_FLAGS,
4533#endif
4534#ifdef ELF_OSABI
4535 .e_ident[EI_OSABI] = ELF_OSABI,
4536#endif
4537 },
4538 .phdr = {
4539 .p_type = PT_LOAD,
4540 .p_flags = PF_X,
4541 },
4542 .shdr = {
4543 [0] = { .sh_type = SHT_NULL },
4544 /* Trick: The contents of code_gen_buffer are not present in
4545 this fake ELF file; that got allocated elsewhere. Therefore
4546 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4547 will not look for contents. We can record any address. */
4548 [1] = { /* .text */
4549 .sh_type = SHT_NOBITS,
4550 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4551 },
4552 [2] = { /* .debug_info */
4553 .sh_type = SHT_PROGBITS,
4554 .sh_offset = offsetof(struct ElfImage, di),
4555 .sh_size = sizeof(struct DebugInfo),
4556 },
4557 [3] = { /* .debug_abbrev */
4558 .sh_type = SHT_PROGBITS,
4559 .sh_offset = offsetof(struct ElfImage, da),
4560 .sh_size = sizeof(img->da),
4561 },
4562 [4] = { /* .debug_frame */
4563 .sh_type = SHT_PROGBITS,
4564 .sh_offset = sizeof(struct ElfImage),
4565 },
4566 [5] = { /* .symtab */
4567 .sh_type = SHT_SYMTAB,
4568 .sh_offset = offsetof(struct ElfImage, sym),
4569 .sh_size = sizeof(img->sym),
4570 .sh_info = 1,
4571 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4572 .sh_entsize = sizeof(ElfW(Sym)),
4573 },
4574 [6] = { /* .strtab */
4575 .sh_type = SHT_STRTAB,
4576 .sh_offset = offsetof(struct ElfImage, str),
4577 .sh_size = sizeof(img->str),
4578 }
4579 },
4580 .sym = {
4581 [1] = { /* code_gen_buffer */
4582 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4583 .st_shndx = 1,
4584 }
4585 },
4586 .di = {
4587 .len = sizeof(struct DebugInfo) - 4,
4588 .version = 2,
4589 .ptr_size = sizeof(void *),
4590 .cu_die = 1,
4591 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4592 .fn_die = 2,
4593 .fn_name = "code_gen_buffer"
4594 },
4595 .da = {
4596 1, /* abbrev number (the cu) */
4597 0x11, 1, /* DW_TAG_compile_unit, has children */
4598 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4599 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4600 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4601 0, 0, /* end of abbrev */
4602 2, /* abbrev number (the fn) */
4603 0x2e, 0, /* DW_TAG_subprogram, no children */
4604 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4605 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4606 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4607 0, 0, /* end of abbrev */
4608 0 /* no more abbrev */
4609 },
4610 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4611 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4612 };
4613
4614 /* We only need a single jit entry; statically allocate it. */
4615 static struct jit_code_entry one_entry;
4616
4617 uintptr_t buf = (uintptr_t)buf_ptr;
4618 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4619 DebugFrameHeader *dfh;
4620
4621 img = g_malloc(img_size);
4622 *img = img_template;
4623
4624 img->phdr.p_vaddr = buf;
4625 img->phdr.p_paddr = buf;
4626 img->phdr.p_memsz = buf_size;
4627
4628 img->shdr[1].sh_name = find_string(img->str, ".text");
4629 img->shdr[1].sh_addr = buf;
4630 img->shdr[1].sh_size = buf_size;
4631
4632 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4633 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4634
4635 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4636 img->shdr[4].sh_size = debug_frame_size;
4637
4638 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4639 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4640
4641 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4642 img->sym[1].st_value = buf;
4643 img->sym[1].st_size = buf_size;
4644
4645 img->di.cu_low_pc = buf;
4646 img->di.cu_high_pc = buf + buf_size;
4647 img->di.fn_low_pc = buf;
4648 img->di.fn_high_pc = buf + buf_size;
4649
4650 dfh = (DebugFrameHeader *)(img + 1);
4651 memcpy(dfh, debug_frame, debug_frame_size);
4652 dfh->fde.func_start = buf;
4653 dfh->fde.func_len = buf_size;
4654
4655#ifdef DEBUG_JIT
4656 /* Enable this block to be able to debug the ELF image file creation.
4657 One can use readelf, objdump, or other inspection utilities. */
4658 {
4659 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4660 if (f) {
4661 if (fwrite(img, img_size, 1, f) != img_size) {
4662 /* Avoid stupid unused return value warning for fwrite. */
4663 }
4664 fclose(f);
4665 }
4666 }
4667#endif
4668
4669 one_entry.symfile_addr = img;
4670 one_entry.symfile_size = img_size;
4671
4672 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4673 __jit_debug_descriptor.relevant_entry = &one_entry;
4674 __jit_debug_descriptor.first_entry = &one_entry;
4675 __jit_debug_register_code();
4676}
4677#else
4678/* No support for the feature. Provide the entry point expected by exec.c,
4679 and implement the internal function we declared earlier. */
4680
4681static void tcg_register_jit_int(const void *buf, size_t size,
4682 const void *debug_frame,
4683 size_t debug_frame_size)
4684{
4685}
4686
4687void tcg_register_jit(const void *buf, size_t buf_size)
4688{
4689}
4690#endif /* ELF_HOST_MACHINE */
4691
4692#if !TCG_TARGET_MAYBE_vec
4693void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4694{
4695 g_assert_not_reached();
4696}
4697#endif
This page took 0.07812 seconds and 4 git commands to generate.