2 * Optimizations for Tiny Code Generator for QEMU
4 * Copyright (c) 2010 Samsung Electronics.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31 #include "qemu-common.h"
34 #if TCG_TARGET_REG_BITS == 64
35 #define CASE_OP_32_64(x) \
36 glue(glue(case INDEX_op_, x), _i32): \
37 glue(glue(case INDEX_op_, x), _i64)
39 #define CASE_OP_32_64(x) \
40 glue(glue(case INDEX_op_, x), _i32)
51 struct tcg_temp_info {
58 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
60 /* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
61 class of equivalent temp's, a new representative should be chosen in this
63 static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
66 TCGArg new_base = (TCGArg)-1;
67 if (temps[temp].state == TCG_TEMP_HAS_COPY) {
68 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
69 if (i >= nb_globals) {
70 temps[i].state = TCG_TEMP_HAS_COPY;
75 for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
76 if (new_base == (TCGArg)-1) {
77 temps[i].state = TCG_TEMP_ANY;
79 temps[i].val = new_base;
82 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
83 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
84 } else if (temps[temp].state == TCG_TEMP_COPY) {
85 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
86 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
87 new_base = temps[temp].val;
89 temps[temp].state = TCG_TEMP_ANY;
90 if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
91 temps[new_base].state = TCG_TEMP_ANY;
95 static int op_bits(int op)
98 case INDEX_op_mov_i32:
99 case INDEX_op_add_i32:
100 case INDEX_op_sub_i32:
101 case INDEX_op_mul_i32:
102 case INDEX_op_and_i32:
103 case INDEX_op_or_i32:
104 case INDEX_op_xor_i32:
105 case INDEX_op_shl_i32:
106 case INDEX_op_shr_i32:
107 case INDEX_op_sar_i32:
108 case INDEX_op_rotl_i32:
109 case INDEX_op_rotr_i32:
110 case INDEX_op_not_i32:
111 case INDEX_op_ext8s_i32:
112 case INDEX_op_ext16s_i32:
113 case INDEX_op_ext8u_i32:
114 case INDEX_op_ext16u_i32:
116 #if TCG_TARGET_REG_BITS == 64
117 case INDEX_op_mov_i64:
118 case INDEX_op_add_i64:
119 case INDEX_op_sub_i64:
120 case INDEX_op_mul_i64:
121 case INDEX_op_and_i64:
122 case INDEX_op_or_i64:
123 case INDEX_op_xor_i64:
124 case INDEX_op_shl_i64:
125 case INDEX_op_shr_i64:
126 case INDEX_op_sar_i64:
127 case INDEX_op_rotl_i64:
128 case INDEX_op_rotr_i64:
129 case INDEX_op_not_i64:
130 case INDEX_op_ext8s_i64:
131 case INDEX_op_ext16s_i64:
132 case INDEX_op_ext32s_i64:
133 case INDEX_op_ext8u_i64:
134 case INDEX_op_ext16u_i64:
135 case INDEX_op_ext32u_i64:
139 fprintf(stderr, "Unrecognized operation %d in op_bits.\n", op);
144 static int op_to_movi(int op)
146 switch (op_bits(op)) {
148 return INDEX_op_movi_i32;
149 #if TCG_TARGET_REG_BITS == 64
151 return INDEX_op_movi_i64;
154 fprintf(stderr, "op_to_movi: unexpected return value of "
155 "function op_bits.\n");
160 static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
161 int nb_temps, int nb_globals)
163 reset_temp(dst, nb_temps, nb_globals);
164 assert(temps[src].state != TCG_TEMP_COPY);
165 if (src >= nb_globals) {
166 assert(temps[src].state != TCG_TEMP_CONST);
167 if (temps[src].state != TCG_TEMP_HAS_COPY) {
168 temps[src].state = TCG_TEMP_HAS_COPY;
169 temps[src].next_copy = src;
170 temps[src].prev_copy = src;
172 temps[dst].state = TCG_TEMP_COPY;
173 temps[dst].val = src;
174 temps[dst].next_copy = temps[src].next_copy;
175 temps[dst].prev_copy = src;
176 temps[temps[dst].next_copy].prev_copy = dst;
177 temps[src].next_copy = dst;
183 static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
184 int nb_temps, int nb_globals)
186 reset_temp(dst, nb_temps, nb_globals);
187 temps[dst].state = TCG_TEMP_CONST;
188 temps[dst].val = val;
193 static int op_to_mov(int op)
195 switch (op_bits(op)) {
197 return INDEX_op_mov_i32;
198 #if TCG_TARGET_REG_BITS == 64
200 return INDEX_op_mov_i64;
203 fprintf(stderr, "op_to_mov: unexpected return value of "
204 "function op_bits.\n");
209 static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
230 case INDEX_op_shl_i32:
231 return (uint32_t)x << (uint32_t)y;
233 #if TCG_TARGET_REG_BITS == 64
234 case INDEX_op_shl_i64:
235 return (uint64_t)x << (uint64_t)y;
238 case INDEX_op_shr_i32:
239 return (uint32_t)x >> (uint32_t)y;
241 #if TCG_TARGET_REG_BITS == 64
242 case INDEX_op_shr_i64:
243 return (uint64_t)x >> (uint64_t)y;
246 case INDEX_op_sar_i32:
247 return (int32_t)x >> (int32_t)y;
249 #if TCG_TARGET_REG_BITS == 64
250 case INDEX_op_sar_i64:
251 return (int64_t)x >> (int64_t)y;
254 case INDEX_op_rotr_i32:
255 #if TCG_TARGET_REG_BITS == 64
259 x = (x << (32 - y)) | (x >> y);
262 #if TCG_TARGET_REG_BITS == 64
263 case INDEX_op_rotr_i64:
264 x = (x << (64 - y)) | (x >> y);
268 case INDEX_op_rotl_i32:
269 #if TCG_TARGET_REG_BITS == 64
273 x = (x << y) | (x >> (32 - y));
276 #if TCG_TARGET_REG_BITS == 64
277 case INDEX_op_rotl_i64:
278 x = (x << y) | (x >> (64 - y));
285 CASE_OP_32_64(ext8s):
288 CASE_OP_32_64(ext16s):
291 CASE_OP_32_64(ext8u):
294 CASE_OP_32_64(ext16u):
297 #if TCG_TARGET_REG_BITS == 64
298 case INDEX_op_ext32s_i64:
301 case INDEX_op_ext32u_i64:
307 "Unrecognized operation %d in do_constant_folding.\n", op);
312 static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
314 TCGArg res = do_constant_folding_2(op, x, y);
315 #if TCG_TARGET_REG_BITS == 64
316 if (op_bits(op) == 32) {
323 /* Propagate constants and copies, fold constant expressions. */
324 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
325 TCGArg *args, TCGOpDef *tcg_op_defs)
327 int i, nb_ops, op_index, op, nb_temps, nb_globals, nb_call_args;
331 /* Array VALS has an element for each temp.
332 If this temp holds a constant then its value is kept in VALS' element.
333 If this temp is a copy of other ones then this equivalence class'
334 representative is kept in VALS' element.
335 If this temp is neither copy nor constant then corresponding VALS'
336 element is unused. */
338 nb_temps = s->nb_temps;
339 nb_globals = s->nb_globals;
340 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
342 nb_ops = tcg_opc_ptr - gen_opc_buf;
344 for (op_index = 0; op_index < nb_ops; op_index++) {
345 op = gen_opc_buf[op_index];
346 def = &tcg_op_defs[op];
347 /* Do copy propagation */
348 if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
349 assert(op != INDEX_op_call);
350 for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
351 if (temps[args[i]].state == TCG_TEMP_COPY) {
352 args[i] = temps[args[i]].val;
357 /* For commutative operations make constant second argument */
364 if (temps[args[1]].state == TCG_TEMP_CONST) {
374 /* Simplify expression if possible. */
383 if (temps[args[1]].state == TCG_TEMP_CONST) {
384 /* Proceed with possible constant folding. */
387 if (temps[args[2]].state == TCG_TEMP_CONST
388 && temps[args[2]].val == 0) {
389 if ((temps[args[0]].state == TCG_TEMP_COPY
390 && temps[args[0]].val == args[1])
391 || args[0] == args[1]) {
393 gen_opc_buf[op_index] = INDEX_op_nop;
395 gen_opc_buf[op_index] = op_to_mov(op);
396 tcg_opt_gen_mov(gen_args, args[0], args[1],
397 nb_temps, nb_globals);
405 if ((temps[args[2]].state == TCG_TEMP_CONST
406 && temps[args[2]].val == 0)) {
407 gen_opc_buf[op_index] = op_to_movi(op);
408 tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
416 if (args[1] == args[2]) {
417 if (args[1] == args[0]) {
419 gen_opc_buf[op_index] = INDEX_op_nop;
421 gen_opc_buf[op_index] = op_to_mov(op);
422 tcg_opt_gen_mov(gen_args, args[0], args[1], nb_temps,
432 /* Propagate constants through copy operations and do constant
433 folding. Constants will be substituted to arguments by register
434 allocator where needed and possible. Also detect copies. */
437 if ((temps[args[1]].state == TCG_TEMP_COPY
438 && temps[args[1]].val == args[0])
439 || args[0] == args[1]) {
441 gen_opc_buf[op_index] = INDEX_op_nop;
444 if (temps[args[1]].state != TCG_TEMP_CONST) {
445 tcg_opt_gen_mov(gen_args, args[0], args[1],
446 nb_temps, nb_globals);
451 /* Source argument is constant. Rewrite the operation and
452 let movi case handle it. */
454 gen_opc_buf[op_index] = op;
455 args[1] = temps[args[1]].val;
458 tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
463 CASE_OP_32_64(ext8s):
464 CASE_OP_32_64(ext16s):
465 CASE_OP_32_64(ext8u):
466 CASE_OP_32_64(ext16u):
467 #if TCG_TARGET_REG_BITS == 64
468 case INDEX_op_ext32s_i64:
469 case INDEX_op_ext32u_i64:
471 if (temps[args[1]].state == TCG_TEMP_CONST) {
472 gen_opc_buf[op_index] = op_to_movi(op);
473 tmp = do_constant_folding(op, temps[args[1]].val, 0);
474 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
479 reset_temp(args[0], nb_temps, nb_globals);
480 gen_args[0] = args[0];
481 gen_args[1] = args[1];
497 if (temps[args[1]].state == TCG_TEMP_CONST
498 && temps[args[2]].state == TCG_TEMP_CONST) {
499 gen_opc_buf[op_index] = op_to_movi(op);
500 tmp = do_constant_folding(op, temps[args[1]].val,
502 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
507 reset_temp(args[0], nb_temps, nb_globals);
508 gen_args[0] = args[0];
509 gen_args[1] = args[1];
510 gen_args[2] = args[2];
516 nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
517 if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
518 for (i = 0; i < nb_globals; i++) {
519 reset_temp(i, nb_temps, nb_globals);
522 for (i = 0; i < (args[0] >> 16); i++) {
523 reset_temp(args[i + 1], nb_temps, nb_globals);
525 i = nb_call_args + 3;
533 case INDEX_op_set_label:
536 CASE_OP_32_64(brcond):
537 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
538 for (i = 0; i < def->nb_args; i++) {
545 /* Default case: we do know nothing about operation so no
546 propagation is done. We only trash output args. */
547 for (i = 0; i < def->nb_oargs; i++) {
548 reset_temp(args[i], nb_temps, nb_globals);
550 for (i = 0; i < def->nb_args; i++) {
551 gen_args[i] = args[i];
553 args += def->nb_args;
554 gen_args += def->nb_args;
562 TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
563 TCGArg *args, TCGOpDef *tcg_op_defs)
566 res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);