]>
Commit | Line | Data |
---|---|---|
8f2e8c07 KB |
1 | /* |
2 | * Optimizations for Tiny Code Generator for QEMU | |
3 | * | |
4 | * Copyright (c) 2010 Samsung Electronics. | |
5 | * Contributed by Kirill Batuzov <[email protected]> | |
6 | * | |
7 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 | * of this software and associated documentation files (the "Software"), to deal | |
9 | * in the Software without restriction, including without limitation the rights | |
10 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 | * copies of the Software, and to permit persons to whom the Software is | |
12 | * furnished to do so, subject to the following conditions: | |
13 | * | |
14 | * The above copyright notice and this permission notice shall be included in | |
15 | * all copies or substantial portions of the Software. | |
16 | * | |
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
23 | * THE SOFTWARE. | |
24 | */ | |
25 | ||
757e725b | 26 | #include "qemu/osdep.h" |
dcb32f1d | 27 | #include "tcg/tcg-op.h" |
90163900 | 28 | #include "tcg-internal.h" |
8f2e8c07 | 29 | |
8f2e8c07 KB |
30 | #define CASE_OP_32_64(x) \ |
31 | glue(glue(case INDEX_op_, x), _i32): \ | |
32 | glue(glue(case INDEX_op_, x), _i64) | |
8f2e8c07 | 33 | |
170ba88f RH |
34 | #define CASE_OP_32_64_VEC(x) \ |
35 | glue(glue(case INDEX_op_, x), _i32): \ | |
36 | glue(glue(case INDEX_op_, x), _i64): \ | |
37 | glue(glue(case INDEX_op_, x), _vec) | |
38 | ||
6fcb98ed | 39 | typedef struct TempOptInfo { |
b41059dd | 40 | bool is_const; |
6349039d RH |
41 | TCGTemp *prev_copy; |
42 | TCGTemp *next_copy; | |
54795544 | 43 | uint64_t val; |
b1fde411 | 44 | uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */ |
6fcb98ed | 45 | } TempOptInfo; |
22613af4 | 46 | |
3b3f847d | 47 | typedef struct OptContext { |
dc84988a | 48 | TCGContext *tcg; |
d0ed5151 | 49 | TCGOp *prev_mb; |
3b3f847d RH |
50 | TCGTempSet temps_used; |
51 | } OptContext; | |
52 | ||
6fcb98ed | 53 | static inline TempOptInfo *ts_info(TCGTemp *ts) |
d9c769c6 | 54 | { |
6349039d | 55 | return ts->state_ptr; |
d9c769c6 AJ |
56 | } |
57 | ||
6fcb98ed | 58 | static inline TempOptInfo *arg_info(TCGArg arg) |
d9c769c6 | 59 | { |
6349039d RH |
60 | return ts_info(arg_temp(arg)); |
61 | } | |
62 | ||
63 | static inline bool ts_is_const(TCGTemp *ts) | |
64 | { | |
65 | return ts_info(ts)->is_const; | |
66 | } | |
67 | ||
68 | static inline bool arg_is_const(TCGArg arg) | |
69 | { | |
70 | return ts_is_const(arg_temp(arg)); | |
71 | } | |
72 | ||
73 | static inline bool ts_is_copy(TCGTemp *ts) | |
74 | { | |
75 | return ts_info(ts)->next_copy != ts; | |
d9c769c6 AJ |
76 | } |
77 | ||
b41059dd | 78 | /* Reset TEMP's state, possibly removing the temp for the list of copies. */ |
6349039d RH |
79 | static void reset_ts(TCGTemp *ts) |
80 | { | |
6fcb98ed RH |
81 | TempOptInfo *ti = ts_info(ts); |
82 | TempOptInfo *pi = ts_info(ti->prev_copy); | |
83 | TempOptInfo *ni = ts_info(ti->next_copy); | |
6349039d RH |
84 | |
85 | ni->prev_copy = ti->prev_copy; | |
86 | pi->next_copy = ti->next_copy; | |
87 | ti->next_copy = ts; | |
88 | ti->prev_copy = ts; | |
89 | ti->is_const = false; | |
b1fde411 | 90 | ti->z_mask = -1; |
6349039d RH |
91 | } |
92 | ||
93 | static void reset_temp(TCGArg arg) | |
22613af4 | 94 | { |
6349039d | 95 | reset_ts(arg_temp(arg)); |
22613af4 KB |
96 | } |
97 | ||
1208d7dd | 98 | /* Initialize and activate a temporary. */ |
3b3f847d | 99 | static void init_ts_info(OptContext *ctx, TCGTemp *ts) |
1208d7dd | 100 | { |
6349039d | 101 | size_t idx = temp_idx(ts); |
8f17a975 | 102 | TempOptInfo *ti; |
6349039d | 103 | |
3b3f847d | 104 | if (test_bit(idx, ctx->temps_used.l)) { |
8f17a975 RH |
105 | return; |
106 | } | |
3b3f847d | 107 | set_bit(idx, ctx->temps_used.l); |
8f17a975 RH |
108 | |
109 | ti = ts->state_ptr; | |
110 | if (ti == NULL) { | |
111 | ti = tcg_malloc(sizeof(TempOptInfo)); | |
6349039d | 112 | ts->state_ptr = ti; |
8f17a975 RH |
113 | } |
114 | ||
115 | ti->next_copy = ts; | |
116 | ti->prev_copy = ts; | |
117 | if (ts->kind == TEMP_CONST) { | |
118 | ti->is_const = true; | |
119 | ti->val = ts->val; | |
b1fde411 | 120 | ti->z_mask = ts->val; |
8f17a975 RH |
121 | if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) { |
122 | /* High bits of a 32-bit quantity are garbage. */ | |
b1fde411 | 123 | ti->z_mask |= ~0xffffffffull; |
c0522136 | 124 | } |
8f17a975 RH |
125 | } else { |
126 | ti->is_const = false; | |
b1fde411 | 127 | ti->z_mask = -1; |
1208d7dd AJ |
128 | } |
129 | } | |
130 | ||
6349039d | 131 | static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts) |
e590d4e6 | 132 | { |
4c868ce6 | 133 | TCGTemp *i, *g, *l; |
e590d4e6 | 134 | |
4c868ce6 RH |
135 | /* If this is already readonly, we can't do better. */ |
136 | if (temp_readonly(ts)) { | |
6349039d | 137 | return ts; |
e590d4e6 AJ |
138 | } |
139 | ||
4c868ce6 | 140 | g = l = NULL; |
6349039d | 141 | for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) { |
4c868ce6 | 142 | if (temp_readonly(i)) { |
e590d4e6 | 143 | return i; |
4c868ce6 RH |
144 | } else if (i->kind > ts->kind) { |
145 | if (i->kind == TEMP_GLOBAL) { | |
146 | g = i; | |
147 | } else if (i->kind == TEMP_LOCAL) { | |
148 | l = i; | |
e590d4e6 AJ |
149 | } |
150 | } | |
151 | } | |
152 | ||
4c868ce6 RH |
153 | /* If we didn't find a better representation, return the same temp. */ |
154 | return g ? g : l ? l : ts; | |
e590d4e6 AJ |
155 | } |
156 | ||
6349039d | 157 | static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2) |
e590d4e6 | 158 | { |
6349039d | 159 | TCGTemp *i; |
e590d4e6 | 160 | |
6349039d | 161 | if (ts1 == ts2) { |
e590d4e6 AJ |
162 | return true; |
163 | } | |
164 | ||
6349039d | 165 | if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) { |
e590d4e6 AJ |
166 | return false; |
167 | } | |
168 | ||
6349039d RH |
169 | for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) { |
170 | if (i == ts2) { | |
e590d4e6 AJ |
171 | return true; |
172 | } | |
173 | } | |
174 | ||
175 | return false; | |
176 | } | |
177 | ||
6349039d RH |
178 | static bool args_are_copies(TCGArg arg1, TCGArg arg2) |
179 | { | |
180 | return ts_are_copies(arg_temp(arg1), arg_temp(arg2)); | |
181 | } | |
182 | ||
dc84988a | 183 | static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src) |
22613af4 | 184 | { |
6349039d RH |
185 | TCGTemp *dst_ts = arg_temp(dst); |
186 | TCGTemp *src_ts = arg_temp(src); | |
170ba88f | 187 | const TCGOpDef *def; |
6fcb98ed RH |
188 | TempOptInfo *di; |
189 | TempOptInfo *si; | |
b1fde411 | 190 | uint64_t z_mask; |
6349039d RH |
191 | TCGOpcode new_op; |
192 | ||
193 | if (ts_are_copies(dst_ts, src_ts)) { | |
dc84988a | 194 | tcg_op_remove(ctx->tcg, op); |
5365718a AJ |
195 | return; |
196 | } | |
197 | ||
6349039d RH |
198 | reset_ts(dst_ts); |
199 | di = ts_info(dst_ts); | |
200 | si = ts_info(src_ts); | |
170ba88f RH |
201 | def = &tcg_op_defs[op->opc]; |
202 | if (def->flags & TCG_OPF_VECTOR) { | |
203 | new_op = INDEX_op_mov_vec; | |
204 | } else if (def->flags & TCG_OPF_64BIT) { | |
205 | new_op = INDEX_op_mov_i64; | |
206 | } else { | |
207 | new_op = INDEX_op_mov_i32; | |
208 | } | |
c45cb8bb | 209 | op->opc = new_op; |
170ba88f | 210 | /* TCGOP_VECL and TCGOP_VECE remain unchanged. */ |
6349039d RH |
211 | op->args[0] = dst; |
212 | op->args[1] = src; | |
a62f6f56 | 213 | |
b1fde411 | 214 | z_mask = si->z_mask; |
24666baf RH |
215 | if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { |
216 | /* High bits of the destination are now garbage. */ | |
b1fde411 | 217 | z_mask |= ~0xffffffffull; |
24666baf | 218 | } |
b1fde411 | 219 | di->z_mask = z_mask; |
e590d4e6 | 220 | |
6349039d | 221 | if (src_ts->type == dst_ts->type) { |
6fcb98ed | 222 | TempOptInfo *ni = ts_info(si->next_copy); |
6349039d RH |
223 | |
224 | di->next_copy = si->next_copy; | |
225 | di->prev_copy = src_ts; | |
226 | ni->prev_copy = dst_ts; | |
227 | si->next_copy = dst_ts; | |
228 | di->is_const = si->is_const; | |
229 | di->val = si->val; | |
230 | } | |
22613af4 KB |
231 | } |
232 | ||
dc84988a RH |
233 | static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op, |
234 | TCGArg dst, uint64_t val) | |
8fe35e04 RH |
235 | { |
236 | const TCGOpDef *def = &tcg_op_defs[op->opc]; | |
237 | TCGType type; | |
238 | TCGTemp *tv; | |
239 | ||
240 | if (def->flags & TCG_OPF_VECTOR) { | |
241 | type = TCGOP_VECL(op) + TCG_TYPE_V64; | |
242 | } else if (def->flags & TCG_OPF_64BIT) { | |
243 | type = TCG_TYPE_I64; | |
244 | } else { | |
245 | type = TCG_TYPE_I32; | |
246 | } | |
247 | ||
248 | /* Convert movi to mov with constant temp. */ | |
249 | tv = tcg_constant_internal(type, val); | |
3b3f847d | 250 | init_ts_info(ctx, tv); |
dc84988a | 251 | tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv)); |
8fe35e04 RH |
252 | } |
253 | ||
54795544 | 254 | static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y) |
53108fb5 | 255 | { |
03271524 RH |
256 | uint64_t l64, h64; |
257 | ||
53108fb5 KB |
258 | switch (op) { |
259 | CASE_OP_32_64(add): | |
260 | return x + y; | |
261 | ||
262 | CASE_OP_32_64(sub): | |
263 | return x - y; | |
264 | ||
265 | CASE_OP_32_64(mul): | |
266 | return x * y; | |
267 | ||
9a81090b KB |
268 | CASE_OP_32_64(and): |
269 | return x & y; | |
270 | ||
271 | CASE_OP_32_64(or): | |
272 | return x | y; | |
273 | ||
274 | CASE_OP_32_64(xor): | |
275 | return x ^ y; | |
276 | ||
55c0975c | 277 | case INDEX_op_shl_i32: |
50c5c4d1 | 278 | return (uint32_t)x << (y & 31); |
55c0975c | 279 | |
55c0975c | 280 | case INDEX_op_shl_i64: |
50c5c4d1 | 281 | return (uint64_t)x << (y & 63); |
55c0975c KB |
282 | |
283 | case INDEX_op_shr_i32: | |
50c5c4d1 | 284 | return (uint32_t)x >> (y & 31); |
55c0975c | 285 | |
55c0975c | 286 | case INDEX_op_shr_i64: |
50c5c4d1 | 287 | return (uint64_t)x >> (y & 63); |
55c0975c KB |
288 | |
289 | case INDEX_op_sar_i32: | |
50c5c4d1 | 290 | return (int32_t)x >> (y & 31); |
55c0975c | 291 | |
55c0975c | 292 | case INDEX_op_sar_i64: |
50c5c4d1 | 293 | return (int64_t)x >> (y & 63); |
55c0975c KB |
294 | |
295 | case INDEX_op_rotr_i32: | |
50c5c4d1 | 296 | return ror32(x, y & 31); |
55c0975c | 297 | |
55c0975c | 298 | case INDEX_op_rotr_i64: |
50c5c4d1 | 299 | return ror64(x, y & 63); |
55c0975c KB |
300 | |
301 | case INDEX_op_rotl_i32: | |
50c5c4d1 | 302 | return rol32(x, y & 31); |
55c0975c | 303 | |
55c0975c | 304 | case INDEX_op_rotl_i64: |
50c5c4d1 | 305 | return rol64(x, y & 63); |
25c4d9cc RH |
306 | |
307 | CASE_OP_32_64(not): | |
a640f031 | 308 | return ~x; |
25c4d9cc | 309 | |
cb25c80a RH |
310 | CASE_OP_32_64(neg): |
311 | return -x; | |
312 | ||
313 | CASE_OP_32_64(andc): | |
314 | return x & ~y; | |
315 | ||
316 | CASE_OP_32_64(orc): | |
317 | return x | ~y; | |
318 | ||
319 | CASE_OP_32_64(eqv): | |
320 | return ~(x ^ y); | |
321 | ||
322 | CASE_OP_32_64(nand): | |
323 | return ~(x & y); | |
324 | ||
325 | CASE_OP_32_64(nor): | |
326 | return ~(x | y); | |
327 | ||
0e28d006 RH |
328 | case INDEX_op_clz_i32: |
329 | return (uint32_t)x ? clz32(x) : y; | |
330 | ||
331 | case INDEX_op_clz_i64: | |
332 | return x ? clz64(x) : y; | |
333 | ||
334 | case INDEX_op_ctz_i32: | |
335 | return (uint32_t)x ? ctz32(x) : y; | |
336 | ||
337 | case INDEX_op_ctz_i64: | |
338 | return x ? ctz64(x) : y; | |
339 | ||
a768e4e9 RH |
340 | case INDEX_op_ctpop_i32: |
341 | return ctpop32(x); | |
342 | ||
343 | case INDEX_op_ctpop_i64: | |
344 | return ctpop64(x); | |
345 | ||
25c4d9cc | 346 | CASE_OP_32_64(ext8s): |
a640f031 | 347 | return (int8_t)x; |
25c4d9cc RH |
348 | |
349 | CASE_OP_32_64(ext16s): | |
a640f031 | 350 | return (int16_t)x; |
25c4d9cc RH |
351 | |
352 | CASE_OP_32_64(ext8u): | |
a640f031 | 353 | return (uint8_t)x; |
25c4d9cc RH |
354 | |
355 | CASE_OP_32_64(ext16u): | |
a640f031 KB |
356 | return (uint16_t)x; |
357 | ||
6498594c | 358 | CASE_OP_32_64(bswap16): |
0b76ff8f RH |
359 | x = bswap16(x); |
360 | return y & TCG_BSWAP_OS ? (int16_t)x : x; | |
6498594c RH |
361 | |
362 | CASE_OP_32_64(bswap32): | |
0b76ff8f RH |
363 | x = bswap32(x); |
364 | return y & TCG_BSWAP_OS ? (int32_t)x : x; | |
6498594c RH |
365 | |
366 | case INDEX_op_bswap64_i64: | |
367 | return bswap64(x); | |
368 | ||
8bcb5c8f | 369 | case INDEX_op_ext_i32_i64: |
a640f031 KB |
370 | case INDEX_op_ext32s_i64: |
371 | return (int32_t)x; | |
372 | ||
8bcb5c8f | 373 | case INDEX_op_extu_i32_i64: |
609ad705 | 374 | case INDEX_op_extrl_i64_i32: |
a640f031 KB |
375 | case INDEX_op_ext32u_i64: |
376 | return (uint32_t)x; | |
a640f031 | 377 | |
609ad705 RH |
378 | case INDEX_op_extrh_i64_i32: |
379 | return (uint64_t)x >> 32; | |
380 | ||
03271524 RH |
381 | case INDEX_op_muluh_i32: |
382 | return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; | |
383 | case INDEX_op_mulsh_i32: | |
384 | return ((int64_t)(int32_t)x * (int32_t)y) >> 32; | |
385 | ||
386 | case INDEX_op_muluh_i64: | |
387 | mulu64(&l64, &h64, x, y); | |
388 | return h64; | |
389 | case INDEX_op_mulsh_i64: | |
390 | muls64(&l64, &h64, x, y); | |
391 | return h64; | |
392 | ||
01547f7f RH |
393 | case INDEX_op_div_i32: |
394 | /* Avoid crashing on divide by zero, otherwise undefined. */ | |
395 | return (int32_t)x / ((int32_t)y ? : 1); | |
396 | case INDEX_op_divu_i32: | |
397 | return (uint32_t)x / ((uint32_t)y ? : 1); | |
398 | case INDEX_op_div_i64: | |
399 | return (int64_t)x / ((int64_t)y ? : 1); | |
400 | case INDEX_op_divu_i64: | |
401 | return (uint64_t)x / ((uint64_t)y ? : 1); | |
402 | ||
403 | case INDEX_op_rem_i32: | |
404 | return (int32_t)x % ((int32_t)y ? : 1); | |
405 | case INDEX_op_remu_i32: | |
406 | return (uint32_t)x % ((uint32_t)y ? : 1); | |
407 | case INDEX_op_rem_i64: | |
408 | return (int64_t)x % ((int64_t)y ? : 1); | |
409 | case INDEX_op_remu_i64: | |
410 | return (uint64_t)x % ((uint64_t)y ? : 1); | |
411 | ||
53108fb5 KB |
412 | default: |
413 | fprintf(stderr, | |
414 | "Unrecognized operation %d in do_constant_folding.\n", op); | |
415 | tcg_abort(); | |
416 | } | |
417 | } | |
418 | ||
54795544 | 419 | static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y) |
53108fb5 | 420 | { |
170ba88f | 421 | const TCGOpDef *def = &tcg_op_defs[op]; |
54795544 | 422 | uint64_t res = do_constant_folding_2(op, x, y); |
170ba88f | 423 | if (!(def->flags & TCG_OPF_64BIT)) { |
29f3ff8d | 424 | res = (int32_t)res; |
53108fb5 | 425 | } |
53108fb5 KB |
426 | return res; |
427 | } | |
428 | ||
9519da7e RH |
429 | static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) |
430 | { | |
431 | switch (c) { | |
432 | case TCG_COND_EQ: | |
433 | return x == y; | |
434 | case TCG_COND_NE: | |
435 | return x != y; | |
436 | case TCG_COND_LT: | |
437 | return (int32_t)x < (int32_t)y; | |
438 | case TCG_COND_GE: | |
439 | return (int32_t)x >= (int32_t)y; | |
440 | case TCG_COND_LE: | |
441 | return (int32_t)x <= (int32_t)y; | |
442 | case TCG_COND_GT: | |
443 | return (int32_t)x > (int32_t)y; | |
444 | case TCG_COND_LTU: | |
445 | return x < y; | |
446 | case TCG_COND_GEU: | |
447 | return x >= y; | |
448 | case TCG_COND_LEU: | |
449 | return x <= y; | |
450 | case TCG_COND_GTU: | |
451 | return x > y; | |
452 | default: | |
453 | tcg_abort(); | |
454 | } | |
455 | } | |
456 | ||
457 | static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) | |
458 | { | |
459 | switch (c) { | |
460 | case TCG_COND_EQ: | |
461 | return x == y; | |
462 | case TCG_COND_NE: | |
463 | return x != y; | |
464 | case TCG_COND_LT: | |
465 | return (int64_t)x < (int64_t)y; | |
466 | case TCG_COND_GE: | |
467 | return (int64_t)x >= (int64_t)y; | |
468 | case TCG_COND_LE: | |
469 | return (int64_t)x <= (int64_t)y; | |
470 | case TCG_COND_GT: | |
471 | return (int64_t)x > (int64_t)y; | |
472 | case TCG_COND_LTU: | |
473 | return x < y; | |
474 | case TCG_COND_GEU: | |
475 | return x >= y; | |
476 | case TCG_COND_LEU: | |
477 | return x <= y; | |
478 | case TCG_COND_GTU: | |
479 | return x > y; | |
480 | default: | |
481 | tcg_abort(); | |
482 | } | |
483 | } | |
484 | ||
485 | static bool do_constant_folding_cond_eq(TCGCond c) | |
486 | { | |
487 | switch (c) { | |
488 | case TCG_COND_GT: | |
489 | case TCG_COND_LTU: | |
490 | case TCG_COND_LT: | |
491 | case TCG_COND_GTU: | |
492 | case TCG_COND_NE: | |
493 | return 0; | |
494 | case TCG_COND_GE: | |
495 | case TCG_COND_GEU: | |
496 | case TCG_COND_LE: | |
497 | case TCG_COND_LEU: | |
498 | case TCG_COND_EQ: | |
499 | return 1; | |
500 | default: | |
501 | tcg_abort(); | |
502 | } | |
503 | } | |
504 | ||
b336ceb6 AJ |
505 | /* Return 2 if the condition can't be simplified, and the result |
506 | of the condition (0 or 1) if it can */ | |
f8dd19e5 AJ |
507 | static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, |
508 | TCGArg y, TCGCond c) | |
509 | { | |
54795544 RH |
510 | uint64_t xv = arg_info(x)->val; |
511 | uint64_t yv = arg_info(y)->val; | |
512 | ||
6349039d | 513 | if (arg_is_const(x) && arg_is_const(y)) { |
170ba88f RH |
514 | const TCGOpDef *def = &tcg_op_defs[op]; |
515 | tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR)); | |
516 | if (def->flags & TCG_OPF_64BIT) { | |
6349039d | 517 | return do_constant_folding_cond_64(xv, yv, c); |
170ba88f RH |
518 | } else { |
519 | return do_constant_folding_cond_32(xv, yv, c); | |
b336ceb6 | 520 | } |
6349039d | 521 | } else if (args_are_copies(x, y)) { |
9519da7e | 522 | return do_constant_folding_cond_eq(c); |
6349039d | 523 | } else if (arg_is_const(y) && yv == 0) { |
b336ceb6 | 524 | switch (c) { |
f8dd19e5 | 525 | case TCG_COND_LTU: |
b336ceb6 | 526 | return 0; |
f8dd19e5 | 527 | case TCG_COND_GEU: |
b336ceb6 AJ |
528 | return 1; |
529 | default: | |
530 | return 2; | |
f8dd19e5 | 531 | } |
f8dd19e5 | 532 | } |
550276ae | 533 | return 2; |
f8dd19e5 AJ |
534 | } |
535 | ||
6c4382f8 RH |
536 | /* Return 2 if the condition can't be simplified, and the result |
537 | of the condition (0 or 1) if it can */ | |
538 | static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) | |
539 | { | |
540 | TCGArg al = p1[0], ah = p1[1]; | |
541 | TCGArg bl = p2[0], bh = p2[1]; | |
542 | ||
6349039d RH |
543 | if (arg_is_const(bl) && arg_is_const(bh)) { |
544 | tcg_target_ulong blv = arg_info(bl)->val; | |
545 | tcg_target_ulong bhv = arg_info(bh)->val; | |
546 | uint64_t b = deposit64(blv, 32, 32, bhv); | |
6c4382f8 | 547 | |
6349039d RH |
548 | if (arg_is_const(al) && arg_is_const(ah)) { |
549 | tcg_target_ulong alv = arg_info(al)->val; | |
550 | tcg_target_ulong ahv = arg_info(ah)->val; | |
551 | uint64_t a = deposit64(alv, 32, 32, ahv); | |
6c4382f8 RH |
552 | return do_constant_folding_cond_64(a, b, c); |
553 | } | |
554 | if (b == 0) { | |
555 | switch (c) { | |
556 | case TCG_COND_LTU: | |
557 | return 0; | |
558 | case TCG_COND_GEU: | |
559 | return 1; | |
560 | default: | |
561 | break; | |
562 | } | |
563 | } | |
564 | } | |
6349039d | 565 | if (args_are_copies(al, bl) && args_are_copies(ah, bh)) { |
6c4382f8 RH |
566 | return do_constant_folding_cond_eq(c); |
567 | } | |
568 | return 2; | |
569 | } | |
570 | ||
24c9ae4e RH |
571 | static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) |
572 | { | |
573 | TCGArg a1 = *p1, a2 = *p2; | |
574 | int sum = 0; | |
6349039d RH |
575 | sum += arg_is_const(a1); |
576 | sum -= arg_is_const(a2); | |
24c9ae4e RH |
577 | |
578 | /* Prefer the constant in second argument, and then the form | |
579 | op a, a, b, which is better handled on non-RISC hosts. */ | |
580 | if (sum > 0 || (sum == 0 && dest == a2)) { | |
581 | *p1 = a2; | |
582 | *p2 = a1; | |
583 | return true; | |
584 | } | |
585 | return false; | |
586 | } | |
587 | ||
0bfcb865 RH |
588 | static bool swap_commutative2(TCGArg *p1, TCGArg *p2) |
589 | { | |
590 | int sum = 0; | |
6349039d RH |
591 | sum += arg_is_const(p1[0]); |
592 | sum += arg_is_const(p1[1]); | |
593 | sum -= arg_is_const(p2[0]); | |
594 | sum -= arg_is_const(p2[1]); | |
0bfcb865 RH |
595 | if (sum > 0) { |
596 | TCGArg t; | |
597 | t = p1[0], p1[0] = p2[0], p2[0] = t; | |
598 | t = p1[1], p1[1] = p2[1], p2[1] = t; | |
599 | return true; | |
600 | } | |
601 | return false; | |
602 | } | |
603 | ||
e2577ea2 RH |
604 | static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args) |
605 | { | |
606 | for (int i = 0; i < nb_args; i++) { | |
607 | TCGTemp *ts = arg_temp(op->args[i]); | |
608 | if (ts) { | |
609 | init_ts_info(ctx, ts); | |
610 | } | |
611 | } | |
612 | } | |
613 | ||
22613af4 | 614 | /* Propagate constants and copies, fold constant expressions. */ |
36e60ef6 | 615 | void tcg_optimize(TCGContext *s) |
8f2e8c07 | 616 | { |
8f17a975 | 617 | int nb_temps, nb_globals, i; |
d0ed5151 | 618 | TCGOp *op, *op_next; |
dc84988a | 619 | OptContext ctx = { .tcg = s }; |
5d8f5363 | 620 | |
22613af4 KB |
621 | /* Array VALS has an element for each temp. |
622 | If this temp holds a constant then its value is kept in VALS' element. | |
e590d4e6 AJ |
623 | If this temp is a copy of other ones then the other copies are |
624 | available through the doubly linked circular list. */ | |
8f2e8c07 KB |
625 | |
626 | nb_temps = s->nb_temps; | |
627 | nb_globals = s->nb_globals; | |
8f17a975 | 628 | |
8f17a975 RH |
629 | for (i = 0; i < nb_temps; ++i) { |
630 | s->temps[i].state_ptr = NULL; | |
631 | } | |
8f2e8c07 | 632 | |
15fa08f8 | 633 | QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) { |
b1fde411 | 634 | uint64_t z_mask, partmask, affected, tmp; |
8f17a975 | 635 | int nb_oargs, nb_iargs; |
c45cb8bb RH |
636 | TCGOpcode opc = op->opc; |
637 | const TCGOpDef *def = &tcg_op_defs[opc]; | |
638 | ||
1208d7dd AJ |
639 | /* Count the arguments, and initialize the temps that are |
640 | going to be used */ | |
c45cb8bb | 641 | if (opc == INDEX_op_call) { |
cd9090aa RH |
642 | nb_oargs = TCGOP_CALLO(op); |
643 | nb_iargs = TCGOP_CALLI(op); | |
1ff8c541 | 644 | } else { |
cf066674 RH |
645 | nb_oargs = def->nb_oargs; |
646 | nb_iargs = def->nb_iargs; | |
cf066674 | 647 | } |
e2577ea2 | 648 | init_arguments(&ctx, op, nb_oargs + nb_iargs); |
cf066674 RH |
649 | |
650 | /* Do copy propagation */ | |
651 | for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { | |
6349039d RH |
652 | TCGTemp *ts = arg_temp(op->args[i]); |
653 | if (ts && ts_is_copy(ts)) { | |
654 | op->args[i] = temp_arg(find_better_copy(s, ts)); | |
22613af4 KB |
655 | } |
656 | } | |
657 | ||
53108fb5 | 658 | /* For commutative operations make constant second argument */ |
c45cb8bb | 659 | switch (opc) { |
170ba88f RH |
660 | CASE_OP_32_64_VEC(add): |
661 | CASE_OP_32_64_VEC(mul): | |
662 | CASE_OP_32_64_VEC(and): | |
663 | CASE_OP_32_64_VEC(or): | |
664 | CASE_OP_32_64_VEC(xor): | |
cb25c80a RH |
665 | CASE_OP_32_64(eqv): |
666 | CASE_OP_32_64(nand): | |
667 | CASE_OP_32_64(nor): | |
03271524 RH |
668 | CASE_OP_32_64(muluh): |
669 | CASE_OP_32_64(mulsh): | |
acd93701 | 670 | swap_commutative(op->args[0], &op->args[1], &op->args[2]); |
53108fb5 | 671 | break; |
65a7cce1 | 672 | CASE_OP_32_64(brcond): |
acd93701 RH |
673 | if (swap_commutative(-1, &op->args[0], &op->args[1])) { |
674 | op->args[2] = tcg_swap_cond(op->args[2]); | |
65a7cce1 AJ |
675 | } |
676 | break; | |
677 | CASE_OP_32_64(setcond): | |
acd93701 RH |
678 | if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) { |
679 | op->args[3] = tcg_swap_cond(op->args[3]); | |
65a7cce1 AJ |
680 | } |
681 | break; | |
fa01a208 | 682 | CASE_OP_32_64(movcond): |
acd93701 RH |
683 | if (swap_commutative(-1, &op->args[1], &op->args[2])) { |
684 | op->args[5] = tcg_swap_cond(op->args[5]); | |
5d8f5363 RH |
685 | } |
686 | /* For movcond, we canonicalize the "false" input reg to match | |
687 | the destination reg so that the tcg backend can implement | |
688 | a "move if true" operation. */ | |
acd93701 RH |
689 | if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) { |
690 | op->args[5] = tcg_invert_cond(op->args[5]); | |
fa01a208 | 691 | } |
1e484e61 | 692 | break; |
d7156f7c | 693 | CASE_OP_32_64(add2): |
acd93701 RH |
694 | swap_commutative(op->args[0], &op->args[2], &op->args[4]); |
695 | swap_commutative(op->args[1], &op->args[3], &op->args[5]); | |
1e484e61 | 696 | break; |
d7156f7c | 697 | CASE_OP_32_64(mulu2): |
4d3203fd | 698 | CASE_OP_32_64(muls2): |
acd93701 | 699 | swap_commutative(op->args[0], &op->args[2], &op->args[3]); |
1414968a | 700 | break; |
0bfcb865 | 701 | case INDEX_op_brcond2_i32: |
acd93701 RH |
702 | if (swap_commutative2(&op->args[0], &op->args[2])) { |
703 | op->args[4] = tcg_swap_cond(op->args[4]); | |
0bfcb865 RH |
704 | } |
705 | break; | |
706 | case INDEX_op_setcond2_i32: | |
acd93701 RH |
707 | if (swap_commutative2(&op->args[1], &op->args[3])) { |
708 | op->args[5] = tcg_swap_cond(op->args[5]); | |
0bfcb865 RH |
709 | } |
710 | break; | |
53108fb5 KB |
711 | default: |
712 | break; | |
713 | } | |
714 | ||
2d497542 RH |
715 | /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", |
716 | and "sub r, 0, a => neg r, a" case. */ | |
c45cb8bb | 717 | switch (opc) { |
01ee5282 AJ |
718 | CASE_OP_32_64(shl): |
719 | CASE_OP_32_64(shr): | |
720 | CASE_OP_32_64(sar): | |
721 | CASE_OP_32_64(rotl): | |
722 | CASE_OP_32_64(rotr): | |
6349039d RH |
723 | if (arg_is_const(op->args[1]) |
724 | && arg_info(op->args[1])->val == 0) { | |
dc84988a | 725 | tcg_opt_gen_movi(&ctx, op, op->args[0], 0); |
01ee5282 AJ |
726 | continue; |
727 | } | |
728 | break; | |
170ba88f | 729 | CASE_OP_32_64_VEC(sub): |
2d497542 RH |
730 | { |
731 | TCGOpcode neg_op; | |
732 | bool have_neg; | |
733 | ||
6349039d | 734 | if (arg_is_const(op->args[2])) { |
2d497542 RH |
735 | /* Proceed with possible constant folding. */ |
736 | break; | |
737 | } | |
c45cb8bb | 738 | if (opc == INDEX_op_sub_i32) { |
2d497542 RH |
739 | neg_op = INDEX_op_neg_i32; |
740 | have_neg = TCG_TARGET_HAS_neg_i32; | |
170ba88f | 741 | } else if (opc == INDEX_op_sub_i64) { |
2d497542 RH |
742 | neg_op = INDEX_op_neg_i64; |
743 | have_neg = TCG_TARGET_HAS_neg_i64; | |
ac383dde RH |
744 | } else if (TCG_TARGET_HAS_neg_vec) { |
745 | TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64; | |
746 | unsigned vece = TCGOP_VECE(op); | |
170ba88f | 747 | neg_op = INDEX_op_neg_vec; |
ac383dde RH |
748 | have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0; |
749 | } else { | |
750 | break; | |
2d497542 RH |
751 | } |
752 | if (!have_neg) { | |
753 | break; | |
754 | } | |
6349039d RH |
755 | if (arg_is_const(op->args[1]) |
756 | && arg_info(op->args[1])->val == 0) { | |
c45cb8bb | 757 | op->opc = neg_op; |
acd93701 RH |
758 | reset_temp(op->args[0]); |
759 | op->args[1] = op->args[2]; | |
2d497542 RH |
760 | continue; |
761 | } | |
762 | } | |
763 | break; | |
170ba88f | 764 | CASE_OP_32_64_VEC(xor): |
e201b564 | 765 | CASE_OP_32_64(nand): |
6349039d RH |
766 | if (!arg_is_const(op->args[1]) |
767 | && arg_is_const(op->args[2]) | |
768 | && arg_info(op->args[2])->val == -1) { | |
e201b564 RH |
769 | i = 1; |
770 | goto try_not; | |
771 | } | |
772 | break; | |
773 | CASE_OP_32_64(nor): | |
6349039d RH |
774 | if (!arg_is_const(op->args[1]) |
775 | && arg_is_const(op->args[2]) | |
776 | && arg_info(op->args[2])->val == 0) { | |
e201b564 RH |
777 | i = 1; |
778 | goto try_not; | |
779 | } | |
780 | break; | |
170ba88f | 781 | CASE_OP_32_64_VEC(andc): |
6349039d RH |
782 | if (!arg_is_const(op->args[2]) |
783 | && arg_is_const(op->args[1]) | |
784 | && arg_info(op->args[1])->val == -1) { | |
e201b564 RH |
785 | i = 2; |
786 | goto try_not; | |
787 | } | |
788 | break; | |
170ba88f | 789 | CASE_OP_32_64_VEC(orc): |
e201b564 | 790 | CASE_OP_32_64(eqv): |
6349039d RH |
791 | if (!arg_is_const(op->args[2]) |
792 | && arg_is_const(op->args[1]) | |
793 | && arg_info(op->args[1])->val == 0) { | |
e201b564 RH |
794 | i = 2; |
795 | goto try_not; | |
796 | } | |
797 | break; | |
798 | try_not: | |
799 | { | |
800 | TCGOpcode not_op; | |
801 | bool have_not; | |
802 | ||
170ba88f RH |
803 | if (def->flags & TCG_OPF_VECTOR) { |
804 | not_op = INDEX_op_not_vec; | |
805 | have_not = TCG_TARGET_HAS_not_vec; | |
806 | } else if (def->flags & TCG_OPF_64BIT) { | |
e201b564 RH |
807 | not_op = INDEX_op_not_i64; |
808 | have_not = TCG_TARGET_HAS_not_i64; | |
809 | } else { | |
810 | not_op = INDEX_op_not_i32; | |
811 | have_not = TCG_TARGET_HAS_not_i32; | |
812 | } | |
813 | if (!have_not) { | |
814 | break; | |
815 | } | |
c45cb8bb | 816 | op->opc = not_op; |
acd93701 RH |
817 | reset_temp(op->args[0]); |
818 | op->args[1] = op->args[i]; | |
e201b564 RH |
819 | continue; |
820 | } | |
01ee5282 AJ |
821 | default: |
822 | break; | |
823 | } | |
824 | ||
464a1441 | 825 | /* Simplify expression for "op r, a, const => mov r, a" cases */ |
c45cb8bb | 826 | switch (opc) { |
170ba88f RH |
827 | CASE_OP_32_64_VEC(add): |
828 | CASE_OP_32_64_VEC(sub): | |
829 | CASE_OP_32_64_VEC(or): | |
830 | CASE_OP_32_64_VEC(xor): | |
831 | CASE_OP_32_64_VEC(andc): | |
55c0975c KB |
832 | CASE_OP_32_64(shl): |
833 | CASE_OP_32_64(shr): | |
834 | CASE_OP_32_64(sar): | |
25c4d9cc RH |
835 | CASE_OP_32_64(rotl): |
836 | CASE_OP_32_64(rotr): | |
6349039d RH |
837 | if (!arg_is_const(op->args[1]) |
838 | && arg_is_const(op->args[2]) | |
839 | && arg_info(op->args[2])->val == 0) { | |
dc84988a | 840 | tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]); |
97a79eb7 | 841 | continue; |
53108fb5 KB |
842 | } |
843 | break; | |
170ba88f RH |
844 | CASE_OP_32_64_VEC(and): |
845 | CASE_OP_32_64_VEC(orc): | |
464a1441 | 846 | CASE_OP_32_64(eqv): |
6349039d RH |
847 | if (!arg_is_const(op->args[1]) |
848 | && arg_is_const(op->args[2]) | |
849 | && arg_info(op->args[2])->val == -1) { | |
dc84988a | 850 | tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]); |
97a79eb7 | 851 | continue; |
464a1441 RH |
852 | } |
853 | break; | |
56e49438 AJ |
854 | default: |
855 | break; | |
856 | } | |
857 | ||
3031244b AJ |
858 | /* Simplify using known-zero bits. Currently only ops with a single |
859 | output argument is supported. */ | |
b1fde411 | 860 | z_mask = -1; |
633f6502 | 861 | affected = -1; |
c45cb8bb | 862 | switch (opc) { |
3a9d8b17 | 863 | CASE_OP_32_64(ext8s): |
b1fde411 | 864 | if ((arg_info(op->args[1])->z_mask & 0x80) != 0) { |
3a9d8b17 PB |
865 | break; |
866 | } | |
d84568b7 | 867 | QEMU_FALLTHROUGH; |
3a9d8b17 | 868 | CASE_OP_32_64(ext8u): |
b1fde411 | 869 | z_mask = 0xff; |
3a9d8b17 PB |
870 | goto and_const; |
871 | CASE_OP_32_64(ext16s): | |
b1fde411 | 872 | if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) { |
3a9d8b17 PB |
873 | break; |
874 | } | |
d84568b7 | 875 | QEMU_FALLTHROUGH; |
3a9d8b17 | 876 | CASE_OP_32_64(ext16u): |
b1fde411 | 877 | z_mask = 0xffff; |
3a9d8b17 PB |
878 | goto and_const; |
879 | case INDEX_op_ext32s_i64: | |
b1fde411 | 880 | if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) { |
3a9d8b17 PB |
881 | break; |
882 | } | |
d84568b7 | 883 | QEMU_FALLTHROUGH; |
3a9d8b17 | 884 | case INDEX_op_ext32u_i64: |
b1fde411 | 885 | z_mask = 0xffffffffU; |
3a9d8b17 PB |
886 | goto and_const; |
887 | ||
888 | CASE_OP_32_64(and): | |
b1fde411 | 889 | z_mask = arg_info(op->args[2])->z_mask; |
6349039d | 890 | if (arg_is_const(op->args[2])) { |
3a9d8b17 | 891 | and_const: |
b1fde411 | 892 | affected = arg_info(op->args[1])->z_mask & ~z_mask; |
3a9d8b17 | 893 | } |
b1fde411 | 894 | z_mask = arg_info(op->args[1])->z_mask & z_mask; |
3a9d8b17 PB |
895 | break; |
896 | ||
8bcb5c8f | 897 | case INDEX_op_ext_i32_i64: |
b1fde411 | 898 | if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) { |
8bcb5c8f AJ |
899 | break; |
900 | } | |
d84568b7 | 901 | QEMU_FALLTHROUGH; |
8bcb5c8f AJ |
902 | case INDEX_op_extu_i32_i64: |
903 | /* We do not compute affected as it is a size changing op. */ | |
b1fde411 | 904 | z_mask = (uint32_t)arg_info(op->args[1])->z_mask; |
8bcb5c8f AJ |
905 | break; |
906 | ||
23ec69ed RH |
907 | CASE_OP_32_64(andc): |
908 | /* Known-zeros does not imply known-ones. Therefore unless | |
acd93701 | 909 | op->args[2] is constant, we can't infer anything from it. */ |
6349039d | 910 | if (arg_is_const(op->args[2])) { |
b1fde411 | 911 | z_mask = ~arg_info(op->args[2])->z_mask; |
23ec69ed RH |
912 | goto and_const; |
913 | } | |
6349039d | 914 | /* But we certainly know nothing outside args[1] may be set. */ |
b1fde411 | 915 | z_mask = arg_info(op->args[1])->z_mask; |
23ec69ed RH |
916 | break; |
917 | ||
e46b225a | 918 | case INDEX_op_sar_i32: |
6349039d RH |
919 | if (arg_is_const(op->args[2])) { |
920 | tmp = arg_info(op->args[2])->val & 31; | |
b1fde411 | 921 | z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp; |
e46b225a AJ |
922 | } |
923 | break; | |
924 | case INDEX_op_sar_i64: | |
6349039d RH |
925 | if (arg_is_const(op->args[2])) { |
926 | tmp = arg_info(op->args[2])->val & 63; | |
b1fde411 | 927 | z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp; |
3a9d8b17 PB |
928 | } |
929 | break; | |
930 | ||
e46b225a | 931 | case INDEX_op_shr_i32: |
6349039d RH |
932 | if (arg_is_const(op->args[2])) { |
933 | tmp = arg_info(op->args[2])->val & 31; | |
b1fde411 | 934 | z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp; |
e46b225a AJ |
935 | } |
936 | break; | |
937 | case INDEX_op_shr_i64: | |
6349039d RH |
938 | if (arg_is_const(op->args[2])) { |
939 | tmp = arg_info(op->args[2])->val & 63; | |
b1fde411 | 940 | z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp; |
3a9d8b17 PB |
941 | } |
942 | break; | |
943 | ||
609ad705 | 944 | case INDEX_op_extrl_i64_i32: |
b1fde411 | 945 | z_mask = (uint32_t)arg_info(op->args[1])->z_mask; |
609ad705 RH |
946 | break; |
947 | case INDEX_op_extrh_i64_i32: | |
b1fde411 | 948 | z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32; |
4bb7a41e RH |
949 | break; |
950 | ||
3a9d8b17 | 951 | CASE_OP_32_64(shl): |
6349039d RH |
952 | if (arg_is_const(op->args[2])) { |
953 | tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1); | |
b1fde411 | 954 | z_mask = arg_info(op->args[1])->z_mask << tmp; |
3a9d8b17 PB |
955 | } |
956 | break; | |
957 | ||
958 | CASE_OP_32_64(neg): | |
959 | /* Set to 1 all bits to the left of the rightmost. */ | |
b1fde411 RH |
960 | z_mask = -(arg_info(op->args[1])->z_mask |
961 | & -arg_info(op->args[1])->z_mask); | |
3a9d8b17 PB |
962 | break; |
963 | ||
964 | CASE_OP_32_64(deposit): | |
b1fde411 RH |
965 | z_mask = deposit64(arg_info(op->args[1])->z_mask, |
966 | op->args[3], op->args[4], | |
967 | arg_info(op->args[2])->z_mask); | |
3a9d8b17 PB |
968 | break; |
969 | ||
7ec8bab3 | 970 | CASE_OP_32_64(extract): |
b1fde411 RH |
971 | z_mask = extract64(arg_info(op->args[1])->z_mask, |
972 | op->args[2], op->args[3]); | |
acd93701 | 973 | if (op->args[2] == 0) { |
b1fde411 | 974 | affected = arg_info(op->args[1])->z_mask & ~z_mask; |
7ec8bab3 RH |
975 | } |
976 | break; | |
977 | CASE_OP_32_64(sextract): | |
b1fde411 RH |
978 | z_mask = sextract64(arg_info(op->args[1])->z_mask, |
979 | op->args[2], op->args[3]); | |
980 | if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) { | |
981 | affected = arg_info(op->args[1])->z_mask & ~z_mask; | |
7ec8bab3 RH |
982 | } |
983 | break; | |
984 | ||
3a9d8b17 PB |
985 | CASE_OP_32_64(or): |
986 | CASE_OP_32_64(xor): | |
b1fde411 RH |
987 | z_mask = arg_info(op->args[1])->z_mask |
988 | | arg_info(op->args[2])->z_mask; | |
3a9d8b17 PB |
989 | break; |
990 | ||
0e28d006 RH |
991 | case INDEX_op_clz_i32: |
992 | case INDEX_op_ctz_i32: | |
b1fde411 | 993 | z_mask = arg_info(op->args[2])->z_mask | 31; |
0e28d006 RH |
994 | break; |
995 | ||
996 | case INDEX_op_clz_i64: | |
997 | case INDEX_op_ctz_i64: | |
b1fde411 | 998 | z_mask = arg_info(op->args[2])->z_mask | 63; |
0e28d006 RH |
999 | break; |
1000 | ||
a768e4e9 | 1001 | case INDEX_op_ctpop_i32: |
b1fde411 | 1002 | z_mask = 32 | 31; |
a768e4e9 RH |
1003 | break; |
1004 | case INDEX_op_ctpop_i64: | |
b1fde411 | 1005 | z_mask = 64 | 63; |
a768e4e9 RH |
1006 | break; |
1007 | ||
3a9d8b17 | 1008 | CASE_OP_32_64(setcond): |
a763551a | 1009 | case INDEX_op_setcond2_i32: |
b1fde411 | 1010 | z_mask = 1; |
3a9d8b17 PB |
1011 | break; |
1012 | ||
1013 | CASE_OP_32_64(movcond): | |
b1fde411 RH |
1014 | z_mask = arg_info(op->args[3])->z_mask |
1015 | | arg_info(op->args[4])->z_mask; | |
3a9d8b17 PB |
1016 | break; |
1017 | ||
c8d70272 | 1018 | CASE_OP_32_64(ld8u): |
b1fde411 | 1019 | z_mask = 0xff; |
c8d70272 AJ |
1020 | break; |
1021 | CASE_OP_32_64(ld16u): | |
b1fde411 | 1022 | z_mask = 0xffff; |
c8d70272 AJ |
1023 | break; |
1024 | case INDEX_op_ld32u_i64: | |
b1fde411 | 1025 | z_mask = 0xffffffffu; |
c8d70272 AJ |
1026 | break; |
1027 | ||
1028 | CASE_OP_32_64(qemu_ld): | |
1029 | { | |
9002ffcb | 1030 | MemOpIdx oi = op->args[nb_oargs + nb_iargs]; |
14776ab5 | 1031 | MemOp mop = get_memop(oi); |
c8d70272 | 1032 | if (!(mop & MO_SIGN)) { |
b1fde411 | 1033 | z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; |
c8d70272 AJ |
1034 | } |
1035 | } | |
1036 | break; | |
1037 | ||
0b76ff8f | 1038 | CASE_OP_32_64(bswap16): |
b1fde411 RH |
1039 | z_mask = arg_info(op->args[1])->z_mask; |
1040 | if (z_mask <= 0xffff) { | |
0b76ff8f RH |
1041 | op->args[2] |= TCG_BSWAP_IZ; |
1042 | } | |
b1fde411 | 1043 | z_mask = bswap16(z_mask); |
0b76ff8f RH |
1044 | switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { |
1045 | case TCG_BSWAP_OZ: | |
1046 | break; | |
1047 | case TCG_BSWAP_OS: | |
b1fde411 | 1048 | z_mask = (int16_t)z_mask; |
0b76ff8f RH |
1049 | break; |
1050 | default: /* undefined high bits */ | |
b1fde411 | 1051 | z_mask |= MAKE_64BIT_MASK(16, 48); |
0b76ff8f RH |
1052 | break; |
1053 | } | |
1054 | break; | |
1055 | ||
1056 | case INDEX_op_bswap32_i64: | |
b1fde411 RH |
1057 | z_mask = arg_info(op->args[1])->z_mask; |
1058 | if (z_mask <= 0xffffffffu) { | |
0b76ff8f RH |
1059 | op->args[2] |= TCG_BSWAP_IZ; |
1060 | } | |
b1fde411 | 1061 | z_mask = bswap32(z_mask); |
0b76ff8f RH |
1062 | switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) { |
1063 | case TCG_BSWAP_OZ: | |
1064 | break; | |
1065 | case TCG_BSWAP_OS: | |
b1fde411 | 1066 | z_mask = (int32_t)z_mask; |
0b76ff8f RH |
1067 | break; |
1068 | default: /* undefined high bits */ | |
b1fde411 | 1069 | z_mask |= MAKE_64BIT_MASK(32, 32); |
0b76ff8f RH |
1070 | break; |
1071 | } | |
1072 | break; | |
1073 | ||
3a9d8b17 PB |
1074 | default: |
1075 | break; | |
1076 | } | |
1077 | ||
bc8d688f RH |
1078 | /* 32-bit ops generate 32-bit results. For the result is zero test |
1079 | below, we can ignore high bits, but for further optimizations we | |
1080 | need to record that the high bits contain garbage. */ | |
b1fde411 | 1081 | partmask = z_mask; |
bc8d688f | 1082 | if (!(def->flags & TCG_OPF_64BIT)) { |
b1fde411 | 1083 | z_mask |= ~(tcg_target_ulong)0xffffffffu; |
24666baf RH |
1084 | partmask &= 0xffffffffu; |
1085 | affected &= 0xffffffffu; | |
f096dc96 AJ |
1086 | } |
1087 | ||
24666baf | 1088 | if (partmask == 0) { |
eabb7b91 | 1089 | tcg_debug_assert(nb_oargs == 1); |
dc84988a | 1090 | tcg_opt_gen_movi(&ctx, op, op->args[0], 0); |
633f6502 PB |
1091 | continue; |
1092 | } | |
1093 | if (affected == 0) { | |
eabb7b91 | 1094 | tcg_debug_assert(nb_oargs == 1); |
dc84988a | 1095 | tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]); |
633f6502 PB |
1096 | continue; |
1097 | } | |
1098 | ||
56e49438 | 1099 | /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ |
c45cb8bb | 1100 | switch (opc) { |
170ba88f RH |
1101 | CASE_OP_32_64_VEC(and): |
1102 | CASE_OP_32_64_VEC(mul): | |
03271524 RH |
1103 | CASE_OP_32_64(muluh): |
1104 | CASE_OP_32_64(mulsh): | |
6349039d RH |
1105 | if (arg_is_const(op->args[2]) |
1106 | && arg_info(op->args[2])->val == 0) { | |
dc84988a | 1107 | tcg_opt_gen_movi(&ctx, op, op->args[0], 0); |
53108fb5 KB |
1108 | continue; |
1109 | } | |
1110 | break; | |
56e49438 AJ |
1111 | default: |
1112 | break; | |
1113 | } | |
1114 | ||
1115 | /* Simplify expression for "op r, a, a => mov r, a" cases */ | |
c45cb8bb | 1116 | switch (opc) { |
170ba88f RH |
1117 | CASE_OP_32_64_VEC(or): |
1118 | CASE_OP_32_64_VEC(and): | |
6349039d | 1119 | if (args_are_copies(op->args[1], op->args[2])) { |
dc84988a | 1120 | tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]); |
9a81090b KB |
1121 | continue; |
1122 | } | |
1123 | break; | |
fe0de7aa BS |
1124 | default: |
1125 | break; | |
53108fb5 KB |
1126 | } |
1127 | ||
3c94193e | 1128 | /* Simplify expression for "op r, a, a => movi r, 0" cases */ |
c45cb8bb | 1129 | switch (opc) { |
170ba88f RH |
1130 | CASE_OP_32_64_VEC(andc): |
1131 | CASE_OP_32_64_VEC(sub): | |
1132 | CASE_OP_32_64_VEC(xor): | |
6349039d | 1133 | if (args_are_copies(op->args[1], op->args[2])) { |
dc84988a | 1134 | tcg_opt_gen_movi(&ctx, op, op->args[0], 0); |
3c94193e AJ |
1135 | continue; |
1136 | } | |
1137 | break; | |
1138 | default: | |
1139 | break; | |
1140 | } | |
1141 | ||
22613af4 KB |
1142 | /* Propagate constants through copy operations and do constant |
1143 | folding. Constants will be substituted to arguments by register | |
1144 | allocator where needed and possible. Also detect copies. */ | |
c45cb8bb | 1145 | switch (opc) { |
170ba88f | 1146 | CASE_OP_32_64_VEC(mov): |
dc84988a | 1147 | tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]); |
b10f3833 | 1148 | continue; |
6e14e91b | 1149 | |
170ba88f RH |
1150 | case INDEX_op_dup_vec: |
1151 | if (arg_is_const(op->args[1])) { | |
1152 | tmp = arg_info(op->args[1])->val; | |
1153 | tmp = dup_const(TCGOP_VECE(op), tmp); | |
dc84988a | 1154 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1155 | continue; |
170ba88f | 1156 | } |
b10f3833 | 1157 | break; |
170ba88f | 1158 | |
1dc4fe70 RH |
1159 | case INDEX_op_dup2_vec: |
1160 | assert(TCG_TARGET_REG_BITS == 32); | |
1161 | if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { | |
dc84988a | 1162 | tcg_opt_gen_movi(&ctx, op, op->args[0], |
0b4286dd RH |
1163 | deposit64(arg_info(op->args[1])->val, 32, 32, |
1164 | arg_info(op->args[2])->val)); | |
b10f3833 | 1165 | continue; |
1dc4fe70 RH |
1166 | } else if (args_are_copies(op->args[1], op->args[2])) { |
1167 | op->opc = INDEX_op_dup_vec; | |
1168 | TCGOP_VECE(op) = MO_32; | |
1169 | nb_iargs = 1; | |
1170 | } | |
b10f3833 | 1171 | break; |
1dc4fe70 | 1172 | |
a640f031 | 1173 | CASE_OP_32_64(not): |
cb25c80a | 1174 | CASE_OP_32_64(neg): |
25c4d9cc RH |
1175 | CASE_OP_32_64(ext8s): |
1176 | CASE_OP_32_64(ext8u): | |
1177 | CASE_OP_32_64(ext16s): | |
1178 | CASE_OP_32_64(ext16u): | |
a768e4e9 | 1179 | CASE_OP_32_64(ctpop): |
a640f031 KB |
1180 | case INDEX_op_ext32s_i64: |
1181 | case INDEX_op_ext32u_i64: | |
8bcb5c8f AJ |
1182 | case INDEX_op_ext_i32_i64: |
1183 | case INDEX_op_extu_i32_i64: | |
609ad705 RH |
1184 | case INDEX_op_extrl_i64_i32: |
1185 | case INDEX_op_extrh_i64_i32: | |
6349039d RH |
1186 | if (arg_is_const(op->args[1])) { |
1187 | tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0); | |
dc84988a | 1188 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1189 | continue; |
a640f031 | 1190 | } |
b10f3833 | 1191 | break; |
6e14e91b | 1192 | |
0b76ff8f RH |
1193 | CASE_OP_32_64(bswap16): |
1194 | CASE_OP_32_64(bswap32): | |
1195 | case INDEX_op_bswap64_i64: | |
1196 | if (arg_is_const(op->args[1])) { | |
1197 | tmp = do_constant_folding(opc, arg_info(op->args[1])->val, | |
1198 | op->args[2]); | |
dc84988a | 1199 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1200 | continue; |
0b76ff8f | 1201 | } |
b10f3833 | 1202 | break; |
0b76ff8f | 1203 | |
53108fb5 KB |
1204 | CASE_OP_32_64(add): |
1205 | CASE_OP_32_64(sub): | |
1206 | CASE_OP_32_64(mul): | |
9a81090b KB |
1207 | CASE_OP_32_64(or): |
1208 | CASE_OP_32_64(and): | |
1209 | CASE_OP_32_64(xor): | |
55c0975c KB |
1210 | CASE_OP_32_64(shl): |
1211 | CASE_OP_32_64(shr): | |
1212 | CASE_OP_32_64(sar): | |
25c4d9cc RH |
1213 | CASE_OP_32_64(rotl): |
1214 | CASE_OP_32_64(rotr): | |
cb25c80a RH |
1215 | CASE_OP_32_64(andc): |
1216 | CASE_OP_32_64(orc): | |
1217 | CASE_OP_32_64(eqv): | |
1218 | CASE_OP_32_64(nand): | |
1219 | CASE_OP_32_64(nor): | |
03271524 RH |
1220 | CASE_OP_32_64(muluh): |
1221 | CASE_OP_32_64(mulsh): | |
01547f7f RH |
1222 | CASE_OP_32_64(div): |
1223 | CASE_OP_32_64(divu): | |
1224 | CASE_OP_32_64(rem): | |
1225 | CASE_OP_32_64(remu): | |
6349039d RH |
1226 | if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { |
1227 | tmp = do_constant_folding(opc, arg_info(op->args[1])->val, | |
1228 | arg_info(op->args[2])->val); | |
dc84988a | 1229 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1230 | continue; |
53108fb5 | 1231 | } |
b10f3833 | 1232 | break; |
6e14e91b | 1233 | |
0e28d006 RH |
1234 | CASE_OP_32_64(clz): |
1235 | CASE_OP_32_64(ctz): | |
6349039d RH |
1236 | if (arg_is_const(op->args[1])) { |
1237 | TCGArg v = arg_info(op->args[1])->val; | |
0e28d006 RH |
1238 | if (v != 0) { |
1239 | tmp = do_constant_folding(opc, v, 0); | |
dc84988a | 1240 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
0e28d006 | 1241 | } else { |
dc84988a | 1242 | tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]); |
0e28d006 | 1243 | } |
b10f3833 | 1244 | continue; |
0e28d006 | 1245 | } |
b10f3833 | 1246 | break; |
0e28d006 | 1247 | |
7ef55fc9 | 1248 | CASE_OP_32_64(deposit): |
6349039d RH |
1249 | if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { |
1250 | tmp = deposit64(arg_info(op->args[1])->val, | |
1251 | op->args[3], op->args[4], | |
1252 | arg_info(op->args[2])->val); | |
dc84988a | 1253 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1254 | continue; |
7ef55fc9 | 1255 | } |
b10f3833 | 1256 | break; |
6e14e91b | 1257 | |
7ec8bab3 | 1258 | CASE_OP_32_64(extract): |
6349039d RH |
1259 | if (arg_is_const(op->args[1])) { |
1260 | tmp = extract64(arg_info(op->args[1])->val, | |
acd93701 | 1261 | op->args[2], op->args[3]); |
dc84988a | 1262 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1263 | continue; |
7ec8bab3 | 1264 | } |
b10f3833 | 1265 | break; |
7ec8bab3 RH |
1266 | |
1267 | CASE_OP_32_64(sextract): | |
6349039d RH |
1268 | if (arg_is_const(op->args[1])) { |
1269 | tmp = sextract64(arg_info(op->args[1])->val, | |
acd93701 | 1270 | op->args[2], op->args[3]); |
dc84988a | 1271 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1272 | continue; |
7ec8bab3 | 1273 | } |
b10f3833 | 1274 | break; |
7ec8bab3 | 1275 | |
fce1296f RH |
1276 | CASE_OP_32_64(extract2): |
1277 | if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) { | |
54795544 RH |
1278 | uint64_t v1 = arg_info(op->args[1])->val; |
1279 | uint64_t v2 = arg_info(op->args[2])->val; | |
1280 | int shr = op->args[3]; | |
fce1296f RH |
1281 | |
1282 | if (opc == INDEX_op_extract2_i64) { | |
54795544 | 1283 | tmp = (v1 >> shr) | (v2 << (64 - shr)); |
fce1296f | 1284 | } else { |
54795544 RH |
1285 | tmp = (int32_t)(((uint32_t)v1 >> shr) | |
1286 | ((uint32_t)v2 << (32 - shr))); | |
fce1296f | 1287 | } |
dc84988a | 1288 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1289 | continue; |
fce1296f | 1290 | } |
b10f3833 | 1291 | break; |
fce1296f | 1292 | |
f8dd19e5 | 1293 | CASE_OP_32_64(setcond): |
acd93701 RH |
1294 | tmp = do_constant_folding_cond(opc, op->args[1], |
1295 | op->args[2], op->args[3]); | |
b336ceb6 | 1296 | if (tmp != 2) { |
dc84988a | 1297 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 | 1298 | continue; |
f8dd19e5 | 1299 | } |
b10f3833 | 1300 | break; |
6e14e91b | 1301 | |
fbeaa26c | 1302 | CASE_OP_32_64(brcond): |
acd93701 RH |
1303 | tmp = do_constant_folding_cond(opc, op->args[0], |
1304 | op->args[1], op->args[2]); | |
b10f3833 RH |
1305 | switch (tmp) { |
1306 | case 0: | |
1307 | tcg_op_remove(s, op); | |
1308 | continue; | |
1309 | case 1: | |
1310 | memset(&ctx.temps_used, 0, sizeof(ctx.temps_used)); | |
1311 | op->opc = opc = INDEX_op_br; | |
1312 | op->args[0] = op->args[3]; | |
6e14e91b | 1313 | break; |
fbeaa26c | 1314 | } |
b10f3833 | 1315 | break; |
6e14e91b | 1316 | |
fa01a208 | 1317 | CASE_OP_32_64(movcond): |
acd93701 RH |
1318 | tmp = do_constant_folding_cond(opc, op->args[1], |
1319 | op->args[2], op->args[5]); | |
b336ceb6 | 1320 | if (tmp != 2) { |
dc84988a | 1321 | tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]); |
b10f3833 | 1322 | continue; |
fa01a208 | 1323 | } |
6349039d | 1324 | if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) { |
54795544 RH |
1325 | uint64_t tv = arg_info(op->args[3])->val; |
1326 | uint64_t fv = arg_info(op->args[4])->val; | |
acd93701 | 1327 | TCGCond cond = op->args[5]; |
54795544 | 1328 | |
333b21b8 RH |
1329 | if (fv == 1 && tv == 0) { |
1330 | cond = tcg_invert_cond(cond); | |
1331 | } else if (!(tv == 1 && fv == 0)) { | |
b10f3833 | 1332 | break; |
333b21b8 | 1333 | } |
acd93701 | 1334 | op->args[3] = cond; |
333b21b8 RH |
1335 | op->opc = opc = (opc == INDEX_op_movcond_i32 |
1336 | ? INDEX_op_setcond_i32 | |
1337 | : INDEX_op_setcond_i64); | |
1338 | nb_iargs = 2; | |
1339 | } | |
b10f3833 | 1340 | break; |
212c328d RH |
1341 | |
1342 | case INDEX_op_add2_i32: | |
1343 | case INDEX_op_sub2_i32: | |
6349039d RH |
1344 | if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) |
1345 | && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) { | |
1346 | uint32_t al = arg_info(op->args[2])->val; | |
1347 | uint32_t ah = arg_info(op->args[3])->val; | |
1348 | uint32_t bl = arg_info(op->args[4])->val; | |
1349 | uint32_t bh = arg_info(op->args[5])->val; | |
212c328d RH |
1350 | uint64_t a = ((uint64_t)ah << 32) | al; |
1351 | uint64_t b = ((uint64_t)bh << 32) | bl; | |
1352 | TCGArg rl, rh; | |
8fe35e04 | 1353 | TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); |
212c328d | 1354 | |
c45cb8bb | 1355 | if (opc == INDEX_op_add2_i32) { |
212c328d RH |
1356 | a += b; |
1357 | } else { | |
1358 | a -= b; | |
1359 | } | |
1360 | ||
acd93701 RH |
1361 | rl = op->args[0]; |
1362 | rh = op->args[1]; | |
dc84988a RH |
1363 | tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a); |
1364 | tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32)); | |
b10f3833 | 1365 | continue; |
212c328d | 1366 | } |
b10f3833 | 1367 | break; |
1414968a RH |
1368 | |
1369 | case INDEX_op_mulu2_i32: | |
6349039d RH |
1370 | if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) { |
1371 | uint32_t a = arg_info(op->args[2])->val; | |
1372 | uint32_t b = arg_info(op->args[3])->val; | |
1414968a RH |
1373 | uint64_t r = (uint64_t)a * b; |
1374 | TCGArg rl, rh; | |
8fe35e04 | 1375 | TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32); |
1414968a | 1376 | |
acd93701 RH |
1377 | rl = op->args[0]; |
1378 | rh = op->args[1]; | |
dc84988a RH |
1379 | tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r); |
1380 | tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32)); | |
b10f3833 | 1381 | continue; |
1414968a | 1382 | } |
b10f3833 | 1383 | break; |
6e14e91b | 1384 | |
bc1473ef | 1385 | case INDEX_op_brcond2_i32: |
acd93701 RH |
1386 | tmp = do_constant_folding_cond2(&op->args[0], &op->args[2], |
1387 | op->args[4]); | |
b10f3833 | 1388 | if (tmp == 0) { |
a763551a | 1389 | do_brcond_false: |
b10f3833 RH |
1390 | tcg_op_remove(s, op); |
1391 | continue; | |
1392 | } | |
1393 | if (tmp == 1) { | |
1394 | do_brcond_true: | |
1395 | op->opc = opc = INDEX_op_br; | |
1396 | op->args[0] = op->args[5]; | |
1397 | break; | |
1398 | } | |
1399 | if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE) | |
1400 | && arg_is_const(op->args[2]) | |
1401 | && arg_info(op->args[2])->val == 0 | |
1402 | && arg_is_const(op->args[3]) | |
1403 | && arg_info(op->args[3])->val == 0) { | |
6c4382f8 RH |
1404 | /* Simplify LT/GE comparisons vs zero to a single compare |
1405 | vs the high word of the input. */ | |
a763551a | 1406 | do_brcond_high: |
b10f3833 | 1407 | op->opc = opc = INDEX_op_brcond_i32; |
acd93701 RH |
1408 | op->args[0] = op->args[1]; |
1409 | op->args[1] = op->args[3]; | |
1410 | op->args[2] = op->args[4]; | |
1411 | op->args[3] = op->args[5]; | |
b10f3833 RH |
1412 | break; |
1413 | } | |
1414 | if (op->args[4] == TCG_COND_EQ) { | |
a763551a RH |
1415 | /* Simplify EQ comparisons where one of the pairs |
1416 | can be simplified. */ | |
1417 | tmp = do_constant_folding_cond(INDEX_op_brcond_i32, | |
acd93701 RH |
1418 | op->args[0], op->args[2], |
1419 | TCG_COND_EQ); | |
a763551a RH |
1420 | if (tmp == 0) { |
1421 | goto do_brcond_false; | |
1422 | } else if (tmp == 1) { | |
1423 | goto do_brcond_high; | |
1424 | } | |
1425 | tmp = do_constant_folding_cond(INDEX_op_brcond_i32, | |
acd93701 RH |
1426 | op->args[1], op->args[3], |
1427 | TCG_COND_EQ); | |
a763551a RH |
1428 | if (tmp == 0) { |
1429 | goto do_brcond_false; | |
1430 | } else if (tmp != 1) { | |
b10f3833 | 1431 | break; |
a763551a RH |
1432 | } |
1433 | do_brcond_low: | |
3b3f847d | 1434 | memset(&ctx.temps_used, 0, sizeof(ctx.temps_used)); |
c45cb8bb | 1435 | op->opc = INDEX_op_brcond_i32; |
acd93701 RH |
1436 | op->args[1] = op->args[2]; |
1437 | op->args[2] = op->args[4]; | |
1438 | op->args[3] = op->args[5]; | |
b10f3833 RH |
1439 | break; |
1440 | } | |
1441 | if (op->args[4] == TCG_COND_NE) { | |
a763551a RH |
1442 | /* Simplify NE comparisons where one of the pairs |
1443 | can be simplified. */ | |
1444 | tmp = do_constant_folding_cond(INDEX_op_brcond_i32, | |
acd93701 RH |
1445 | op->args[0], op->args[2], |
1446 | TCG_COND_NE); | |
a763551a RH |
1447 | if (tmp == 0) { |
1448 | goto do_brcond_high; | |
1449 | } else if (tmp == 1) { | |
1450 | goto do_brcond_true; | |
1451 | } | |
1452 | tmp = do_constant_folding_cond(INDEX_op_brcond_i32, | |
acd93701 RH |
1453 | op->args[1], op->args[3], |
1454 | TCG_COND_NE); | |
a763551a RH |
1455 | if (tmp == 0) { |
1456 | goto do_brcond_low; | |
1457 | } else if (tmp == 1) { | |
1458 | goto do_brcond_true; | |
1459 | } | |
bc1473ef | 1460 | } |
6c4382f8 | 1461 | break; |
bc1473ef RH |
1462 | |
1463 | case INDEX_op_setcond2_i32: | |
acd93701 RH |
1464 | tmp = do_constant_folding_cond2(&op->args[1], &op->args[3], |
1465 | op->args[5]); | |
6c4382f8 | 1466 | if (tmp != 2) { |
a763551a | 1467 | do_setcond_const: |
dc84988a | 1468 | tcg_opt_gen_movi(&ctx, op, op->args[0], tmp); |
b10f3833 RH |
1469 | continue; |
1470 | } | |
1471 | if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE) | |
1472 | && arg_is_const(op->args[3]) | |
1473 | && arg_info(op->args[3])->val == 0 | |
1474 | && arg_is_const(op->args[4]) | |
1475 | && arg_info(op->args[4])->val == 0) { | |
6c4382f8 RH |
1476 | /* Simplify LT/GE comparisons vs zero to a single compare |
1477 | vs the high word of the input. */ | |
a763551a | 1478 | do_setcond_high: |
acd93701 | 1479 | reset_temp(op->args[0]); |
b1fde411 | 1480 | arg_info(op->args[0])->z_mask = 1; |
c45cb8bb | 1481 | op->opc = INDEX_op_setcond_i32; |
acd93701 RH |
1482 | op->args[1] = op->args[2]; |
1483 | op->args[2] = op->args[4]; | |
1484 | op->args[3] = op->args[5]; | |
b10f3833 RH |
1485 | break; |
1486 | } | |
1487 | if (op->args[5] == TCG_COND_EQ) { | |
a763551a RH |
1488 | /* Simplify EQ comparisons where one of the pairs |
1489 | can be simplified. */ | |
1490 | tmp = do_constant_folding_cond(INDEX_op_setcond_i32, | |
acd93701 RH |
1491 | op->args[1], op->args[3], |
1492 | TCG_COND_EQ); | |
a763551a RH |
1493 | if (tmp == 0) { |
1494 | goto do_setcond_const; | |
1495 | } else if (tmp == 1) { | |
1496 | goto do_setcond_high; | |
1497 | } | |
1498 | tmp = do_constant_folding_cond(INDEX_op_setcond_i32, | |
acd93701 RH |
1499 | op->args[2], op->args[4], |
1500 | TCG_COND_EQ); | |
a763551a RH |
1501 | if (tmp == 0) { |
1502 | goto do_setcond_high; | |
1503 | } else if (tmp != 1) { | |
b10f3833 | 1504 | break; |
a763551a RH |
1505 | } |
1506 | do_setcond_low: | |
acd93701 | 1507 | reset_temp(op->args[0]); |
b1fde411 | 1508 | arg_info(op->args[0])->z_mask = 1; |
c45cb8bb | 1509 | op->opc = INDEX_op_setcond_i32; |
acd93701 RH |
1510 | op->args[2] = op->args[3]; |
1511 | op->args[3] = op->args[5]; | |
b10f3833 RH |
1512 | break; |
1513 | } | |
1514 | if (op->args[5] == TCG_COND_NE) { | |
a763551a RH |
1515 | /* Simplify NE comparisons where one of the pairs |
1516 | can be simplified. */ | |
1517 | tmp = do_constant_folding_cond(INDEX_op_setcond_i32, | |
acd93701 RH |
1518 | op->args[1], op->args[3], |
1519 | TCG_COND_NE); | |
a763551a RH |
1520 | if (tmp == 0) { |
1521 | goto do_setcond_high; | |
1522 | } else if (tmp == 1) { | |
1523 | goto do_setcond_const; | |
1524 | } | |
1525 | tmp = do_constant_folding_cond(INDEX_op_setcond_i32, | |
acd93701 RH |
1526 | op->args[2], op->args[4], |
1527 | TCG_COND_NE); | |
a763551a RH |
1528 | if (tmp == 0) { |
1529 | goto do_setcond_low; | |
1530 | } else if (tmp == 1) { | |
1531 | goto do_setcond_const; | |
1532 | } | |
bc1473ef | 1533 | } |
6c4382f8 | 1534 | break; |
bc1473ef | 1535 | |
b10f3833 RH |
1536 | default: |
1537 | break; | |
1538 | } | |
1539 | ||
1540 | /* Some of the folding above can change opc. */ | |
1541 | opc = op->opc; | |
1542 | def = &tcg_op_defs[opc]; | |
1543 | if (def->flags & TCG_OPF_BB_END) { | |
1544 | memset(&ctx.temps_used, 0, sizeof(ctx.temps_used)); | |
1545 | } else { | |
1546 | if (opc == INDEX_op_call && | |
1547 | !(tcg_call_flags(op) | |
cf066674 | 1548 | & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { |
22613af4 | 1549 | for (i = 0; i < nb_globals; i++) { |
3b3f847d | 1550 | if (test_bit(i, ctx.temps_used.l)) { |
6349039d | 1551 | reset_ts(&s->temps[i]); |
1208d7dd | 1552 | } |
22613af4 KB |
1553 | } |
1554 | } | |
6e14e91b | 1555 | |
b10f3833 RH |
1556 | for (i = 0; i < nb_oargs; i++) { |
1557 | reset_temp(op->args[i]); | |
1558 | /* Save the corresponding known-zero bits mask for the | |
1559 | first output argument (only one supported so far). */ | |
1560 | if (i == 0) { | |
1561 | arg_info(op->args[i])->z_mask = z_mask; | |
a2550660 | 1562 | } |
22613af4 | 1563 | } |
8f2e8c07 | 1564 | } |
34f93921 PK |
1565 | |
1566 | /* Eliminate duplicate and redundant fence instructions. */ | |
d0ed5151 | 1567 | if (ctx.prev_mb) { |
34f93921 PK |
1568 | switch (opc) { |
1569 | case INDEX_op_mb: | |
1570 | /* Merge two barriers of the same type into one, | |
1571 | * or a weaker barrier into a stronger one, | |
1572 | * or two weaker barriers into a stronger one. | |
1573 | * mb X; mb Y => mb X|Y | |
1574 | * mb; strl => mb; st | |
1575 | * ldaq; mb => ld; mb | |
1576 | * ldaq; strl => ld; mb; st | |
1577 | * Other combinations are also merged into a strong | |
1578 | * barrier. This is stricter than specified but for | |
1579 | * the purposes of TCG is better than not optimizing. | |
1580 | */ | |
d0ed5151 | 1581 | ctx.prev_mb->args[0] |= op->args[0]; |
34f93921 PK |
1582 | tcg_op_remove(s, op); |
1583 | break; | |
1584 | ||
1585 | default: | |
1586 | /* Opcodes that end the block stop the optimization. */ | |
1587 | if ((def->flags & TCG_OPF_BB_END) == 0) { | |
1588 | break; | |
1589 | } | |
1590 | /* fallthru */ | |
1591 | case INDEX_op_qemu_ld_i32: | |
1592 | case INDEX_op_qemu_ld_i64: | |
1593 | case INDEX_op_qemu_st_i32: | |
07ce0b05 | 1594 | case INDEX_op_qemu_st8_i32: |
34f93921 PK |
1595 | case INDEX_op_qemu_st_i64: |
1596 | case INDEX_op_call: | |
1597 | /* Opcodes that touch guest memory stop the optimization. */ | |
d0ed5151 | 1598 | ctx.prev_mb = NULL; |
34f93921 PK |
1599 | break; |
1600 | } | |
1601 | } else if (opc == INDEX_op_mb) { | |
d0ed5151 | 1602 | ctx.prev_mb = op; |
34f93921 | 1603 | } |
8f2e8c07 | 1604 | } |
8f2e8c07 | 1605 | } |