]>
Commit | Line | Data |
---|---|---|
625e3dd4 PM |
1 | /* |
2 | * ARM translation: AArch32 Neon instructions | |
3 | * | |
4 | * Copyright (c) 2003 Fabrice Bellard | |
5 | * Copyright (c) 2005-2007 CodeSourcery | |
6 | * Copyright (c) 2007 OpenedHand, Ltd. | |
7 | * Copyright (c) 2020 Linaro, Ltd. | |
8 | * | |
9 | * This library is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2 of the License, or (at your option) any later version. | |
13 | * | |
14 | * This library is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
21 | */ | |
22 | ||
23 | /* | |
24 | * This file is intended to be included from translate.c; it uses | |
25 | * some macros and definitions provided by that file. | |
26 | * It might be possible to convert it to a standalone .c file eventually. | |
27 | */ | |
28 | ||
123ce4e3 PM |
29 | static inline int plus1(DisasContext *s, int x) |
30 | { | |
31 | return x + 1; | |
32 | } | |
33 | ||
625e3dd4 PM |
34 | /* Include the generated Neon decoder */ |
35 | #include "decode-neon-dp.inc.c" | |
36 | #include "decode-neon-ls.inc.c" | |
37 | #include "decode-neon-shared.inc.c" | |
afff8de0 PM |
38 | |
39 | static bool trans_VCMLA(DisasContext *s, arg_VCMLA *a) | |
40 | { | |
41 | int opr_sz; | |
42 | TCGv_ptr fpst; | |
43 | gen_helper_gvec_3_ptr *fn_gvec_ptr; | |
44 | ||
45 | if (!dc_isar_feature(aa32_vcma, s) | |
46 | || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { | |
47 | return false; | |
48 | } | |
49 | ||
50 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
51 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
52 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
53 | return false; | |
54 | } | |
55 | ||
56 | if ((a->vn | a->vm | a->vd) & a->q) { | |
57 | return false; | |
58 | } | |
59 | ||
60 | if (!vfp_access_check(s)) { | |
61 | return true; | |
62 | } | |
63 | ||
64 | opr_sz = (1 + a->q) * 8; | |
65 | fpst = get_fpstatus_ptr(1); | |
66 | fn_gvec_ptr = a->size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah; | |
67 | tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), | |
68 | vfp_reg_offset(1, a->vn), | |
69 | vfp_reg_offset(1, a->vm), | |
70 | fpst, opr_sz, opr_sz, a->rot, | |
71 | fn_gvec_ptr); | |
72 | tcg_temp_free_ptr(fpst); | |
73 | return true; | |
74 | } | |
94d5eb7b PM |
75 | |
76 | static bool trans_VCADD(DisasContext *s, arg_VCADD *a) | |
77 | { | |
78 | int opr_sz; | |
79 | TCGv_ptr fpst; | |
80 | gen_helper_gvec_3_ptr *fn_gvec_ptr; | |
81 | ||
82 | if (!dc_isar_feature(aa32_vcma, s) | |
83 | || (!a->size && !dc_isar_feature(aa32_fp16_arith, s))) { | |
84 | return false; | |
85 | } | |
86 | ||
87 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
88 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
89 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
90 | return false; | |
91 | } | |
92 | ||
93 | if ((a->vn | a->vm | a->vd) & a->q) { | |
94 | return false; | |
95 | } | |
96 | ||
97 | if (!vfp_access_check(s)) { | |
98 | return true; | |
99 | } | |
100 | ||
101 | opr_sz = (1 + a->q) * 8; | |
102 | fpst = get_fpstatus_ptr(1); | |
103 | fn_gvec_ptr = a->size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh; | |
104 | tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), | |
105 | vfp_reg_offset(1, a->vn), | |
106 | vfp_reg_offset(1, a->vm), | |
107 | fpst, opr_sz, opr_sz, a->rot, | |
108 | fn_gvec_ptr); | |
109 | tcg_temp_free_ptr(fpst); | |
110 | return true; | |
111 | } | |
32da0e33 PM |
112 | |
113 | static bool trans_VDOT(DisasContext *s, arg_VDOT *a) | |
114 | { | |
115 | int opr_sz; | |
116 | gen_helper_gvec_3 *fn_gvec; | |
117 | ||
118 | if (!dc_isar_feature(aa32_dp, s)) { | |
119 | return false; | |
120 | } | |
121 | ||
122 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
123 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
124 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
125 | return false; | |
126 | } | |
127 | ||
128 | if ((a->vn | a->vm | a->vd) & a->q) { | |
129 | return false; | |
130 | } | |
131 | ||
132 | if (!vfp_access_check(s)) { | |
133 | return true; | |
134 | } | |
135 | ||
136 | opr_sz = (1 + a->q) * 8; | |
137 | fn_gvec = a->u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b; | |
138 | tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd), | |
139 | vfp_reg_offset(1, a->vn), | |
140 | vfp_reg_offset(1, a->vm), | |
141 | opr_sz, opr_sz, 0, fn_gvec); | |
142 | return true; | |
143 | } | |
9a107e7b PM |
144 | |
145 | static bool trans_VFML(DisasContext *s, arg_VFML *a) | |
146 | { | |
147 | int opr_sz; | |
148 | ||
149 | if (!dc_isar_feature(aa32_fhm, s)) { | |
150 | return false; | |
151 | } | |
152 | ||
153 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
154 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
155 | (a->vd & 0x10)) { | |
156 | return false; | |
157 | } | |
158 | ||
159 | if (a->vd & a->q) { | |
160 | return false; | |
161 | } | |
162 | ||
163 | if (!vfp_access_check(s)) { | |
164 | return true; | |
165 | } | |
166 | ||
167 | opr_sz = (1 + a->q) * 8; | |
168 | tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), | |
169 | vfp_reg_offset(a->q, a->vn), | |
170 | vfp_reg_offset(a->q, a->vm), | |
171 | cpu_env, opr_sz, opr_sz, a->s, /* is_2 == 0 */ | |
172 | gen_helper_gvec_fmlal_a32); | |
173 | return true; | |
174 | } | |
7e1b5d61 PM |
175 | |
176 | static bool trans_VCMLA_scalar(DisasContext *s, arg_VCMLA_scalar *a) | |
177 | { | |
178 | gen_helper_gvec_3_ptr *fn_gvec_ptr; | |
179 | int opr_sz; | |
180 | TCGv_ptr fpst; | |
181 | ||
182 | if (!dc_isar_feature(aa32_vcma, s)) { | |
183 | return false; | |
184 | } | |
185 | if (a->size == 0 && !dc_isar_feature(aa32_fp16_arith, s)) { | |
186 | return false; | |
187 | } | |
188 | ||
189 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
190 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
191 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
192 | return false; | |
193 | } | |
194 | ||
195 | if ((a->vd | a->vn) & a->q) { | |
196 | return false; | |
197 | } | |
198 | ||
199 | if (!vfp_access_check(s)) { | |
200 | return true; | |
201 | } | |
202 | ||
203 | fn_gvec_ptr = (a->size ? gen_helper_gvec_fcmlas_idx | |
204 | : gen_helper_gvec_fcmlah_idx); | |
205 | opr_sz = (1 + a->q) * 8; | |
206 | fpst = get_fpstatus_ptr(1); | |
207 | tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), | |
208 | vfp_reg_offset(1, a->vn), | |
209 | vfp_reg_offset(1, a->vm), | |
210 | fpst, opr_sz, opr_sz, | |
211 | (a->index << 2) | a->rot, fn_gvec_ptr); | |
212 | tcg_temp_free_ptr(fpst); | |
213 | return true; | |
214 | } | |
35f5d4d1 PM |
215 | |
216 | static bool trans_VDOT_scalar(DisasContext *s, arg_VDOT_scalar *a) | |
217 | { | |
218 | gen_helper_gvec_3 *fn_gvec; | |
219 | int opr_sz; | |
220 | TCGv_ptr fpst; | |
221 | ||
222 | if (!dc_isar_feature(aa32_dp, s)) { | |
223 | return false; | |
224 | } | |
225 | ||
226 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
227 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
228 | ((a->vd | a->vn) & 0x10)) { | |
229 | return false; | |
230 | } | |
231 | ||
232 | if ((a->vd | a->vn) & a->q) { | |
233 | return false; | |
234 | } | |
235 | ||
236 | if (!vfp_access_check(s)) { | |
237 | return true; | |
238 | } | |
239 | ||
240 | fn_gvec = a->u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b; | |
241 | opr_sz = (1 + a->q) * 8; | |
242 | fpst = get_fpstatus_ptr(1); | |
243 | tcg_gen_gvec_3_ool(vfp_reg_offset(1, a->vd), | |
244 | vfp_reg_offset(1, a->vn), | |
245 | vfp_reg_offset(1, a->rm), | |
246 | opr_sz, opr_sz, a->index, fn_gvec); | |
247 | tcg_temp_free_ptr(fpst); | |
248 | return true; | |
249 | } | |
d27e82f7 PM |
250 | |
251 | static bool trans_VFML_scalar(DisasContext *s, arg_VFML_scalar *a) | |
252 | { | |
253 | int opr_sz; | |
254 | ||
255 | if (!dc_isar_feature(aa32_fhm, s)) { | |
256 | return false; | |
257 | } | |
258 | ||
259 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
260 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
261 | ((a->vd & 0x10) || (a->q && (a->vn & 0x10)))) { | |
262 | return false; | |
263 | } | |
264 | ||
265 | if (a->vd & a->q) { | |
266 | return false; | |
267 | } | |
268 | ||
269 | if (!vfp_access_check(s)) { | |
270 | return true; | |
271 | } | |
272 | ||
273 | opr_sz = (1 + a->q) * 8; | |
274 | tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), | |
275 | vfp_reg_offset(a->q, a->vn), | |
276 | vfp_reg_offset(a->q, a->rm), | |
277 | cpu_env, opr_sz, opr_sz, | |
278 | (a->index << 2) | a->s, /* is_2 == 0 */ | |
279 | gen_helper_gvec_fmlal_idx_a32); | |
280 | return true; | |
281 | } | |
a27b4630 PM |
282 | |
283 | static struct { | |
284 | int nregs; | |
285 | int interleave; | |
286 | int spacing; | |
287 | } const neon_ls_element_type[11] = { | |
288 | {1, 4, 1}, | |
289 | {1, 4, 2}, | |
290 | {4, 1, 1}, | |
291 | {2, 2, 2}, | |
292 | {1, 3, 1}, | |
293 | {1, 3, 2}, | |
294 | {3, 1, 1}, | |
295 | {1, 1, 1}, | |
296 | {1, 2, 1}, | |
297 | {1, 2, 2}, | |
298 | {2, 1, 1} | |
299 | }; | |
300 | ||
301 | static void gen_neon_ldst_base_update(DisasContext *s, int rm, int rn, | |
302 | int stride) | |
303 | { | |
304 | if (rm != 15) { | |
305 | TCGv_i32 base; | |
306 | ||
307 | base = load_reg(s, rn); | |
308 | if (rm == 13) { | |
309 | tcg_gen_addi_i32(base, base, stride); | |
310 | } else { | |
311 | TCGv_i32 index; | |
312 | index = load_reg(s, rm); | |
313 | tcg_gen_add_i32(base, base, index); | |
314 | tcg_temp_free_i32(index); | |
315 | } | |
316 | store_reg(s, rn, base); | |
317 | } | |
318 | } | |
319 | ||
320 | static bool trans_VLDST_multiple(DisasContext *s, arg_VLDST_multiple *a) | |
321 | { | |
322 | /* Neon load/store multiple structures */ | |
323 | int nregs, interleave, spacing, reg, n; | |
324 | MemOp endian = s->be_data; | |
325 | int mmu_idx = get_mem_index(s); | |
326 | int size = a->size; | |
327 | TCGv_i64 tmp64; | |
328 | TCGv_i32 addr, tmp; | |
329 | ||
330 | if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { | |
331 | return false; | |
332 | } | |
333 | ||
334 | /* UNDEF accesses to D16-D31 if they don't exist */ | |
335 | if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { | |
336 | return false; | |
337 | } | |
338 | if (a->itype > 10) { | |
339 | return false; | |
340 | } | |
341 | /* Catch UNDEF cases for bad values of align field */ | |
342 | switch (a->itype & 0xc) { | |
343 | case 4: | |
344 | if (a->align >= 2) { | |
345 | return false; | |
346 | } | |
347 | break; | |
348 | case 8: | |
349 | if (a->align == 3) { | |
350 | return false; | |
351 | } | |
352 | break; | |
353 | default: | |
354 | break; | |
355 | } | |
356 | nregs = neon_ls_element_type[a->itype].nregs; | |
357 | interleave = neon_ls_element_type[a->itype].interleave; | |
358 | spacing = neon_ls_element_type[a->itype].spacing; | |
359 | if (size == 3 && (interleave | spacing) != 1) { | |
360 | return false; | |
361 | } | |
362 | ||
363 | if (!vfp_access_check(s)) { | |
364 | return true; | |
365 | } | |
366 | ||
367 | /* For our purposes, bytes are always little-endian. */ | |
368 | if (size == 0) { | |
369 | endian = MO_LE; | |
370 | } | |
371 | /* | |
372 | * Consecutive little-endian elements from a single register | |
373 | * can be promoted to a larger little-endian operation. | |
374 | */ | |
375 | if (interleave == 1 && endian == MO_LE) { | |
376 | size = 3; | |
377 | } | |
378 | tmp64 = tcg_temp_new_i64(); | |
379 | addr = tcg_temp_new_i32(); | |
380 | tmp = tcg_const_i32(1 << size); | |
381 | load_reg_var(s, addr, a->rn); | |
382 | for (reg = 0; reg < nregs; reg++) { | |
383 | for (n = 0; n < 8 >> size; n++) { | |
384 | int xs; | |
385 | for (xs = 0; xs < interleave; xs++) { | |
386 | int tt = a->vd + reg + spacing * xs; | |
387 | ||
388 | if (a->l) { | |
389 | gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size); | |
390 | neon_store_element64(tt, n, size, tmp64); | |
391 | } else { | |
392 | neon_load_element64(tmp64, tt, n, size); | |
393 | gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size); | |
394 | } | |
395 | tcg_gen_add_i32(addr, addr, tmp); | |
396 | } | |
397 | } | |
398 | } | |
399 | tcg_temp_free_i32(addr); | |
400 | tcg_temp_free_i32(tmp); | |
401 | tcg_temp_free_i64(tmp64); | |
402 | ||
403 | gen_neon_ldst_base_update(s, a->rm, a->rn, nregs * interleave * 8); | |
404 | return true; | |
405 | } | |
3698747c PM |
406 | |
407 | static bool trans_VLD_all_lanes(DisasContext *s, arg_VLD_all_lanes *a) | |
408 | { | |
409 | /* Neon load single structure to all lanes */ | |
410 | int reg, stride, vec_size; | |
411 | int vd = a->vd; | |
412 | int size = a->size; | |
413 | int nregs = a->n + 1; | |
414 | TCGv_i32 addr, tmp; | |
415 | ||
416 | if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { | |
417 | return false; | |
418 | } | |
419 | ||
420 | /* UNDEF accesses to D16-D31 if they don't exist */ | |
421 | if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { | |
422 | return false; | |
423 | } | |
424 | ||
425 | if (size == 3) { | |
426 | if (nregs != 4 || a->a == 0) { | |
427 | return false; | |
428 | } | |
429 | /* For VLD4 size == 3 a == 1 means 32 bits at 16 byte alignment */ | |
430 | size = 2; | |
431 | } | |
432 | if (nregs == 1 && a->a == 1 && size == 0) { | |
433 | return false; | |
434 | } | |
435 | if (nregs == 3 && a->a == 1) { | |
436 | return false; | |
437 | } | |
438 | ||
439 | if (!vfp_access_check(s)) { | |
440 | return true; | |
441 | } | |
442 | ||
443 | /* | |
444 | * VLD1 to all lanes: T bit indicates how many Dregs to write. | |
445 | * VLD2/3/4 to all lanes: T bit indicates register stride. | |
446 | */ | |
447 | stride = a->t ? 2 : 1; | |
448 | vec_size = nregs == 1 ? stride * 8 : 8; | |
449 | ||
450 | tmp = tcg_temp_new_i32(); | |
451 | addr = tcg_temp_new_i32(); | |
452 | load_reg_var(s, addr, a->rn); | |
453 | for (reg = 0; reg < nregs; reg++) { | |
454 | gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), | |
455 | s->be_data | size); | |
456 | if ((vd & 1) && vec_size == 16) { | |
457 | /* | |
458 | * We cannot write 16 bytes at once because the | |
459 | * destination is unaligned. | |
460 | */ | |
461 | tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0), | |
462 | 8, 8, tmp); | |
463 | tcg_gen_gvec_mov(0, neon_reg_offset(vd + 1, 0), | |
464 | neon_reg_offset(vd, 0), 8, 8); | |
465 | } else { | |
466 | tcg_gen_gvec_dup_i32(size, neon_reg_offset(vd, 0), | |
467 | vec_size, vec_size, tmp); | |
468 | } | |
469 | tcg_gen_addi_i32(addr, addr, 1 << size); | |
470 | vd += stride; | |
471 | } | |
472 | tcg_temp_free_i32(tmp); | |
473 | tcg_temp_free_i32(addr); | |
474 | ||
475 | gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << size) * nregs); | |
476 | ||
477 | return true; | |
478 | } | |
123ce4e3 PM |
479 | |
480 | static bool trans_VLDST_single(DisasContext *s, arg_VLDST_single *a) | |
481 | { | |
482 | /* Neon load/store single structure to one lane */ | |
483 | int reg; | |
484 | int nregs = a->n + 1; | |
485 | int vd = a->vd; | |
486 | TCGv_i32 addr, tmp; | |
487 | ||
488 | if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { | |
489 | return false; | |
490 | } | |
491 | ||
492 | /* UNDEF accesses to D16-D31 if they don't exist */ | |
493 | if (!dc_isar_feature(aa32_simd_r32, s) && (a->vd & 0x10)) { | |
494 | return false; | |
495 | } | |
496 | ||
497 | /* Catch the UNDEF cases. This is unavoidably a bit messy. */ | |
498 | switch (nregs) { | |
499 | case 1: | |
500 | if (((a->align & (1 << a->size)) != 0) || | |
501 | (a->size == 2 && ((a->align & 3) == 1 || (a->align & 3) == 2))) { | |
502 | return false; | |
503 | } | |
504 | break; | |
505 | case 3: | |
506 | if ((a->align & 1) != 0) { | |
507 | return false; | |
508 | } | |
509 | /* fall through */ | |
510 | case 2: | |
511 | if (a->size == 2 && (a->align & 2) != 0) { | |
512 | return false; | |
513 | } | |
514 | break; | |
515 | case 4: | |
516 | if ((a->size == 2) && ((a->align & 3) == 3)) { | |
517 | return false; | |
518 | } | |
519 | break; | |
520 | default: | |
521 | abort(); | |
522 | } | |
523 | if ((vd + a->stride * (nregs - 1)) > 31) { | |
524 | /* | |
525 | * Attempts to write off the end of the register file are | |
526 | * UNPREDICTABLE; we choose to UNDEF because otherwise we would | |
527 | * access off the end of the array that holds the register data. | |
528 | */ | |
529 | return false; | |
530 | } | |
531 | ||
532 | if (!vfp_access_check(s)) { | |
533 | return true; | |
534 | } | |
535 | ||
536 | tmp = tcg_temp_new_i32(); | |
537 | addr = tcg_temp_new_i32(); | |
538 | load_reg_var(s, addr, a->rn); | |
539 | /* | |
540 | * TODO: if we implemented alignment exceptions, we should check | |
541 | * addr against the alignment encoded in a->align here. | |
542 | */ | |
543 | for (reg = 0; reg < nregs; reg++) { | |
544 | if (a->l) { | |
545 | gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), | |
546 | s->be_data | a->size); | |
547 | neon_store_element(vd, a->reg_idx, a->size, tmp); | |
548 | } else { /* Store */ | |
549 | neon_load_element(tmp, vd, a->reg_idx, a->size); | |
550 | gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), | |
551 | s->be_data | a->size); | |
552 | } | |
553 | vd += a->stride; | |
554 | tcg_gen_addi_i32(addr, addr, 1 << a->size); | |
555 | } | |
556 | tcg_temp_free_i32(addr); | |
557 | tcg_temp_free_i32(tmp); | |
558 | ||
559 | gen_neon_ldst_base_update(s, a->rm, a->rn, (1 << a->size) * nregs); | |
560 | ||
561 | return true; | |
562 | } | |
a4e143ac PM |
563 | |
564 | static bool do_3same(DisasContext *s, arg_3same *a, GVecGen3Fn fn) | |
565 | { | |
566 | int vec_size = a->q ? 16 : 8; | |
567 | int rd_ofs = neon_reg_offset(a->vd, 0); | |
568 | int rn_ofs = neon_reg_offset(a->vn, 0); | |
569 | int rm_ofs = neon_reg_offset(a->vm, 0); | |
570 | ||
571 | if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { | |
572 | return false; | |
573 | } | |
574 | ||
575 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
576 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
577 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
578 | return false; | |
579 | } | |
580 | ||
581 | if ((a->vn | a->vm | a->vd) & a->q) { | |
582 | return false; | |
583 | } | |
584 | ||
585 | if (!vfp_access_check(s)) { | |
586 | return true; | |
587 | } | |
588 | ||
589 | fn(a->size, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); | |
590 | return true; | |
591 | } | |
592 | ||
593 | #define DO_3SAME(INSN, FUNC) \ | |
594 | static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ | |
595 | { \ | |
596 | return do_3same(s, a, FUNC); \ | |
597 | } | |
598 | ||
599 | DO_3SAME(VADD, tcg_gen_gvec_add) | |
600 | DO_3SAME(VSUB, tcg_gen_gvec_sub) | |
35a548ed PM |
601 | DO_3SAME(VAND, tcg_gen_gvec_and) |
602 | DO_3SAME(VBIC, tcg_gen_gvec_andc) | |
603 | DO_3SAME(VORR, tcg_gen_gvec_or) | |
604 | DO_3SAME(VORN, tcg_gen_gvec_orc) | |
605 | DO_3SAME(VEOR, tcg_gen_gvec_xor) | |
8161b753 RH |
606 | DO_3SAME(VSHL_S, gen_gvec_sshl) |
607 | DO_3SAME(VSHL_U, gen_gvec_ushl) | |
c7715b6b RH |
608 | DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) |
609 | DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) | |
610 | DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) | |
611 | DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) | |
35a548ed PM |
612 | |
613 | /* These insns are all gvec_bitsel but with the inputs in various orders. */ | |
614 | #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ | |
615 | static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ | |
616 | uint32_t rn_ofs, uint32_t rm_ofs, \ | |
617 | uint32_t oprsz, uint32_t maxsz) \ | |
618 | { \ | |
619 | tcg_gen_gvec_bitsel(vece, rd_ofs, O1, O2, O3, oprsz, maxsz); \ | |
620 | } \ | |
621 | DO_3SAME(INSN, gen_##INSN##_3s) | |
622 | ||
623 | DO_3SAME_BITSEL(VBSL, rd_ofs, rn_ofs, rm_ofs) | |
624 | DO_3SAME_BITSEL(VBIT, rm_ofs, rn_ofs, rd_ofs) | |
625 | DO_3SAME_BITSEL(VBIF, rm_ofs, rd_ofs, rn_ofs) | |
36b59310 PM |
626 | |
627 | #define DO_3SAME_NO_SZ_3(INSN, FUNC) \ | |
628 | static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ | |
629 | { \ | |
630 | if (a->size == 3) { \ | |
631 | return false; \ | |
632 | } \ | |
633 | return do_3same(s, a, FUNC); \ | |
634 | } | |
635 | ||
636 | DO_3SAME_NO_SZ_3(VMAX_S, tcg_gen_gvec_smax) | |
637 | DO_3SAME_NO_SZ_3(VMAX_U, tcg_gen_gvec_umax) | |
638 | DO_3SAME_NO_SZ_3(VMIN_S, tcg_gen_gvec_smin) | |
639 | DO_3SAME_NO_SZ_3(VMIN_U, tcg_gen_gvec_umin) | |
0de34fd4 | 640 | DO_3SAME_NO_SZ_3(VMUL, tcg_gen_gvec_mul) |
27106320 RH |
641 | DO_3SAME_NO_SZ_3(VMLA, gen_gvec_mla) |
642 | DO_3SAME_NO_SZ_3(VMLS, gen_gvec_mls) | |
8161b753 | 643 | DO_3SAME_NO_SZ_3(VTST, gen_gvec_cmtst) |
7715098f PM |
644 | DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) |
645 | DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) | |
646 | DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) | |
647 | DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) | |
02bd0cdb PM |
648 | |
649 | #define DO_3SAME_CMP(INSN, COND) \ | |
650 | static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ | |
651 | uint32_t rn_ofs, uint32_t rm_ofs, \ | |
652 | uint32_t oprsz, uint32_t maxsz) \ | |
653 | { \ | |
654 | tcg_gen_gvec_cmp(COND, vece, rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz); \ | |
655 | } \ | |
656 | DO_3SAME_NO_SZ_3(INSN, gen_##INSN##_3s) | |
657 | ||
658 | DO_3SAME_CMP(VCGT_S, TCG_COND_GT) | |
659 | DO_3SAME_CMP(VCGT_U, TCG_COND_GTU) | |
660 | DO_3SAME_CMP(VCGE_S, TCG_COND_GE) | |
661 | DO_3SAME_CMP(VCGE_U, TCG_COND_GEU) | |
662 | DO_3SAME_CMP(VCEQ, TCG_COND_EQ) | |
663 | ||
0de34fd4 PM |
664 | static void gen_VMUL_p_3s(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, |
665 | uint32_t rm_ofs, uint32_t oprsz, uint32_t maxsz) | |
666 | { | |
667 | tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, | |
668 | 0, gen_helper_gvec_pmul_b); | |
669 | } | |
670 | ||
671 | static bool trans_VMUL_p_3s(DisasContext *s, arg_3same *a) | |
672 | { | |
673 | if (a->size != 0) { | |
674 | return false; | |
675 | } | |
676 | return do_3same(s, a, gen_VMUL_p_3s); | |
677 | } | |
a0635695 PM |
678 | |
679 | #define DO_VQRDMLAH(INSN, FUNC) \ | |
680 | static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ | |
681 | { \ | |
682 | if (!dc_isar_feature(aa32_rdm, s)) { \ | |
683 | return false; \ | |
684 | } \ | |
685 | if (a->size != 1 && a->size != 2) { \ | |
686 | return false; \ | |
687 | } \ | |
688 | return do_3same(s, a, FUNC); \ | |
689 | } | |
690 | ||
691 | DO_VQRDMLAH(VQRDMLAH, gen_gvec_sqrdmlah_qc) | |
692 | DO_VQRDMLAH(VQRDMLSH, gen_gvec_sqrdmlsh_qc) | |
21290edf PM |
693 | |
694 | static bool trans_SHA1_3s(DisasContext *s, arg_SHA1_3s *a) | |
695 | { | |
696 | TCGv_ptr ptr1, ptr2, ptr3; | |
697 | TCGv_i32 tmp; | |
698 | ||
699 | if (!arm_dc_feature(s, ARM_FEATURE_NEON) || | |
700 | !dc_isar_feature(aa32_sha1, s)) { | |
701 | return false; | |
702 | } | |
703 | ||
704 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
705 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
706 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
707 | return false; | |
708 | } | |
709 | ||
710 | if ((a->vn | a->vm | a->vd) & 1) { | |
711 | return false; | |
712 | } | |
713 | ||
714 | if (!vfp_access_check(s)) { | |
715 | return true; | |
716 | } | |
717 | ||
718 | ptr1 = vfp_reg_ptr(true, a->vd); | |
719 | ptr2 = vfp_reg_ptr(true, a->vn); | |
720 | ptr3 = vfp_reg_ptr(true, a->vm); | |
721 | tmp = tcg_const_i32(a->optype); | |
722 | gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp); | |
723 | tcg_temp_free_i32(tmp); | |
724 | tcg_temp_free_ptr(ptr1); | |
725 | tcg_temp_free_ptr(ptr2); | |
726 | tcg_temp_free_ptr(ptr3); | |
727 | ||
728 | return true; | |
729 | } | |
730 | ||
731 | static bool trans_SHA256H_3s(DisasContext *s, arg_SHA256H_3s *a) | |
732 | { | |
733 | TCGv_ptr ptr1, ptr2, ptr3; | |
734 | ||
735 | if (!arm_dc_feature(s, ARM_FEATURE_NEON) || | |
736 | !dc_isar_feature(aa32_sha2, s)) { | |
737 | return false; | |
738 | } | |
739 | ||
740 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
741 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
742 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
743 | return false; | |
744 | } | |
745 | ||
746 | if ((a->vn | a->vm | a->vd) & 1) { | |
747 | return false; | |
748 | } | |
749 | ||
750 | if (!vfp_access_check(s)) { | |
751 | return true; | |
752 | } | |
753 | ||
754 | ptr1 = vfp_reg_ptr(true, a->vd); | |
755 | ptr2 = vfp_reg_ptr(true, a->vn); | |
756 | ptr3 = vfp_reg_ptr(true, a->vm); | |
757 | gen_helper_crypto_sha256h(ptr1, ptr2, ptr3); | |
758 | tcg_temp_free_ptr(ptr1); | |
759 | tcg_temp_free_ptr(ptr2); | |
760 | tcg_temp_free_ptr(ptr3); | |
761 | ||
762 | return true; | |
763 | } | |
764 | ||
765 | static bool trans_SHA256H2_3s(DisasContext *s, arg_SHA256H2_3s *a) | |
766 | { | |
767 | TCGv_ptr ptr1, ptr2, ptr3; | |
768 | ||
769 | if (!arm_dc_feature(s, ARM_FEATURE_NEON) || | |
770 | !dc_isar_feature(aa32_sha2, s)) { | |
771 | return false; | |
772 | } | |
773 | ||
774 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
775 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
776 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
777 | return false; | |
778 | } | |
779 | ||
780 | if ((a->vn | a->vm | a->vd) & 1) { | |
781 | return false; | |
782 | } | |
783 | ||
784 | if (!vfp_access_check(s)) { | |
785 | return true; | |
786 | } | |
787 | ||
788 | ptr1 = vfp_reg_ptr(true, a->vd); | |
789 | ptr2 = vfp_reg_ptr(true, a->vn); | |
790 | ptr3 = vfp_reg_ptr(true, a->vm); | |
791 | gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3); | |
792 | tcg_temp_free_ptr(ptr1); | |
793 | tcg_temp_free_ptr(ptr2); | |
794 | tcg_temp_free_ptr(ptr3); | |
795 | ||
796 | return true; | |
797 | } | |
798 | ||
799 | static bool trans_SHA256SU1_3s(DisasContext *s, arg_SHA256SU1_3s *a) | |
800 | { | |
801 | TCGv_ptr ptr1, ptr2, ptr3; | |
802 | ||
803 | if (!arm_dc_feature(s, ARM_FEATURE_NEON) || | |
804 | !dc_isar_feature(aa32_sha2, s)) { | |
805 | return false; | |
806 | } | |
807 | ||
808 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
809 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
810 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
811 | return false; | |
812 | } | |
813 | ||
814 | if ((a->vn | a->vm | a->vd) & 1) { | |
815 | return false; | |
816 | } | |
817 | ||
818 | if (!vfp_access_check(s)) { | |
819 | return true; | |
820 | } | |
821 | ||
822 | ptr1 = vfp_reg_ptr(true, a->vd); | |
823 | ptr2 = vfp_reg_ptr(true, a->vn); | |
824 | ptr3 = vfp_reg_ptr(true, a->vm); | |
825 | gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3); | |
826 | tcg_temp_free_ptr(ptr1); | |
827 | tcg_temp_free_ptr(ptr2); | |
828 | tcg_temp_free_ptr(ptr3); | |
829 | ||
830 | return true; | |
831 | } | |
35d4352f PM |
832 | |
833 | #define DO_3SAME_64(INSN, FUNC) \ | |
834 | static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ | |
835 | uint32_t rn_ofs, uint32_t rm_ofs, \ | |
836 | uint32_t oprsz, uint32_t maxsz) \ | |
837 | { \ | |
838 | static const GVecGen3 op = { .fni8 = FUNC }; \ | |
839 | tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ | |
840 | } \ | |
841 | DO_3SAME(INSN, gen_##INSN##_3s) | |
842 | ||
843 | #define DO_3SAME_64_ENV(INSN, FUNC) \ | |
844 | static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ | |
845 | { \ | |
846 | FUNC(d, cpu_env, n, m); \ | |
847 | } \ | |
848 | DO_3SAME_64(INSN, gen_##INSN##_elt) | |
849 | ||
850 | DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) | |
851 | DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) | |
852 | DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) | |
853 | DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) | |
854 | DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) | |
855 | DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) | |
cb294bca PM |
856 | |
857 | #define DO_3SAME_32(INSN, FUNC) \ | |
858 | static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ | |
859 | uint32_t rn_ofs, uint32_t rm_ofs, \ | |
860 | uint32_t oprsz, uint32_t maxsz) \ | |
861 | { \ | |
862 | static const GVecGen3 ops[4] = { \ | |
863 | { .fni4 = gen_helper_neon_##FUNC##8 }, \ | |
864 | { .fni4 = gen_helper_neon_##FUNC##16 }, \ | |
865 | { .fni4 = gen_helper_neon_##FUNC##32 }, \ | |
866 | { 0 }, \ | |
867 | }; \ | |
868 | tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ | |
869 | } \ | |
870 | static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ | |
871 | { \ | |
872 | if (a->size > 2) { \ | |
873 | return false; \ | |
874 | } \ | |
875 | return do_3same(s, a, gen_##INSN##_3s); \ | |
876 | } | |
877 | ||
6812dfdc PM |
878 | /* |
879 | * Some helper functions need to be passed the cpu_env. In order | |
880 | * to use those with the gvec APIs like tcg_gen_gvec_3() we need | |
881 | * to create wrapper functions whose prototype is a NeonGenTwoOpFn() | |
882 | * and which call a NeonGenTwoOpEnvFn(). | |
883 | */ | |
884 | #define WRAP_ENV_FN(WRAPNAME, FUNC) \ | |
885 | static void WRAPNAME(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m) \ | |
886 | { \ | |
887 | FUNC(d, cpu_env, n, m); \ | |
888 | } | |
889 | ||
890 | #define DO_3SAME_32_ENV(INSN, FUNC) \ | |
891 | WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ | |
892 | WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ | |
893 | WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ | |
894 | static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ | |
895 | uint32_t rn_ofs, uint32_t rm_ofs, \ | |
896 | uint32_t oprsz, uint32_t maxsz) \ | |
897 | { \ | |
898 | static const GVecGen3 ops[4] = { \ | |
899 | { .fni4 = gen_##INSN##_tramp8 }, \ | |
900 | { .fni4 = gen_##INSN##_tramp16 }, \ | |
901 | { .fni4 = gen_##INSN##_tramp32 }, \ | |
902 | { 0 }, \ | |
903 | }; \ | |
904 | tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ | |
905 | } \ | |
906 | static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ | |
907 | { \ | |
908 | if (a->size > 2) { \ | |
909 | return false; \ | |
910 | } \ | |
911 | return do_3same(s, a, gen_##INSN##_3s); \ | |
912 | } | |
913 | ||
cb294bca PM |
914 | DO_3SAME_32(VHADD_S, hadd_s) |
915 | DO_3SAME_32(VHADD_U, hadd_u) | |
8e44d03f PM |
916 | DO_3SAME_32(VHSUB_S, hsub_s) |
917 | DO_3SAME_32(VHSUB_U, hsub_u) | |
918 | DO_3SAME_32(VRHADD_S, rhadd_s) | |
919 | DO_3SAME_32(VRHADD_U, rhadd_u) | |
6812dfdc PM |
920 | DO_3SAME_32(VRSHL_S, rshl_s) |
921 | DO_3SAME_32(VRSHL_U, rshl_u) | |
922 | ||
923 | DO_3SAME_32_ENV(VQSHL_S, qshl_s) | |
924 | DO_3SAME_32_ENV(VQSHL_U, qshl_u) | |
925 | DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) | |
926 | DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) | |
059c2398 PM |
927 | |
928 | static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) | |
929 | { | |
930 | /* Operations handled pairwise 32 bits at a time */ | |
931 | TCGv_i32 tmp, tmp2, tmp3; | |
932 | ||
933 | if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { | |
934 | return false; | |
935 | } | |
936 | ||
937 | /* UNDEF accesses to D16-D31 if they don't exist. */ | |
938 | if (!dc_isar_feature(aa32_simd_r32, s) && | |
939 | ((a->vd | a->vn | a->vm) & 0x10)) { | |
940 | return false; | |
941 | } | |
942 | ||
943 | if (a->size == 3) { | |
944 | return false; | |
945 | } | |
946 | ||
947 | if (!vfp_access_check(s)) { | |
948 | return true; | |
949 | } | |
950 | ||
951 | assert(a->q == 0); /* enforced by decode patterns */ | |
952 | ||
953 | /* | |
954 | * Note that we have to be careful not to clobber the source operands | |
955 | * in the "vm == vd" case by storing the result of the first pass too | |
956 | * early. Since Q is 0 there are always just two passes, so instead | |
957 | * of a complicated loop over each pass we just unroll. | |
958 | */ | |
959 | tmp = neon_load_reg(a->vn, 0); | |
960 | tmp2 = neon_load_reg(a->vn, 1); | |
961 | fn(tmp, tmp, tmp2); | |
962 | tcg_temp_free_i32(tmp2); | |
963 | ||
964 | tmp3 = neon_load_reg(a->vm, 0); | |
965 | tmp2 = neon_load_reg(a->vm, 1); | |
966 | fn(tmp3, tmp3, tmp2); | |
967 | tcg_temp_free_i32(tmp2); | |
968 | ||
969 | neon_store_reg(a->vd, 0, tmp); | |
970 | neon_store_reg(a->vd, 1, tmp3); | |
971 | return true; | |
972 | } | |
973 | ||
974 | #define DO_3SAME_PAIR(INSN, func) \ | |
975 | static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ | |
976 | { \ | |
977 | static NeonGenTwoOpFn * const fns[] = { \ | |
978 | gen_helper_neon_##func##8, \ | |
979 | gen_helper_neon_##func##16, \ | |
980 | gen_helper_neon_##func##32, \ | |
981 | }; \ | |
982 | if (a->size > 2) { \ | |
983 | return false; \ | |
984 | } \ | |
985 | return do_3same_pair(s, a, fns[a->size]); \ | |
986 | } | |
987 | ||
988 | /* 32-bit pairwise ops end up the same as the elementwise versions. */ | |
989 | #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 | |
990 | #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 | |
991 | #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 | |
992 | #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 | |
fa22827d | 993 | #define gen_helper_neon_padd_u32 tcg_gen_add_i32 |
059c2398 PM |
994 | |
995 | DO_3SAME_PAIR(VPMAX_S, pmax_s) | |
996 | DO_3SAME_PAIR(VPMIN_S, pmin_s) | |
997 | DO_3SAME_PAIR(VPMAX_U, pmax_u) | |
998 | DO_3SAME_PAIR(VPMIN_U, pmin_u) | |
fa22827d | 999 | DO_3SAME_PAIR(VPADD, padd_u) |