]> Git Repo - qemu.git/blame - target-ppc/int_helper.c
ppc: Improve the exception helpers flags
[qemu.git] / target-ppc / int_helper.c
CommitLineData
64654ded
BS
1/*
2 * PowerPC integer and vector emulation helpers for QEMU.
3 *
4 * Copyright (c) 2003-2007 Jocelyn Mayer
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
0d75590d 19#include "qemu/osdep.h"
64654ded 20#include "cpu.h"
63c91552 21#include "exec/exec-all.h"
1de7afc9 22#include "qemu/host-utils.h"
2ef6175a 23#include "exec/helper-proto.h"
6f2945cd 24#include "crypto/aes.h"
64654ded
BS
25
26#include "helper_regs.h"
27/*****************************************************************************/
28/* Fixed point operations helpers */
64654ded 29
6a4fda33
TM
30target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
31 uint32_t oe)
32{
33 uint64_t rt = 0;
34 int overflow = 0;
35
36 uint64_t dividend = (uint64_t)ra << 32;
37 uint64_t divisor = (uint32_t)rb;
38
39 if (unlikely(divisor == 0)) {
40 overflow = 1;
41 } else {
42 rt = dividend / divisor;
43 overflow = rt > UINT32_MAX;
44 }
45
46 if (unlikely(overflow)) {
47 rt = 0; /* Undefined */
48 }
49
50 if (oe) {
51 if (unlikely(overflow)) {
52 env->so = env->ov = 1;
53 } else {
54 env->ov = 0;
55 }
56 }
57
58 return (target_ulong)rt;
59}
60
a98eb9e9
TM
61target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
62 uint32_t oe)
63{
64 int64_t rt = 0;
65 int overflow = 0;
66
67 int64_t dividend = (int64_t)ra << 32;
68 int64_t divisor = (int64_t)((int32_t)rb);
69
70 if (unlikely((divisor == 0) ||
71 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
72 overflow = 1;
73 } else {
74 rt = dividend / divisor;
75 overflow = rt != (int32_t)rt;
76 }
77
78 if (unlikely(overflow)) {
79 rt = 0; /* Undefined */
80 }
81
82 if (oe) {
83 if (unlikely(overflow)) {
84 env->so = env->ov = 1;
85 } else {
86 env->ov = 0;
87 }
88 }
89
90 return (target_ulong)rt;
91}
92
98d1eb27
TM
93#if defined(TARGET_PPC64)
94
95uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
96{
97 uint64_t rt = 0;
98 int overflow = 0;
99
100 overflow = divu128(&rt, &ra, rb);
101
102 if (unlikely(overflow)) {
103 rt = 0; /* Undefined */
104 }
105
106 if (oe) {
107 if (unlikely(overflow)) {
108 env->so = env->ov = 1;
109 } else {
110 env->ov = 0;
111 }
112 }
113
114 return rt;
115}
116
e44259b6
TM
117uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118{
119 int64_t rt = 0;
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
123
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
126 }
127
128 if (oe) {
129
130 if (unlikely(overflow)) {
131 env->so = env->ov = 1;
132 } else {
133 env->ov = 0;
134 }
135 }
136
137 return rt;
138}
139
98d1eb27
TM
140#endif
141
142
64654ded
BS
143target_ulong helper_cntlzw(target_ulong t)
144{
145 return clz32(t);
146}
147
b35344e4
ND
148target_ulong helper_cnttzw(target_ulong t)
149{
150 return ctz32(t);
151}
152
64654ded 153#if defined(TARGET_PPC64)
082ce330
ND
154/* if x = 0xab, returns 0xababababababababa */
155#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
156
157/* substract 1 from each byte, and with inverse, check if MSB is set at each
158 * byte.
159 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
160 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
161 */
162#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
163
164/* When you XOR the pattern and there is a match, that byte will be zero */
165#define hasvalue(x, n) (haszero((x) ^ pattern(n)))
166
167uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
168{
169 return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
170}
171
172#undef pattern
173#undef haszero
174#undef hasvalue
175
64654ded
BS
176target_ulong helper_cntlzd(target_ulong t)
177{
178 return clz64(t);
179}
e91d95b2
SD
180
181target_ulong helper_cnttzd(target_ulong t)
182{
183 return ctz64(t);
184}
64654ded
BS
185#endif
186
86ba37ed
TM
187#if defined(TARGET_PPC64)
188
189uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
190{
191 int i;
192 uint64_t ra = 0;
193
194 for (i = 0; i < 8; i++) {
195 int index = (rs >> (i*8)) & 0xFF;
196 if (index < 64) {
197 if (rb & (1ull << (63-index))) {
198 ra |= 1 << i;
199 }
200 }
201 }
202 return ra;
203}
204
205#endif
206
fcfda20f
AJ
207target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
208{
209 target_ulong mask = 0xff;
210 target_ulong ra = 0;
211 int i;
212
213 for (i = 0; i < sizeof(target_ulong); i++) {
214 if ((rs & mask) == (rb & mask)) {
215 ra |= mask;
216 }
217 mask <<= 8;
218 }
219 return ra;
220}
221
64654ded 222/* shift right arithmetic helper */
d15f74fb
BS
223target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
224 target_ulong shift)
64654ded
BS
225{
226 int32_t ret;
227
228 if (likely(!(shift & 0x20))) {
229 if (likely((uint32_t)shift != 0)) {
230 shift &= 0x1f;
231 ret = (int32_t)value >> shift;
232 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
da91a00f 233 env->ca = 0;
64654ded 234 } else {
da91a00f 235 env->ca = 1;
64654ded
BS
236 }
237 } else {
238 ret = (int32_t)value;
da91a00f 239 env->ca = 0;
64654ded
BS
240 }
241 } else {
242 ret = (int32_t)value >> 31;
da91a00f 243 env->ca = (ret != 0);
64654ded
BS
244 }
245 return (target_long)ret;
246}
247
248#if defined(TARGET_PPC64)
d15f74fb
BS
249target_ulong helper_srad(CPUPPCState *env, target_ulong value,
250 target_ulong shift)
64654ded
BS
251{
252 int64_t ret;
253
254 if (likely(!(shift & 0x40))) {
255 if (likely((uint64_t)shift != 0)) {
256 shift &= 0x3f;
257 ret = (int64_t)value >> shift;
4bc02e23 258 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
da91a00f 259 env->ca = 0;
64654ded 260 } else {
da91a00f 261 env->ca = 1;
64654ded
BS
262 }
263 } else {
264 ret = (int64_t)value;
da91a00f 265 env->ca = 0;
64654ded
BS
266 }
267 } else {
268 ret = (int64_t)value >> 63;
da91a00f 269 env->ca = (ret != 0);
64654ded
BS
270 }
271 return ret;
272}
273#endif
274
275#if defined(TARGET_PPC64)
276target_ulong helper_popcntb(target_ulong val)
277{
278 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
279 0x5555555555555555ULL);
280 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
281 0x3333333333333333ULL);
282 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
283 0x0f0f0f0f0f0f0f0fULL);
284 return val;
285}
286
287target_ulong helper_popcntw(target_ulong val)
288{
289 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
290 0x5555555555555555ULL);
291 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
292 0x3333333333333333ULL);
293 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
294 0x0f0f0f0f0f0f0f0fULL);
295 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
296 0x00ff00ff00ff00ffULL);
297 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
298 0x0000ffff0000ffffULL);
299 return val;
300}
301
302target_ulong helper_popcntd(target_ulong val)
303{
304 return ctpop64(val);
305}
306#else
307target_ulong helper_popcntb(target_ulong val)
308{
309 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
310 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
311 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
312 return val;
313}
314
315target_ulong helper_popcntw(target_ulong val)
316{
317 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
318 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
319 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
320 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
321 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
322 return val;
323}
324#endif
325
326/*****************************************************************************/
327/* PowerPC 601 specific instructions (POWER bridge) */
d15f74fb 328target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
64654ded
BS
329{
330 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
331
332 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
333 (int32_t)arg2 == 0) {
334 env->spr[SPR_MQ] = 0;
335 return INT32_MIN;
336 } else {
337 env->spr[SPR_MQ] = tmp % arg2;
338 return tmp / (int32_t)arg2;
339 }
340}
341
d15f74fb
BS
342target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
343 target_ulong arg2)
64654ded
BS
344{
345 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
346
347 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
348 (int32_t)arg2 == 0) {
da91a00f 349 env->so = env->ov = 1;
64654ded
BS
350 env->spr[SPR_MQ] = 0;
351 return INT32_MIN;
352 } else {
353 env->spr[SPR_MQ] = tmp % arg2;
354 tmp /= (int32_t)arg2;
355 if ((int32_t)tmp != tmp) {
da91a00f 356 env->so = env->ov = 1;
64654ded 357 } else {
da91a00f 358 env->ov = 0;
64654ded
BS
359 }
360 return tmp;
361 }
362}
363
d15f74fb
BS
364target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
365 target_ulong arg2)
64654ded
BS
366{
367 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
368 (int32_t)arg2 == 0) {
369 env->spr[SPR_MQ] = 0;
370 return INT32_MIN;
371 } else {
372 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
373 return (int32_t)arg1 / (int32_t)arg2;
374 }
375}
376
d15f74fb
BS
377target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
378 target_ulong arg2)
64654ded
BS
379{
380 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
381 (int32_t)arg2 == 0) {
da91a00f 382 env->so = env->ov = 1;
64654ded
BS
383 env->spr[SPR_MQ] = 0;
384 return INT32_MIN;
385 } else {
da91a00f 386 env->ov = 0;
64654ded
BS
387 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
388 return (int32_t)arg1 / (int32_t)arg2;
389 }
390}
391
392/*****************************************************************************/
393/* 602 specific instructions */
394/* mfrom is the most crazy instruction ever seen, imho ! */
395/* Real implementation uses a ROM table. Do the same */
396/* Extremely decomposed:
397 * -arg / 256
398 * return 256 * log10(10 + 1.0) + 0.5
399 */
400#if !defined(CONFIG_USER_ONLY)
401target_ulong helper_602_mfrom(target_ulong arg)
402{
403 if (likely(arg < 602)) {
404#include "mfrom_table.c"
405 return mfrom_ROM_table[arg];
406 } else {
407 return 0;
408 }
409}
410#endif
411
412/*****************************************************************************/
413/* Altivec extension helpers */
414#if defined(HOST_WORDS_BIGENDIAN)
415#define HI_IDX 0
416#define LO_IDX 1
c1542453
TM
417#define AVRB(i) u8[i]
418#define AVRW(i) u32[i]
64654ded
BS
419#else
420#define HI_IDX 1
421#define LO_IDX 0
c1542453
TM
422#define AVRB(i) u8[15-(i)]
423#define AVRW(i) u32[3-(i)]
64654ded
BS
424#endif
425
426#if defined(HOST_WORDS_BIGENDIAN)
427#define VECTOR_FOR_INORDER_I(index, element) \
428 for (index = 0; index < ARRAY_SIZE(r->element); index++)
429#else
430#define VECTOR_FOR_INORDER_I(index, element) \
431 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
432#endif
433
64654ded
BS
434/* Saturating arithmetic helpers. */
435#define SATCVT(from, to, from_type, to_type, min, max) \
436 static inline to_type cvt##from##to(from_type x, int *sat) \
437 { \
438 to_type r; \
439 \
440 if (x < (from_type)min) { \
441 r = min; \
442 *sat = 1; \
443 } else if (x > (from_type)max) { \
444 r = max; \
445 *sat = 1; \
446 } else { \
447 r = x; \
448 } \
449 return r; \
450 }
451#define SATCVTU(from, to, from_type, to_type, min, max) \
452 static inline to_type cvt##from##to(from_type x, int *sat) \
453 { \
454 to_type r; \
455 \
456 if (x > (from_type)max) { \
457 r = max; \
458 *sat = 1; \
459 } else { \
460 r = x; \
461 } \
462 return r; \
463 }
464SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
465SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
466SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
467
468SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
469SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
470SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
471SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
472SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
473SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
474#undef SATCVT
475#undef SATCVTU
476
477void helper_lvsl(ppc_avr_t *r, target_ulong sh)
478{
479 int i, j = (sh & 0xf);
480
481 VECTOR_FOR_INORDER_I(i, u8) {
482 r->u8[i] = j++;
483 }
484}
485
486void helper_lvsr(ppc_avr_t *r, target_ulong sh)
487{
488 int i, j = 0x10 - (sh & 0xf);
489
490 VECTOR_FOR_INORDER_I(i, u8) {
491 r->u8[i] = j++;
492 }
493}
494
d15f74fb 495void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
64654ded
BS
496{
497#if defined(HOST_WORDS_BIGENDIAN)
498 env->vscr = r->u32[3];
499#else
500 env->vscr = r->u32[0];
501#endif
502 set_flush_to_zero(vscr_nj, &env->vec_status);
503}
504
505void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
506{
507 int i;
508
509 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
510 r->u32[i] = ~a->u32[i] < b->u32[i];
511 }
512}
513
514#define VARITH_DO(name, op, element) \
515 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
516 { \
517 int i; \
518 \
519 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
520 r->element[i] = a->element[i] op b->element[i]; \
521 } \
522 }
523#define VARITH(suffix, element) \
524 VARITH_DO(add##suffix, +, element) \
525 VARITH_DO(sub##suffix, -, element)
526VARITH(ubm, u8)
527VARITH(uhm, u16)
528VARITH(uwm, u32)
56eabc75 529VARITH(udm, u64)
953f0f58 530VARITH_DO(muluwm, *, u32)
64654ded
BS
531#undef VARITH_DO
532#undef VARITH
533
534#define VARITHFP(suffix, func) \
d15f74fb
BS
535 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
536 ppc_avr_t *b) \
64654ded
BS
537 { \
538 int i; \
539 \
540 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 541 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
64654ded
BS
542 } \
543 }
544VARITHFP(addfp, float32_add)
545VARITHFP(subfp, float32_sub)
db1babb8
AJ
546VARITHFP(minfp, float32_min)
547VARITHFP(maxfp, float32_max)
64654ded
BS
548#undef VARITHFP
549
2f93c23f
AJ
550#define VARITHFPFMA(suffix, type) \
551 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
552 ppc_avr_t *b, ppc_avr_t *c) \
553 { \
554 int i; \
555 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
556 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
557 type, &env->vec_status); \
558 } \
559 }
560VARITHFPFMA(maddfp, 0);
561VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
562#undef VARITHFPFMA
563
64654ded
BS
564#define VARITHSAT_CASE(type, op, cvt, element) \
565 { \
566 type result = (type)a->element[i] op (type)b->element[i]; \
567 r->element[i] = cvt(result, &sat); \
568 }
569
570#define VARITHSAT_DO(name, op, optype, cvt, element) \
d15f74fb
BS
571 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
572 ppc_avr_t *b) \
64654ded
BS
573 { \
574 int sat = 0; \
575 int i; \
576 \
577 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
578 switch (sizeof(r->element[0])) { \
579 case 1: \
580 VARITHSAT_CASE(optype, op, cvt, element); \
581 break; \
582 case 2: \
583 VARITHSAT_CASE(optype, op, cvt, element); \
584 break; \
585 case 4: \
586 VARITHSAT_CASE(optype, op, cvt, element); \
587 break; \
588 } \
589 } \
590 if (sat) { \
591 env->vscr |= (1 << VSCR_SAT); \
592 } \
593 }
594#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
595 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
596 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
597#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
598 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
599 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
600VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
601VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
602VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
603VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
604VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
605VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
606#undef VARITHSAT_CASE
607#undef VARITHSAT_DO
608#undef VARITHSAT_SIGNED
609#undef VARITHSAT_UNSIGNED
610
611#define VAVG_DO(name, element, etype) \
612 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
613 { \
614 int i; \
615 \
616 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
617 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
618 r->element[i] = x >> 1; \
619 } \
620 }
621
622#define VAVG(type, signed_element, signed_type, unsigned_element, \
623 unsigned_type) \
624 VAVG_DO(avgs##type, signed_element, signed_type) \
625 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
626VAVG(b, s8, int16_t, u8, uint16_t)
627VAVG(h, s16, int32_t, u16, uint32_t)
628VAVG(w, s32, int64_t, u32, uint64_t)
629#undef VAVG_DO
630#undef VAVG
631
37707059
SD
632#define VABSDU_DO(name, element) \
633void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
634{ \
635 int i; \
636 \
637 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
638 r->element[i] = (a->element[i] > b->element[i]) ? \
639 (a->element[i] - b->element[i]) : \
640 (b->element[i] - a->element[i]); \
641 } \
642}
643
644/* VABSDU - Vector absolute difference unsigned
645 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
646 * element - element type to access from vector
647 */
648#define VABSDU(type, element) \
649 VABSDU_DO(absdu##type, element)
650VABSDU(b, u8)
651VABSDU(h, u16)
652VABSDU(w, u32)
653#undef VABSDU_DO
654#undef VABSDU
655
64654ded 656#define VCF(suffix, cvt, element) \
d15f74fb
BS
657 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
658 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
659 { \
660 int i; \
661 \
662 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
663 float32 t = cvt(b->element[i], &env->vec_status); \
664 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
665 } \
666 }
667VCF(ux, uint32_to_float32, u32)
668VCF(sx, int32_to_float32, s32)
669#undef VCF
670
671#define VCMP_DO(suffix, compare, element, record) \
d15f74fb
BS
672 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
673 ppc_avr_t *a, ppc_avr_t *b) \
64654ded 674 { \
6f3dab41
TM
675 uint64_t ones = (uint64_t)-1; \
676 uint64_t all = ones; \
677 uint64_t none = 0; \
64654ded
BS
678 int i; \
679 \
680 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
6f3dab41 681 uint64_t result = (a->element[i] compare b->element[i] ? \
64654ded
BS
682 ones : 0x0); \
683 switch (sizeof(a->element[0])) { \
6f3dab41
TM
684 case 8: \
685 r->u64[i] = result; \
686 break; \
64654ded
BS
687 case 4: \
688 r->u32[i] = result; \
689 break; \
690 case 2: \
691 r->u16[i] = result; \
692 break; \
693 case 1: \
694 r->u8[i] = result; \
695 break; \
696 } \
697 all &= result; \
698 none |= result; \
699 } \
700 if (record) { \
701 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
702 } \
703 }
704#define VCMP(suffix, compare, element) \
705 VCMP_DO(suffix, compare, element, 0) \
706 VCMP_DO(suffix##_dot, compare, element, 1)
707VCMP(equb, ==, u8)
708VCMP(equh, ==, u16)
709VCMP(equw, ==, u32)
6f3dab41 710VCMP(equd, ==, u64)
64654ded
BS
711VCMP(gtub, >, u8)
712VCMP(gtuh, >, u16)
713VCMP(gtuw, >, u32)
6f3dab41 714VCMP(gtud, >, u64)
64654ded
BS
715VCMP(gtsb, >, s8)
716VCMP(gtsh, >, s16)
717VCMP(gtsw, >, s32)
6f3dab41 718VCMP(gtsd, >, s64)
64654ded
BS
719#undef VCMP_DO
720#undef VCMP
721
f7cc8466
SB
722#define VCMPNEZ_DO(suffix, element, etype, record) \
723void helper_vcmpnez##suffix(CPUPPCState *env, ppc_avr_t *r, \
724 ppc_avr_t *a, ppc_avr_t *b) \
725{ \
726 etype ones = (etype)-1; \
727 etype all = ones; \
728 etype none = 0; \
729 int i; \
730 \
731 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
732 etype result = ((a->element[i] == 0) \
733 || (b->element[i] == 0) \
734 || (a->element[i] != b->element[i]) ? \
735 ones : 0x0); \
736 r->element[i] = result; \
737 all &= result; \
738 none |= result; \
739 } \
740 if (record) { \
741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
742 } \
743}
744
745/* VCMPNEZ - Vector compare not equal to zero
746 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
747 * element - element type to access from vector
748 */
749#define VCMPNEZ(suffix, element, etype) \
750 VCMPNEZ_DO(suffix, element, etype, 0) \
751 VCMPNEZ_DO(suffix##_dot, element, etype, 1)
752VCMPNEZ(b, u8, uint8_t)
753VCMPNEZ(h, u16, uint16_t)
754VCMPNEZ(w, u32, uint32_t)
755#undef VCMPNEZ_DO
756#undef VCMPNEZ
757
64654ded 758#define VCMPFP_DO(suffix, compare, order, record) \
d15f74fb
BS
759 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
760 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
761 { \
762 uint32_t ones = (uint32_t)-1; \
763 uint32_t all = ones; \
764 uint32_t none = 0; \
765 int i; \
766 \
767 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
768 uint32_t result; \
769 int rel = float32_compare_quiet(a->f[i], b->f[i], \
770 &env->vec_status); \
771 if (rel == float_relation_unordered) { \
772 result = 0; \
773 } else if (rel compare order) { \
774 result = ones; \
775 } else { \
776 result = 0; \
777 } \
778 r->u32[i] = result; \
779 all &= result; \
780 none |= result; \
781 } \
782 if (record) { \
783 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
784 } \
785 }
786#define VCMPFP(suffix, compare, order) \
787 VCMPFP_DO(suffix, compare, order, 0) \
788 VCMPFP_DO(suffix##_dot, compare, order, 1)
789VCMPFP(eqfp, ==, float_relation_equal)
790VCMPFP(gefp, !=, float_relation_less)
791VCMPFP(gtfp, ==, float_relation_greater)
792#undef VCMPFP_DO
793#undef VCMPFP
794
d15f74fb
BS
795static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
796 ppc_avr_t *a, ppc_avr_t *b, int record)
64654ded
BS
797{
798 int i;
799 int all_in = 0;
800
801 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
802 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
803 if (le_rel == float_relation_unordered) {
804 r->u32[i] = 0xc0000000;
4007b8de 805 all_in = 1;
64654ded
BS
806 } else {
807 float32 bneg = float32_chs(b->f[i]);
808 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
809 int le = le_rel != float_relation_greater;
810 int ge = ge_rel != float_relation_less;
811
812 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
813 all_in |= (!le | !ge);
814 }
815 }
816 if (record) {
817 env->crf[6] = (all_in == 0) << 1;
818 }
819}
820
d15f74fb 821void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded 822{
d15f74fb 823 vcmpbfp_internal(env, r, a, b, 0);
64654ded
BS
824}
825
d15f74fb
BS
826void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
827 ppc_avr_t *b)
64654ded 828{
d15f74fb 829 vcmpbfp_internal(env, r, a, b, 1);
64654ded
BS
830}
831
832#define VCT(suffix, satcvt, element) \
d15f74fb
BS
833 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
834 ppc_avr_t *b, uint32_t uim) \
64654ded
BS
835 { \
836 int i; \
837 int sat = 0; \
838 float_status s = env->vec_status; \
839 \
840 set_float_rounding_mode(float_round_to_zero, &s); \
841 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
842 if (float32_is_any_nan(b->f[i])) { \
843 r->element[i] = 0; \
844 } else { \
845 float64 t = float32_to_float64(b->f[i], &s); \
846 int64_t j; \
847 \
848 t = float64_scalbn(t, uim, &s); \
849 j = float64_to_int64(t, &s); \
850 r->element[i] = satcvt(j, &sat); \
851 } \
852 } \
853 if (sat) { \
854 env->vscr |= (1 << VSCR_SAT); \
855 } \
856 }
857VCT(uxs, cvtsduw, u32)
858VCT(sxs, cvtsdsw, s32)
859#undef VCT
860
d15f74fb
BS
861void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
862 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
863{
864 int sat = 0;
865 int i;
866
867 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
868 int32_t prod = a->s16[i] * b->s16[i];
869 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
870
871 r->s16[i] = cvtswsh(t, &sat);
872 }
873
874 if (sat) {
875 env->vscr |= (1 << VSCR_SAT);
876 }
877}
878
d15f74fb
BS
879void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
880 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
881{
882 int sat = 0;
883 int i;
884
885 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
886 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
887 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
888 r->s16[i] = cvtswsh(t, &sat);
889 }
890
891 if (sat) {
892 env->vscr |= (1 << VSCR_SAT);
893 }
894}
895
896#define VMINMAX_DO(name, compare, element) \
897 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
898 { \
899 int i; \
900 \
901 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
902 if (a->element[i] compare b->element[i]) { \
903 r->element[i] = b->element[i]; \
904 } else { \
905 r->element[i] = a->element[i]; \
906 } \
907 } \
908 }
909#define VMINMAX(suffix, element) \
910 VMINMAX_DO(min##suffix, >, element) \
911 VMINMAX_DO(max##suffix, <, element)
912VMINMAX(sb, s8)
913VMINMAX(sh, s16)
914VMINMAX(sw, s32)
8203e31b 915VMINMAX(sd, s64)
64654ded
BS
916VMINMAX(ub, u8)
917VMINMAX(uh, u16)
918VMINMAX(uw, u32)
8203e31b 919VMINMAX(ud, u64)
64654ded
BS
920#undef VMINMAX_DO
921#undef VMINMAX
922
64654ded
BS
923void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
924{
925 int i;
926
927 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
928 int32_t prod = a->s16[i] * b->s16[i];
929 r->s16[i] = (int16_t) (prod + c->s16[i]);
930 }
931}
932
933#define VMRG_DO(name, element, highp) \
934 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
935 { \
936 ppc_avr_t result; \
937 int i; \
938 size_t n_elems = ARRAY_SIZE(r->element); \
939 \
940 for (i = 0; i < n_elems / 2; i++) { \
941 if (highp) { \
942 result.element[i*2+HI_IDX] = a->element[i]; \
943 result.element[i*2+LO_IDX] = b->element[i]; \
944 } else { \
945 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
946 b->element[n_elems - i - 1]; \
947 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
948 a->element[n_elems - i - 1]; \
949 } \
950 } \
951 *r = result; \
952 }
953#if defined(HOST_WORDS_BIGENDIAN)
954#define MRGHI 0
955#define MRGLO 1
956#else
957#define MRGHI 1
958#define MRGLO 0
959#endif
960#define VMRG(suffix, element) \
961 VMRG_DO(mrgl##suffix, element, MRGHI) \
962 VMRG_DO(mrgh##suffix, element, MRGLO)
963VMRG(b, u8)
964VMRG(h, u16)
965VMRG(w, u32)
966#undef VMRG_DO
967#undef VMRG
968#undef MRGHI
969#undef MRGLO
970
d15f74fb
BS
971void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
972 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
973{
974 int32_t prod[16];
975 int i;
976
977 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
978 prod[i] = (int32_t)a->s8[i] * b->u8[i];
979 }
980
981 VECTOR_FOR_INORDER_I(i, s32) {
982 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
983 prod[4 * i + 2] + prod[4 * i + 3];
984 }
985}
986
d15f74fb
BS
987void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
988 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
989{
990 int32_t prod[8];
991 int i;
992
993 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
994 prod[i] = a->s16[i] * b->s16[i];
995 }
996
997 VECTOR_FOR_INORDER_I(i, s32) {
998 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
999 }
1000}
1001
d15f74fb
BS
1002void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1003 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1004{
1005 int32_t prod[8];
1006 int i;
1007 int sat = 0;
1008
1009 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1010 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1011 }
1012
1013 VECTOR_FOR_INORDER_I(i, s32) {
1014 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1015
1016 r->u32[i] = cvtsdsw(t, &sat);
1017 }
1018
1019 if (sat) {
1020 env->vscr |= (1 << VSCR_SAT);
1021 }
1022}
1023
d15f74fb
BS
1024void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1025 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1026{
1027 uint16_t prod[16];
1028 int i;
1029
1030 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1031 prod[i] = a->u8[i] * b->u8[i];
1032 }
1033
1034 VECTOR_FOR_INORDER_I(i, u32) {
1035 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1036 prod[4 * i + 2] + prod[4 * i + 3];
1037 }
1038}
1039
d15f74fb
BS
1040void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1041 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1042{
1043 uint32_t prod[8];
1044 int i;
1045
1046 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1047 prod[i] = a->u16[i] * b->u16[i];
1048 }
1049
1050 VECTOR_FOR_INORDER_I(i, u32) {
1051 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1052 }
1053}
1054
d15f74fb
BS
1055void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1056 ppc_avr_t *b, ppc_avr_t *c)
64654ded
BS
1057{
1058 uint32_t prod[8];
1059 int i;
1060 int sat = 0;
1061
1062 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1063 prod[i] = a->u16[i] * b->u16[i];
1064 }
1065
1066 VECTOR_FOR_INORDER_I(i, s32) {
1067 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1068
1069 r->u32[i] = cvtuduw(t, &sat);
1070 }
1071
1072 if (sat) {
1073 env->vscr |= (1 << VSCR_SAT);
1074 }
1075}
1076
aa9e930c 1077#define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
64654ded
BS
1078 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1079 { \
1080 int i; \
1081 \
1082 VECTOR_FOR_INORDER_I(i, prod_element) { \
1083 if (evenp) { \
aa9e930c
TM
1084 r->prod_element[i] = \
1085 (cast)a->mul_element[i * 2 + HI_IDX] * \
1086 (cast)b->mul_element[i * 2 + HI_IDX]; \
64654ded 1087 } else { \
aa9e930c
TM
1088 r->prod_element[i] = \
1089 (cast)a->mul_element[i * 2 + LO_IDX] * \
1090 (cast)b->mul_element[i * 2 + LO_IDX]; \
64654ded
BS
1091 } \
1092 } \
1093 }
aa9e930c
TM
1094#define VMUL(suffix, mul_element, prod_element, cast) \
1095 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1096 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1097VMUL(sb, s8, s16, int16_t)
1098VMUL(sh, s16, s32, int32_t)
63be0936 1099VMUL(sw, s32, s64, int64_t)
aa9e930c
TM
1100VMUL(ub, u8, u16, uint16_t)
1101VMUL(uh, u16, u32, uint32_t)
63be0936 1102VMUL(uw, u32, u64, uint64_t)
64654ded
BS
1103#undef VMUL_DO
1104#undef VMUL
1105
d15f74fb
BS
1106void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1107 ppc_avr_t *c)
64654ded
BS
1108{
1109 ppc_avr_t result;
1110 int i;
1111
1112 VECTOR_FOR_INORDER_I(i, u8) {
1113 int s = c->u8[i] & 0x1f;
1114#if defined(HOST_WORDS_BIGENDIAN)
1115 int index = s & 0xf;
1116#else
1117 int index = 15 - (s & 0xf);
1118#endif
1119
1120 if (s & 0x10) {
1121 result.u8[i] = b->u8[index];
1122 } else {
1123 result.u8[i] = a->u8[index];
1124 }
1125 }
1126 *r = result;
1127}
1128
4d82038e
TM
1129#if defined(HOST_WORDS_BIGENDIAN)
1130#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1131#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1132#else
1133#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1134#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1135#endif
1136
1137void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1138{
1139 int i;
1140 uint64_t perm = 0;
1141
1142 VECTOR_FOR_INORDER_I(i, u8) {
1143 int index = VBPERMQ_INDEX(b, i);
1144
1145 if (index < 128) {
1146 uint64_t mask = (1ull << (63-(index & 0x3F)));
1147 if (a->u64[VBPERMQ_DW(index)] & mask) {
1148 perm |= (0x8000 >> i);
1149 }
1150 }
1151 }
1152
1153 r->u64[HI_IDX] = perm;
1154 r->u64[LO_IDX] = 0;
1155}
1156
1157#undef VBPERMQ_INDEX
1158#undef VBPERMQ_DW
1159
cfd54a04 1160static const uint64_t VGBBD_MASKS[256] = {
f1064f61
TM
1161 0x0000000000000000ull, /* 00 */
1162 0x0000000000000080ull, /* 01 */
1163 0x0000000000008000ull, /* 02 */
1164 0x0000000000008080ull, /* 03 */
1165 0x0000000000800000ull, /* 04 */
1166 0x0000000000800080ull, /* 05 */
1167 0x0000000000808000ull, /* 06 */
1168 0x0000000000808080ull, /* 07 */
1169 0x0000000080000000ull, /* 08 */
1170 0x0000000080000080ull, /* 09 */
1171 0x0000000080008000ull, /* 0A */
1172 0x0000000080008080ull, /* 0B */
1173 0x0000000080800000ull, /* 0C */
1174 0x0000000080800080ull, /* 0D */
1175 0x0000000080808000ull, /* 0E */
1176 0x0000000080808080ull, /* 0F */
1177 0x0000008000000000ull, /* 10 */
1178 0x0000008000000080ull, /* 11 */
1179 0x0000008000008000ull, /* 12 */
1180 0x0000008000008080ull, /* 13 */
1181 0x0000008000800000ull, /* 14 */
1182 0x0000008000800080ull, /* 15 */
1183 0x0000008000808000ull, /* 16 */
1184 0x0000008000808080ull, /* 17 */
1185 0x0000008080000000ull, /* 18 */
1186 0x0000008080000080ull, /* 19 */
1187 0x0000008080008000ull, /* 1A */
1188 0x0000008080008080ull, /* 1B */
1189 0x0000008080800000ull, /* 1C */
1190 0x0000008080800080ull, /* 1D */
1191 0x0000008080808000ull, /* 1E */
1192 0x0000008080808080ull, /* 1F */
1193 0x0000800000000000ull, /* 20 */
1194 0x0000800000000080ull, /* 21 */
1195 0x0000800000008000ull, /* 22 */
1196 0x0000800000008080ull, /* 23 */
1197 0x0000800000800000ull, /* 24 */
1198 0x0000800000800080ull, /* 25 */
1199 0x0000800000808000ull, /* 26 */
1200 0x0000800000808080ull, /* 27 */
1201 0x0000800080000000ull, /* 28 */
1202 0x0000800080000080ull, /* 29 */
1203 0x0000800080008000ull, /* 2A */
1204 0x0000800080008080ull, /* 2B */
1205 0x0000800080800000ull, /* 2C */
1206 0x0000800080800080ull, /* 2D */
1207 0x0000800080808000ull, /* 2E */
1208 0x0000800080808080ull, /* 2F */
1209 0x0000808000000000ull, /* 30 */
1210 0x0000808000000080ull, /* 31 */
1211 0x0000808000008000ull, /* 32 */
1212 0x0000808000008080ull, /* 33 */
1213 0x0000808000800000ull, /* 34 */
1214 0x0000808000800080ull, /* 35 */
1215 0x0000808000808000ull, /* 36 */
1216 0x0000808000808080ull, /* 37 */
1217 0x0000808080000000ull, /* 38 */
1218 0x0000808080000080ull, /* 39 */
1219 0x0000808080008000ull, /* 3A */
1220 0x0000808080008080ull, /* 3B */
1221 0x0000808080800000ull, /* 3C */
1222 0x0000808080800080ull, /* 3D */
1223 0x0000808080808000ull, /* 3E */
1224 0x0000808080808080ull, /* 3F */
1225 0x0080000000000000ull, /* 40 */
1226 0x0080000000000080ull, /* 41 */
1227 0x0080000000008000ull, /* 42 */
1228 0x0080000000008080ull, /* 43 */
1229 0x0080000000800000ull, /* 44 */
1230 0x0080000000800080ull, /* 45 */
1231 0x0080000000808000ull, /* 46 */
1232 0x0080000000808080ull, /* 47 */
1233 0x0080000080000000ull, /* 48 */
1234 0x0080000080000080ull, /* 49 */
1235 0x0080000080008000ull, /* 4A */
1236 0x0080000080008080ull, /* 4B */
1237 0x0080000080800000ull, /* 4C */
1238 0x0080000080800080ull, /* 4D */
1239 0x0080000080808000ull, /* 4E */
1240 0x0080000080808080ull, /* 4F */
1241 0x0080008000000000ull, /* 50 */
1242 0x0080008000000080ull, /* 51 */
1243 0x0080008000008000ull, /* 52 */
1244 0x0080008000008080ull, /* 53 */
1245 0x0080008000800000ull, /* 54 */
1246 0x0080008000800080ull, /* 55 */
1247 0x0080008000808000ull, /* 56 */
1248 0x0080008000808080ull, /* 57 */
1249 0x0080008080000000ull, /* 58 */
1250 0x0080008080000080ull, /* 59 */
1251 0x0080008080008000ull, /* 5A */
1252 0x0080008080008080ull, /* 5B */
1253 0x0080008080800000ull, /* 5C */
1254 0x0080008080800080ull, /* 5D */
1255 0x0080008080808000ull, /* 5E */
1256 0x0080008080808080ull, /* 5F */
1257 0x0080800000000000ull, /* 60 */
1258 0x0080800000000080ull, /* 61 */
1259 0x0080800000008000ull, /* 62 */
1260 0x0080800000008080ull, /* 63 */
1261 0x0080800000800000ull, /* 64 */
1262 0x0080800000800080ull, /* 65 */
1263 0x0080800000808000ull, /* 66 */
1264 0x0080800000808080ull, /* 67 */
1265 0x0080800080000000ull, /* 68 */
1266 0x0080800080000080ull, /* 69 */
1267 0x0080800080008000ull, /* 6A */
1268 0x0080800080008080ull, /* 6B */
1269 0x0080800080800000ull, /* 6C */
1270 0x0080800080800080ull, /* 6D */
1271 0x0080800080808000ull, /* 6E */
1272 0x0080800080808080ull, /* 6F */
1273 0x0080808000000000ull, /* 70 */
1274 0x0080808000000080ull, /* 71 */
1275 0x0080808000008000ull, /* 72 */
1276 0x0080808000008080ull, /* 73 */
1277 0x0080808000800000ull, /* 74 */
1278 0x0080808000800080ull, /* 75 */
1279 0x0080808000808000ull, /* 76 */
1280 0x0080808000808080ull, /* 77 */
1281 0x0080808080000000ull, /* 78 */
1282 0x0080808080000080ull, /* 79 */
1283 0x0080808080008000ull, /* 7A */
1284 0x0080808080008080ull, /* 7B */
1285 0x0080808080800000ull, /* 7C */
1286 0x0080808080800080ull, /* 7D */
1287 0x0080808080808000ull, /* 7E */
1288 0x0080808080808080ull, /* 7F */
1289 0x8000000000000000ull, /* 80 */
1290 0x8000000000000080ull, /* 81 */
1291 0x8000000000008000ull, /* 82 */
1292 0x8000000000008080ull, /* 83 */
1293 0x8000000000800000ull, /* 84 */
1294 0x8000000000800080ull, /* 85 */
1295 0x8000000000808000ull, /* 86 */
1296 0x8000000000808080ull, /* 87 */
1297 0x8000000080000000ull, /* 88 */
1298 0x8000000080000080ull, /* 89 */
1299 0x8000000080008000ull, /* 8A */
1300 0x8000000080008080ull, /* 8B */
1301 0x8000000080800000ull, /* 8C */
1302 0x8000000080800080ull, /* 8D */
1303 0x8000000080808000ull, /* 8E */
1304 0x8000000080808080ull, /* 8F */
1305 0x8000008000000000ull, /* 90 */
1306 0x8000008000000080ull, /* 91 */
1307 0x8000008000008000ull, /* 92 */
1308 0x8000008000008080ull, /* 93 */
1309 0x8000008000800000ull, /* 94 */
1310 0x8000008000800080ull, /* 95 */
1311 0x8000008000808000ull, /* 96 */
1312 0x8000008000808080ull, /* 97 */
1313 0x8000008080000000ull, /* 98 */
1314 0x8000008080000080ull, /* 99 */
1315 0x8000008080008000ull, /* 9A */
1316 0x8000008080008080ull, /* 9B */
1317 0x8000008080800000ull, /* 9C */
1318 0x8000008080800080ull, /* 9D */
1319 0x8000008080808000ull, /* 9E */
1320 0x8000008080808080ull, /* 9F */
1321 0x8000800000000000ull, /* A0 */
1322 0x8000800000000080ull, /* A1 */
1323 0x8000800000008000ull, /* A2 */
1324 0x8000800000008080ull, /* A3 */
1325 0x8000800000800000ull, /* A4 */
1326 0x8000800000800080ull, /* A5 */
1327 0x8000800000808000ull, /* A6 */
1328 0x8000800000808080ull, /* A7 */
1329 0x8000800080000000ull, /* A8 */
1330 0x8000800080000080ull, /* A9 */
1331 0x8000800080008000ull, /* AA */
1332 0x8000800080008080ull, /* AB */
1333 0x8000800080800000ull, /* AC */
1334 0x8000800080800080ull, /* AD */
1335 0x8000800080808000ull, /* AE */
1336 0x8000800080808080ull, /* AF */
1337 0x8000808000000000ull, /* B0 */
1338 0x8000808000000080ull, /* B1 */
1339 0x8000808000008000ull, /* B2 */
1340 0x8000808000008080ull, /* B3 */
1341 0x8000808000800000ull, /* B4 */
1342 0x8000808000800080ull, /* B5 */
1343 0x8000808000808000ull, /* B6 */
1344 0x8000808000808080ull, /* B7 */
1345 0x8000808080000000ull, /* B8 */
1346 0x8000808080000080ull, /* B9 */
1347 0x8000808080008000ull, /* BA */
1348 0x8000808080008080ull, /* BB */
1349 0x8000808080800000ull, /* BC */
1350 0x8000808080800080ull, /* BD */
1351 0x8000808080808000ull, /* BE */
1352 0x8000808080808080ull, /* BF */
1353 0x8080000000000000ull, /* C0 */
1354 0x8080000000000080ull, /* C1 */
1355 0x8080000000008000ull, /* C2 */
1356 0x8080000000008080ull, /* C3 */
1357 0x8080000000800000ull, /* C4 */
1358 0x8080000000800080ull, /* C5 */
1359 0x8080000000808000ull, /* C6 */
1360 0x8080000000808080ull, /* C7 */
1361 0x8080000080000000ull, /* C8 */
1362 0x8080000080000080ull, /* C9 */
1363 0x8080000080008000ull, /* CA */
1364 0x8080000080008080ull, /* CB */
1365 0x8080000080800000ull, /* CC */
1366 0x8080000080800080ull, /* CD */
1367 0x8080000080808000ull, /* CE */
1368 0x8080000080808080ull, /* CF */
1369 0x8080008000000000ull, /* D0 */
1370 0x8080008000000080ull, /* D1 */
1371 0x8080008000008000ull, /* D2 */
1372 0x8080008000008080ull, /* D3 */
1373 0x8080008000800000ull, /* D4 */
1374 0x8080008000800080ull, /* D5 */
1375 0x8080008000808000ull, /* D6 */
1376 0x8080008000808080ull, /* D7 */
1377 0x8080008080000000ull, /* D8 */
1378 0x8080008080000080ull, /* D9 */
1379 0x8080008080008000ull, /* DA */
1380 0x8080008080008080ull, /* DB */
1381 0x8080008080800000ull, /* DC */
1382 0x8080008080800080ull, /* DD */
1383 0x8080008080808000ull, /* DE */
1384 0x8080008080808080ull, /* DF */
1385 0x8080800000000000ull, /* E0 */
1386 0x8080800000000080ull, /* E1 */
1387 0x8080800000008000ull, /* E2 */
1388 0x8080800000008080ull, /* E3 */
1389 0x8080800000800000ull, /* E4 */
1390 0x8080800000800080ull, /* E5 */
1391 0x8080800000808000ull, /* E6 */
1392 0x8080800000808080ull, /* E7 */
1393 0x8080800080000000ull, /* E8 */
1394 0x8080800080000080ull, /* E9 */
1395 0x8080800080008000ull, /* EA */
1396 0x8080800080008080ull, /* EB */
1397 0x8080800080800000ull, /* EC */
1398 0x8080800080800080ull, /* ED */
1399 0x8080800080808000ull, /* EE */
1400 0x8080800080808080ull, /* EF */
1401 0x8080808000000000ull, /* F0 */
1402 0x8080808000000080ull, /* F1 */
1403 0x8080808000008000ull, /* F2 */
1404 0x8080808000008080ull, /* F3 */
1405 0x8080808000800000ull, /* F4 */
1406 0x8080808000800080ull, /* F5 */
1407 0x8080808000808000ull, /* F6 */
1408 0x8080808000808080ull, /* F7 */
1409 0x8080808080000000ull, /* F8 */
1410 0x8080808080000080ull, /* F9 */
1411 0x8080808080008000ull, /* FA */
1412 0x8080808080008080ull, /* FB */
1413 0x8080808080800000ull, /* FC */
1414 0x8080808080800080ull, /* FD */
1415 0x8080808080808000ull, /* FE */
1416 0x8080808080808080ull, /* FF */
1417};
1418
1419void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1420{
1421 int i;
1422 uint64_t t[2] = { 0, 0 };
1423
1424 VECTOR_FOR_INORDER_I(i, u8) {
1425#if defined(HOST_WORDS_BIGENDIAN)
1426 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1427#else
1428 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1429#endif
1430 }
1431
1432 r->u64[0] = t[0];
1433 r->u64[1] = t[1];
1434}
1435
b8476fc7
TM
1436#define PMSUM(name, srcfld, trgfld, trgtyp) \
1437void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1438{ \
1439 int i, j; \
1440 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1441 \
1442 VECTOR_FOR_INORDER_I(i, srcfld) { \
1443 prod[i] = 0; \
1444 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1445 if (a->srcfld[i] & (1ull<<j)) { \
1446 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1447 } \
1448 } \
1449 } \
1450 \
1451 VECTOR_FOR_INORDER_I(i, trgfld) { \
1452 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1453 } \
1454}
1455
1456PMSUM(vpmsumb, u8, u16, uint16_t)
1457PMSUM(vpmsumh, u16, u32, uint32_t)
1458PMSUM(vpmsumw, u32, u64, uint64_t)
1459
1460void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1461{
1462
1463#ifdef CONFIG_INT128
1464 int i, j;
1465 __uint128_t prod[2];
1466
1467 VECTOR_FOR_INORDER_I(i, u64) {
1468 prod[i] = 0;
1469 for (j = 0; j < 64; j++) {
1470 if (a->u64[i] & (1ull<<j)) {
1471 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1472 }
1473 }
1474 }
1475
1476 r->u128 = prod[0] ^ prod[1];
1477
1478#else
1479 int i, j;
1480 ppc_avr_t prod[2];
1481
1482 VECTOR_FOR_INORDER_I(i, u64) {
1483 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1484 for (j = 0; j < 64; j++) {
1485 if (a->u64[i] & (1ull<<j)) {
1486 ppc_avr_t bshift;
1487 if (j == 0) {
1488 bshift.u64[HI_IDX] = 0;
1489 bshift.u64[LO_IDX] = b->u64[i];
1490 } else {
1491 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1492 bshift.u64[LO_IDX] = b->u64[i] << j;
1493 }
1494 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1495 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1496 }
1497 }
1498 }
1499
1500 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1501 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1502#endif
1503}
1504
1505
64654ded
BS
1506#if defined(HOST_WORDS_BIGENDIAN)
1507#define PKBIG 1
1508#else
1509#define PKBIG 0
1510#endif
1511void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1512{
1513 int i, j;
1514 ppc_avr_t result;
1515#if defined(HOST_WORDS_BIGENDIAN)
1516 const ppc_avr_t *x[2] = { a, b };
1517#else
1518 const ppc_avr_t *x[2] = { b, a };
1519#endif
1520
1521 VECTOR_FOR_INORDER_I(i, u64) {
1522 VECTOR_FOR_INORDER_I(j, u32) {
1523 uint32_t e = x[i]->u32[j];
1524
1525 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1526 ((e >> 6) & 0x3e0) |
1527 ((e >> 3) & 0x1f));
1528 }
1529 }
1530 *r = result;
1531}
1532
1533#define VPK(suffix, from, to, cvt, dosat) \
d15f74fb
BS
1534 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1535 ppc_avr_t *a, ppc_avr_t *b) \
64654ded
BS
1536 { \
1537 int i; \
1538 int sat = 0; \
1539 ppc_avr_t result; \
1540 ppc_avr_t *a0 = PKBIG ? a : b; \
1541 ppc_avr_t *a1 = PKBIG ? b : a; \
1542 \
1543 VECTOR_FOR_INORDER_I(i, from) { \
1544 result.to[i] = cvt(a0->from[i], &sat); \
1545 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1546 } \
1547 *r = result; \
1548 if (dosat && sat) { \
1549 env->vscr |= (1 << VSCR_SAT); \
1550 } \
1551 }
1552#define I(x, y) (x)
1553VPK(shss, s16, s8, cvtshsb, 1)
1554VPK(shus, s16, u8, cvtshub, 1)
1555VPK(swss, s32, s16, cvtswsh, 1)
1556VPK(swus, s32, u16, cvtswuh, 1)
024215b2
TM
1557VPK(sdss, s64, s32, cvtsdsw, 1)
1558VPK(sdus, s64, u32, cvtsduw, 1)
64654ded
BS
1559VPK(uhus, u16, u8, cvtuhub, 1)
1560VPK(uwus, u32, u16, cvtuwuh, 1)
024215b2 1561VPK(udus, u64, u32, cvtuduw, 1)
64654ded
BS
1562VPK(uhum, u16, u8, I, 0)
1563VPK(uwum, u32, u16, I, 0)
024215b2 1564VPK(udum, u64, u32, I, 0)
64654ded
BS
1565#undef I
1566#undef VPK
1567#undef PKBIG
1568
d15f74fb 1569void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1570{
1571 int i;
1572
1573 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1574 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
64654ded
BS
1575 }
1576}
1577
1578#define VRFI(suffix, rounding) \
d15f74fb
BS
1579 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1580 ppc_avr_t *b) \
64654ded
BS
1581 { \
1582 int i; \
1583 float_status s = env->vec_status; \
1584 \
1585 set_float_rounding_mode(rounding, &s); \
1586 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
ef9bd150 1587 r->f[i] = float32_round_to_int (b->f[i], &s); \
64654ded
BS
1588 } \
1589 }
1590VRFI(n, float_round_nearest_even)
1591VRFI(m, float_round_down)
1592VRFI(p, float_round_up)
1593VRFI(z, float_round_to_zero)
1594#undef VRFI
1595
818692ff 1596#define VROTATE(suffix, element, mask) \
64654ded
BS
1597 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1598 { \
1599 int i; \
1600 \
1601 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1602 unsigned int shift = b->element[i] & mask; \
1603 r->element[i] = (a->element[i] << shift) | \
1604 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1605 } \
1606 }
818692ff
TM
1607VROTATE(b, u8, 0x7)
1608VROTATE(h, u16, 0xF)
1609VROTATE(w, u32, 0x1F)
2fdf78e6 1610VROTATE(d, u64, 0x3F)
64654ded
BS
1611#undef VROTATE
1612
d15f74fb 1613void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1614{
1615 int i;
1616
1617 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1618 float32 t = float32_sqrt(b->f[i], &env->vec_status);
64654ded 1619
ef9bd150 1620 r->f[i] = float32_div(float32_one, t, &env->vec_status);
64654ded
BS
1621 }
1622}
1623
d15f74fb
BS
1624void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1625 ppc_avr_t *c)
64654ded
BS
1626{
1627 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1628 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1629}
1630
d15f74fb 1631void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1632{
1633 int i;
1634
1635 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1636 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
64654ded
BS
1637 }
1638}
1639
d15f74fb 1640void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
64654ded
BS
1641{
1642 int i;
1643
1644 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
ef9bd150 1645 r->f[i] = float32_log2(b->f[i], &env->vec_status);
64654ded
BS
1646 }
1647}
1648
64654ded
BS
1649/* The specification says that the results are undefined if all of the
1650 * shift counts are not identical. We check to make sure that they are
1651 * to conform to what real hardware appears to do. */
1652#define VSHIFT(suffix, leftp) \
1653 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1654 { \
1655 int shift = b->u8[LO_IDX*15] & 0x7; \
1656 int doit = 1; \
1657 int i; \
1658 \
1659 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1660 doit = doit && ((b->u8[i] & 0x7) == shift); \
1661 } \
1662 if (doit) { \
1663 if (shift == 0) { \
1664 *r = *a; \
1665 } else if (leftp) { \
1666 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1667 \
1668 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1669 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1670 } else { \
1671 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1672 \
1673 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1674 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1675 } \
1676 } \
1677 }
24e669ba
TM
1678VSHIFT(l, 1)
1679VSHIFT(r, 0)
64654ded 1680#undef VSHIFT
64654ded 1681
818692ff 1682#define VSL(suffix, element, mask) \
64654ded
BS
1683 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1684 { \
1685 int i; \
1686 \
1687 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded
BS
1688 unsigned int shift = b->element[i] & mask; \
1689 \
1690 r->element[i] = a->element[i] << shift; \
1691 } \
1692 }
818692ff
TM
1693VSL(b, u8, 0x7)
1694VSL(h, u16, 0x0F)
1695VSL(w, u32, 0x1F)
2fdf78e6 1696VSL(d, u64, 0x3F)
64654ded
BS
1697#undef VSL
1698
5644a175
VAS
1699void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1700{
1701 int i;
1702 unsigned int shift, bytes, size;
1703
1704 size = ARRAY_SIZE(r->u8);
1705 for (i = 0; i < size; i++) {
1706 shift = b->u8[i] & 0x7; /* extract shift value */
1707 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1708 (((i + 1) < size) ? a->u8[i + 1] : 0);
1709 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1710 }
1711}
1712
4004c1db
VAS
1713void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1714{
1715 int i;
1716 unsigned int shift, bytes;
1717
1718 /* Use reverse order, as destination and source register can be same. Its
1719 * being modified in place saving temporary, reverse order will guarantee
1720 * that computed result is not fed back.
1721 */
1722 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1723 shift = b->u8[i] & 0x7; /* extract shift value */
1724 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1725 /* extract adjacent bytes */
1726 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1727 }
1728}
1729
64654ded
BS
1730void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1731{
1732 int sh = shift & 0xf;
1733 int i;
1734 ppc_avr_t result;
1735
1736#if defined(HOST_WORDS_BIGENDIAN)
1737 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1738 int index = sh + i;
1739 if (index > 0xf) {
1740 result.u8[i] = b->u8[index - 0x10];
1741 } else {
1742 result.u8[i] = a->u8[index];
1743 }
1744 }
1745#else
1746 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1747 int index = (16 - sh) + i;
1748 if (index > 0xf) {
1749 result.u8[i] = a->u8[index - 0x10];
1750 } else {
1751 result.u8[i] = b->u8[index];
1752 }
1753 }
1754#endif
1755 *r = result;
1756}
1757
1758void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1759{
1760 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1761
1762#if defined(HOST_WORDS_BIGENDIAN)
1763 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1764 memset(&r->u8[16-sh], 0, sh);
1765#else
1766 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1767 memset(&r->u8[0], 0, sh);
1768#endif
1769}
1770
1771/* Experimental testing shows that hardware masks the immediate. */
1772#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1773#if defined(HOST_WORDS_BIGENDIAN)
1774#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1775#else
1776#define SPLAT_ELEMENT(element) \
1777 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1778#endif
1779#define VSPLT(suffix, element) \
1780 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1781 { \
1782 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1783 int i; \
1784 \
1785 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1786 r->element[i] = s; \
1787 } \
1788 }
1789VSPLT(b, u8)
1790VSPLT(h, u16)
1791VSPLT(w, u32)
1792#undef VSPLT
1793#undef SPLAT_ELEMENT
1794#undef _SPLAT_MASKED
1795
1796#define VSPLTI(suffix, element, splat_type) \
1797 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1798 { \
1799 splat_type x = (int8_t)(splat << 3) >> 3; \
1800 int i; \
1801 \
1802 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1803 r->element[i] = x; \
1804 } \
1805 }
1806VSPLTI(b, s8, int8_t)
1807VSPLTI(h, s16, int16_t)
1808VSPLTI(w, s32, int32_t)
1809#undef VSPLTI
1810
818692ff 1811#define VSR(suffix, element, mask) \
64654ded
BS
1812 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1813 { \
1814 int i; \
1815 \
1816 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
64654ded 1817 unsigned int shift = b->element[i] & mask; \
64654ded
BS
1818 r->element[i] = a->element[i] >> shift; \
1819 } \
1820 }
818692ff
TM
1821VSR(ab, s8, 0x7)
1822VSR(ah, s16, 0xF)
1823VSR(aw, s32, 0x1F)
2fdf78e6 1824VSR(ad, s64, 0x3F)
818692ff
TM
1825VSR(b, u8, 0x7)
1826VSR(h, u16, 0xF)
1827VSR(w, u32, 0x1F)
2fdf78e6 1828VSR(d, u64, 0x3F)
64654ded
BS
1829#undef VSR
1830
1831void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1832{
1833 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1834
1835#if defined(HOST_WORDS_BIGENDIAN)
1836 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1837 memset(&r->u8[0], 0, sh);
1838#else
1839 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1840 memset(&r->u8[16 - sh], 0, sh);
1841#endif
1842}
1843
1844void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1845{
1846 int i;
1847
1848 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1849 r->u32[i] = a->u32[i] >= b->u32[i];
1850 }
1851}
1852
d15f74fb 1853void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1854{
1855 int64_t t;
1856 int i, upper;
1857 ppc_avr_t result;
1858 int sat = 0;
1859
1860#if defined(HOST_WORDS_BIGENDIAN)
1861 upper = ARRAY_SIZE(r->s32)-1;
1862#else
1863 upper = 0;
1864#endif
1865 t = (int64_t)b->s32[upper];
1866 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1867 t += a->s32[i];
1868 result.s32[i] = 0;
1869 }
1870 result.s32[upper] = cvtsdsw(t, &sat);
1871 *r = result;
1872
1873 if (sat) {
1874 env->vscr |= (1 << VSCR_SAT);
1875 }
1876}
1877
d15f74fb 1878void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1879{
1880 int i, j, upper;
1881 ppc_avr_t result;
1882 int sat = 0;
1883
1884#if defined(HOST_WORDS_BIGENDIAN)
1885 upper = 1;
1886#else
1887 upper = 0;
1888#endif
1889 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1890 int64_t t = (int64_t)b->s32[upper + i * 2];
1891
1892 result.u64[i] = 0;
1893 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1894 t += a->s32[2 * i + j];
1895 }
1896 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1897 }
1898
1899 *r = result;
1900 if (sat) {
1901 env->vscr |= (1 << VSCR_SAT);
1902 }
1903}
1904
d15f74fb 1905void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1906{
1907 int i, j;
1908 int sat = 0;
1909
1910 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1911 int64_t t = (int64_t)b->s32[i];
1912
1913 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1914 t += a->s8[4 * i + j];
1915 }
1916 r->s32[i] = cvtsdsw(t, &sat);
1917 }
1918
1919 if (sat) {
1920 env->vscr |= (1 << VSCR_SAT);
1921 }
1922}
1923
d15f74fb 1924void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1925{
1926 int sat = 0;
1927 int i;
1928
1929 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1930 int64_t t = (int64_t)b->s32[i];
1931
1932 t += a->s16[2 * i] + a->s16[2 * i + 1];
1933 r->s32[i] = cvtsdsw(t, &sat);
1934 }
1935
1936 if (sat) {
1937 env->vscr |= (1 << VSCR_SAT);
1938 }
1939}
1940
d15f74fb 1941void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
64654ded
BS
1942{
1943 int i, j;
1944 int sat = 0;
1945
1946 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1947 uint64_t t = (uint64_t)b->u32[i];
1948
1949 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1950 t += a->u8[4 * i + j];
1951 }
1952 r->u32[i] = cvtuduw(t, &sat);
1953 }
1954
1955 if (sat) {
1956 env->vscr |= (1 << VSCR_SAT);
1957 }
1958}
1959
1960#if defined(HOST_WORDS_BIGENDIAN)
1961#define UPKHI 1
1962#define UPKLO 0
1963#else
1964#define UPKHI 0
1965#define UPKLO 1
1966#endif
1967#define VUPKPX(suffix, hi) \
1968 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1969 { \
1970 int i; \
1971 ppc_avr_t result; \
1972 \
1973 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1974 uint16_t e = b->u16[hi ? i : i+4]; \
1975 uint8_t a = (e >> 15) ? 0xff : 0; \
1976 uint8_t r = (e >> 10) & 0x1f; \
1977 uint8_t g = (e >> 5) & 0x1f; \
1978 uint8_t b = e & 0x1f; \
1979 \
1980 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1981 } \
1982 *r = result; \
1983 }
1984VUPKPX(lpx, UPKLO)
1985VUPKPX(hpx, UPKHI)
1986#undef VUPKPX
1987
1988#define VUPK(suffix, unpacked, packee, hi) \
1989 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1990 { \
1991 int i; \
1992 ppc_avr_t result; \
1993 \
1994 if (hi) { \
1995 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1996 result.unpacked[i] = b->packee[i]; \
1997 } \
1998 } else { \
1999 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2000 i++) { \
2001 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2002 } \
2003 } \
2004 *r = result; \
2005 }
2006VUPK(hsb, s16, s8, UPKHI)
2007VUPK(hsh, s32, s16, UPKHI)
4430e076 2008VUPK(hsw, s64, s32, UPKHI)
64654ded
BS
2009VUPK(lsb, s16, s8, UPKLO)
2010VUPK(lsh, s32, s16, UPKLO)
4430e076 2011VUPK(lsw, s64, s32, UPKLO)
64654ded
BS
2012#undef VUPK
2013#undef UPKHI
2014#undef UPKLO
2015
f293f04a
TM
2016#define VGENERIC_DO(name, element) \
2017 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2018 { \
2019 int i; \
2020 \
2021 VECTOR_FOR_INORDER_I(i, element) { \
2022 r->element[i] = name(b->element[i]); \
2023 } \
2024 }
2025
2026#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2027#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2028#define clzw(v) clz32((v))
2029#define clzd(v) clz64((v))
2030
2031VGENERIC_DO(clzb, u8)
2032VGENERIC_DO(clzh, u16)
2033VGENERIC_DO(clzw, u32)
2034VGENERIC_DO(clzd, u64)
2035
2036#undef clzb
2037#undef clzh
2038#undef clzw
2039#undef clzd
2040
e13500b3
TM
2041#define popcntb(v) ctpop8(v)
2042#define popcnth(v) ctpop16(v)
2043#define popcntw(v) ctpop32(v)
2044#define popcntd(v) ctpop64(v)
2045
2046VGENERIC_DO(popcntb, u8)
2047VGENERIC_DO(popcnth, u16)
2048VGENERIC_DO(popcntw, u32)
2049VGENERIC_DO(popcntd, u64)
2050
2051#undef popcntb
2052#undef popcnth
2053#undef popcntw
2054#undef popcntd
f293f04a
TM
2055
2056#undef VGENERIC_DO
2057
b41da4eb
TM
2058#if defined(HOST_WORDS_BIGENDIAN)
2059#define QW_ONE { .u64 = { 0, 1 } }
2060#else
2061#define QW_ONE { .u64 = { 1, 0 } }
2062#endif
2063
2064#ifndef CONFIG_INT128
2065
2066static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2067{
2068 t->u64[0] = ~a.u64[0];
2069 t->u64[1] = ~a.u64[1];
2070}
2071
2072static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2073{
2074 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2075 return -1;
2076 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2077 return 1;
2078 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2079 return -1;
2080 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2081 return 1;
2082 } else {
2083 return 0;
2084 }
2085}
2086
2087static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2088{
2089 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2090 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2091 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2092}
2093
2094static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2095{
2096 ppc_avr_t not_a;
2097 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2098 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2099 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2100 avr_qw_not(&not_a, a);
2101 return avr_qw_cmpu(not_a, b) < 0;
2102}
2103
2104#endif
2105
2106void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2107{
2108#ifdef CONFIG_INT128
2109 r->u128 = a->u128 + b->u128;
2110#else
2111 avr_qw_add(r, *a, *b);
2112#endif
2113}
2114
2115void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2116{
2117#ifdef CONFIG_INT128
2118 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2119#else
2120
2121 if (c->u64[LO_IDX] & 1) {
2122 ppc_avr_t tmp;
2123
2124 tmp.u64[HI_IDX] = 0;
2125 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2126 avr_qw_add(&tmp, *a, tmp);
2127 avr_qw_add(r, tmp, *b);
2128 } else {
2129 avr_qw_add(r, *a, *b);
2130 }
2131#endif
2132}
2133
2134void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2135{
2136#ifdef CONFIG_INT128
2137 r->u128 = (~a->u128 < b->u128);
2138#else
2139 ppc_avr_t not_a;
2140
2141 avr_qw_not(&not_a, *a);
2142
2143 r->u64[HI_IDX] = 0;
2144 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2145#endif
2146}
2147
2148void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2149{
2150#ifdef CONFIG_INT128
2151 int carry_out = (~a->u128 < b->u128);
2152 if (!carry_out && (c->u128 & 1)) {
2153 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2154 ((a->u128 != 0) || (b->u128 != 0));
2155 }
2156 r->u128 = carry_out;
2157#else
2158
2159 int carry_in = c->u64[LO_IDX] & 1;
2160 int carry_out = 0;
2161 ppc_avr_t tmp;
2162
2163 carry_out = avr_qw_addc(&tmp, *a, *b);
2164
2165 if (!carry_out && carry_in) {
2166 ppc_avr_t one = QW_ONE;
2167 carry_out = avr_qw_addc(&tmp, tmp, one);
2168 }
2169 r->u64[HI_IDX] = 0;
2170 r->u64[LO_IDX] = carry_out;
2171#endif
2172}
2173
2174void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2175{
2176#ifdef CONFIG_INT128
2177 r->u128 = a->u128 - b->u128;
2178#else
2179 ppc_avr_t tmp;
2180 ppc_avr_t one = QW_ONE;
2181
2182 avr_qw_not(&tmp, *b);
2183 avr_qw_add(&tmp, *a, tmp);
2184 avr_qw_add(r, tmp, one);
2185#endif
2186}
2187
2188void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2189{
2190#ifdef CONFIG_INT128
2191 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2192#else
2193 ppc_avr_t tmp, sum;
2194
2195 avr_qw_not(&tmp, *b);
2196 avr_qw_add(&sum, *a, tmp);
2197
2198 tmp.u64[HI_IDX] = 0;
2199 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2200 avr_qw_add(r, sum, tmp);
2201#endif
2202}
2203
2204void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2205{
2206#ifdef CONFIG_INT128
2207 r->u128 = (~a->u128 < ~b->u128) ||
2208 (a->u128 + ~b->u128 == (__uint128_t)-1);
2209#else
2210 int carry = (avr_qw_cmpu(*a, *b) > 0);
2211 if (!carry) {
2212 ppc_avr_t tmp;
2213 avr_qw_not(&tmp, *b);
2214 avr_qw_add(&tmp, *a, tmp);
2215 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2216 }
2217 r->u64[HI_IDX] = 0;
2218 r->u64[LO_IDX] = carry;
2219#endif
2220}
2221
2222void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2223{
2224#ifdef CONFIG_INT128
2225 r->u128 =
2226 (~a->u128 < ~b->u128) ||
2227 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2228#else
2229 int carry_in = c->u64[LO_IDX] & 1;
2230 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2231 if (!carry_out && carry_in) {
2232 ppc_avr_t tmp;
2233 avr_qw_not(&tmp, *b);
2234 avr_qw_add(&tmp, *a, tmp);
2235 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2236 }
2237
2238 r->u64[HI_IDX] = 0;
2239 r->u64[LO_IDX] = carry_out;
2240#endif
2241}
2242
e8f7b27b
TM
2243#define BCD_PLUS_PREF_1 0xC
2244#define BCD_PLUS_PREF_2 0xF
2245#define BCD_PLUS_ALT_1 0xA
2246#define BCD_NEG_PREF 0xD
2247#define BCD_NEG_ALT 0xB
2248#define BCD_PLUS_ALT_2 0xE
2249
2250#if defined(HOST_WORDS_BIGENDIAN)
2251#define BCD_DIG_BYTE(n) (15 - (n/2))
2252#else
2253#define BCD_DIG_BYTE(n) (n/2)
2254#endif
2255
2256static int bcd_get_sgn(ppc_avr_t *bcd)
2257{
2258 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2259 case BCD_PLUS_PREF_1:
2260 case BCD_PLUS_PREF_2:
2261 case BCD_PLUS_ALT_1:
2262 case BCD_PLUS_ALT_2:
2263 {
2264 return 1;
2265 }
2266
2267 case BCD_NEG_PREF:
2268 case BCD_NEG_ALT:
2269 {
2270 return -1;
2271 }
2272
2273 default:
2274 {
2275 return 0;
2276 }
2277 }
2278}
2279
2280static int bcd_preferred_sgn(int sgn, int ps)
2281{
2282 if (sgn >= 0) {
2283 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2284 } else {
2285 return BCD_NEG_PREF;
2286 }
2287}
2288
2289static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2290{
2291 uint8_t result;
2292 if (n & 1) {
2293 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2294 } else {
2295 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2296 }
2297
2298 if (unlikely(result > 9)) {
2299 *invalid = true;
2300 }
2301 return result;
2302}
2303
2304static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2305{
2306 if (n & 1) {
2307 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2308 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2309 } else {
2310 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2311 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2312 }
2313}
2314
2315static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2316{
2317 int i;
2318 int invalid = 0;
2319 for (i = 31; i > 0; i--) {
2320 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2321 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2322 if (unlikely(invalid)) {
3b163b01 2323 return 0; /* doesn't matter */
e8f7b27b
TM
2324 } else if (dig_a > dig_b) {
2325 return 1;
2326 } else if (dig_a < dig_b) {
2327 return -1;
2328 }
2329 }
2330
2331 return 0;
2332}
2333
2334static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2335 int *overflow)
2336{
2337 int carry = 0;
2338 int i;
2339 int is_zero = 1;
2340 for (i = 1; i <= 31; i++) {
2341 uint8_t digit = bcd_get_digit(a, i, invalid) +
2342 bcd_get_digit(b, i, invalid) + carry;
2343 is_zero &= (digit == 0);
2344 if (digit > 9) {
2345 carry = 1;
2346 digit -= 10;
2347 } else {
2348 carry = 0;
2349 }
2350
2351 bcd_put_digit(t, digit, i);
2352
2353 if (unlikely(*invalid)) {
2354 return -1;
2355 }
2356 }
2357
2358 *overflow = carry;
2359 return is_zero;
2360}
2361
2362static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2363 int *overflow)
2364{
2365 int carry = 0;
2366 int i;
2367 int is_zero = 1;
2368 for (i = 1; i <= 31; i++) {
2369 uint8_t digit = bcd_get_digit(a, i, invalid) -
2370 bcd_get_digit(b, i, invalid) + carry;
2371 is_zero &= (digit == 0);
2372 if (digit & 0x80) {
2373 carry = -1;
2374 digit += 10;
2375 } else {
2376 carry = 0;
2377 }
2378
2379 bcd_put_digit(t, digit, i);
2380
2381 if (unlikely(*invalid)) {
2382 return -1;
2383 }
2384 }
2385
2386 *overflow = carry;
2387 return is_zero;
2388}
2389
2390uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2391{
2392
2393 int sgna = bcd_get_sgn(a);
2394 int sgnb = bcd_get_sgn(b);
2395 int invalid = (sgna == 0) || (sgnb == 0);
2396 int overflow = 0;
2397 int zero = 0;
2398 uint32_t cr = 0;
2399 ppc_avr_t result = { .u64 = { 0, 0 } };
2400
2401 if (!invalid) {
2402 if (sgna == sgnb) {
2403 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2404 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
72189ea4 2405 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2406 } else if (bcd_cmp_mag(a, b) > 0) {
2407 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2408 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
72189ea4 2409 cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2410 } else {
2411 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2412 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
72189ea4 2413 cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
e8f7b27b
TM
2414 }
2415 }
2416
2417 if (unlikely(invalid)) {
2418 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
72189ea4 2419 cr = 1 << CRF_SO;
e8f7b27b 2420 } else if (overflow) {
72189ea4 2421 cr |= 1 << CRF_SO;
e8f7b27b 2422 } else if (zero) {
72189ea4 2423 cr = 1 << CRF_EQ;
e8f7b27b
TM
2424 }
2425
2426 *r = result;
2427
2428 return cr;
2429}
2430
2431uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2432{
2433 ppc_avr_t bcopy = *b;
2434 int sgnb = bcd_get_sgn(b);
2435 if (sgnb < 0) {
2436 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2437 } else if (sgnb > 0) {
2438 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2439 }
2440 /* else invalid ... defer to bcdadd code for proper handling */
2441
2442 return helper_bcdadd(r, a, &bcopy, ps);
2443}
f293f04a 2444
c1542453 2445void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
557d52fa
TM
2446{
2447 int i;
2448 VECTOR_FOR_INORDER_I(i, u8) {
c1542453 2449 r->u8[i] = AES_sbox[a->u8[i]];
557d52fa
TM
2450 }
2451}
2452
c1542453 2453void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
557d52fa 2454{
65cf1f65 2455 ppc_avr_t result;
557d52fa 2456 int i;
557d52fa 2457
c1542453 2458 VECTOR_FOR_INORDER_I(i, u32) {
65cf1f65 2459 result.AVRW(i) = b->AVRW(i) ^
c1542453
TM
2460 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2461 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2462 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2463 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
557d52fa 2464 }
65cf1f65 2465 *r = result;
557d52fa
TM
2466}
2467
557d52fa
TM
2468void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2469{
65cf1f65 2470 ppc_avr_t result;
c1542453
TM
2471 int i;
2472
2473 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2474 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
c1542453 2475 }
65cf1f65 2476 *r = result;
557d52fa
TM
2477}
2478
2479void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2480{
2481 /* This differs from what is written in ISA V2.07. The RTL is */
2482 /* incorrect and will be fixed in V2.07B. */
c1542453
TM
2483 int i;
2484 ppc_avr_t tmp;
2485
2486 VECTOR_FOR_INORDER_I(i, u8) {
2487 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2488 }
2489
2490 VECTOR_FOR_INORDER_I(i, u32) {
2491 r->AVRW(i) =
2492 AES_imc[tmp.AVRB(4*i + 0)][0] ^
2493 AES_imc[tmp.AVRB(4*i + 1)][1] ^
2494 AES_imc[tmp.AVRB(4*i + 2)][2] ^
2495 AES_imc[tmp.AVRB(4*i + 3)][3];
2496 }
557d52fa
TM
2497}
2498
2499void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2500{
65cf1f65 2501 ppc_avr_t result;
c1542453
TM
2502 int i;
2503
2504 VECTOR_FOR_INORDER_I(i, u8) {
65cf1f65 2505 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
c1542453 2506 }
65cf1f65 2507 *r = result;
557d52fa
TM
2508}
2509
57354f8f
TM
2510#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2511#if defined(HOST_WORDS_BIGENDIAN)
2512#define EL_IDX(i) (i)
2513#else
2514#define EL_IDX(i) (3 - (i))
2515#endif
2516
2517void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2518{
2519 int st = (st_six & 0x10) != 0;
2520 int six = st_six & 0xF;
2521 int i;
2522
2523 VECTOR_FOR_INORDER_I(i, u32) {
2524 if (st == 0) {
2525 if ((six & (0x8 >> i)) == 0) {
2526 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2527 ROTRu32(a->u32[EL_IDX(i)], 18) ^
2528 (a->u32[EL_IDX(i)] >> 3);
2529 } else { /* six.bit[i] == 1 */
2530 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2531 ROTRu32(a->u32[EL_IDX(i)], 19) ^
2532 (a->u32[EL_IDX(i)] >> 10);
2533 }
2534 } else { /* st == 1 */
2535 if ((six & (0x8 >> i)) == 0) {
2536 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2537 ROTRu32(a->u32[EL_IDX(i)], 13) ^
2538 ROTRu32(a->u32[EL_IDX(i)], 22);
2539 } else { /* six.bit[i] == 1 */
2540 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2541 ROTRu32(a->u32[EL_IDX(i)], 11) ^
2542 ROTRu32(a->u32[EL_IDX(i)], 25);
2543 }
2544 }
2545 }
2546}
2547
2548#undef ROTRu32
2549#undef EL_IDX
2550
2551#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2552#if defined(HOST_WORDS_BIGENDIAN)
2553#define EL_IDX(i) (i)
2554#else
2555#define EL_IDX(i) (1 - (i))
2556#endif
2557
2558void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2559{
2560 int st = (st_six & 0x10) != 0;
2561 int six = st_six & 0xF;
2562 int i;
2563
2564 VECTOR_FOR_INORDER_I(i, u64) {
2565 if (st == 0) {
2566 if ((six & (0x8 >> (2*i))) == 0) {
2567 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2568 ROTRu64(a->u64[EL_IDX(i)], 8) ^
2569 (a->u64[EL_IDX(i)] >> 7);
2570 } else { /* six.bit[2*i] == 1 */
2571 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2572 ROTRu64(a->u64[EL_IDX(i)], 61) ^
2573 (a->u64[EL_IDX(i)] >> 6);
2574 }
2575 } else { /* st == 1 */
2576 if ((six & (0x8 >> (2*i))) == 0) {
2577 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2578 ROTRu64(a->u64[EL_IDX(i)], 34) ^
2579 ROTRu64(a->u64[EL_IDX(i)], 39);
2580 } else { /* six.bit[2*i] == 1 */
2581 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2582 ROTRu64(a->u64[EL_IDX(i)], 18) ^
2583 ROTRu64(a->u64[EL_IDX(i)], 41);
2584 }
2585 }
2586 }
2587}
2588
2589#undef ROTRu64
2590#undef EL_IDX
2591
ac174549
TM
2592void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2593{
65cf1f65 2594 ppc_avr_t result;
ac174549 2595 int i;
65cf1f65 2596
ac174549
TM
2597 VECTOR_FOR_INORDER_I(i, u8) {
2598 int indexA = c->u8[i] >> 4;
2599 int indexB = c->u8[i] & 0xF;
2600#if defined(HOST_WORDS_BIGENDIAN)
65cf1f65 2601 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
ac174549 2602#else
65cf1f65 2603 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
ac174549
TM
2604#endif
2605 }
65cf1f65 2606 *r = result;
ac174549
TM
2607}
2608
64654ded
BS
2609#undef VECTOR_FOR_INORDER_I
2610#undef HI_IDX
2611#undef LO_IDX
2612
2613/*****************************************************************************/
2614/* SPE extension helpers */
2615/* Use a table to make this quicker */
ea6c0dac 2616static const uint8_t hbrev[16] = {
64654ded
BS
2617 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2618 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2619};
2620
2621static inline uint8_t byte_reverse(uint8_t val)
2622{
2623 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2624}
2625
2626static inline uint32_t word_reverse(uint32_t val)
2627{
2628 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2629 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2630}
2631
2632#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2633target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2634{
2635 uint32_t a, b, d, mask;
2636
2637 mask = UINT32_MAX >> (32 - MASKBITS);
2638 a = arg1 & mask;
2639 b = arg2 & mask;
2640 d = word_reverse(1 + word_reverse(a | ~b));
2641 return (arg1 & ~mask) | (d & b);
2642}
2643
2644uint32_t helper_cntlsw32(uint32_t val)
2645{
2646 if (val & 0x80000000) {
2647 return clz32(~val);
2648 } else {
2649 return clz32(val);
2650 }
2651}
2652
2653uint32_t helper_cntlzw32(uint32_t val)
2654{
2655 return clz32(val);
2656}
2657
2658/* 440 specific */
d15f74fb
BS
2659target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2660 target_ulong low, uint32_t update_Rc)
64654ded
BS
2661{
2662 target_ulong mask;
2663 int i;
2664
2665 i = 1;
2666 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2667 if ((high & mask) == 0) {
2668 if (update_Rc) {
2669 env->crf[0] = 0x4;
2670 }
2671 goto done;
2672 }
2673 i++;
2674 }
2675 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2676 if ((low & mask) == 0) {
2677 if (update_Rc) {
2678 env->crf[0] = 0x8;
2679 }
2680 goto done;
2681 }
2682 i++;
2683 }
ebbd8b40 2684 i = 8;
64654ded
BS
2685 if (update_Rc) {
2686 env->crf[0] = 0x2;
2687 }
2688 done:
2689 env->xer = (env->xer & ~0x7F) | i;
2690 if (update_Rc) {
2691 env->crf[0] |= xer_so;
2692 }
2693 return i;
2694}
This page took 0.677186 seconds and 4 git commands to generate.