2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
23 #include "qemu/host-utils.h"
24 #include "exec/helper-proto.h"
25 #include "crypto/aes.h"
26 #include "fpu/softfloat.h"
28 #include "helper_regs.h"
29 /*****************************************************************************/
30 /* Fixed point operations helpers */
32 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
35 env->so = env->ov = 1;
41 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
47 uint64_t dividend = (uint64_t)ra << 32;
48 uint64_t divisor = (uint32_t)rb;
50 if (unlikely(divisor == 0)) {
53 rt = dividend / divisor;
54 overflow = rt > UINT32_MAX;
57 if (unlikely(overflow)) {
58 rt = 0; /* Undefined */
62 helper_update_ov_legacy(env, overflow);
65 return (target_ulong)rt;
68 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
74 int64_t dividend = (int64_t)ra << 32;
75 int64_t divisor = (int64_t)((int32_t)rb);
77 if (unlikely((divisor == 0) ||
78 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
81 rt = dividend / divisor;
82 overflow = rt != (int32_t)rt;
85 if (unlikely(overflow)) {
86 rt = 0; /* Undefined */
90 helper_update_ov_legacy(env, overflow);
93 return (target_ulong)rt;
96 #if defined(TARGET_PPC64)
98 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
103 overflow = divu128(&rt, &ra, rb);
105 if (unlikely(overflow)) {
106 rt = 0; /* Undefined */
110 helper_update_ov_legacy(env, overflow);
116 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
119 int64_t ra = (int64_t)rau;
120 int64_t rb = (int64_t)rbu;
121 int overflow = divs128(&rt, &ra, rb);
123 if (unlikely(overflow)) {
124 rt = 0; /* Undefined */
128 helper_update_ov_legacy(env, overflow);
137 #if defined(TARGET_PPC64)
138 /* if x = 0xab, returns 0xababababababababa */
139 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
141 /* substract 1 from each byte, and with inverse, check if MSB is set at each
143 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
144 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
146 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
148 /* When you XOR the pattern and there is a match, that byte will be zero */
149 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
151 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
153 return hasvalue(rb, ra) ? CRF_GT : 0;
160 /* Return invalid random number.
162 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
165 target_ulong helper_darn32(void)
170 target_ulong helper_darn64(void)
177 #if defined(TARGET_PPC64)
179 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
184 for (i = 0; i < 8; i++) {
185 int index = (rs >> (i*8)) & 0xFF;
187 if (rb & PPC_BIT(index)) {
197 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
199 target_ulong mask = 0xff;
203 for (i = 0; i < sizeof(target_ulong); i++) {
204 if ((rs & mask) == (rb & mask)) {
212 /* shift right arithmetic helper */
213 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
218 if (likely(!(shift & 0x20))) {
219 if (likely((uint32_t)shift != 0)) {
221 ret = (int32_t)value >> shift;
222 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
223 env->ca32 = env->ca = 0;
225 env->ca32 = env->ca = 1;
228 ret = (int32_t)value;
229 env->ca32 = env->ca = 0;
232 ret = (int32_t)value >> 31;
233 env->ca32 = env->ca = (ret != 0);
235 return (target_long)ret;
238 #if defined(TARGET_PPC64)
239 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
244 if (likely(!(shift & 0x40))) {
245 if (likely((uint64_t)shift != 0)) {
247 ret = (int64_t)value >> shift;
248 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
249 env->ca32 = env->ca = 0;
251 env->ca32 = env->ca = 1;
254 ret = (int64_t)value;
255 env->ca32 = env->ca = 0;
258 ret = (int64_t)value >> 63;
259 env->ca32 = env->ca = (ret != 0);
265 #if defined(TARGET_PPC64)
266 target_ulong helper_popcntb(target_ulong val)
268 /* Note that we don't fold past bytes */
269 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
270 0x5555555555555555ULL);
271 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
272 0x3333333333333333ULL);
273 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
274 0x0f0f0f0f0f0f0f0fULL);
278 target_ulong helper_popcntw(target_ulong val)
280 /* Note that we don't fold past words. */
281 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
282 0x5555555555555555ULL);
283 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
284 0x3333333333333333ULL);
285 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
286 0x0f0f0f0f0f0f0f0fULL);
287 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
288 0x00ff00ff00ff00ffULL);
289 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
290 0x0000ffff0000ffffULL);
294 target_ulong helper_popcntb(target_ulong val)
296 /* Note that we don't fold past bytes */
297 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
298 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
299 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
304 /*****************************************************************************/
305 /* PowerPC 601 specific instructions (POWER bridge) */
306 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
308 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
310 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
311 (int32_t)arg2 == 0) {
312 env->spr[SPR_MQ] = 0;
315 env->spr[SPR_MQ] = tmp % arg2;
316 return tmp / (int32_t)arg2;
320 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
323 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
325 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
326 (int32_t)arg2 == 0) {
327 env->so = env->ov = 1;
328 env->spr[SPR_MQ] = 0;
331 env->spr[SPR_MQ] = tmp % arg2;
332 tmp /= (int32_t)arg2;
333 if ((int32_t)tmp != tmp) {
334 env->so = env->ov = 1;
342 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
345 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
346 (int32_t)arg2 == 0) {
347 env->spr[SPR_MQ] = 0;
350 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
351 return (int32_t)arg1 / (int32_t)arg2;
355 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
358 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
359 (int32_t)arg2 == 0) {
360 env->so = env->ov = 1;
361 env->spr[SPR_MQ] = 0;
365 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
366 return (int32_t)arg1 / (int32_t)arg2;
370 /*****************************************************************************/
371 /* 602 specific instructions */
372 /* mfrom is the most crazy instruction ever seen, imho ! */
373 /* Real implementation uses a ROM table. Do the same */
374 /* Extremely decomposed:
376 * return 256 * log10(10 + 1.0) + 0.5
378 #if !defined(CONFIG_USER_ONLY)
379 target_ulong helper_602_mfrom(target_ulong arg)
381 if (likely(arg < 602)) {
382 #include "mfrom_table.inc.c"
383 return mfrom_ROM_table[arg];
390 /*****************************************************************************/
391 /* Altivec extension helpers */
392 #if defined(HOST_WORDS_BIGENDIAN)
395 #define AVRB(i) u8[i]
396 #define AVRW(i) u32[i]
400 #define AVRB(i) u8[15-(i)]
401 #define AVRW(i) u32[3-(i)]
404 #if defined(HOST_WORDS_BIGENDIAN)
405 #define VECTOR_FOR_INORDER_I(index, element) \
406 for (index = 0; index < ARRAY_SIZE(r->element); index++)
408 #define VECTOR_FOR_INORDER_I(index, element) \
409 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
412 /* Saturating arithmetic helpers. */
413 #define SATCVT(from, to, from_type, to_type, min, max) \
414 static inline to_type cvt##from##to(from_type x, int *sat) \
418 if (x < (from_type)min) { \
421 } else if (x > (from_type)max) { \
429 #define SATCVTU(from, to, from_type, to_type, min, max) \
430 static inline to_type cvt##from##to(from_type x, int *sat) \
434 if (x > (from_type)max) { \
442 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
443 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
444 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
446 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
447 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
448 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
449 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
450 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
451 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
455 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
457 int i, j = (sh & 0xf);
459 VECTOR_FOR_INORDER_I(i, u8) {
464 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
466 int i, j = 0x10 - (sh & 0xf);
468 VECTOR_FOR_INORDER_I(i, u8) {
473 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
475 #if defined(HOST_WORDS_BIGENDIAN)
476 env->vscr = r->u32[3];
478 env->vscr = r->u32[0];
480 set_flush_to_zero(vscr_nj, &env->vec_status);
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
518 uint64_t res = b->u64[0] ^ b->u64[1];
522 r->u64[LO_IDX] = res & 1;
526 #define VARITH_DO(name, op, element) \
527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
532 r->element[i] = a->element[i] op b->element[i]; \
535 #define VARITH(suffix, element) \
536 VARITH_DO(add##suffix, +, element) \
537 VARITH_DO(sub##suffix, -, element)
542 VARITH_DO(muluwm, *, u32)
546 #define VARITHFP(suffix, func) \
547 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
552 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
553 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
556 VARITHFP(addfp, float32_add)
557 VARITHFP(subfp, float32_sub)
558 VARITHFP(minfp, float32_min)
559 VARITHFP(maxfp, float32_max)
562 #define VARITHFPFMA(suffix, type) \
563 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
564 ppc_avr_t *b, ppc_avr_t *c) \
567 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
568 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
569 type, &env->vec_status); \
572 VARITHFPFMA(maddfp, 0);
573 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
576 #define VARITHSAT_CASE(type, op, cvt, element) \
578 type result = (type)a->element[i] op (type)b->element[i]; \
579 r->element[i] = cvt(result, &sat); \
582 #define VARITHSAT_DO(name, op, optype, cvt, element) \
583 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
589 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
590 switch (sizeof(r->element[0])) { \
592 VARITHSAT_CASE(optype, op, cvt, element); \
595 VARITHSAT_CASE(optype, op, cvt, element); \
598 VARITHSAT_CASE(optype, op, cvt, element); \
603 env->vscr |= (1 << VSCR_SAT); \
606 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
607 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
608 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
609 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
610 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
611 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
612 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
613 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
614 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
615 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
616 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
617 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
618 #undef VARITHSAT_CASE
620 #undef VARITHSAT_SIGNED
621 #undef VARITHSAT_UNSIGNED
623 #define VAVG_DO(name, element, etype) \
624 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
628 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
629 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
630 r->element[i] = x >> 1; \
634 #define VAVG(type, signed_element, signed_type, unsigned_element, \
636 VAVG_DO(avgs##type, signed_element, signed_type) \
637 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
638 VAVG(b, s8, int16_t, u8, uint16_t)
639 VAVG(h, s16, int32_t, u16, uint32_t)
640 VAVG(w, s32, int64_t, u32, uint64_t)
644 #define VABSDU_DO(name, element) \
645 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
649 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
650 r->element[i] = (a->element[i] > b->element[i]) ? \
651 (a->element[i] - b->element[i]) : \
652 (b->element[i] - a->element[i]); \
656 /* VABSDU - Vector absolute difference unsigned
657 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
658 * element - element type to access from vector
660 #define VABSDU(type, element) \
661 VABSDU_DO(absdu##type, element)
668 #define VCF(suffix, cvt, element) \
669 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
670 ppc_avr_t *b, uint32_t uim) \
674 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
675 float32 t = cvt(b->element[i], &env->vec_status); \
676 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
679 VCF(ux, uint32_to_float32, u32)
680 VCF(sx, int32_to_float32, s32)
683 #define VCMP_DO(suffix, compare, element, record) \
684 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
685 ppc_avr_t *a, ppc_avr_t *b) \
687 uint64_t ones = (uint64_t)-1; \
688 uint64_t all = ones; \
692 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
693 uint64_t result = (a->element[i] compare b->element[i] ? \
695 switch (sizeof(a->element[0])) { \
697 r->u64[i] = result; \
700 r->u32[i] = result; \
703 r->u16[i] = result; \
713 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
716 #define VCMP(suffix, compare, element) \
717 VCMP_DO(suffix, compare, element, 0) \
718 VCMP_DO(suffix##_dot, compare, element, 1)
734 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
735 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
736 ppc_avr_t *a, ppc_avr_t *b) \
738 etype ones = (etype)-1; \
740 etype result, none = 0; \
743 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
745 result = ((a->element[i] == 0) \
746 || (b->element[i] == 0) \
747 || (a->element[i] != b->element[i]) ? \
750 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
752 r->element[i] = result; \
757 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
761 /* VCMPNEZ - Vector compare not equal to zero
762 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
763 * element - element type to access from vector
765 #define VCMPNE(suffix, element, etype, cmpzero) \
766 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
767 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
768 VCMPNE(zb, u8, uint8_t, 1)
769 VCMPNE(zh, u16, uint16_t, 1)
770 VCMPNE(zw, u32, uint32_t, 1)
771 VCMPNE(b, u8, uint8_t, 0)
772 VCMPNE(h, u16, uint16_t, 0)
773 VCMPNE(w, u32, uint32_t, 0)
777 #define VCMPFP_DO(suffix, compare, order, record) \
778 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
779 ppc_avr_t *a, ppc_avr_t *b) \
781 uint32_t ones = (uint32_t)-1; \
782 uint32_t all = ones; \
786 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
788 int rel = float32_compare_quiet(a->f[i], b->f[i], \
790 if (rel == float_relation_unordered) { \
792 } else if (rel compare order) { \
797 r->u32[i] = result; \
802 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
805 #define VCMPFP(suffix, compare, order) \
806 VCMPFP_DO(suffix, compare, order, 0) \
807 VCMPFP_DO(suffix##_dot, compare, order, 1)
808 VCMPFP(eqfp, ==, float_relation_equal)
809 VCMPFP(gefp, !=, float_relation_less)
810 VCMPFP(gtfp, ==, float_relation_greater)
814 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
815 ppc_avr_t *a, ppc_avr_t *b, int record)
820 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
821 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
822 if (le_rel == float_relation_unordered) {
823 r->u32[i] = 0xc0000000;
826 float32 bneg = float32_chs(b->f[i]);
827 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
828 int le = le_rel != float_relation_greater;
829 int ge = ge_rel != float_relation_less;
831 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
832 all_in |= (!le | !ge);
836 env->crf[6] = (all_in == 0) << 1;
840 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
842 vcmpbfp_internal(env, r, a, b, 0);
845 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
848 vcmpbfp_internal(env, r, a, b, 1);
851 #define VCT(suffix, satcvt, element) \
852 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
853 ppc_avr_t *b, uint32_t uim) \
857 float_status s = env->vec_status; \
859 set_float_rounding_mode(float_round_to_zero, &s); \
860 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
861 if (float32_is_any_nan(b->f[i])) { \
864 float64 t = float32_to_float64(b->f[i], &s); \
867 t = float64_scalbn(t, uim, &s); \
868 j = float64_to_int64(t, &s); \
869 r->element[i] = satcvt(j, &sat); \
873 env->vscr |= (1 << VSCR_SAT); \
876 VCT(uxs, cvtsduw, u32)
877 VCT(sxs, cvtsdsw, s32)
880 target_ulong helper_vclzlsbb(ppc_avr_t *r)
882 target_ulong count = 0;
884 VECTOR_FOR_INORDER_I(i, u8) {
885 if (r->u8[i] & 0x01) {
893 target_ulong helper_vctzlsbb(ppc_avr_t *r)
895 target_ulong count = 0;
897 #if defined(HOST_WORDS_BIGENDIAN)
898 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
900 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
902 if (r->u8[i] & 0x01) {
910 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
911 ppc_avr_t *b, ppc_avr_t *c)
916 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
917 int32_t prod = a->s16[i] * b->s16[i];
918 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
920 r->s16[i] = cvtswsh(t, &sat);
924 env->vscr |= (1 << VSCR_SAT);
928 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
929 ppc_avr_t *b, ppc_avr_t *c)
934 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
935 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
936 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
937 r->s16[i] = cvtswsh(t, &sat);
941 env->vscr |= (1 << VSCR_SAT);
945 #define VMINMAX_DO(name, compare, element) \
946 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
950 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
951 if (a->element[i] compare b->element[i]) { \
952 r->element[i] = b->element[i]; \
954 r->element[i] = a->element[i]; \
958 #define VMINMAX(suffix, element) \
959 VMINMAX_DO(min##suffix, >, element) \
960 VMINMAX_DO(max##suffix, <, element)
972 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
976 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
977 int32_t prod = a->s16[i] * b->s16[i];
978 r->s16[i] = (int16_t) (prod + c->s16[i]);
982 #define VMRG_DO(name, element, highp) \
983 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
987 size_t n_elems = ARRAY_SIZE(r->element); \
989 for (i = 0; i < n_elems / 2; i++) { \
991 result.element[i*2+HI_IDX] = a->element[i]; \
992 result.element[i*2+LO_IDX] = b->element[i]; \
994 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
995 b->element[n_elems - i - 1]; \
996 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
997 a->element[n_elems - i - 1]; \
1002 #if defined(HOST_WORDS_BIGENDIAN)
1009 #define VMRG(suffix, element) \
1010 VMRG_DO(mrgl##suffix, element, MRGHI) \
1011 VMRG_DO(mrgh##suffix, element, MRGLO)
1020 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1021 ppc_avr_t *b, ppc_avr_t *c)
1026 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1027 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1030 VECTOR_FOR_INORDER_I(i, s32) {
1031 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1032 prod[4 * i + 2] + prod[4 * i + 3];
1036 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1037 ppc_avr_t *b, ppc_avr_t *c)
1042 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1043 prod[i] = a->s16[i] * b->s16[i];
1046 VECTOR_FOR_INORDER_I(i, s32) {
1047 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1051 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1052 ppc_avr_t *b, ppc_avr_t *c)
1058 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1059 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1062 VECTOR_FOR_INORDER_I(i, s32) {
1063 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1065 r->u32[i] = cvtsdsw(t, &sat);
1069 env->vscr |= (1 << VSCR_SAT);
1073 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1074 ppc_avr_t *b, ppc_avr_t *c)
1079 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1080 prod[i] = a->u8[i] * b->u8[i];
1083 VECTOR_FOR_INORDER_I(i, u32) {
1084 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1085 prod[4 * i + 2] + prod[4 * i + 3];
1089 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1090 ppc_avr_t *b, ppc_avr_t *c)
1095 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1096 prod[i] = a->u16[i] * b->u16[i];
1099 VECTOR_FOR_INORDER_I(i, u32) {
1100 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1104 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1105 ppc_avr_t *b, ppc_avr_t *c)
1111 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1112 prod[i] = a->u16[i] * b->u16[i];
1115 VECTOR_FOR_INORDER_I(i, s32) {
1116 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1118 r->u32[i] = cvtuduw(t, &sat);
1122 env->vscr |= (1 << VSCR_SAT);
1126 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1127 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1131 VECTOR_FOR_INORDER_I(i, prod_element) { \
1133 r->prod_element[i] = \
1134 (cast)a->mul_element[i * 2 + HI_IDX] * \
1135 (cast)b->mul_element[i * 2 + HI_IDX]; \
1137 r->prod_element[i] = \
1138 (cast)a->mul_element[i * 2 + LO_IDX] * \
1139 (cast)b->mul_element[i * 2 + LO_IDX]; \
1143 #define VMUL(suffix, mul_element, prod_element, cast) \
1144 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1145 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1146 VMUL(sb, s8, s16, int16_t)
1147 VMUL(sh, s16, s32, int32_t)
1148 VMUL(sw, s32, s64, int64_t)
1149 VMUL(ub, u8, u16, uint16_t)
1150 VMUL(uh, u16, u32, uint32_t)
1151 VMUL(uw, u32, u64, uint64_t)
1155 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1161 VECTOR_FOR_INORDER_I(i, u8) {
1162 int s = c->u8[i] & 0x1f;
1163 #if defined(HOST_WORDS_BIGENDIAN)
1164 int index = s & 0xf;
1166 int index = 15 - (s & 0xf);
1170 result.u8[i] = b->u8[index];
1172 result.u8[i] = a->u8[index];
1178 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1184 VECTOR_FOR_INORDER_I(i, u8) {
1185 int s = c->u8[i] & 0x1f;
1186 #if defined(HOST_WORDS_BIGENDIAN)
1187 int index = 15 - (s & 0xf);
1189 int index = s & 0xf;
1193 result.u8[i] = a->u8[index];
1195 result.u8[i] = b->u8[index];
1201 #if defined(HOST_WORDS_BIGENDIAN)
1202 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1203 #define VBPERMD_INDEX(i) (i)
1204 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1205 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1207 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1208 #define VBPERMD_INDEX(i) (1 - i)
1209 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1210 #define EXTRACT_BIT(avr, i, index) \
1211 (extract64((avr)->u64[1 - i], 63 - index, 1))
1214 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1217 ppc_avr_t result = { .u64 = { 0, 0 } };
1218 VECTOR_FOR_INORDER_I(i, u64) {
1219 for (j = 0; j < 8; j++) {
1220 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1221 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1222 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1229 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1234 VECTOR_FOR_INORDER_I(i, u8) {
1235 int index = VBPERMQ_INDEX(b, i);
1238 uint64_t mask = (1ull << (63-(index & 0x3F)));
1239 if (a->u64[VBPERMQ_DW(index)] & mask) {
1240 perm |= (0x8000 >> i);
1245 r->u64[HI_IDX] = perm;
1249 #undef VBPERMQ_INDEX
1252 static const uint64_t VGBBD_MASKS[256] = {
1253 0x0000000000000000ull, /* 00 */
1254 0x0000000000000080ull, /* 01 */
1255 0x0000000000008000ull, /* 02 */
1256 0x0000000000008080ull, /* 03 */
1257 0x0000000000800000ull, /* 04 */
1258 0x0000000000800080ull, /* 05 */
1259 0x0000000000808000ull, /* 06 */
1260 0x0000000000808080ull, /* 07 */
1261 0x0000000080000000ull, /* 08 */
1262 0x0000000080000080ull, /* 09 */
1263 0x0000000080008000ull, /* 0A */
1264 0x0000000080008080ull, /* 0B */
1265 0x0000000080800000ull, /* 0C */
1266 0x0000000080800080ull, /* 0D */
1267 0x0000000080808000ull, /* 0E */
1268 0x0000000080808080ull, /* 0F */
1269 0x0000008000000000ull, /* 10 */
1270 0x0000008000000080ull, /* 11 */
1271 0x0000008000008000ull, /* 12 */
1272 0x0000008000008080ull, /* 13 */
1273 0x0000008000800000ull, /* 14 */
1274 0x0000008000800080ull, /* 15 */
1275 0x0000008000808000ull, /* 16 */
1276 0x0000008000808080ull, /* 17 */
1277 0x0000008080000000ull, /* 18 */
1278 0x0000008080000080ull, /* 19 */
1279 0x0000008080008000ull, /* 1A */
1280 0x0000008080008080ull, /* 1B */
1281 0x0000008080800000ull, /* 1C */
1282 0x0000008080800080ull, /* 1D */
1283 0x0000008080808000ull, /* 1E */
1284 0x0000008080808080ull, /* 1F */
1285 0x0000800000000000ull, /* 20 */
1286 0x0000800000000080ull, /* 21 */
1287 0x0000800000008000ull, /* 22 */
1288 0x0000800000008080ull, /* 23 */
1289 0x0000800000800000ull, /* 24 */
1290 0x0000800000800080ull, /* 25 */
1291 0x0000800000808000ull, /* 26 */
1292 0x0000800000808080ull, /* 27 */
1293 0x0000800080000000ull, /* 28 */
1294 0x0000800080000080ull, /* 29 */
1295 0x0000800080008000ull, /* 2A */
1296 0x0000800080008080ull, /* 2B */
1297 0x0000800080800000ull, /* 2C */
1298 0x0000800080800080ull, /* 2D */
1299 0x0000800080808000ull, /* 2E */
1300 0x0000800080808080ull, /* 2F */
1301 0x0000808000000000ull, /* 30 */
1302 0x0000808000000080ull, /* 31 */
1303 0x0000808000008000ull, /* 32 */
1304 0x0000808000008080ull, /* 33 */
1305 0x0000808000800000ull, /* 34 */
1306 0x0000808000800080ull, /* 35 */
1307 0x0000808000808000ull, /* 36 */
1308 0x0000808000808080ull, /* 37 */
1309 0x0000808080000000ull, /* 38 */
1310 0x0000808080000080ull, /* 39 */
1311 0x0000808080008000ull, /* 3A */
1312 0x0000808080008080ull, /* 3B */
1313 0x0000808080800000ull, /* 3C */
1314 0x0000808080800080ull, /* 3D */
1315 0x0000808080808000ull, /* 3E */
1316 0x0000808080808080ull, /* 3F */
1317 0x0080000000000000ull, /* 40 */
1318 0x0080000000000080ull, /* 41 */
1319 0x0080000000008000ull, /* 42 */
1320 0x0080000000008080ull, /* 43 */
1321 0x0080000000800000ull, /* 44 */
1322 0x0080000000800080ull, /* 45 */
1323 0x0080000000808000ull, /* 46 */
1324 0x0080000000808080ull, /* 47 */
1325 0x0080000080000000ull, /* 48 */
1326 0x0080000080000080ull, /* 49 */
1327 0x0080000080008000ull, /* 4A */
1328 0x0080000080008080ull, /* 4B */
1329 0x0080000080800000ull, /* 4C */
1330 0x0080000080800080ull, /* 4D */
1331 0x0080000080808000ull, /* 4E */
1332 0x0080000080808080ull, /* 4F */
1333 0x0080008000000000ull, /* 50 */
1334 0x0080008000000080ull, /* 51 */
1335 0x0080008000008000ull, /* 52 */
1336 0x0080008000008080ull, /* 53 */
1337 0x0080008000800000ull, /* 54 */
1338 0x0080008000800080ull, /* 55 */
1339 0x0080008000808000ull, /* 56 */
1340 0x0080008000808080ull, /* 57 */
1341 0x0080008080000000ull, /* 58 */
1342 0x0080008080000080ull, /* 59 */
1343 0x0080008080008000ull, /* 5A */
1344 0x0080008080008080ull, /* 5B */
1345 0x0080008080800000ull, /* 5C */
1346 0x0080008080800080ull, /* 5D */
1347 0x0080008080808000ull, /* 5E */
1348 0x0080008080808080ull, /* 5F */
1349 0x0080800000000000ull, /* 60 */
1350 0x0080800000000080ull, /* 61 */
1351 0x0080800000008000ull, /* 62 */
1352 0x0080800000008080ull, /* 63 */
1353 0x0080800000800000ull, /* 64 */
1354 0x0080800000800080ull, /* 65 */
1355 0x0080800000808000ull, /* 66 */
1356 0x0080800000808080ull, /* 67 */
1357 0x0080800080000000ull, /* 68 */
1358 0x0080800080000080ull, /* 69 */
1359 0x0080800080008000ull, /* 6A */
1360 0x0080800080008080ull, /* 6B */
1361 0x0080800080800000ull, /* 6C */
1362 0x0080800080800080ull, /* 6D */
1363 0x0080800080808000ull, /* 6E */
1364 0x0080800080808080ull, /* 6F */
1365 0x0080808000000000ull, /* 70 */
1366 0x0080808000000080ull, /* 71 */
1367 0x0080808000008000ull, /* 72 */
1368 0x0080808000008080ull, /* 73 */
1369 0x0080808000800000ull, /* 74 */
1370 0x0080808000800080ull, /* 75 */
1371 0x0080808000808000ull, /* 76 */
1372 0x0080808000808080ull, /* 77 */
1373 0x0080808080000000ull, /* 78 */
1374 0x0080808080000080ull, /* 79 */
1375 0x0080808080008000ull, /* 7A */
1376 0x0080808080008080ull, /* 7B */
1377 0x0080808080800000ull, /* 7C */
1378 0x0080808080800080ull, /* 7D */
1379 0x0080808080808000ull, /* 7E */
1380 0x0080808080808080ull, /* 7F */
1381 0x8000000000000000ull, /* 80 */
1382 0x8000000000000080ull, /* 81 */
1383 0x8000000000008000ull, /* 82 */
1384 0x8000000000008080ull, /* 83 */
1385 0x8000000000800000ull, /* 84 */
1386 0x8000000000800080ull, /* 85 */
1387 0x8000000000808000ull, /* 86 */
1388 0x8000000000808080ull, /* 87 */
1389 0x8000000080000000ull, /* 88 */
1390 0x8000000080000080ull, /* 89 */
1391 0x8000000080008000ull, /* 8A */
1392 0x8000000080008080ull, /* 8B */
1393 0x8000000080800000ull, /* 8C */
1394 0x8000000080800080ull, /* 8D */
1395 0x8000000080808000ull, /* 8E */
1396 0x8000000080808080ull, /* 8F */
1397 0x8000008000000000ull, /* 90 */
1398 0x8000008000000080ull, /* 91 */
1399 0x8000008000008000ull, /* 92 */
1400 0x8000008000008080ull, /* 93 */
1401 0x8000008000800000ull, /* 94 */
1402 0x8000008000800080ull, /* 95 */
1403 0x8000008000808000ull, /* 96 */
1404 0x8000008000808080ull, /* 97 */
1405 0x8000008080000000ull, /* 98 */
1406 0x8000008080000080ull, /* 99 */
1407 0x8000008080008000ull, /* 9A */
1408 0x8000008080008080ull, /* 9B */
1409 0x8000008080800000ull, /* 9C */
1410 0x8000008080800080ull, /* 9D */
1411 0x8000008080808000ull, /* 9E */
1412 0x8000008080808080ull, /* 9F */
1413 0x8000800000000000ull, /* A0 */
1414 0x8000800000000080ull, /* A1 */
1415 0x8000800000008000ull, /* A2 */
1416 0x8000800000008080ull, /* A3 */
1417 0x8000800000800000ull, /* A4 */
1418 0x8000800000800080ull, /* A5 */
1419 0x8000800000808000ull, /* A6 */
1420 0x8000800000808080ull, /* A7 */
1421 0x8000800080000000ull, /* A8 */
1422 0x8000800080000080ull, /* A9 */
1423 0x8000800080008000ull, /* AA */
1424 0x8000800080008080ull, /* AB */
1425 0x8000800080800000ull, /* AC */
1426 0x8000800080800080ull, /* AD */
1427 0x8000800080808000ull, /* AE */
1428 0x8000800080808080ull, /* AF */
1429 0x8000808000000000ull, /* B0 */
1430 0x8000808000000080ull, /* B1 */
1431 0x8000808000008000ull, /* B2 */
1432 0x8000808000008080ull, /* B3 */
1433 0x8000808000800000ull, /* B4 */
1434 0x8000808000800080ull, /* B5 */
1435 0x8000808000808000ull, /* B6 */
1436 0x8000808000808080ull, /* B7 */
1437 0x8000808080000000ull, /* B8 */
1438 0x8000808080000080ull, /* B9 */
1439 0x8000808080008000ull, /* BA */
1440 0x8000808080008080ull, /* BB */
1441 0x8000808080800000ull, /* BC */
1442 0x8000808080800080ull, /* BD */
1443 0x8000808080808000ull, /* BE */
1444 0x8000808080808080ull, /* BF */
1445 0x8080000000000000ull, /* C0 */
1446 0x8080000000000080ull, /* C1 */
1447 0x8080000000008000ull, /* C2 */
1448 0x8080000000008080ull, /* C3 */
1449 0x8080000000800000ull, /* C4 */
1450 0x8080000000800080ull, /* C5 */
1451 0x8080000000808000ull, /* C6 */
1452 0x8080000000808080ull, /* C7 */
1453 0x8080000080000000ull, /* C8 */
1454 0x8080000080000080ull, /* C9 */
1455 0x8080000080008000ull, /* CA */
1456 0x8080000080008080ull, /* CB */
1457 0x8080000080800000ull, /* CC */
1458 0x8080000080800080ull, /* CD */
1459 0x8080000080808000ull, /* CE */
1460 0x8080000080808080ull, /* CF */
1461 0x8080008000000000ull, /* D0 */
1462 0x8080008000000080ull, /* D1 */
1463 0x8080008000008000ull, /* D2 */
1464 0x8080008000008080ull, /* D3 */
1465 0x8080008000800000ull, /* D4 */
1466 0x8080008000800080ull, /* D5 */
1467 0x8080008000808000ull, /* D6 */
1468 0x8080008000808080ull, /* D7 */
1469 0x8080008080000000ull, /* D8 */
1470 0x8080008080000080ull, /* D9 */
1471 0x8080008080008000ull, /* DA */
1472 0x8080008080008080ull, /* DB */
1473 0x8080008080800000ull, /* DC */
1474 0x8080008080800080ull, /* DD */
1475 0x8080008080808000ull, /* DE */
1476 0x8080008080808080ull, /* DF */
1477 0x8080800000000000ull, /* E0 */
1478 0x8080800000000080ull, /* E1 */
1479 0x8080800000008000ull, /* E2 */
1480 0x8080800000008080ull, /* E3 */
1481 0x8080800000800000ull, /* E4 */
1482 0x8080800000800080ull, /* E5 */
1483 0x8080800000808000ull, /* E6 */
1484 0x8080800000808080ull, /* E7 */
1485 0x8080800080000000ull, /* E8 */
1486 0x8080800080000080ull, /* E9 */
1487 0x8080800080008000ull, /* EA */
1488 0x8080800080008080ull, /* EB */
1489 0x8080800080800000ull, /* EC */
1490 0x8080800080800080ull, /* ED */
1491 0x8080800080808000ull, /* EE */
1492 0x8080800080808080ull, /* EF */
1493 0x8080808000000000ull, /* F0 */
1494 0x8080808000000080ull, /* F1 */
1495 0x8080808000008000ull, /* F2 */
1496 0x8080808000008080ull, /* F3 */
1497 0x8080808000800000ull, /* F4 */
1498 0x8080808000800080ull, /* F5 */
1499 0x8080808000808000ull, /* F6 */
1500 0x8080808000808080ull, /* F7 */
1501 0x8080808080000000ull, /* F8 */
1502 0x8080808080000080ull, /* F9 */
1503 0x8080808080008000ull, /* FA */
1504 0x8080808080008080ull, /* FB */
1505 0x8080808080800000ull, /* FC */
1506 0x8080808080800080ull, /* FD */
1507 0x8080808080808000ull, /* FE */
1508 0x8080808080808080ull, /* FF */
1511 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1514 uint64_t t[2] = { 0, 0 };
1516 VECTOR_FOR_INORDER_I(i, u8) {
1517 #if defined(HOST_WORDS_BIGENDIAN)
1518 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1520 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1528 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1529 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1532 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1534 VECTOR_FOR_INORDER_I(i, srcfld) { \
1536 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1537 if (a->srcfld[i] & (1ull<<j)) { \
1538 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1543 VECTOR_FOR_INORDER_I(i, trgfld) { \
1544 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1548 PMSUM(vpmsumb, u8, u16, uint16_t)
1549 PMSUM(vpmsumh, u16, u32, uint32_t)
1550 PMSUM(vpmsumw, u32, u64, uint64_t)
1552 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1555 #ifdef CONFIG_INT128
1557 __uint128_t prod[2];
1559 VECTOR_FOR_INORDER_I(i, u64) {
1561 for (j = 0; j < 64; j++) {
1562 if (a->u64[i] & (1ull<<j)) {
1563 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1568 r->u128 = prod[0] ^ prod[1];
1574 VECTOR_FOR_INORDER_I(i, u64) {
1575 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1576 for (j = 0; j < 64; j++) {
1577 if (a->u64[i] & (1ull<<j)) {
1580 bshift.u64[HI_IDX] = 0;
1581 bshift.u64[LO_IDX] = b->u64[i];
1583 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1584 bshift.u64[LO_IDX] = b->u64[i] << j;
1586 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1587 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1592 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1593 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1598 #if defined(HOST_WORDS_BIGENDIAN)
1603 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1607 #if defined(HOST_WORDS_BIGENDIAN)
1608 const ppc_avr_t *x[2] = { a, b };
1610 const ppc_avr_t *x[2] = { b, a };
1613 VECTOR_FOR_INORDER_I(i, u64) {
1614 VECTOR_FOR_INORDER_I(j, u32) {
1615 uint32_t e = x[i]->u32[j];
1617 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1618 ((e >> 6) & 0x3e0) |
1625 #define VPK(suffix, from, to, cvt, dosat) \
1626 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1627 ppc_avr_t *a, ppc_avr_t *b) \
1632 ppc_avr_t *a0 = PKBIG ? a : b; \
1633 ppc_avr_t *a1 = PKBIG ? b : a; \
1635 VECTOR_FOR_INORDER_I(i, from) { \
1636 result.to[i] = cvt(a0->from[i], &sat); \
1637 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1640 if (dosat && sat) { \
1641 env->vscr |= (1 << VSCR_SAT); \
1645 VPK(shss, s16, s8, cvtshsb, 1)
1646 VPK(shus, s16, u8, cvtshub, 1)
1647 VPK(swss, s32, s16, cvtswsh, 1)
1648 VPK(swus, s32, u16, cvtswuh, 1)
1649 VPK(sdss, s64, s32, cvtsdsw, 1)
1650 VPK(sdus, s64, u32, cvtsduw, 1)
1651 VPK(uhus, u16, u8, cvtuhub, 1)
1652 VPK(uwus, u32, u16, cvtuwuh, 1)
1653 VPK(udus, u64, u32, cvtuduw, 1)
1654 VPK(uhum, u16, u8, I, 0)
1655 VPK(uwum, u32, u16, I, 0)
1656 VPK(udum, u64, u32, I, 0)
1661 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1665 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1666 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1670 #define VRFI(suffix, rounding) \
1671 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1675 float_status s = env->vec_status; \
1677 set_float_rounding_mode(rounding, &s); \
1678 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1679 r->f[i] = float32_round_to_int (b->f[i], &s); \
1682 VRFI(n, float_round_nearest_even)
1683 VRFI(m, float_round_down)
1684 VRFI(p, float_round_up)
1685 VRFI(z, float_round_to_zero)
1688 #define VROTATE(suffix, element, mask) \
1689 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1693 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1694 unsigned int shift = b->element[i] & mask; \
1695 r->element[i] = (a->element[i] << shift) | \
1696 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1700 VROTATE(h, u16, 0xF)
1701 VROTATE(w, u32, 0x1F)
1702 VROTATE(d, u64, 0x3F)
1705 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1709 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1710 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1712 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1716 #define VRLMI(name, size, element, insert) \
1717 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1720 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1721 uint##size##_t src1 = a->element[i]; \
1722 uint##size##_t src2 = b->element[i]; \
1723 uint##size##_t src3 = r->element[i]; \
1724 uint##size##_t begin, end, shift, mask, rot_val; \
1726 shift = extract##size(src2, 0, 6); \
1727 end = extract##size(src2, 8, 6); \
1728 begin = extract##size(src2, 16, 6); \
1729 rot_val = rol##size(src1, shift); \
1730 mask = mask_u##size(begin, end); \
1732 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1734 r->element[i] = (rot_val & mask); \
1739 VRLMI(vrldmi, 64, u64, 1);
1740 VRLMI(vrlwmi, 32, u32, 1);
1741 VRLMI(vrldnm, 64, u64, 0);
1742 VRLMI(vrlwnm, 32, u32, 0);
1744 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1747 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1748 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1751 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1755 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1756 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1760 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1764 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1765 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1769 #if defined(HOST_WORDS_BIGENDIAN)
1770 #define VEXTU_X_DO(name, size, left) \
1771 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1775 index = (a & 0xf) * 8; \
1777 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1779 return int128_getlo(int128_rshift(b->s128, index)) & \
1780 MAKE_64BIT_MASK(0, size); \
1783 #define VEXTU_X_DO(name, size, left) \
1784 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1788 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1790 index = (a & 0xf) * 8; \
1792 return int128_getlo(int128_rshift(b->s128, index)) & \
1793 MAKE_64BIT_MASK(0, size); \
1797 VEXTU_X_DO(vextublx, 8, 1)
1798 VEXTU_X_DO(vextuhlx, 16, 1)
1799 VEXTU_X_DO(vextuwlx, 32, 1)
1800 VEXTU_X_DO(vextubrx, 8, 0)
1801 VEXTU_X_DO(vextuhrx, 16, 0)
1802 VEXTU_X_DO(vextuwrx, 32, 0)
1805 /* The specification says that the results are undefined if all of the
1806 * shift counts are not identical. We check to make sure that they are
1807 * to conform to what real hardware appears to do. */
1808 #define VSHIFT(suffix, leftp) \
1809 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1811 int shift = b->u8[LO_IDX*15] & 0x7; \
1815 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1816 doit = doit && ((b->u8[i] & 0x7) == shift); \
1821 } else if (leftp) { \
1822 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1824 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1825 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1827 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1829 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1830 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1838 #define VSL(suffix, element, mask) \
1839 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1843 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1844 unsigned int shift = b->element[i] & mask; \
1846 r->element[i] = a->element[i] << shift; \
1855 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1858 unsigned int shift, bytes, size;
1860 size = ARRAY_SIZE(r->u8);
1861 for (i = 0; i < size; i++) {
1862 shift = b->u8[i] & 0x7; /* extract shift value */
1863 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1864 (((i + 1) < size) ? a->u8[i + 1] : 0);
1865 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1869 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1872 unsigned int shift, bytes;
1874 /* Use reverse order, as destination and source register can be same. Its
1875 * being modified in place saving temporary, reverse order will guarantee
1876 * that computed result is not fed back.
1878 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1879 shift = b->u8[i] & 0x7; /* extract shift value */
1880 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1881 /* extract adjacent bytes */
1882 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1886 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1888 int sh = shift & 0xf;
1892 #if defined(HOST_WORDS_BIGENDIAN)
1893 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1896 result.u8[i] = b->u8[index - 0x10];
1898 result.u8[i] = a->u8[index];
1902 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1903 int index = (16 - sh) + i;
1905 result.u8[i] = a->u8[index - 0x10];
1907 result.u8[i] = b->u8[index];
1914 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1916 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1918 #if defined(HOST_WORDS_BIGENDIAN)
1919 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1920 memset(&r->u8[16-sh], 0, sh);
1922 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1923 memset(&r->u8[0], 0, sh);
1927 /* Experimental testing shows that hardware masks the immediate. */
1928 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1929 #if defined(HOST_WORDS_BIGENDIAN)
1930 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1932 #define SPLAT_ELEMENT(element) \
1933 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1935 #define VSPLT(suffix, element) \
1936 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1938 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1941 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1942 r->element[i] = s; \
1949 #undef SPLAT_ELEMENT
1950 #undef _SPLAT_MASKED
1951 #if defined(HOST_WORDS_BIGENDIAN)
1952 #define VINSERT(suffix, element) \
1953 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1955 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)], \
1956 sizeof(r->element[0])); \
1959 #define VINSERT(suffix, element) \
1960 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1962 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1963 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1971 #if defined(HOST_WORDS_BIGENDIAN)
1972 #define VEXTRACT(suffix, element) \
1973 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1975 uint32_t es = sizeof(r->element[0]); \
1976 memmove(&r->u8[8 - es], &b->u8[index], es); \
1977 memset(&r->u8[8], 0, 8); \
1978 memset(&r->u8[0], 0, 8 - es); \
1981 #define VEXTRACT(suffix, element) \
1982 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1984 uint32_t es = sizeof(r->element[0]); \
1985 uint32_t s = (16 - index) - es; \
1986 memmove(&r->u8[8], &b->u8[s], es); \
1987 memset(&r->u8[0], 0, 8); \
1988 memset(&r->u8[8 + es], 0, 8 - es); \
1997 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1998 target_ulong xbn, uint32_t index)
2001 size_t es = sizeof(uint32_t);
2005 getVSR(xbn, &xb, env);
2006 memset(&xt, 0, sizeof(xt));
2008 #if defined(HOST_WORDS_BIGENDIAN)
2010 for (i = 0; i < es; i++, ext_index++) {
2011 xt.u8[8 - es + i] = xb.u8[ext_index % 16];
2014 ext_index = 15 - index;
2015 for (i = es - 1; i >= 0; i--, ext_index--) {
2016 xt.u8[8 + i] = xb.u8[ext_index % 16];
2020 putVSR(xtn, &xt, env);
2023 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
2024 target_ulong xbn, uint32_t index)
2027 size_t es = sizeof(uint32_t);
2028 int ins_index, i = 0;
2030 getVSR(xbn, &xb, env);
2031 getVSR(xtn, &xt, env);
2033 #if defined(HOST_WORDS_BIGENDIAN)
2035 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
2036 xt.u8[ins_index] = xb.u8[8 - es + i];
2039 ins_index = 15 - index;
2040 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) {
2041 xt.u8[ins_index] = xb.u8[8 + i];
2045 putVSR(xtn, &xt, env);
2048 #define VEXT_SIGNED(name, element, mask, cast, recast) \
2049 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2052 VECTOR_FOR_INORDER_I(i, element) { \
2053 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
2056 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2057 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2058 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2059 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2060 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2063 #define VNEG(name, element) \
2064 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2067 VECTOR_FOR_INORDER_I(i, element) { \
2068 r->element[i] = -b->element[i]; \
2075 #define VSPLTI(suffix, element, splat_type) \
2076 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2078 splat_type x = (int8_t)(splat << 3) >> 3; \
2081 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2082 r->element[i] = x; \
2085 VSPLTI(b, s8, int8_t)
2086 VSPLTI(h, s16, int16_t)
2087 VSPLTI(w, s32, int32_t)
2090 #define VSR(suffix, element, mask) \
2091 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2095 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2096 unsigned int shift = b->element[i] & mask; \
2097 r->element[i] = a->element[i] >> shift; \
2110 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2112 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2114 #if defined(HOST_WORDS_BIGENDIAN)
2115 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2116 memset(&r->u8[0], 0, sh);
2118 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2119 memset(&r->u8[16 - sh], 0, sh);
2123 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2127 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2128 r->u32[i] = a->u32[i] >= b->u32[i];
2132 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2139 #if defined(HOST_WORDS_BIGENDIAN)
2140 upper = ARRAY_SIZE(r->s32)-1;
2144 t = (int64_t)b->s32[upper];
2145 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2149 result.s32[upper] = cvtsdsw(t, &sat);
2153 env->vscr |= (1 << VSCR_SAT);
2157 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2163 #if defined(HOST_WORDS_BIGENDIAN)
2168 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2169 int64_t t = (int64_t)b->s32[upper + i * 2];
2172 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2173 t += a->s32[2 * i + j];
2175 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2180 env->vscr |= (1 << VSCR_SAT);
2184 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2189 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2190 int64_t t = (int64_t)b->s32[i];
2192 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2193 t += a->s8[4 * i + j];
2195 r->s32[i] = cvtsdsw(t, &sat);
2199 env->vscr |= (1 << VSCR_SAT);
2203 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2208 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2209 int64_t t = (int64_t)b->s32[i];
2211 t += a->s16[2 * i] + a->s16[2 * i + 1];
2212 r->s32[i] = cvtsdsw(t, &sat);
2216 env->vscr |= (1 << VSCR_SAT);
2220 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2225 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2226 uint64_t t = (uint64_t)b->u32[i];
2228 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2229 t += a->u8[4 * i + j];
2231 r->u32[i] = cvtuduw(t, &sat);
2235 env->vscr |= (1 << VSCR_SAT);
2239 #if defined(HOST_WORDS_BIGENDIAN)
2246 #define VUPKPX(suffix, hi) \
2247 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2252 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2253 uint16_t e = b->u16[hi ? i : i+4]; \
2254 uint8_t a = (e >> 15) ? 0xff : 0; \
2255 uint8_t r = (e >> 10) & 0x1f; \
2256 uint8_t g = (e >> 5) & 0x1f; \
2257 uint8_t b = e & 0x1f; \
2259 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2267 #define VUPK(suffix, unpacked, packee, hi) \
2268 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2274 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2275 result.unpacked[i] = b->packee[i]; \
2278 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2280 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2285 VUPK(hsb, s16, s8, UPKHI)
2286 VUPK(hsh, s32, s16, UPKHI)
2287 VUPK(hsw, s64, s32, UPKHI)
2288 VUPK(lsb, s16, s8, UPKLO)
2289 VUPK(lsh, s32, s16, UPKLO)
2290 VUPK(lsw, s64, s32, UPKLO)
2295 #define VGENERIC_DO(name, element) \
2296 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2300 VECTOR_FOR_INORDER_I(i, element) { \
2301 r->element[i] = name(b->element[i]); \
2305 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2306 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2307 #define clzw(v) clz32((v))
2308 #define clzd(v) clz64((v))
2310 VGENERIC_DO(clzb, u8)
2311 VGENERIC_DO(clzh, u16)
2312 VGENERIC_DO(clzw, u32)
2313 VGENERIC_DO(clzd, u64)
2320 #define ctzb(v) ((v) ? ctz32(v) : 8)
2321 #define ctzh(v) ((v) ? ctz32(v) : 16)
2322 #define ctzw(v) ctz32((v))
2323 #define ctzd(v) ctz64((v))
2325 VGENERIC_DO(ctzb, u8)
2326 VGENERIC_DO(ctzh, u16)
2327 VGENERIC_DO(ctzw, u32)
2328 VGENERIC_DO(ctzd, u64)
2335 #define popcntb(v) ctpop8(v)
2336 #define popcnth(v) ctpop16(v)
2337 #define popcntw(v) ctpop32(v)
2338 #define popcntd(v) ctpop64(v)
2340 VGENERIC_DO(popcntb, u8)
2341 VGENERIC_DO(popcnth, u16)
2342 VGENERIC_DO(popcntw, u32)
2343 VGENERIC_DO(popcntd, u64)
2352 #if defined(HOST_WORDS_BIGENDIAN)
2353 #define QW_ONE { .u64 = { 0, 1 } }
2355 #define QW_ONE { .u64 = { 1, 0 } }
2358 #ifndef CONFIG_INT128
2360 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2362 t->u64[0] = ~a.u64[0];
2363 t->u64[1] = ~a.u64[1];
2366 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2368 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2370 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2372 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2374 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2381 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2383 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2384 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2385 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2388 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2391 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2392 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2393 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2394 avr_qw_not(¬_a, a);
2395 return avr_qw_cmpu(not_a, b) < 0;
2400 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2402 #ifdef CONFIG_INT128
2403 r->u128 = a->u128 + b->u128;
2405 avr_qw_add(r, *a, *b);
2409 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2411 #ifdef CONFIG_INT128
2412 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2415 if (c->u64[LO_IDX] & 1) {
2418 tmp.u64[HI_IDX] = 0;
2419 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2420 avr_qw_add(&tmp, *a, tmp);
2421 avr_qw_add(r, tmp, *b);
2423 avr_qw_add(r, *a, *b);
2428 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2430 #ifdef CONFIG_INT128
2431 r->u128 = (~a->u128 < b->u128);
2435 avr_qw_not(¬_a, *a);
2438 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2442 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2444 #ifdef CONFIG_INT128
2445 int carry_out = (~a->u128 < b->u128);
2446 if (!carry_out && (c->u128 & 1)) {
2447 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2448 ((a->u128 != 0) || (b->u128 != 0));
2450 r->u128 = carry_out;
2453 int carry_in = c->u64[LO_IDX] & 1;
2457 carry_out = avr_qw_addc(&tmp, *a, *b);
2459 if (!carry_out && carry_in) {
2460 ppc_avr_t one = QW_ONE;
2461 carry_out = avr_qw_addc(&tmp, tmp, one);
2464 r->u64[LO_IDX] = carry_out;
2468 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2470 #ifdef CONFIG_INT128
2471 r->u128 = a->u128 - b->u128;
2474 ppc_avr_t one = QW_ONE;
2476 avr_qw_not(&tmp, *b);
2477 avr_qw_add(&tmp, *a, tmp);
2478 avr_qw_add(r, tmp, one);
2482 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2484 #ifdef CONFIG_INT128
2485 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2489 avr_qw_not(&tmp, *b);
2490 avr_qw_add(&sum, *a, tmp);
2492 tmp.u64[HI_IDX] = 0;
2493 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2494 avr_qw_add(r, sum, tmp);
2498 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2500 #ifdef CONFIG_INT128
2501 r->u128 = (~a->u128 < ~b->u128) ||
2502 (a->u128 + ~b->u128 == (__uint128_t)-1);
2504 int carry = (avr_qw_cmpu(*a, *b) > 0);
2507 avr_qw_not(&tmp, *b);
2508 avr_qw_add(&tmp, *a, tmp);
2509 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2512 r->u64[LO_IDX] = carry;
2516 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2518 #ifdef CONFIG_INT128
2520 (~a->u128 < ~b->u128) ||
2521 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2523 int carry_in = c->u64[LO_IDX] & 1;
2524 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2525 if (!carry_out && carry_in) {
2527 avr_qw_not(&tmp, *b);
2528 avr_qw_add(&tmp, *a, tmp);
2529 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2533 r->u64[LO_IDX] = carry_out;
2537 #define BCD_PLUS_PREF_1 0xC
2538 #define BCD_PLUS_PREF_2 0xF
2539 #define BCD_PLUS_ALT_1 0xA
2540 #define BCD_NEG_PREF 0xD
2541 #define BCD_NEG_ALT 0xB
2542 #define BCD_PLUS_ALT_2 0xE
2543 #define NATIONAL_PLUS 0x2B
2544 #define NATIONAL_NEG 0x2D
2546 #if defined(HOST_WORDS_BIGENDIAN)
2547 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2549 #define BCD_DIG_BYTE(n) ((n) / 2)
2552 static int bcd_get_sgn(ppc_avr_t *bcd)
2554 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2555 case BCD_PLUS_PREF_1:
2556 case BCD_PLUS_PREF_2:
2557 case BCD_PLUS_ALT_1:
2558 case BCD_PLUS_ALT_2:
2576 static int bcd_preferred_sgn(int sgn, int ps)
2579 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2581 return BCD_NEG_PREF;
2585 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2589 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2591 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2594 if (unlikely(result > 9)) {
2600 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2603 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2604 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2606 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2607 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2611 static bool bcd_is_valid(ppc_avr_t *bcd)
2616 if (bcd_get_sgn(bcd) == 0) {
2620 for (i = 1; i < 32; i++) {
2621 bcd_get_digit(bcd, i, &invalid);
2622 if (unlikely(invalid)) {
2629 static int bcd_cmp_zero(ppc_avr_t *bcd)
2631 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2634 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2638 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2640 #if defined(HOST_WORDS_BIGENDIAN)
2641 return reg->u16[7 - n];
2647 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2649 #if defined(HOST_WORDS_BIGENDIAN)
2650 reg->u16[7 - n] = val;
2656 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2660 for (i = 31; i > 0; i--) {
2661 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2662 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2663 if (unlikely(invalid)) {
2664 return 0; /* doesn't matter */
2665 } else if (dig_a > dig_b) {
2667 } else if (dig_a < dig_b) {
2675 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2681 for (i = 1; i <= 31; i++) {
2682 uint8_t digit = bcd_get_digit(a, i, invalid) +
2683 bcd_get_digit(b, i, invalid) + carry;
2684 is_zero &= (digit == 0);
2692 bcd_put_digit(t, digit, i);
2694 if (unlikely(*invalid)) {
2703 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2709 for (i = 1; i <= 31; i++) {
2710 uint8_t digit = bcd_get_digit(a, i, invalid) -
2711 bcd_get_digit(b, i, invalid) + carry;
2712 is_zero &= (digit == 0);
2720 bcd_put_digit(t, digit, i);
2722 if (unlikely(*invalid)) {
2731 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2734 int sgna = bcd_get_sgn(a);
2735 int sgnb = bcd_get_sgn(b);
2736 int invalid = (sgna == 0) || (sgnb == 0);
2740 ppc_avr_t result = { .u64 = { 0, 0 } };
2744 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2745 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2746 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2747 } else if (bcd_cmp_mag(a, b) > 0) {
2748 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2749 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2750 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2752 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2753 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2754 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2758 if (unlikely(invalid)) {
2759 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2761 } else if (overflow) {
2772 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2774 ppc_avr_t bcopy = *b;
2775 int sgnb = bcd_get_sgn(b);
2777 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2778 } else if (sgnb > 0) {
2779 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2781 /* else invalid ... defer to bcdadd code for proper handling */
2783 return helper_bcdadd(r, a, &bcopy, ps);
2786 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2790 uint16_t national = 0;
2791 uint16_t sgnb = get_national_digit(b, 0);
2792 ppc_avr_t ret = { .u64 = { 0, 0 } };
2793 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2795 for (i = 1; i < 8; i++) {
2796 national = get_national_digit(b, i);
2797 if (unlikely(national < 0x30 || national > 0x39)) {
2802 bcd_put_digit(&ret, national & 0xf, i);
2805 if (sgnb == NATIONAL_PLUS) {
2806 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2808 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2811 cr = bcd_cmp_zero(&ret);
2813 if (unlikely(invalid)) {
2822 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2826 int sgnb = bcd_get_sgn(b);
2827 int invalid = (sgnb == 0);
2828 ppc_avr_t ret = { .u64 = { 0, 0 } };
2830 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2832 for (i = 1; i < 8; i++) {
2833 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2835 if (unlikely(invalid)) {
2839 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2841 cr = bcd_cmp_zero(b);
2847 if (unlikely(invalid)) {
2856 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2862 int zone_lead = ps ? 0xF : 0x3;
2864 ppc_avr_t ret = { .u64 = { 0, 0 } };
2865 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2867 if (unlikely((sgnb < 0xA) && ps)) {
2871 for (i = 0; i < 16; i++) {
2872 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2873 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2874 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2879 bcd_put_digit(&ret, digit, i + 1);
2882 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2883 (!ps && (sgnb & 0x4))) {
2884 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2886 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2889 cr = bcd_cmp_zero(&ret);
2891 if (unlikely(invalid)) {
2900 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2905 int sgnb = bcd_get_sgn(b);
2906 int zone_lead = (ps) ? 0xF0 : 0x30;
2907 int invalid = (sgnb == 0);
2908 ppc_avr_t ret = { .u64 = { 0, 0 } };
2910 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2912 for (i = 0; i < 16; i++) {
2913 digit = bcd_get_digit(b, i + 1, &invalid);
2915 if (unlikely(invalid)) {
2919 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2923 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2925 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2928 cr = bcd_cmp_zero(b);
2934 if (unlikely(invalid)) {
2943 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2949 ppc_avr_t ret = { .u64 = { 0, 0 } };
2951 if (b->s64[HI_IDX] < 0) {
2952 lo_value = -b->s64[LO_IDX];
2953 hi_value = ~b->u64[HI_IDX] + !lo_value;
2954 bcd_put_digit(&ret, 0xD, 0);
2956 lo_value = b->u64[LO_IDX];
2957 hi_value = b->u64[HI_IDX];
2958 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2961 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2962 lo_value > 9999999999999999ULL) {
2966 for (i = 1; i < 16; hi_value /= 10, i++) {
2967 bcd_put_digit(&ret, hi_value % 10, i);
2970 for (; i < 32; lo_value /= 10, i++) {
2971 bcd_put_digit(&ret, lo_value % 10, i);
2974 cr |= bcd_cmp_zero(&ret);
2981 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2988 uint64_t hi_value = 0;
2989 int sgnb = bcd_get_sgn(b);
2990 int invalid = (sgnb == 0);
2992 lo_value = bcd_get_digit(b, 31, &invalid);
2993 for (i = 30; i > 0; i--) {
2994 mulu64(&lo_value, &carry, lo_value, 10ULL);
2995 mulu64(&hi_value, &unused, hi_value, 10ULL);
2996 lo_value += bcd_get_digit(b, i, &invalid);
2999 if (unlikely(invalid)) {
3005 r->s64[LO_IDX] = -lo_value;
3006 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
3008 r->s64[LO_IDX] = lo_value;
3009 r->s64[HI_IDX] = hi_value;
3012 cr = bcd_cmp_zero(b);
3014 if (unlikely(invalid)) {
3021 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3026 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
3031 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
3033 for (i = 1; i < 32; i++) {
3034 bcd_get_digit(a, i, &invalid);
3035 bcd_get_digit(b, i, &invalid);
3036 if (unlikely(invalid)) {
3041 return bcd_cmp_zero(r);
3044 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
3046 int sgnb = bcd_get_sgn(b);
3049 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
3051 if (bcd_is_valid(b) == false) {
3055 return bcd_cmp_zero(r);
3058 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3061 #if defined(HOST_WORDS_BIGENDIAN)
3066 bool ox_flag = false;
3067 int sgnb = bcd_get_sgn(b);
3069 ret.u64[LO_IDX] &= ~0xf;
3071 if (bcd_is_valid(b) == false) {
3075 if (unlikely(i > 31)) {
3077 } else if (unlikely(i < -31)) {
3082 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3084 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3086 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3090 cr = bcd_cmp_zero(r);
3098 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3103 bool ox_flag = false;
3106 for (i = 0; i < 32; i++) {
3107 bcd_get_digit(b, i, &invalid);
3109 if (unlikely(invalid)) {
3114 #if defined(HOST_WORDS_BIGENDIAN)
3121 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3122 } else if (i <= -32) {
3123 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3125 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3127 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3131 cr = bcd_cmp_zero(r);
3139 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3144 bool ox_flag = false;
3145 int sgnb = bcd_get_sgn(b);
3147 ret.u64[LO_IDX] &= ~0xf;
3149 #if defined(HOST_WORDS_BIGENDIAN)
3151 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
3154 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3157 if (bcd_is_valid(b) == false) {
3161 if (unlikely(i > 31)) {
3163 } else if (unlikely(i < -31)) {
3168 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3170 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3172 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3173 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3176 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3178 cr = bcd_cmp_zero(&ret);
3187 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3190 uint32_t ox_flag = 0;
3191 #if defined(HOST_WORDS_BIGENDIAN)
3192 int i = a->s16[3] + 1;
3194 int i = a->s16[4] + 1;
3198 if (bcd_is_valid(b) == false) {
3202 if (i > 16 && i < 32) {
3203 mask = (uint64_t)-1 >> (128 - i * 4);
3204 if (ret.u64[HI_IDX] & ~mask) {
3208 ret.u64[HI_IDX] &= mask;
3209 } else if (i >= 0 && i <= 16) {
3210 mask = (uint64_t)-1 >> (64 - i * 4);
3211 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3215 ret.u64[LO_IDX] &= mask;
3216 ret.u64[HI_IDX] = 0;
3218 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3221 return bcd_cmp_zero(&ret) | ox_flag;
3224 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3228 uint32_t ox_flag = 0;
3232 for (i = 0; i < 32; i++) {
3233 bcd_get_digit(b, i, &invalid);
3235 if (unlikely(invalid)) {
3240 #if defined(HOST_WORDS_BIGENDIAN)
3245 if (i > 16 && i < 33) {
3246 mask = (uint64_t)-1 >> (128 - i * 4);
3247 if (ret.u64[HI_IDX] & ~mask) {
3251 ret.u64[HI_IDX] &= mask;
3252 } else if (i > 0 && i <= 16) {
3253 mask = (uint64_t)-1 >> (64 - i * 4);
3254 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3258 ret.u64[LO_IDX] &= mask;
3259 ret.u64[HI_IDX] = 0;
3260 } else if (i == 0) {
3261 if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) {
3264 ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0;
3268 if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) {
3269 return ox_flag | CRF_EQ;
3272 return ox_flag | CRF_GT;
3275 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3278 VECTOR_FOR_INORDER_I(i, u8) {
3279 r->u8[i] = AES_sbox[a->u8[i]];
3283 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3288 VECTOR_FOR_INORDER_I(i, u32) {
3289 result.AVRW(i) = b->AVRW(i) ^
3290 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
3291 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
3292 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
3293 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
3298 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3303 VECTOR_FOR_INORDER_I(i, u8) {
3304 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
3309 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3311 /* This differs from what is written in ISA V2.07. The RTL is */
3312 /* incorrect and will be fixed in V2.07B. */
3316 VECTOR_FOR_INORDER_I(i, u8) {
3317 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
3320 VECTOR_FOR_INORDER_I(i, u32) {
3322 AES_imc[tmp.AVRB(4*i + 0)][0] ^
3323 AES_imc[tmp.AVRB(4*i + 1)][1] ^
3324 AES_imc[tmp.AVRB(4*i + 2)][2] ^
3325 AES_imc[tmp.AVRB(4*i + 3)][3];
3329 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3334 VECTOR_FOR_INORDER_I(i, u8) {
3335 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
3340 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
3341 #if defined(HOST_WORDS_BIGENDIAN)
3342 #define EL_IDX(i) (i)
3344 #define EL_IDX(i) (3 - (i))
3347 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3349 int st = (st_six & 0x10) != 0;
3350 int six = st_six & 0xF;
3353 VECTOR_FOR_INORDER_I(i, u32) {
3355 if ((six & (0x8 >> i)) == 0) {
3356 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
3357 ROTRu32(a->u32[EL_IDX(i)], 18) ^
3358 (a->u32[EL_IDX(i)] >> 3);
3359 } else { /* six.bit[i] == 1 */
3360 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
3361 ROTRu32(a->u32[EL_IDX(i)], 19) ^
3362 (a->u32[EL_IDX(i)] >> 10);
3364 } else { /* st == 1 */
3365 if ((six & (0x8 >> i)) == 0) {
3366 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
3367 ROTRu32(a->u32[EL_IDX(i)], 13) ^
3368 ROTRu32(a->u32[EL_IDX(i)], 22);
3369 } else { /* six.bit[i] == 1 */
3370 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
3371 ROTRu32(a->u32[EL_IDX(i)], 11) ^
3372 ROTRu32(a->u32[EL_IDX(i)], 25);
3381 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
3382 #if defined(HOST_WORDS_BIGENDIAN)
3383 #define EL_IDX(i) (i)
3385 #define EL_IDX(i) (1 - (i))
3388 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3390 int st = (st_six & 0x10) != 0;
3391 int six = st_six & 0xF;
3394 VECTOR_FOR_INORDER_I(i, u64) {
3396 if ((six & (0x8 >> (2*i))) == 0) {
3397 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3398 ROTRu64(a->u64[EL_IDX(i)], 8) ^
3399 (a->u64[EL_IDX(i)] >> 7);
3400 } else { /* six.bit[2*i] == 1 */
3401 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3402 ROTRu64(a->u64[EL_IDX(i)], 61) ^
3403 (a->u64[EL_IDX(i)] >> 6);
3405 } else { /* st == 1 */
3406 if ((six & (0x8 >> (2*i))) == 0) {
3407 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3408 ROTRu64(a->u64[EL_IDX(i)], 34) ^
3409 ROTRu64(a->u64[EL_IDX(i)], 39);
3410 } else { /* six.bit[2*i] == 1 */
3411 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3412 ROTRu64(a->u64[EL_IDX(i)], 18) ^
3413 ROTRu64(a->u64[EL_IDX(i)], 41);
3422 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3427 VECTOR_FOR_INORDER_I(i, u8) {
3428 int indexA = c->u8[i] >> 4;
3429 int indexB = c->u8[i] & 0xF;
3430 #if defined(HOST_WORDS_BIGENDIAN)
3431 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3433 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3439 #undef VECTOR_FOR_INORDER_I
3443 /*****************************************************************************/
3444 /* SPE extension helpers */
3445 /* Use a table to make this quicker */
3446 static const uint8_t hbrev[16] = {
3447 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3448 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3451 static inline uint8_t byte_reverse(uint8_t val)
3453 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3456 static inline uint32_t word_reverse(uint32_t val)
3458 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3459 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3462 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3463 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3465 uint32_t a, b, d, mask;
3467 mask = UINT32_MAX >> (32 - MASKBITS);
3470 d = word_reverse(1 + word_reverse(a | ~b));
3471 return (arg1 & ~mask) | (d & b);
3474 uint32_t helper_cntlsw32(uint32_t val)
3476 if (val & 0x80000000) {
3483 uint32_t helper_cntlzw32(uint32_t val)
3489 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3490 target_ulong low, uint32_t update_Rc)
3496 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3497 if ((high & mask) == 0) {
3505 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3506 if ((low & mask) == 0) {
3519 env->xer = (env->xer & ~0x7F) | i;
3521 env->crf[0] |= xer_so;