2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
23 #include "qemu/host-utils.h"
24 #include "qemu/main-loop.h"
25 #include "exec/helper-proto.h"
26 #include "crypto/aes.h"
27 #include "fpu/softfloat.h"
28 #include "qapi/error.h"
29 #include "qemu/guest-random.h"
31 #include "helper_regs.h"
32 /*****************************************************************************/
33 /* Fixed point operations helpers */
35 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
38 env->so = env->ov = 1;
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
53 if (unlikely(divisor == 0)) {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
65 helper_update_ov_legacy(env, overflow);
68 return (target_ulong)rt;
71 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
77 int64_t dividend = (int64_t)ra << 32;
78 int64_t divisor = (int64_t)((int32_t)rb);
80 if (unlikely((divisor == 0) ||
81 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
84 rt = dividend / divisor;
85 overflow = rt != (int32_t)rt;
88 if (unlikely(overflow)) {
89 rt = 0; /* Undefined */
93 helper_update_ov_legacy(env, overflow);
96 return (target_ulong)rt;
99 #if defined(TARGET_PPC64)
101 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
106 overflow = divu128(&rt, &ra, rb);
108 if (unlikely(overflow)) {
109 rt = 0; /* Undefined */
113 helper_update_ov_legacy(env, overflow);
119 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
122 int64_t ra = (int64_t)rau;
123 int64_t rb = (int64_t)rbu;
124 int overflow = divs128(&rt, &ra, rb);
126 if (unlikely(overflow)) {
127 rt = 0; /* Undefined */
131 helper_update_ov_legacy(env, overflow);
140 #if defined(TARGET_PPC64)
141 /* if x = 0xab, returns 0xababababababababa */
142 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
145 * subtract 1 from each byte, and with inverse, check if MSB is set at each
147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
148 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
150 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
152 /* When you XOR the pattern and there is a match, that byte will be zero */
153 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
155 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
157 return hasvalue(rb, ra) ? CRF_GT : 0;
165 * Return a random number.
167 uint64_t helper_darn32(void)
172 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
173 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
174 error_get_pretty(err));
182 uint64_t helper_darn64(void)
187 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
188 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
189 error_get_pretty(err));
197 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
202 for (i = 0; i < 8; i++) {
203 int index = (rs >> (i * 8)) & 0xFF;
205 if (rb & PPC_BIT(index)) {
215 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
217 target_ulong mask = 0xff;
221 for (i = 0; i < sizeof(target_ulong); i++) {
222 if ((rs & mask) == (rb & mask)) {
230 /* shift right arithmetic helper */
231 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
236 if (likely(!(shift & 0x20))) {
237 if (likely((uint32_t)shift != 0)) {
239 ret = (int32_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
241 env->ca32 = env->ca = 0;
243 env->ca32 = env->ca = 1;
246 ret = (int32_t)value;
247 env->ca32 = env->ca = 0;
250 ret = (int32_t)value >> 31;
251 env->ca32 = env->ca = (ret != 0);
253 return (target_long)ret;
256 #if defined(TARGET_PPC64)
257 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
262 if (likely(!(shift & 0x40))) {
263 if (likely((uint64_t)shift != 0)) {
265 ret = (int64_t)value >> shift;
266 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
267 env->ca32 = env->ca = 0;
269 env->ca32 = env->ca = 1;
272 ret = (int64_t)value;
273 env->ca32 = env->ca = 0;
276 ret = (int64_t)value >> 63;
277 env->ca32 = env->ca = (ret != 0);
283 #if defined(TARGET_PPC64)
284 target_ulong helper_popcntb(target_ulong val)
286 /* Note that we don't fold past bytes */
287 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
288 0x5555555555555555ULL);
289 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
290 0x3333333333333333ULL);
291 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
292 0x0f0f0f0f0f0f0f0fULL);
296 target_ulong helper_popcntw(target_ulong val)
298 /* Note that we don't fold past words. */
299 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
300 0x5555555555555555ULL);
301 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
302 0x3333333333333333ULL);
303 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
304 0x0f0f0f0f0f0f0f0fULL);
305 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
306 0x00ff00ff00ff00ffULL);
307 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
308 0x0000ffff0000ffffULL);
312 target_ulong helper_popcntb(target_ulong val)
314 /* Note that we don't fold past bytes */
315 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
316 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
317 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
322 /*****************************************************************************/
323 /* PowerPC 601 specific instructions (POWER bridge) */
324 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
326 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
328 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
329 (int32_t)arg2 == 0) {
330 env->spr[SPR_MQ] = 0;
333 env->spr[SPR_MQ] = tmp % arg2;
334 return tmp / (int32_t)arg2;
338 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
341 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
343 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
344 (int32_t)arg2 == 0) {
345 env->so = env->ov = 1;
346 env->spr[SPR_MQ] = 0;
349 env->spr[SPR_MQ] = tmp % arg2;
350 tmp /= (int32_t)arg2;
351 if ((int32_t)tmp != tmp) {
352 env->so = env->ov = 1;
360 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
363 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
364 (int32_t)arg2 == 0) {
365 env->spr[SPR_MQ] = 0;
368 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
369 return (int32_t)arg1 / (int32_t)arg2;
373 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
376 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
377 (int32_t)arg2 == 0) {
378 env->so = env->ov = 1;
379 env->spr[SPR_MQ] = 0;
383 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
384 return (int32_t)arg1 / (int32_t)arg2;
388 /*****************************************************************************/
389 /* 602 specific instructions */
390 /* mfrom is the most crazy instruction ever seen, imho ! */
391 /* Real implementation uses a ROM table. Do the same */
393 * Extremely decomposed:
395 * return 256 * log10(10 + 1.0) + 0.5
397 #if !defined(CONFIG_USER_ONLY)
398 target_ulong helper_602_mfrom(target_ulong arg)
400 if (likely(arg < 602)) {
401 #include "mfrom_table.inc.c"
402 return mfrom_ROM_table[arg];
409 /*****************************************************************************/
410 /* Altivec extension helpers */
411 #if defined(HOST_WORDS_BIGENDIAN)
412 #define VECTOR_FOR_INORDER_I(index, element) \
413 for (index = 0; index < ARRAY_SIZE(r->element); index++)
415 #define VECTOR_FOR_INORDER_I(index, element) \
416 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
419 /* Saturating arithmetic helpers. */
420 #define SATCVT(from, to, from_type, to_type, min, max) \
421 static inline to_type cvt##from##to(from_type x, int *sat) \
425 if (x < (from_type)min) { \
428 } else if (x > (from_type)max) { \
436 #define SATCVTU(from, to, from_type, to_type, min, max) \
437 static inline to_type cvt##from##to(from_type x, int *sat) \
441 if (x > (from_type)max) { \
449 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
450 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
451 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
453 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
454 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
455 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
456 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
457 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
458 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
462 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
464 env->vscr = vscr & ~(1u << VSCR_SAT);
465 /* Which bit we set is completely arbitrary, but clear the rest. */
466 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
467 env->vscr_sat.u64[1] = 0;
468 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
471 uint32_t helper_mfvscr(CPUPPCState *env)
473 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
474 return env->vscr | (sat << VSCR_SAT);
477 static inline void set_vscr_sat(CPUPPCState *env)
479 /* The choice of non-zero value is arbitrary. */
480 env->vscr_sat.u32[0] = 1;
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
493 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
496 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
497 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
504 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
507 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
508 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
516 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
518 uint64_t res = b->u64[0] ^ b->u64[1];
522 r->VsrD(1) = res & 1;
526 #define VARITH_DO(name, op, element) \
527 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
531 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
532 r->element[i] = a->element[i] op b->element[i]; \
535 VARITH_DO(muluwm, *, u32)
539 #define VARITHFP(suffix, func) \
540 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
545 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
546 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
549 VARITHFP(addfp, float32_add)
550 VARITHFP(subfp, float32_sub)
551 VARITHFP(minfp, float32_min)
552 VARITHFP(maxfp, float32_max)
555 #define VARITHFPFMA(suffix, type) \
556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
557 ppc_avr_t *b, ppc_avr_t *c) \
560 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
561 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
562 type, &env->vec_status); \
565 VARITHFPFMA(maddfp, 0);
566 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
569 #define VARITHSAT_CASE(type, op, cvt, element) \
571 type result = (type)a->element[i] op (type)b->element[i]; \
572 r->element[i] = cvt(result, &sat); \
575 #define VARITHSAT_DO(name, op, optype, cvt, element) \
576 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
577 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
582 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
583 VARITHSAT_CASE(optype, op, cvt, element); \
586 vscr_sat->u32[0] = 1; \
589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
601 #undef VARITHSAT_CASE
603 #undef VARITHSAT_SIGNED
604 #undef VARITHSAT_UNSIGNED
606 #define VAVG_DO(name, element, etype) \
607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
613 r->element[i] = x >> 1; \
617 #define VAVG(type, signed_element, signed_type, unsigned_element, \
619 VAVG_DO(avgs##type, signed_element, signed_type) \
620 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
621 VAVG(b, s8, int16_t, u8, uint16_t)
622 VAVG(h, s16, int32_t, u16, uint32_t)
623 VAVG(w, s32, int64_t, u32, uint64_t)
627 #define VABSDU_DO(name, element) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 r->element[i] = (a->element[i] > b->element[i]) ? \
634 (a->element[i] - b->element[i]) : \
635 (b->element[i] - a->element[i]); \
640 * VABSDU - Vector absolute difference unsigned
641 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
642 * element - element type to access from vector
644 #define VABSDU(type, element) \
645 VABSDU_DO(absdu##type, element)
652 #define VCF(suffix, cvt, element) \
653 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
654 ppc_avr_t *b, uint32_t uim) \
658 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
659 float32 t = cvt(b->element[i], &env->vec_status); \
660 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
663 VCF(ux, uint32_to_float32, u32)
664 VCF(sx, int32_to_float32, s32)
667 #define VCMP_DO(suffix, compare, element, record) \
668 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
669 ppc_avr_t *a, ppc_avr_t *b) \
671 uint64_t ones = (uint64_t)-1; \
672 uint64_t all = ones; \
676 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
677 uint64_t result = (a->element[i] compare b->element[i] ? \
679 switch (sizeof(a->element[0])) { \
681 r->u64[i] = result; \
684 r->u32[i] = result; \
687 r->u16[i] = result; \
697 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
700 #define VCMP(suffix, compare, element) \
701 VCMP_DO(suffix, compare, element, 0) \
702 VCMP_DO(suffix##_dot, compare, element, 1)
718 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
719 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
720 ppc_avr_t *a, ppc_avr_t *b) \
722 etype ones = (etype)-1; \
724 etype result, none = 0; \
727 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
729 result = ((a->element[i] == 0) \
730 || (b->element[i] == 0) \
731 || (a->element[i] != b->element[i]) ? \
734 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
736 r->element[i] = result; \
741 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
746 * VCMPNEZ - Vector compare not equal to zero
747 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
748 * element - element type to access from vector
750 #define VCMPNE(suffix, element, etype, cmpzero) \
751 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
752 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
753 VCMPNE(zb, u8, uint8_t, 1)
754 VCMPNE(zh, u16, uint16_t, 1)
755 VCMPNE(zw, u32, uint32_t, 1)
756 VCMPNE(b, u8, uint8_t, 0)
757 VCMPNE(h, u16, uint16_t, 0)
758 VCMPNE(w, u32, uint32_t, 0)
762 #define VCMPFP_DO(suffix, compare, order, record) \
763 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
764 ppc_avr_t *a, ppc_avr_t *b) \
766 uint32_t ones = (uint32_t)-1; \
767 uint32_t all = ones; \
771 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
773 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
775 if (rel == float_relation_unordered) { \
777 } else if (rel compare order) { \
782 r->u32[i] = result; \
787 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
790 #define VCMPFP(suffix, compare, order) \
791 VCMPFP_DO(suffix, compare, order, 0) \
792 VCMPFP_DO(suffix##_dot, compare, order, 1)
793 VCMPFP(eqfp, ==, float_relation_equal)
794 VCMPFP(gefp, !=, float_relation_less)
795 VCMPFP(gtfp, ==, float_relation_greater)
799 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
800 ppc_avr_t *a, ppc_avr_t *b, int record)
805 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
806 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
808 if (le_rel == float_relation_unordered) {
809 r->u32[i] = 0xc0000000;
812 float32 bneg = float32_chs(b->f32[i]);
813 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
815 int le = le_rel != float_relation_greater;
816 int ge = ge_rel != float_relation_less;
818 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
819 all_in |= (!le | !ge);
823 env->crf[6] = (all_in == 0) << 1;
827 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
829 vcmpbfp_internal(env, r, a, b, 0);
832 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
835 vcmpbfp_internal(env, r, a, b, 1);
838 #define VCT(suffix, satcvt, element) \
839 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
840 ppc_avr_t *b, uint32_t uim) \
844 float_status s = env->vec_status; \
846 set_float_rounding_mode(float_round_to_zero, &s); \
847 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
848 if (float32_is_any_nan(b->f32[i])) { \
851 float64 t = float32_to_float64(b->f32[i], &s); \
854 t = float64_scalbn(t, uim, &s); \
855 j = float64_to_int64(t, &s); \
856 r->element[i] = satcvt(j, &sat); \
863 VCT(uxs, cvtsduw, u32)
864 VCT(sxs, cvtsdsw, s32)
867 target_ulong helper_vclzlsbb(ppc_avr_t *r)
869 target_ulong count = 0;
871 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
872 if (r->VsrB(i) & 0x01) {
880 target_ulong helper_vctzlsbb(ppc_avr_t *r)
882 target_ulong count = 0;
884 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
885 if (r->VsrB(i) & 0x01) {
893 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
894 ppc_avr_t *b, ppc_avr_t *c)
899 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
900 int32_t prod = a->s16[i] * b->s16[i];
901 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
903 r->s16[i] = cvtswsh(t, &sat);
911 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
912 ppc_avr_t *b, ppc_avr_t *c)
917 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
918 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
919 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
920 r->s16[i] = cvtswsh(t, &sat);
928 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
932 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
933 int32_t prod = a->s16[i] * b->s16[i];
934 r->s16[i] = (int16_t) (prod + c->s16[i]);
938 #define VMRG_DO(name, element, access, ofs) \
939 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
942 int i, half = ARRAY_SIZE(r->element) / 2; \
944 for (i = 0; i < half; i++) { \
945 result.access(i * 2 + 0) = a->access(i + ofs); \
946 result.access(i * 2 + 1) = b->access(i + ofs); \
951 #define VMRG(suffix, element, access) \
952 VMRG_DO(mrgl##suffix, element, access, half) \
953 VMRG_DO(mrgh##suffix, element, access, 0)
960 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
961 ppc_avr_t *b, ppc_avr_t *c)
966 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
967 prod[i] = (int32_t)a->s8[i] * b->u8[i];
970 VECTOR_FOR_INORDER_I(i, s32) {
971 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
972 prod[4 * i + 2] + prod[4 * i + 3];
976 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
977 ppc_avr_t *b, ppc_avr_t *c)
982 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
983 prod[i] = a->s16[i] * b->s16[i];
986 VECTOR_FOR_INORDER_I(i, s32) {
987 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
991 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
992 ppc_avr_t *b, ppc_avr_t *c)
998 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
999 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1002 VECTOR_FOR_INORDER_I(i, s32) {
1003 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1005 r->u32[i] = cvtsdsw(t, &sat);
1013 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1014 ppc_avr_t *b, ppc_avr_t *c)
1019 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1020 prod[i] = a->u8[i] * b->u8[i];
1023 VECTOR_FOR_INORDER_I(i, u32) {
1024 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1025 prod[4 * i + 2] + prod[4 * i + 3];
1029 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1030 ppc_avr_t *b, ppc_avr_t *c)
1035 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1036 prod[i] = a->u16[i] * b->u16[i];
1039 VECTOR_FOR_INORDER_I(i, u32) {
1040 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1044 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1045 ppc_avr_t *b, ppc_avr_t *c)
1051 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1052 prod[i] = a->u16[i] * b->u16[i];
1055 VECTOR_FOR_INORDER_I(i, s32) {
1056 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1058 r->u32[i] = cvtuduw(t, &sat);
1066 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1067 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1071 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1072 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1073 (cast)b->mul_access(i); \
1077 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1078 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1082 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1083 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1084 (cast)b->mul_access(i + 1); \
1088 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1089 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1090 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1091 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1092 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1093 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1094 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1095 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1096 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1101 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1107 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1108 int s = c->VsrB(i) & 0x1f;
1109 int index = s & 0xf;
1112 result.VsrB(i) = b->VsrB(index);
1114 result.VsrB(i) = a->VsrB(index);
1120 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1126 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1127 int s = c->VsrB(i) & 0x1f;
1128 int index = 15 - (s & 0xf);
1131 result.VsrB(i) = a->VsrB(index);
1133 result.VsrB(i) = b->VsrB(index);
1139 #if defined(HOST_WORDS_BIGENDIAN)
1140 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1141 #define VBPERMD_INDEX(i) (i)
1142 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1143 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1145 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1146 #define VBPERMD_INDEX(i) (1 - i)
1147 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1148 #define EXTRACT_BIT(avr, i, index) \
1149 (extract64((avr)->u64[1 - i], 63 - index, 1))
1152 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1155 ppc_avr_t result = { .u64 = { 0, 0 } };
1156 VECTOR_FOR_INORDER_I(i, u64) {
1157 for (j = 0; j < 8; j++) {
1158 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1159 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1160 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1167 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1172 VECTOR_FOR_INORDER_I(i, u8) {
1173 int index = VBPERMQ_INDEX(b, i);
1176 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1177 if (a->u64[VBPERMQ_DW(index)] & mask) {
1178 perm |= (0x8000 >> i);
1187 #undef VBPERMQ_INDEX
1190 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1191 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1194 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1196 VECTOR_FOR_INORDER_I(i, srcfld) { \
1198 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1199 if (a->srcfld[i] & (1ull << j)) { \
1200 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1205 VECTOR_FOR_INORDER_I(i, trgfld) { \
1206 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1210 PMSUM(vpmsumb, u8, u16, uint16_t)
1211 PMSUM(vpmsumh, u16, u32, uint32_t)
1212 PMSUM(vpmsumw, u32, u64, uint64_t)
1214 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1217 #ifdef CONFIG_INT128
1219 __uint128_t prod[2];
1221 VECTOR_FOR_INORDER_I(i, u64) {
1223 for (j = 0; j < 64; j++) {
1224 if (a->u64[i] & (1ull << j)) {
1225 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1230 r->u128 = prod[0] ^ prod[1];
1236 VECTOR_FOR_INORDER_I(i, u64) {
1237 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1238 for (j = 0; j < 64; j++) {
1239 if (a->u64[i] & (1ull << j)) {
1243 bshift.VsrD(1) = b->u64[i];
1245 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1246 bshift.VsrD(1) = b->u64[i] << j;
1248 prod[i].VsrD(1) ^= bshift.VsrD(1);
1249 prod[i].VsrD(0) ^= bshift.VsrD(0);
1254 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1255 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1260 #if defined(HOST_WORDS_BIGENDIAN)
1265 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1269 #if defined(HOST_WORDS_BIGENDIAN)
1270 const ppc_avr_t *x[2] = { a, b };
1272 const ppc_avr_t *x[2] = { b, a };
1275 VECTOR_FOR_INORDER_I(i, u64) {
1276 VECTOR_FOR_INORDER_I(j, u32) {
1277 uint32_t e = x[i]->u32[j];
1279 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1280 ((e >> 6) & 0x3e0) |
1287 #define VPK(suffix, from, to, cvt, dosat) \
1288 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1289 ppc_avr_t *a, ppc_avr_t *b) \
1294 ppc_avr_t *a0 = PKBIG ? a : b; \
1295 ppc_avr_t *a1 = PKBIG ? b : a; \
1297 VECTOR_FOR_INORDER_I(i, from) { \
1298 result.to[i] = cvt(a0->from[i], &sat); \
1299 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1302 if (dosat && sat) { \
1303 set_vscr_sat(env); \
1307 VPK(shss, s16, s8, cvtshsb, 1)
1308 VPK(shus, s16, u8, cvtshub, 1)
1309 VPK(swss, s32, s16, cvtswsh, 1)
1310 VPK(swus, s32, u16, cvtswuh, 1)
1311 VPK(sdss, s64, s32, cvtsdsw, 1)
1312 VPK(sdus, s64, u32, cvtsduw, 1)
1313 VPK(uhus, u16, u8, cvtuhub, 1)
1314 VPK(uwus, u32, u16, cvtuwuh, 1)
1315 VPK(udus, u64, u32, cvtuduw, 1)
1316 VPK(uhum, u16, u8, I, 0)
1317 VPK(uwum, u32, u16, I, 0)
1318 VPK(udum, u64, u32, I, 0)
1323 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1327 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1328 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1332 #define VRFI(suffix, rounding) \
1333 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1337 float_status s = env->vec_status; \
1339 set_float_rounding_mode(rounding, &s); \
1340 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1341 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1344 VRFI(n, float_round_nearest_even)
1345 VRFI(m, float_round_down)
1346 VRFI(p, float_round_up)
1347 VRFI(z, float_round_to_zero)
1350 #define VROTATE(suffix, element, mask) \
1351 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1355 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1356 unsigned int shift = b->element[i] & mask; \
1357 r->element[i] = (a->element[i] << shift) | \
1358 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1362 VROTATE(h, u16, 0xF)
1363 VROTATE(w, u32, 0x1F)
1364 VROTATE(d, u64, 0x3F)
1367 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1371 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1372 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1374 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1378 #define VRLMI(name, size, element, insert) \
1379 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1382 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1383 uint##size##_t src1 = a->element[i]; \
1384 uint##size##_t src2 = b->element[i]; \
1385 uint##size##_t src3 = r->element[i]; \
1386 uint##size##_t begin, end, shift, mask, rot_val; \
1388 shift = extract##size(src2, 0, 6); \
1389 end = extract##size(src2, 8, 6); \
1390 begin = extract##size(src2, 16, 6); \
1391 rot_val = rol##size(src1, shift); \
1392 mask = mask_u##size(begin, end); \
1394 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1396 r->element[i] = (rot_val & mask); \
1401 VRLMI(vrldmi, 64, u64, 1);
1402 VRLMI(vrlwmi, 32, u32, 1);
1403 VRLMI(vrldnm, 64, u64, 0);
1404 VRLMI(vrlwnm, 32, u32, 0);
1406 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1409 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1410 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1413 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1417 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1418 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1422 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1426 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1427 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1431 #if defined(HOST_WORDS_BIGENDIAN)
1432 #define VEXTU_X_DO(name, size, left) \
1433 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1437 index = (a & 0xf) * 8; \
1439 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1441 return int128_getlo(int128_rshift(b->s128, index)) & \
1442 MAKE_64BIT_MASK(0, size); \
1445 #define VEXTU_X_DO(name, size, left) \
1446 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1450 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1452 index = (a & 0xf) * 8; \
1454 return int128_getlo(int128_rshift(b->s128, index)) & \
1455 MAKE_64BIT_MASK(0, size); \
1459 VEXTU_X_DO(vextublx, 8, 1)
1460 VEXTU_X_DO(vextuhlx, 16, 1)
1461 VEXTU_X_DO(vextuwlx, 32, 1)
1462 VEXTU_X_DO(vextubrx, 8, 0)
1463 VEXTU_X_DO(vextuhrx, 16, 0)
1464 VEXTU_X_DO(vextuwrx, 32, 0)
1467 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1470 unsigned int shift, bytes, size;
1472 size = ARRAY_SIZE(r->u8);
1473 for (i = 0; i < size; i++) {
1474 shift = b->VsrB(i) & 0x7; /* extract shift value */
1475 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1476 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1477 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1481 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1484 unsigned int shift, bytes;
1487 * Use reverse order, as destination and source register can be
1488 * same. Its being modified in place saving temporary, reverse
1489 * order will guarantee that computed result is not fed back.
1491 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1492 shift = b->VsrB(i) & 0x7; /* extract shift value */
1493 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1494 /* extract adjacent bytes */
1495 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1499 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1501 int sh = shift & 0xf;
1505 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1508 result.VsrB(i) = b->VsrB(index - 0x10);
1510 result.VsrB(i) = a->VsrB(index);
1516 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1518 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1520 #if defined(HOST_WORDS_BIGENDIAN)
1521 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1522 memset(&r->u8[16 - sh], 0, sh);
1524 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1525 memset(&r->u8[0], 0, sh);
1529 #if defined(HOST_WORDS_BIGENDIAN)
1530 #define VINSERT(suffix, element) \
1531 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1533 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1534 sizeof(r->element[0])); \
1537 #define VINSERT(suffix, element) \
1538 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1540 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1541 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1549 #if defined(HOST_WORDS_BIGENDIAN)
1550 #define VEXTRACT(suffix, element) \
1551 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1553 uint32_t es = sizeof(r->element[0]); \
1554 memmove(&r->u8[8 - es], &b->u8[index], es); \
1555 memset(&r->u8[8], 0, 8); \
1556 memset(&r->u8[0], 0, 8 - es); \
1559 #define VEXTRACT(suffix, element) \
1560 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1562 uint32_t es = sizeof(r->element[0]); \
1563 uint32_t s = (16 - index) - es; \
1564 memmove(&r->u8[8], &b->u8[s], es); \
1565 memset(&r->u8[0], 0, 8); \
1566 memset(&r->u8[8 + es], 0, 8 - es); \
1575 void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1576 ppc_vsr_t *xb, uint32_t index)
1579 size_t es = sizeof(uint32_t);
1584 for (i = 0; i < es; i++, ext_index++) {
1585 t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1591 void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1592 ppc_vsr_t *xb, uint32_t index)
1595 size_t es = sizeof(uint32_t);
1596 int ins_index, i = 0;
1599 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1600 t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1606 #define VEXT_SIGNED(name, element, cast) \
1607 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1610 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1611 r->element[i] = (cast)b->element[i]; \
1614 VEXT_SIGNED(vextsb2w, s32, int8_t)
1615 VEXT_SIGNED(vextsb2d, s64, int8_t)
1616 VEXT_SIGNED(vextsh2w, s32, int16_t)
1617 VEXT_SIGNED(vextsh2d, s64, int16_t)
1618 VEXT_SIGNED(vextsw2d, s64, int32_t)
1621 #define VNEG(name, element) \
1622 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1625 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1626 r->element[i] = -b->element[i]; \
1633 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1635 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1637 #if defined(HOST_WORDS_BIGENDIAN)
1638 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1639 memset(&r->u8[0], 0, sh);
1641 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1642 memset(&r->u8[16 - sh], 0, sh);
1646 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1650 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1651 r->u32[i] = a->u32[i] >= b->u32[i];
1655 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1662 upper = ARRAY_SIZE(r->s32) - 1;
1663 t = (int64_t)b->VsrSW(upper);
1664 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1666 result.VsrSW(i) = 0;
1668 result.VsrSW(upper) = cvtsdsw(t, &sat);
1676 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1683 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1684 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1687 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1688 t += a->VsrSW(2 * i + j);
1690 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1699 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1704 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1705 int64_t t = (int64_t)b->s32[i];
1707 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1708 t += a->s8[4 * i + j];
1710 r->s32[i] = cvtsdsw(t, &sat);
1718 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1723 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1724 int64_t t = (int64_t)b->s32[i];
1726 t += a->s16[2 * i] + a->s16[2 * i + 1];
1727 r->s32[i] = cvtsdsw(t, &sat);
1735 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1740 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1741 uint64_t t = (uint64_t)b->u32[i];
1743 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1744 t += a->u8[4 * i + j];
1746 r->u32[i] = cvtuduw(t, &sat);
1754 #if defined(HOST_WORDS_BIGENDIAN)
1761 #define VUPKPX(suffix, hi) \
1762 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1767 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1768 uint16_t e = b->u16[hi ? i : i + 4]; \
1769 uint8_t a = (e >> 15) ? 0xff : 0; \
1770 uint8_t r = (e >> 10) & 0x1f; \
1771 uint8_t g = (e >> 5) & 0x1f; \
1772 uint8_t b = e & 0x1f; \
1774 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1782 #define VUPK(suffix, unpacked, packee, hi) \
1783 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1789 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1790 result.unpacked[i] = b->packee[i]; \
1793 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1795 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1800 VUPK(hsb, s16, s8, UPKHI)
1801 VUPK(hsh, s32, s16, UPKHI)
1802 VUPK(hsw, s64, s32, UPKHI)
1803 VUPK(lsb, s16, s8, UPKLO)
1804 VUPK(lsh, s32, s16, UPKLO)
1805 VUPK(lsw, s64, s32, UPKLO)
1810 #define VGENERIC_DO(name, element) \
1811 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1815 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1816 r->element[i] = name(b->element[i]); \
1820 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1821 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1823 VGENERIC_DO(clzb, u8)
1824 VGENERIC_DO(clzh, u16)
1829 #define ctzb(v) ((v) ? ctz32(v) : 8)
1830 #define ctzh(v) ((v) ? ctz32(v) : 16)
1831 #define ctzw(v) ctz32((v))
1832 #define ctzd(v) ctz64((v))
1834 VGENERIC_DO(ctzb, u8)
1835 VGENERIC_DO(ctzh, u16)
1836 VGENERIC_DO(ctzw, u32)
1837 VGENERIC_DO(ctzd, u64)
1844 #define popcntb(v) ctpop8(v)
1845 #define popcnth(v) ctpop16(v)
1846 #define popcntw(v) ctpop32(v)
1847 #define popcntd(v) ctpop64(v)
1849 VGENERIC_DO(popcntb, u8)
1850 VGENERIC_DO(popcnth, u16)
1851 VGENERIC_DO(popcntw, u32)
1852 VGENERIC_DO(popcntd, u64)
1861 #if defined(HOST_WORDS_BIGENDIAN)
1862 #define QW_ONE { .u64 = { 0, 1 } }
1864 #define QW_ONE { .u64 = { 1, 0 } }
1867 #ifndef CONFIG_INT128
1869 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1871 t->u64[0] = ~a.u64[0];
1872 t->u64[1] = ~a.u64[1];
1875 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1877 if (a.VsrD(0) < b.VsrD(0)) {
1879 } else if (a.VsrD(0) > b.VsrD(0)) {
1881 } else if (a.VsrD(1) < b.VsrD(1)) {
1883 } else if (a.VsrD(1) > b.VsrD(1)) {
1890 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1892 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1893 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1894 (~a.VsrD(1) < b.VsrD(1));
1897 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1900 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1901 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1902 (~a.VsrD(1) < b.VsrD(1));
1903 avr_qw_not(¬_a, a);
1904 return avr_qw_cmpu(not_a, b) < 0;
1909 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1911 #ifdef CONFIG_INT128
1912 r->u128 = a->u128 + b->u128;
1914 avr_qw_add(r, *a, *b);
1918 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1920 #ifdef CONFIG_INT128
1921 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1924 if (c->VsrD(1) & 1) {
1928 tmp.VsrD(1) = c->VsrD(1) & 1;
1929 avr_qw_add(&tmp, *a, tmp);
1930 avr_qw_add(r, tmp, *b);
1932 avr_qw_add(r, *a, *b);
1937 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1939 #ifdef CONFIG_INT128
1940 r->u128 = (~a->u128 < b->u128);
1944 avr_qw_not(¬_a, *a);
1947 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1951 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1953 #ifdef CONFIG_INT128
1954 int carry_out = (~a->u128 < b->u128);
1955 if (!carry_out && (c->u128 & 1)) {
1956 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1957 ((a->u128 != 0) || (b->u128 != 0));
1959 r->u128 = carry_out;
1962 int carry_in = c->VsrD(1) & 1;
1966 carry_out = avr_qw_addc(&tmp, *a, *b);
1968 if (!carry_out && carry_in) {
1969 ppc_avr_t one = QW_ONE;
1970 carry_out = avr_qw_addc(&tmp, tmp, one);
1973 r->VsrD(1) = carry_out;
1977 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1979 #ifdef CONFIG_INT128
1980 r->u128 = a->u128 - b->u128;
1983 ppc_avr_t one = QW_ONE;
1985 avr_qw_not(&tmp, *b);
1986 avr_qw_add(&tmp, *a, tmp);
1987 avr_qw_add(r, tmp, one);
1991 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1993 #ifdef CONFIG_INT128
1994 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1998 avr_qw_not(&tmp, *b);
1999 avr_qw_add(&sum, *a, tmp);
2002 tmp.VsrD(1) = c->VsrD(1) & 1;
2003 avr_qw_add(r, sum, tmp);
2007 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2009 #ifdef CONFIG_INT128
2010 r->u128 = (~a->u128 < ~b->u128) ||
2011 (a->u128 + ~b->u128 == (__uint128_t)-1);
2013 int carry = (avr_qw_cmpu(*a, *b) > 0);
2016 avr_qw_not(&tmp, *b);
2017 avr_qw_add(&tmp, *a, tmp);
2018 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2025 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2027 #ifdef CONFIG_INT128
2029 (~a->u128 < ~b->u128) ||
2030 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2032 int carry_in = c->VsrD(1) & 1;
2033 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2034 if (!carry_out && carry_in) {
2036 avr_qw_not(&tmp, *b);
2037 avr_qw_add(&tmp, *a, tmp);
2038 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2042 r->VsrD(1) = carry_out;
2046 #define BCD_PLUS_PREF_1 0xC
2047 #define BCD_PLUS_PREF_2 0xF
2048 #define BCD_PLUS_ALT_1 0xA
2049 #define BCD_NEG_PREF 0xD
2050 #define BCD_NEG_ALT 0xB
2051 #define BCD_PLUS_ALT_2 0xE
2052 #define NATIONAL_PLUS 0x2B
2053 #define NATIONAL_NEG 0x2D
2055 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2057 static int bcd_get_sgn(ppc_avr_t *bcd)
2059 switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2060 case BCD_PLUS_PREF_1:
2061 case BCD_PLUS_PREF_2:
2062 case BCD_PLUS_ALT_1:
2063 case BCD_PLUS_ALT_2:
2081 static int bcd_preferred_sgn(int sgn, int ps)
2084 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2086 return BCD_NEG_PREF;
2090 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2094 result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2096 result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2099 if (unlikely(result > 9)) {
2105 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2108 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2109 bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2111 bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2112 bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2116 static bool bcd_is_valid(ppc_avr_t *bcd)
2121 if (bcd_get_sgn(bcd) == 0) {
2125 for (i = 1; i < 32; i++) {
2126 bcd_get_digit(bcd, i, &invalid);
2127 if (unlikely(invalid)) {
2134 static int bcd_cmp_zero(ppc_avr_t *bcd)
2136 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2139 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2143 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2145 return reg->VsrH(7 - n);
2148 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2150 reg->VsrH(7 - n) = val;
2153 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2157 for (i = 31; i > 0; i--) {
2158 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2159 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2160 if (unlikely(invalid)) {
2161 return 0; /* doesn't matter */
2162 } else if (dig_a > dig_b) {
2164 } else if (dig_a < dig_b) {
2172 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2177 for (i = 1; i <= 31; i++) {
2178 uint8_t digit = bcd_get_digit(a, i, invalid) +
2179 bcd_get_digit(b, i, invalid) + carry;
2187 bcd_put_digit(t, digit, i);
2193 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2199 for (i = 1; i <= 31; i++) {
2200 uint8_t digit = bcd_get_digit(a, i, invalid) -
2201 bcd_get_digit(b, i, invalid) + carry;
2209 bcd_put_digit(t, digit, i);
2215 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2218 int sgna = bcd_get_sgn(a);
2219 int sgnb = bcd_get_sgn(b);
2220 int invalid = (sgna == 0) || (sgnb == 0);
2223 ppc_avr_t result = { .u64 = { 0, 0 } };
2227 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2228 bcd_add_mag(&result, a, b, &invalid, &overflow);
2229 cr = bcd_cmp_zero(&result);
2231 int magnitude = bcd_cmp_mag(a, b);
2232 if (magnitude > 0) {
2233 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2234 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2235 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2236 } else if (magnitude < 0) {
2237 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2238 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2239 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2241 result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2247 if (unlikely(invalid)) {
2248 result.VsrD(0) = result.VsrD(1) = -1;
2250 } else if (overflow) {
2259 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2261 ppc_avr_t bcopy = *b;
2262 int sgnb = bcd_get_sgn(b);
2264 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2265 } else if (sgnb > 0) {
2266 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2268 /* else invalid ... defer to bcdadd code for proper handling */
2270 return helper_bcdadd(r, a, &bcopy, ps);
2273 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2277 uint16_t national = 0;
2278 uint16_t sgnb = get_national_digit(b, 0);
2279 ppc_avr_t ret = { .u64 = { 0, 0 } };
2280 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2282 for (i = 1; i < 8; i++) {
2283 national = get_national_digit(b, i);
2284 if (unlikely(national < 0x30 || national > 0x39)) {
2289 bcd_put_digit(&ret, national & 0xf, i);
2292 if (sgnb == NATIONAL_PLUS) {
2293 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2295 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2298 cr = bcd_cmp_zero(&ret);
2300 if (unlikely(invalid)) {
2309 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2313 int sgnb = bcd_get_sgn(b);
2314 int invalid = (sgnb == 0);
2315 ppc_avr_t ret = { .u64 = { 0, 0 } };
2317 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2319 for (i = 1; i < 8; i++) {
2320 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2322 if (unlikely(invalid)) {
2326 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2328 cr = bcd_cmp_zero(b);
2334 if (unlikely(invalid)) {
2343 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2349 int zone_lead = ps ? 0xF : 0x3;
2351 ppc_avr_t ret = { .u64 = { 0, 0 } };
2352 int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2354 if (unlikely((sgnb < 0xA) && ps)) {
2358 for (i = 0; i < 16; i++) {
2359 zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2360 digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2361 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2366 bcd_put_digit(&ret, digit, i + 1);
2369 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2370 (!ps && (sgnb & 0x4))) {
2371 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2373 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2376 cr = bcd_cmp_zero(&ret);
2378 if (unlikely(invalid)) {
2387 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2392 int sgnb = bcd_get_sgn(b);
2393 int zone_lead = (ps) ? 0xF0 : 0x30;
2394 int invalid = (sgnb == 0);
2395 ppc_avr_t ret = { .u64 = { 0, 0 } };
2397 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2399 for (i = 0; i < 16; i++) {
2400 digit = bcd_get_digit(b, i + 1, &invalid);
2402 if (unlikely(invalid)) {
2406 ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2410 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2412 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2415 cr = bcd_cmp_zero(b);
2421 if (unlikely(invalid)) {
2430 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2436 ppc_avr_t ret = { .u64 = { 0, 0 } };
2438 if (b->VsrSD(0) < 0) {
2439 lo_value = -b->VsrSD(1);
2440 hi_value = ~b->VsrD(0) + !lo_value;
2441 bcd_put_digit(&ret, 0xD, 0);
2443 lo_value = b->VsrD(1);
2444 hi_value = b->VsrD(0);
2445 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2448 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2449 lo_value > 9999999999999999ULL) {
2453 for (i = 1; i < 16; hi_value /= 10, i++) {
2454 bcd_put_digit(&ret, hi_value % 10, i);
2457 for (; i < 32; lo_value /= 10, i++) {
2458 bcd_put_digit(&ret, lo_value % 10, i);
2461 cr |= bcd_cmp_zero(&ret);
2468 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2475 uint64_t hi_value = 0;
2476 int sgnb = bcd_get_sgn(b);
2477 int invalid = (sgnb == 0);
2479 lo_value = bcd_get_digit(b, 31, &invalid);
2480 for (i = 30; i > 0; i--) {
2481 mulu64(&lo_value, &carry, lo_value, 10ULL);
2482 mulu64(&hi_value, &unused, hi_value, 10ULL);
2483 lo_value += bcd_get_digit(b, i, &invalid);
2486 if (unlikely(invalid)) {
2492 r->VsrSD(1) = -lo_value;
2493 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2495 r->VsrSD(1) = lo_value;
2496 r->VsrSD(0) = hi_value;
2499 cr = bcd_cmp_zero(b);
2501 if (unlikely(invalid)) {
2508 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2513 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2518 bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2520 for (i = 1; i < 32; i++) {
2521 bcd_get_digit(a, i, &invalid);
2522 bcd_get_digit(b, i, &invalid);
2523 if (unlikely(invalid)) {
2528 return bcd_cmp_zero(r);
2531 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2533 int sgnb = bcd_get_sgn(b);
2536 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2538 if (bcd_is_valid(b) == false) {
2542 return bcd_cmp_zero(r);
2545 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2548 int i = a->VsrSB(7);
2549 bool ox_flag = false;
2550 int sgnb = bcd_get_sgn(b);
2552 ret.VsrD(1) &= ~0xf;
2554 if (bcd_is_valid(b) == false) {
2558 if (unlikely(i > 31)) {
2560 } else if (unlikely(i < -31)) {
2565 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2567 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2569 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2573 cr = bcd_cmp_zero(r);
2581 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2586 bool ox_flag = false;
2589 for (i = 0; i < 32; i++) {
2590 bcd_get_digit(b, i, &invalid);
2592 if (unlikely(invalid)) {
2600 ret.VsrD(1) = ret.VsrD(0) = 0;
2601 } else if (i <= -32) {
2602 ret.VsrD(1) = ret.VsrD(0) = 0;
2604 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2606 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2610 cr = bcd_cmp_zero(r);
2618 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2623 bool ox_flag = false;
2624 int sgnb = bcd_get_sgn(b);
2626 ret.VsrD(1) &= ~0xf;
2628 int i = a->VsrSB(7);
2631 bcd_one.VsrD(0) = 0;
2632 bcd_one.VsrD(1) = 0x10;
2634 if (bcd_is_valid(b) == false) {
2638 if (unlikely(i > 31)) {
2640 } else if (unlikely(i < -31)) {
2645 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2647 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2649 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2650 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2653 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2655 cr = bcd_cmp_zero(&ret);
2664 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2667 uint32_t ox_flag = 0;
2668 int i = a->VsrSH(3) + 1;
2671 if (bcd_is_valid(b) == false) {
2675 if (i > 16 && i < 32) {
2676 mask = (uint64_t)-1 >> (128 - i * 4);
2677 if (ret.VsrD(0) & ~mask) {
2681 ret.VsrD(0) &= mask;
2682 } else if (i >= 0 && i <= 16) {
2683 mask = (uint64_t)-1 >> (64 - i * 4);
2684 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2688 ret.VsrD(1) &= mask;
2691 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2694 return bcd_cmp_zero(&ret) | ox_flag;
2697 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2701 uint32_t ox_flag = 0;
2705 for (i = 0; i < 32; i++) {
2706 bcd_get_digit(b, i, &invalid);
2708 if (unlikely(invalid)) {
2714 if (i > 16 && i < 33) {
2715 mask = (uint64_t)-1 >> (128 - i * 4);
2716 if (ret.VsrD(0) & ~mask) {
2720 ret.VsrD(0) &= mask;
2721 } else if (i > 0 && i <= 16) {
2722 mask = (uint64_t)-1 >> (64 - i * 4);
2723 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2727 ret.VsrD(1) &= mask;
2729 } else if (i == 0) {
2730 if (ret.VsrD(0) || ret.VsrD(1)) {
2733 ret.VsrD(0) = ret.VsrD(1) = 0;
2737 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2738 return ox_flag | CRF_EQ;
2741 return ox_flag | CRF_GT;
2744 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2747 VECTOR_FOR_INORDER_I(i, u8) {
2748 r->u8[i] = AES_sbox[a->u8[i]];
2752 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2757 VECTOR_FOR_INORDER_I(i, u32) {
2758 result.VsrW(i) = b->VsrW(i) ^
2759 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2760 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2761 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2762 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2767 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2772 VECTOR_FOR_INORDER_I(i, u8) {
2773 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2778 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2780 /* This differs from what is written in ISA V2.07. The RTL is */
2781 /* incorrect and will be fixed in V2.07B. */
2785 VECTOR_FOR_INORDER_I(i, u8) {
2786 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2789 VECTOR_FOR_INORDER_I(i, u32) {
2791 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2792 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2793 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2794 AES_imc[tmp.VsrB(4 * i + 3)][3];
2798 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2803 VECTOR_FOR_INORDER_I(i, u8) {
2804 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2809 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2811 int st = (st_six & 0x10) != 0;
2812 int six = st_six & 0xF;
2815 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2817 if ((six & (0x8 >> i)) == 0) {
2818 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2819 ror32(a->VsrW(i), 18) ^
2821 } else { /* six.bit[i] == 1 */
2822 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2823 ror32(a->VsrW(i), 19) ^
2826 } else { /* st == 1 */
2827 if ((six & (0x8 >> i)) == 0) {
2828 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2829 ror32(a->VsrW(i), 13) ^
2830 ror32(a->VsrW(i), 22);
2831 } else { /* six.bit[i] == 1 */
2832 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2833 ror32(a->VsrW(i), 11) ^
2834 ror32(a->VsrW(i), 25);
2840 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
2842 int st = (st_six & 0x10) != 0;
2843 int six = st_six & 0xF;
2846 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2848 if ((six & (0x8 >> (2 * i))) == 0) {
2849 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2850 ror64(a->VsrD(i), 8) ^
2852 } else { /* six.bit[2*i] == 1 */
2853 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2854 ror64(a->VsrD(i), 61) ^
2857 } else { /* st == 1 */
2858 if ((six & (0x8 >> (2 * i))) == 0) {
2859 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2860 ror64(a->VsrD(i), 34) ^
2861 ror64(a->VsrD(i), 39);
2862 } else { /* six.bit[2*i] == 1 */
2863 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2864 ror64(a->VsrD(i), 18) ^
2865 ror64(a->VsrD(i), 41);
2871 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2876 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2877 int indexA = c->VsrB(i) >> 4;
2878 int indexB = c->VsrB(i) & 0xF;
2880 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2885 #undef VECTOR_FOR_INORDER_I
2887 /*****************************************************************************/
2888 /* SPE extension helpers */
2889 /* Use a table to make this quicker */
2890 static const uint8_t hbrev[16] = {
2891 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2892 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2895 static inline uint8_t byte_reverse(uint8_t val)
2897 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2900 static inline uint32_t word_reverse(uint32_t val)
2902 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2903 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2906 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2907 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2909 uint32_t a, b, d, mask;
2911 mask = UINT32_MAX >> (32 - MASKBITS);
2914 d = word_reverse(1 + word_reverse(a | ~b));
2915 return (arg1 & ~mask) | (d & b);
2918 uint32_t helper_cntlsw32(uint32_t val)
2920 if (val & 0x80000000) {
2927 uint32_t helper_cntlzw32(uint32_t val)
2933 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2934 target_ulong low, uint32_t update_Rc)
2940 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2941 if ((high & mask) == 0) {
2949 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2950 if ((low & mask) == 0) {
2963 env->xer = (env->xer & ~0x7F) | i;
2965 env->crf[0] |= xer_so;