2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
25 #include "fpu/softfloat.h"
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
34 env->so = env->ov = 1;
40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint64_t dividend = (uint64_t)ra << 32;
47 uint64_t divisor = (uint32_t)rb;
49 if (unlikely(divisor == 0)) {
52 rt = dividend / divisor;
53 overflow = rt > UINT32_MAX;
56 if (unlikely(overflow)) {
57 rt = 0; /* Undefined */
61 helper_update_ov_legacy(env, overflow);
64 return (target_ulong)rt;
67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 int64_t dividend = (int64_t)ra << 32;
74 int64_t divisor = (int64_t)((int32_t)rb);
76 if (unlikely((divisor == 0) ||
77 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
80 rt = dividend / divisor;
81 overflow = rt != (int32_t)rt;
84 if (unlikely(overflow)) {
85 rt = 0; /* Undefined */
89 helper_update_ov_legacy(env, overflow);
92 return (target_ulong)rt;
95 #if defined(TARGET_PPC64)
97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
102 overflow = divu128(&rt, &ra, rb);
104 if (unlikely(overflow)) {
105 rt = 0; /* Undefined */
109 helper_update_ov_legacy(env, overflow);
115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118 int64_t ra = (int64_t)rau;
119 int64_t rb = (int64_t)rbu;
120 int overflow = divs128(&rt, &ra, rb);
122 if (unlikely(overflow)) {
123 rt = 0; /* Undefined */
127 helper_update_ov_legacy(env, overflow);
136 #if defined(TARGET_PPC64)
137 /* if x = 0xab, returns 0xababababababababa */
138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
140 /* substract 1 from each byte, and with inverse, check if MSB is set at each
142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
147 /* When you XOR the pattern and there is a match, that byte will be zero */
148 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
152 return hasvalue(rb, ra) ? CRF_GT : 0;
159 /* Return invalid random number.
161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
164 target_ulong helper_darn32(void)
169 target_ulong helper_darn64(void)
176 #if defined(TARGET_PPC64)
178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
183 for (i = 0; i < 8; i++) {
184 int index = (rs >> (i*8)) & 0xFF;
186 if (rb & PPC_BIT(index)) {
196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
198 target_ulong mask = 0xff;
202 for (i = 0; i < sizeof(target_ulong); i++) {
203 if ((rs & mask) == (rb & mask)) {
211 /* shift right arithmetic helper */
212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
217 if (likely(!(shift & 0x20))) {
218 if (likely((uint32_t)shift != 0)) {
220 ret = (int32_t)value >> shift;
221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
222 env->ca32 = env->ca = 0;
224 env->ca32 = env->ca = 1;
227 ret = (int32_t)value;
228 env->ca32 = env->ca = 0;
231 ret = (int32_t)value >> 31;
232 env->ca32 = env->ca = (ret != 0);
234 return (target_long)ret;
237 #if defined(TARGET_PPC64)
238 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
243 if (likely(!(shift & 0x40))) {
244 if (likely((uint64_t)shift != 0)) {
246 ret = (int64_t)value >> shift;
247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
248 env->ca32 = env->ca = 0;
250 env->ca32 = env->ca = 1;
253 ret = (int64_t)value;
254 env->ca32 = env->ca = 0;
257 ret = (int64_t)value >> 63;
258 env->ca32 = env->ca = (ret != 0);
264 #if defined(TARGET_PPC64)
265 target_ulong helper_popcntb(target_ulong val)
267 /* Note that we don't fold past bytes */
268 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
269 0x5555555555555555ULL);
270 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
271 0x3333333333333333ULL);
272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
273 0x0f0f0f0f0f0f0f0fULL);
277 target_ulong helper_popcntw(target_ulong val)
279 /* Note that we don't fold past words. */
280 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
281 0x5555555555555555ULL);
282 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
283 0x3333333333333333ULL);
284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
285 0x0f0f0f0f0f0f0f0fULL);
286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
287 0x00ff00ff00ff00ffULL);
288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
289 0x0000ffff0000ffffULL);
293 target_ulong helper_popcntb(target_ulong val)
295 /* Note that we don't fold past bytes */
296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
303 /*****************************************************************************/
304 /* PowerPC 601 specific instructions (POWER bridge) */
305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
310 (int32_t)arg2 == 0) {
311 env->spr[SPR_MQ] = 0;
314 env->spr[SPR_MQ] = tmp % arg2;
315 return tmp / (int32_t)arg2;
319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
325 (int32_t)arg2 == 0) {
326 env->so = env->ov = 1;
327 env->spr[SPR_MQ] = 0;
330 env->spr[SPR_MQ] = tmp % arg2;
331 tmp /= (int32_t)arg2;
332 if ((int32_t)tmp != tmp) {
333 env->so = env->ov = 1;
341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
345 (int32_t)arg2 == 0) {
346 env->spr[SPR_MQ] = 0;
349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
350 return (int32_t)arg1 / (int32_t)arg2;
354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
358 (int32_t)arg2 == 0) {
359 env->so = env->ov = 1;
360 env->spr[SPR_MQ] = 0;
364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
365 return (int32_t)arg1 / (int32_t)arg2;
369 /*****************************************************************************/
370 /* 602 specific instructions */
371 /* mfrom is the most crazy instruction ever seen, imho ! */
372 /* Real implementation uses a ROM table. Do the same */
373 /* Extremely decomposed:
375 * return 256 * log10(10 + 1.0) + 0.5
377 #if !defined(CONFIG_USER_ONLY)
378 target_ulong helper_602_mfrom(target_ulong arg)
380 if (likely(arg < 602)) {
381 #include "mfrom_table.inc.c"
382 return mfrom_ROM_table[arg];
389 /*****************************************************************************/
390 /* Altivec extension helpers */
391 #if defined(HOST_WORDS_BIGENDIAN)
392 #define VECTOR_FOR_INORDER_I(index, element) \
393 for (index = 0; index < ARRAY_SIZE(r->element); index++)
395 #define VECTOR_FOR_INORDER_I(index, element) \
396 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
399 /* Saturating arithmetic helpers. */
400 #define SATCVT(from, to, from_type, to_type, min, max) \
401 static inline to_type cvt##from##to(from_type x, int *sat) \
405 if (x < (from_type)min) { \
408 } else if (x > (from_type)max) { \
416 #define SATCVTU(from, to, from_type, to_type, min, max) \
417 static inline to_type cvt##from##to(from_type x, int *sat) \
421 if (x > (from_type)max) { \
429 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
430 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
431 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
433 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
434 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
435 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
436 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
437 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
438 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
442 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
444 int i, j = (sh & 0xf);
446 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
451 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
453 int i, j = 0x10 - (sh & 0xf);
455 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
460 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
462 env->vscr = vscr & ~(1u << VSCR_SAT);
463 /* Which bit we set is completely arbitrary, but clear the rest. */
464 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
465 env->vscr_sat.u64[1] = 0;
466 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
469 uint32_t helper_mfvscr(CPUPPCState *env)
471 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
472 return env->vscr | (sat << VSCR_SAT);
475 static inline void set_vscr_sat(CPUPPCState *env)
477 /* The choice of non-zero value is arbitrary. */
478 env->vscr_sat.u32[0] = 1;
481 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
485 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
486 r->u32[i] = ~a->u32[i] < b->u32[i];
491 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
494 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
495 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
502 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
505 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
506 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
514 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
516 uint64_t res = b->u64[0] ^ b->u64[1];
520 r->VsrD(1) = res & 1;
524 #define VARITH_DO(name, op, element) \
525 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
529 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
530 r->element[i] = a->element[i] op b->element[i]; \
533 VARITH_DO(muluwm, *, u32)
537 #define VARITHFP(suffix, func) \
538 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
543 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
544 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
547 VARITHFP(addfp, float32_add)
548 VARITHFP(subfp, float32_sub)
549 VARITHFP(minfp, float32_min)
550 VARITHFP(maxfp, float32_max)
553 #define VARITHFPFMA(suffix, type) \
554 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 ppc_avr_t *b, ppc_avr_t *c) \
558 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
559 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
560 type, &env->vec_status); \
563 VARITHFPFMA(maddfp, 0);
564 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
567 #define VARITHSAT_CASE(type, op, cvt, element) \
569 type result = (type)a->element[i] op (type)b->element[i]; \
570 r->element[i] = cvt(result, &sat); \
573 #define VARITHSAT_DO(name, op, optype, cvt, element) \
574 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
575 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
580 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
581 VARITHSAT_CASE(optype, op, cvt, element); \
584 vscr_sat->u32[0] = 1; \
587 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
588 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
589 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
590 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
591 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
592 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
593 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
594 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
595 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
596 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
597 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
598 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
599 #undef VARITHSAT_CASE
601 #undef VARITHSAT_SIGNED
602 #undef VARITHSAT_UNSIGNED
604 #define VAVG_DO(name, element, etype) \
605 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
609 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
610 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
611 r->element[i] = x >> 1; \
615 #define VAVG(type, signed_element, signed_type, unsigned_element, \
617 VAVG_DO(avgs##type, signed_element, signed_type) \
618 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
619 VAVG(b, s8, int16_t, u8, uint16_t)
620 VAVG(h, s16, int32_t, u16, uint32_t)
621 VAVG(w, s32, int64_t, u32, uint64_t)
625 #define VABSDU_DO(name, element) \
626 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
630 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
631 r->element[i] = (a->element[i] > b->element[i]) ? \
632 (a->element[i] - b->element[i]) : \
633 (b->element[i] - a->element[i]); \
637 /* VABSDU - Vector absolute difference unsigned
638 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
639 * element - element type to access from vector
641 #define VABSDU(type, element) \
642 VABSDU_DO(absdu##type, element)
649 #define VCF(suffix, cvt, element) \
650 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
651 ppc_avr_t *b, uint32_t uim) \
655 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
656 float32 t = cvt(b->element[i], &env->vec_status); \
657 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
660 VCF(ux, uint32_to_float32, u32)
661 VCF(sx, int32_to_float32, s32)
664 #define VCMP_DO(suffix, compare, element, record) \
665 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
666 ppc_avr_t *a, ppc_avr_t *b) \
668 uint64_t ones = (uint64_t)-1; \
669 uint64_t all = ones; \
673 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
674 uint64_t result = (a->element[i] compare b->element[i] ? \
676 switch (sizeof(a->element[0])) { \
678 r->u64[i] = result; \
681 r->u32[i] = result; \
684 r->u16[i] = result; \
694 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
697 #define VCMP(suffix, compare, element) \
698 VCMP_DO(suffix, compare, element, 0) \
699 VCMP_DO(suffix##_dot, compare, element, 1)
715 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
716 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
717 ppc_avr_t *a, ppc_avr_t *b) \
719 etype ones = (etype)-1; \
721 etype result, none = 0; \
724 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
726 result = ((a->element[i] == 0) \
727 || (b->element[i] == 0) \
728 || (a->element[i] != b->element[i]) ? \
731 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
733 r->element[i] = result; \
738 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
742 /* VCMPNEZ - Vector compare not equal to zero
743 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
744 * element - element type to access from vector
746 #define VCMPNE(suffix, element, etype, cmpzero) \
747 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
748 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
749 VCMPNE(zb, u8, uint8_t, 1)
750 VCMPNE(zh, u16, uint16_t, 1)
751 VCMPNE(zw, u32, uint32_t, 1)
752 VCMPNE(b, u8, uint8_t, 0)
753 VCMPNE(h, u16, uint16_t, 0)
754 VCMPNE(w, u32, uint32_t, 0)
758 #define VCMPFP_DO(suffix, compare, order, record) \
759 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
760 ppc_avr_t *a, ppc_avr_t *b) \
762 uint32_t ones = (uint32_t)-1; \
763 uint32_t all = ones; \
767 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
769 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
771 if (rel == float_relation_unordered) { \
773 } else if (rel compare order) { \
778 r->u32[i] = result; \
783 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
786 #define VCMPFP(suffix, compare, order) \
787 VCMPFP_DO(suffix, compare, order, 0) \
788 VCMPFP_DO(suffix##_dot, compare, order, 1)
789 VCMPFP(eqfp, ==, float_relation_equal)
790 VCMPFP(gefp, !=, float_relation_less)
791 VCMPFP(gtfp, ==, float_relation_greater)
795 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
796 ppc_avr_t *a, ppc_avr_t *b, int record)
801 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
802 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
804 if (le_rel == float_relation_unordered) {
805 r->u32[i] = 0xc0000000;
808 float32 bneg = float32_chs(b->f32[i]);
809 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
811 int le = le_rel != float_relation_greater;
812 int ge = ge_rel != float_relation_less;
814 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
815 all_in |= (!le | !ge);
819 env->crf[6] = (all_in == 0) << 1;
823 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
825 vcmpbfp_internal(env, r, a, b, 0);
828 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
831 vcmpbfp_internal(env, r, a, b, 1);
834 #define VCT(suffix, satcvt, element) \
835 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
836 ppc_avr_t *b, uint32_t uim) \
840 float_status s = env->vec_status; \
842 set_float_rounding_mode(float_round_to_zero, &s); \
843 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
844 if (float32_is_any_nan(b->f32[i])) { \
847 float64 t = float32_to_float64(b->f32[i], &s); \
850 t = float64_scalbn(t, uim, &s); \
851 j = float64_to_int64(t, &s); \
852 r->element[i] = satcvt(j, &sat); \
859 VCT(uxs, cvtsduw, u32)
860 VCT(sxs, cvtsdsw, s32)
863 target_ulong helper_vclzlsbb(ppc_avr_t *r)
865 target_ulong count = 0;
867 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
868 if (r->VsrB(i) & 0x01) {
876 target_ulong helper_vctzlsbb(ppc_avr_t *r)
878 target_ulong count = 0;
880 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
881 if (r->VsrB(i) & 0x01) {
889 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
890 ppc_avr_t *b, ppc_avr_t *c)
895 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
896 int32_t prod = a->s16[i] * b->s16[i];
897 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
899 r->s16[i] = cvtswsh(t, &sat);
907 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
908 ppc_avr_t *b, ppc_avr_t *c)
913 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
914 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
915 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
916 r->s16[i] = cvtswsh(t, &sat);
924 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
928 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
929 int32_t prod = a->s16[i] * b->s16[i];
930 r->s16[i] = (int16_t) (prod + c->s16[i]);
934 #define VMRG_DO(name, element, access, ofs) \
935 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
938 int i, half = ARRAY_SIZE(r->element) / 2; \
940 for (i = 0; i < half; i++) { \
941 result.access(i * 2 + 0) = a->access(i + ofs); \
942 result.access(i * 2 + 1) = b->access(i + ofs); \
947 #define VMRG(suffix, element, access) \
948 VMRG_DO(mrgl##suffix, element, access, half) \
949 VMRG_DO(mrgh##suffix, element, access, 0)
956 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
957 ppc_avr_t *b, ppc_avr_t *c)
962 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
963 prod[i] = (int32_t)a->s8[i] * b->u8[i];
966 VECTOR_FOR_INORDER_I(i, s32) {
967 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
968 prod[4 * i + 2] + prod[4 * i + 3];
972 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
973 ppc_avr_t *b, ppc_avr_t *c)
978 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
979 prod[i] = a->s16[i] * b->s16[i];
982 VECTOR_FOR_INORDER_I(i, s32) {
983 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
987 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
988 ppc_avr_t *b, ppc_avr_t *c)
994 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
995 prod[i] = (int32_t)a->s16[i] * b->s16[i];
998 VECTOR_FOR_INORDER_I(i, s32) {
999 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1001 r->u32[i] = cvtsdsw(t, &sat);
1009 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1010 ppc_avr_t *b, ppc_avr_t *c)
1015 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1016 prod[i] = a->u8[i] * b->u8[i];
1019 VECTOR_FOR_INORDER_I(i, u32) {
1020 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1021 prod[4 * i + 2] + prod[4 * i + 3];
1025 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1026 ppc_avr_t *b, ppc_avr_t *c)
1031 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1032 prod[i] = a->u16[i] * b->u16[i];
1035 VECTOR_FOR_INORDER_I(i, u32) {
1036 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1040 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1041 ppc_avr_t *b, ppc_avr_t *c)
1047 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1048 prod[i] = a->u16[i] * b->u16[i];
1051 VECTOR_FOR_INORDER_I(i, s32) {
1052 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1054 r->u32[i] = cvtuduw(t, &sat);
1062 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1063 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1067 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1068 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1069 (cast)b->mul_access(i); \
1073 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1074 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1078 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1079 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1080 (cast)b->mul_access(i + 1); \
1084 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1085 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1086 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1087 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1088 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1089 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1090 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1091 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1092 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1097 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1103 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1104 int s = c->VsrB(i) & 0x1f;
1105 int index = s & 0xf;
1108 result.VsrB(i) = b->VsrB(index);
1110 result.VsrB(i) = a->VsrB(index);
1116 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1122 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1123 int s = c->VsrB(i) & 0x1f;
1124 int index = 15 - (s & 0xf);
1127 result.VsrB(i) = a->VsrB(index);
1129 result.VsrB(i) = b->VsrB(index);
1135 #if defined(HOST_WORDS_BIGENDIAN)
1136 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1137 #define VBPERMD_INDEX(i) (i)
1138 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1139 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1141 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1142 #define VBPERMD_INDEX(i) (1 - i)
1143 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1144 #define EXTRACT_BIT(avr, i, index) \
1145 (extract64((avr)->u64[1 - i], 63 - index, 1))
1148 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1151 ppc_avr_t result = { .u64 = { 0, 0 } };
1152 VECTOR_FOR_INORDER_I(i, u64) {
1153 for (j = 0; j < 8; j++) {
1154 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1155 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1156 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1163 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1168 VECTOR_FOR_INORDER_I(i, u8) {
1169 int index = VBPERMQ_INDEX(b, i);
1172 uint64_t mask = (1ull << (63-(index & 0x3F)));
1173 if (a->u64[VBPERMQ_DW(index)] & mask) {
1174 perm |= (0x8000 >> i);
1183 #undef VBPERMQ_INDEX
1186 static const uint64_t VGBBD_MASKS[256] = {
1187 0x0000000000000000ull, /* 00 */
1188 0x0000000000000080ull, /* 01 */
1189 0x0000000000008000ull, /* 02 */
1190 0x0000000000008080ull, /* 03 */
1191 0x0000000000800000ull, /* 04 */
1192 0x0000000000800080ull, /* 05 */
1193 0x0000000000808000ull, /* 06 */
1194 0x0000000000808080ull, /* 07 */
1195 0x0000000080000000ull, /* 08 */
1196 0x0000000080000080ull, /* 09 */
1197 0x0000000080008000ull, /* 0A */
1198 0x0000000080008080ull, /* 0B */
1199 0x0000000080800000ull, /* 0C */
1200 0x0000000080800080ull, /* 0D */
1201 0x0000000080808000ull, /* 0E */
1202 0x0000000080808080ull, /* 0F */
1203 0x0000008000000000ull, /* 10 */
1204 0x0000008000000080ull, /* 11 */
1205 0x0000008000008000ull, /* 12 */
1206 0x0000008000008080ull, /* 13 */
1207 0x0000008000800000ull, /* 14 */
1208 0x0000008000800080ull, /* 15 */
1209 0x0000008000808000ull, /* 16 */
1210 0x0000008000808080ull, /* 17 */
1211 0x0000008080000000ull, /* 18 */
1212 0x0000008080000080ull, /* 19 */
1213 0x0000008080008000ull, /* 1A */
1214 0x0000008080008080ull, /* 1B */
1215 0x0000008080800000ull, /* 1C */
1216 0x0000008080800080ull, /* 1D */
1217 0x0000008080808000ull, /* 1E */
1218 0x0000008080808080ull, /* 1F */
1219 0x0000800000000000ull, /* 20 */
1220 0x0000800000000080ull, /* 21 */
1221 0x0000800000008000ull, /* 22 */
1222 0x0000800000008080ull, /* 23 */
1223 0x0000800000800000ull, /* 24 */
1224 0x0000800000800080ull, /* 25 */
1225 0x0000800000808000ull, /* 26 */
1226 0x0000800000808080ull, /* 27 */
1227 0x0000800080000000ull, /* 28 */
1228 0x0000800080000080ull, /* 29 */
1229 0x0000800080008000ull, /* 2A */
1230 0x0000800080008080ull, /* 2B */
1231 0x0000800080800000ull, /* 2C */
1232 0x0000800080800080ull, /* 2D */
1233 0x0000800080808000ull, /* 2E */
1234 0x0000800080808080ull, /* 2F */
1235 0x0000808000000000ull, /* 30 */
1236 0x0000808000000080ull, /* 31 */
1237 0x0000808000008000ull, /* 32 */
1238 0x0000808000008080ull, /* 33 */
1239 0x0000808000800000ull, /* 34 */
1240 0x0000808000800080ull, /* 35 */
1241 0x0000808000808000ull, /* 36 */
1242 0x0000808000808080ull, /* 37 */
1243 0x0000808080000000ull, /* 38 */
1244 0x0000808080000080ull, /* 39 */
1245 0x0000808080008000ull, /* 3A */
1246 0x0000808080008080ull, /* 3B */
1247 0x0000808080800000ull, /* 3C */
1248 0x0000808080800080ull, /* 3D */
1249 0x0000808080808000ull, /* 3E */
1250 0x0000808080808080ull, /* 3F */
1251 0x0080000000000000ull, /* 40 */
1252 0x0080000000000080ull, /* 41 */
1253 0x0080000000008000ull, /* 42 */
1254 0x0080000000008080ull, /* 43 */
1255 0x0080000000800000ull, /* 44 */
1256 0x0080000000800080ull, /* 45 */
1257 0x0080000000808000ull, /* 46 */
1258 0x0080000000808080ull, /* 47 */
1259 0x0080000080000000ull, /* 48 */
1260 0x0080000080000080ull, /* 49 */
1261 0x0080000080008000ull, /* 4A */
1262 0x0080000080008080ull, /* 4B */
1263 0x0080000080800000ull, /* 4C */
1264 0x0080000080800080ull, /* 4D */
1265 0x0080000080808000ull, /* 4E */
1266 0x0080000080808080ull, /* 4F */
1267 0x0080008000000000ull, /* 50 */
1268 0x0080008000000080ull, /* 51 */
1269 0x0080008000008000ull, /* 52 */
1270 0x0080008000008080ull, /* 53 */
1271 0x0080008000800000ull, /* 54 */
1272 0x0080008000800080ull, /* 55 */
1273 0x0080008000808000ull, /* 56 */
1274 0x0080008000808080ull, /* 57 */
1275 0x0080008080000000ull, /* 58 */
1276 0x0080008080000080ull, /* 59 */
1277 0x0080008080008000ull, /* 5A */
1278 0x0080008080008080ull, /* 5B */
1279 0x0080008080800000ull, /* 5C */
1280 0x0080008080800080ull, /* 5D */
1281 0x0080008080808000ull, /* 5E */
1282 0x0080008080808080ull, /* 5F */
1283 0x0080800000000000ull, /* 60 */
1284 0x0080800000000080ull, /* 61 */
1285 0x0080800000008000ull, /* 62 */
1286 0x0080800000008080ull, /* 63 */
1287 0x0080800000800000ull, /* 64 */
1288 0x0080800000800080ull, /* 65 */
1289 0x0080800000808000ull, /* 66 */
1290 0x0080800000808080ull, /* 67 */
1291 0x0080800080000000ull, /* 68 */
1292 0x0080800080000080ull, /* 69 */
1293 0x0080800080008000ull, /* 6A */
1294 0x0080800080008080ull, /* 6B */
1295 0x0080800080800000ull, /* 6C */
1296 0x0080800080800080ull, /* 6D */
1297 0x0080800080808000ull, /* 6E */
1298 0x0080800080808080ull, /* 6F */
1299 0x0080808000000000ull, /* 70 */
1300 0x0080808000000080ull, /* 71 */
1301 0x0080808000008000ull, /* 72 */
1302 0x0080808000008080ull, /* 73 */
1303 0x0080808000800000ull, /* 74 */
1304 0x0080808000800080ull, /* 75 */
1305 0x0080808000808000ull, /* 76 */
1306 0x0080808000808080ull, /* 77 */
1307 0x0080808080000000ull, /* 78 */
1308 0x0080808080000080ull, /* 79 */
1309 0x0080808080008000ull, /* 7A */
1310 0x0080808080008080ull, /* 7B */
1311 0x0080808080800000ull, /* 7C */
1312 0x0080808080800080ull, /* 7D */
1313 0x0080808080808000ull, /* 7E */
1314 0x0080808080808080ull, /* 7F */
1315 0x8000000000000000ull, /* 80 */
1316 0x8000000000000080ull, /* 81 */
1317 0x8000000000008000ull, /* 82 */
1318 0x8000000000008080ull, /* 83 */
1319 0x8000000000800000ull, /* 84 */
1320 0x8000000000800080ull, /* 85 */
1321 0x8000000000808000ull, /* 86 */
1322 0x8000000000808080ull, /* 87 */
1323 0x8000000080000000ull, /* 88 */
1324 0x8000000080000080ull, /* 89 */
1325 0x8000000080008000ull, /* 8A */
1326 0x8000000080008080ull, /* 8B */
1327 0x8000000080800000ull, /* 8C */
1328 0x8000000080800080ull, /* 8D */
1329 0x8000000080808000ull, /* 8E */
1330 0x8000000080808080ull, /* 8F */
1331 0x8000008000000000ull, /* 90 */
1332 0x8000008000000080ull, /* 91 */
1333 0x8000008000008000ull, /* 92 */
1334 0x8000008000008080ull, /* 93 */
1335 0x8000008000800000ull, /* 94 */
1336 0x8000008000800080ull, /* 95 */
1337 0x8000008000808000ull, /* 96 */
1338 0x8000008000808080ull, /* 97 */
1339 0x8000008080000000ull, /* 98 */
1340 0x8000008080000080ull, /* 99 */
1341 0x8000008080008000ull, /* 9A */
1342 0x8000008080008080ull, /* 9B */
1343 0x8000008080800000ull, /* 9C */
1344 0x8000008080800080ull, /* 9D */
1345 0x8000008080808000ull, /* 9E */
1346 0x8000008080808080ull, /* 9F */
1347 0x8000800000000000ull, /* A0 */
1348 0x8000800000000080ull, /* A1 */
1349 0x8000800000008000ull, /* A2 */
1350 0x8000800000008080ull, /* A3 */
1351 0x8000800000800000ull, /* A4 */
1352 0x8000800000800080ull, /* A5 */
1353 0x8000800000808000ull, /* A6 */
1354 0x8000800000808080ull, /* A7 */
1355 0x8000800080000000ull, /* A8 */
1356 0x8000800080000080ull, /* A9 */
1357 0x8000800080008000ull, /* AA */
1358 0x8000800080008080ull, /* AB */
1359 0x8000800080800000ull, /* AC */
1360 0x8000800080800080ull, /* AD */
1361 0x8000800080808000ull, /* AE */
1362 0x8000800080808080ull, /* AF */
1363 0x8000808000000000ull, /* B0 */
1364 0x8000808000000080ull, /* B1 */
1365 0x8000808000008000ull, /* B2 */
1366 0x8000808000008080ull, /* B3 */
1367 0x8000808000800000ull, /* B4 */
1368 0x8000808000800080ull, /* B5 */
1369 0x8000808000808000ull, /* B6 */
1370 0x8000808000808080ull, /* B7 */
1371 0x8000808080000000ull, /* B8 */
1372 0x8000808080000080ull, /* B9 */
1373 0x8000808080008000ull, /* BA */
1374 0x8000808080008080ull, /* BB */
1375 0x8000808080800000ull, /* BC */
1376 0x8000808080800080ull, /* BD */
1377 0x8000808080808000ull, /* BE */
1378 0x8000808080808080ull, /* BF */
1379 0x8080000000000000ull, /* C0 */
1380 0x8080000000000080ull, /* C1 */
1381 0x8080000000008000ull, /* C2 */
1382 0x8080000000008080ull, /* C3 */
1383 0x8080000000800000ull, /* C4 */
1384 0x8080000000800080ull, /* C5 */
1385 0x8080000000808000ull, /* C6 */
1386 0x8080000000808080ull, /* C7 */
1387 0x8080000080000000ull, /* C8 */
1388 0x8080000080000080ull, /* C9 */
1389 0x8080000080008000ull, /* CA */
1390 0x8080000080008080ull, /* CB */
1391 0x8080000080800000ull, /* CC */
1392 0x8080000080800080ull, /* CD */
1393 0x8080000080808000ull, /* CE */
1394 0x8080000080808080ull, /* CF */
1395 0x8080008000000000ull, /* D0 */
1396 0x8080008000000080ull, /* D1 */
1397 0x8080008000008000ull, /* D2 */
1398 0x8080008000008080ull, /* D3 */
1399 0x8080008000800000ull, /* D4 */
1400 0x8080008000800080ull, /* D5 */
1401 0x8080008000808000ull, /* D6 */
1402 0x8080008000808080ull, /* D7 */
1403 0x8080008080000000ull, /* D8 */
1404 0x8080008080000080ull, /* D9 */
1405 0x8080008080008000ull, /* DA */
1406 0x8080008080008080ull, /* DB */
1407 0x8080008080800000ull, /* DC */
1408 0x8080008080800080ull, /* DD */
1409 0x8080008080808000ull, /* DE */
1410 0x8080008080808080ull, /* DF */
1411 0x8080800000000000ull, /* E0 */
1412 0x8080800000000080ull, /* E1 */
1413 0x8080800000008000ull, /* E2 */
1414 0x8080800000008080ull, /* E3 */
1415 0x8080800000800000ull, /* E4 */
1416 0x8080800000800080ull, /* E5 */
1417 0x8080800000808000ull, /* E6 */
1418 0x8080800000808080ull, /* E7 */
1419 0x8080800080000000ull, /* E8 */
1420 0x8080800080000080ull, /* E9 */
1421 0x8080800080008000ull, /* EA */
1422 0x8080800080008080ull, /* EB */
1423 0x8080800080800000ull, /* EC */
1424 0x8080800080800080ull, /* ED */
1425 0x8080800080808000ull, /* EE */
1426 0x8080800080808080ull, /* EF */
1427 0x8080808000000000ull, /* F0 */
1428 0x8080808000000080ull, /* F1 */
1429 0x8080808000008000ull, /* F2 */
1430 0x8080808000008080ull, /* F3 */
1431 0x8080808000800000ull, /* F4 */
1432 0x8080808000800080ull, /* F5 */
1433 0x8080808000808000ull, /* F6 */
1434 0x8080808000808080ull, /* F7 */
1435 0x8080808080000000ull, /* F8 */
1436 0x8080808080000080ull, /* F9 */
1437 0x8080808080008000ull, /* FA */
1438 0x8080808080008080ull, /* FB */
1439 0x8080808080800000ull, /* FC */
1440 0x8080808080800080ull, /* FD */
1441 0x8080808080808000ull, /* FE */
1442 0x8080808080808080ull, /* FF */
1445 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1448 uint64_t t[2] = { 0, 0 };
1450 VECTOR_FOR_INORDER_I(i, u8) {
1451 #if defined(HOST_WORDS_BIGENDIAN)
1452 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1454 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1462 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1463 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1466 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1468 VECTOR_FOR_INORDER_I(i, srcfld) { \
1470 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1471 if (a->srcfld[i] & (1ull<<j)) { \
1472 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1477 VECTOR_FOR_INORDER_I(i, trgfld) { \
1478 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1482 PMSUM(vpmsumb, u8, u16, uint16_t)
1483 PMSUM(vpmsumh, u16, u32, uint32_t)
1484 PMSUM(vpmsumw, u32, u64, uint64_t)
1486 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1489 #ifdef CONFIG_INT128
1491 __uint128_t prod[2];
1493 VECTOR_FOR_INORDER_I(i, u64) {
1495 for (j = 0; j < 64; j++) {
1496 if (a->u64[i] & (1ull<<j)) {
1497 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1502 r->u128 = prod[0] ^ prod[1];
1508 VECTOR_FOR_INORDER_I(i, u64) {
1509 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1510 for (j = 0; j < 64; j++) {
1511 if (a->u64[i] & (1ull<<j)) {
1515 bshift.VsrD(1) = b->u64[i];
1517 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1518 bshift.VsrD(1) = b->u64[i] << j;
1520 prod[i].VsrD(1) ^= bshift.VsrD(1);
1521 prod[i].VsrD(0) ^= bshift.VsrD(0);
1526 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1527 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1532 #if defined(HOST_WORDS_BIGENDIAN)
1537 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1541 #if defined(HOST_WORDS_BIGENDIAN)
1542 const ppc_avr_t *x[2] = { a, b };
1544 const ppc_avr_t *x[2] = { b, a };
1547 VECTOR_FOR_INORDER_I(i, u64) {
1548 VECTOR_FOR_INORDER_I(j, u32) {
1549 uint32_t e = x[i]->u32[j];
1551 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1552 ((e >> 6) & 0x3e0) |
1559 #define VPK(suffix, from, to, cvt, dosat) \
1560 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1561 ppc_avr_t *a, ppc_avr_t *b) \
1566 ppc_avr_t *a0 = PKBIG ? a : b; \
1567 ppc_avr_t *a1 = PKBIG ? b : a; \
1569 VECTOR_FOR_INORDER_I(i, from) { \
1570 result.to[i] = cvt(a0->from[i], &sat); \
1571 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1574 if (dosat && sat) { \
1575 set_vscr_sat(env); \
1579 VPK(shss, s16, s8, cvtshsb, 1)
1580 VPK(shus, s16, u8, cvtshub, 1)
1581 VPK(swss, s32, s16, cvtswsh, 1)
1582 VPK(swus, s32, u16, cvtswuh, 1)
1583 VPK(sdss, s64, s32, cvtsdsw, 1)
1584 VPK(sdus, s64, u32, cvtsduw, 1)
1585 VPK(uhus, u16, u8, cvtuhub, 1)
1586 VPK(uwus, u32, u16, cvtuwuh, 1)
1587 VPK(udus, u64, u32, cvtuduw, 1)
1588 VPK(uhum, u16, u8, I, 0)
1589 VPK(uwum, u32, u16, I, 0)
1590 VPK(udum, u64, u32, I, 0)
1595 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1599 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1600 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1604 #define VRFI(suffix, rounding) \
1605 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1609 float_status s = env->vec_status; \
1611 set_float_rounding_mode(rounding, &s); \
1612 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1613 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1616 VRFI(n, float_round_nearest_even)
1617 VRFI(m, float_round_down)
1618 VRFI(p, float_round_up)
1619 VRFI(z, float_round_to_zero)
1622 #define VROTATE(suffix, element, mask) \
1623 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1628 unsigned int shift = b->element[i] & mask; \
1629 r->element[i] = (a->element[i] << shift) | \
1630 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1634 VROTATE(h, u16, 0xF)
1635 VROTATE(w, u32, 0x1F)
1636 VROTATE(d, u64, 0x3F)
1639 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1643 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1644 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1646 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1650 #define VRLMI(name, size, element, insert) \
1651 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1654 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1655 uint##size##_t src1 = a->element[i]; \
1656 uint##size##_t src2 = b->element[i]; \
1657 uint##size##_t src3 = r->element[i]; \
1658 uint##size##_t begin, end, shift, mask, rot_val; \
1660 shift = extract##size(src2, 0, 6); \
1661 end = extract##size(src2, 8, 6); \
1662 begin = extract##size(src2, 16, 6); \
1663 rot_val = rol##size(src1, shift); \
1664 mask = mask_u##size(begin, end); \
1666 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1668 r->element[i] = (rot_val & mask); \
1673 VRLMI(vrldmi, 64, u64, 1);
1674 VRLMI(vrlwmi, 32, u32, 1);
1675 VRLMI(vrldnm, 64, u64, 0);
1676 VRLMI(vrlwnm, 32, u32, 0);
1678 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1681 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1682 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1685 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1689 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1690 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1694 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1698 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1699 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1703 #if defined(HOST_WORDS_BIGENDIAN)
1704 #define VEXTU_X_DO(name, size, left) \
1705 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1709 index = (a & 0xf) * 8; \
1711 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1713 return int128_getlo(int128_rshift(b->s128, index)) & \
1714 MAKE_64BIT_MASK(0, size); \
1717 #define VEXTU_X_DO(name, size, left) \
1718 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1722 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1724 index = (a & 0xf) * 8; \
1726 return int128_getlo(int128_rshift(b->s128, index)) & \
1727 MAKE_64BIT_MASK(0, size); \
1731 VEXTU_X_DO(vextublx, 8, 1)
1732 VEXTU_X_DO(vextuhlx, 16, 1)
1733 VEXTU_X_DO(vextuwlx, 32, 1)
1734 VEXTU_X_DO(vextubrx, 8, 0)
1735 VEXTU_X_DO(vextuhrx, 16, 0)
1736 VEXTU_X_DO(vextuwrx, 32, 0)
1739 /* The specification says that the results are undefined if all of the
1740 * shift counts are not identical. We check to make sure that they are
1741 * to conform to what real hardware appears to do. */
1742 #define VSHIFT(suffix, leftp) \
1743 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1745 int shift = b->VsrB(15) & 0x7; \
1749 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1750 doit = doit && ((b->u8[i] & 0x7) == shift); \
1755 } else if (leftp) { \
1756 uint64_t carry = a->VsrD(1) >> (64 - shift); \
1758 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \
1759 r->VsrD(1) = a->VsrD(1) << shift; \
1761 uint64_t carry = a->VsrD(0) << (64 - shift); \
1763 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \
1764 r->VsrD(0) = a->VsrD(0) >> shift; \
1772 #define VSL(suffix, element, mask) \
1773 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1777 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1778 unsigned int shift = b->element[i] & mask; \
1780 r->element[i] = a->element[i] << shift; \
1789 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1792 unsigned int shift, bytes, size;
1794 size = ARRAY_SIZE(r->u8);
1795 for (i = 0; i < size; i++) {
1796 shift = b->u8[i] & 0x7; /* extract shift value */
1797 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1798 (((i + 1) < size) ? a->u8[i + 1] : 0);
1799 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1803 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1806 unsigned int shift, bytes;
1808 /* Use reverse order, as destination and source register can be same. Its
1809 * being modified in place saving temporary, reverse order will guarantee
1810 * that computed result is not fed back.
1812 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1813 shift = b->u8[i] & 0x7; /* extract shift value */
1814 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1815 /* extract adjacent bytes */
1816 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1820 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1822 int sh = shift & 0xf;
1826 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1829 result.VsrB(i) = b->VsrB(index - 0x10);
1831 result.VsrB(i) = a->VsrB(index);
1837 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1839 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1841 #if defined(HOST_WORDS_BIGENDIAN)
1842 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1843 memset(&r->u8[16-sh], 0, sh);
1845 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1846 memset(&r->u8[0], 0, sh);
1850 #if defined(HOST_WORDS_BIGENDIAN)
1851 #define VINSERT(suffix, element) \
1852 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1854 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1855 sizeof(r->element[0])); \
1858 #define VINSERT(suffix, element) \
1859 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1861 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1862 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1870 #if defined(HOST_WORDS_BIGENDIAN)
1871 #define VEXTRACT(suffix, element) \
1872 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1874 uint32_t es = sizeof(r->element[0]); \
1875 memmove(&r->u8[8 - es], &b->u8[index], es); \
1876 memset(&r->u8[8], 0, 8); \
1877 memset(&r->u8[0], 0, 8 - es); \
1880 #define VEXTRACT(suffix, element) \
1881 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1883 uint32_t es = sizeof(r->element[0]); \
1884 uint32_t s = (16 - index) - es; \
1885 memmove(&r->u8[8], &b->u8[s], es); \
1886 memset(&r->u8[0], 0, 8); \
1887 memset(&r->u8[8 + es], 0, 8 - es); \
1896 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1897 target_ulong xbn, uint32_t index)
1900 size_t es = sizeof(uint32_t);
1904 getVSR(xbn, &xb, env);
1905 memset(&xt, 0, sizeof(xt));
1908 for (i = 0; i < es; i++, ext_index++) {
1909 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16);
1912 putVSR(xtn, &xt, env);
1915 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
1916 target_ulong xbn, uint32_t index)
1919 size_t es = sizeof(uint32_t);
1920 int ins_index, i = 0;
1922 getVSR(xbn, &xb, env);
1923 getVSR(xtn, &xt, env);
1926 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1927 xt.VsrB(ins_index) = xb.VsrB(8 - es + i);
1930 putVSR(xtn, &xt, env);
1933 #define VEXT_SIGNED(name, element, cast) \
1934 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1937 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1938 r->element[i] = (cast)b->element[i]; \
1941 VEXT_SIGNED(vextsb2w, s32, int8_t)
1942 VEXT_SIGNED(vextsb2d, s64, int8_t)
1943 VEXT_SIGNED(vextsh2w, s32, int16_t)
1944 VEXT_SIGNED(vextsh2d, s64, int16_t)
1945 VEXT_SIGNED(vextsw2d, s64, int32_t)
1948 #define VNEG(name, element) \
1949 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1952 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1953 r->element[i] = -b->element[i]; \
1960 #define VSR(suffix, element, mask) \
1961 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1965 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1966 unsigned int shift = b->element[i] & mask; \
1967 r->element[i] = a->element[i] >> shift; \
1980 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1982 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1984 #if defined(HOST_WORDS_BIGENDIAN)
1985 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1986 memset(&r->u8[0], 0, sh);
1988 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1989 memset(&r->u8[16 - sh], 0, sh);
1993 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1997 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1998 r->u32[i] = a->u32[i] >= b->u32[i];
2002 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2009 upper = ARRAY_SIZE(r->s32) - 1;
2010 t = (int64_t)b->VsrSW(upper);
2011 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2013 result.VsrSW(i) = 0;
2015 result.VsrSW(upper) = cvtsdsw(t, &sat);
2023 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2030 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2031 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
2034 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2035 t += a->VsrSW(2 * i + j);
2037 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
2046 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2051 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2052 int64_t t = (int64_t)b->s32[i];
2054 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2055 t += a->s8[4 * i + j];
2057 r->s32[i] = cvtsdsw(t, &sat);
2065 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2070 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2071 int64_t t = (int64_t)b->s32[i];
2073 t += a->s16[2 * i] + a->s16[2 * i + 1];
2074 r->s32[i] = cvtsdsw(t, &sat);
2082 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2087 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2088 uint64_t t = (uint64_t)b->u32[i];
2090 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2091 t += a->u8[4 * i + j];
2093 r->u32[i] = cvtuduw(t, &sat);
2101 #if defined(HOST_WORDS_BIGENDIAN)
2108 #define VUPKPX(suffix, hi) \
2109 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2114 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2115 uint16_t e = b->u16[hi ? i : i+4]; \
2116 uint8_t a = (e >> 15) ? 0xff : 0; \
2117 uint8_t r = (e >> 10) & 0x1f; \
2118 uint8_t g = (e >> 5) & 0x1f; \
2119 uint8_t b = e & 0x1f; \
2121 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2129 #define VUPK(suffix, unpacked, packee, hi) \
2130 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2136 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2137 result.unpacked[i] = b->packee[i]; \
2140 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2142 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2147 VUPK(hsb, s16, s8, UPKHI)
2148 VUPK(hsh, s32, s16, UPKHI)
2149 VUPK(hsw, s64, s32, UPKHI)
2150 VUPK(lsb, s16, s8, UPKLO)
2151 VUPK(lsh, s32, s16, UPKLO)
2152 VUPK(lsw, s64, s32, UPKLO)
2157 #define VGENERIC_DO(name, element) \
2158 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2162 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2163 r->element[i] = name(b->element[i]); \
2167 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2168 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2169 #define clzw(v) clz32((v))
2170 #define clzd(v) clz64((v))
2172 VGENERIC_DO(clzb, u8)
2173 VGENERIC_DO(clzh, u16)
2174 VGENERIC_DO(clzw, u32)
2175 VGENERIC_DO(clzd, u64)
2182 #define ctzb(v) ((v) ? ctz32(v) : 8)
2183 #define ctzh(v) ((v) ? ctz32(v) : 16)
2184 #define ctzw(v) ctz32((v))
2185 #define ctzd(v) ctz64((v))
2187 VGENERIC_DO(ctzb, u8)
2188 VGENERIC_DO(ctzh, u16)
2189 VGENERIC_DO(ctzw, u32)
2190 VGENERIC_DO(ctzd, u64)
2197 #define popcntb(v) ctpop8(v)
2198 #define popcnth(v) ctpop16(v)
2199 #define popcntw(v) ctpop32(v)
2200 #define popcntd(v) ctpop64(v)
2202 VGENERIC_DO(popcntb, u8)
2203 VGENERIC_DO(popcnth, u16)
2204 VGENERIC_DO(popcntw, u32)
2205 VGENERIC_DO(popcntd, u64)
2214 #if defined(HOST_WORDS_BIGENDIAN)
2215 #define QW_ONE { .u64 = { 0, 1 } }
2217 #define QW_ONE { .u64 = { 1, 0 } }
2220 #ifndef CONFIG_INT128
2222 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2224 t->u64[0] = ~a.u64[0];
2225 t->u64[1] = ~a.u64[1];
2228 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2230 if (a.VsrD(0) < b.VsrD(0)) {
2232 } else if (a.VsrD(0) > b.VsrD(0)) {
2234 } else if (a.VsrD(1) < b.VsrD(1)) {
2236 } else if (a.VsrD(1) > b.VsrD(1)) {
2243 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2245 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2246 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2247 (~a.VsrD(1) < b.VsrD(1));
2250 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2253 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2254 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2255 (~a.VsrD(1) < b.VsrD(1));
2256 avr_qw_not(¬_a, a);
2257 return avr_qw_cmpu(not_a, b) < 0;
2262 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2264 #ifdef CONFIG_INT128
2265 r->u128 = a->u128 + b->u128;
2267 avr_qw_add(r, *a, *b);
2271 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2273 #ifdef CONFIG_INT128
2274 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2277 if (c->VsrD(1) & 1) {
2281 tmp.VsrD(1) = c->VsrD(1) & 1;
2282 avr_qw_add(&tmp, *a, tmp);
2283 avr_qw_add(r, tmp, *b);
2285 avr_qw_add(r, *a, *b);
2290 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2292 #ifdef CONFIG_INT128
2293 r->u128 = (~a->u128 < b->u128);
2297 avr_qw_not(¬_a, *a);
2300 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2304 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2306 #ifdef CONFIG_INT128
2307 int carry_out = (~a->u128 < b->u128);
2308 if (!carry_out && (c->u128 & 1)) {
2309 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2310 ((a->u128 != 0) || (b->u128 != 0));
2312 r->u128 = carry_out;
2315 int carry_in = c->VsrD(1) & 1;
2319 carry_out = avr_qw_addc(&tmp, *a, *b);
2321 if (!carry_out && carry_in) {
2322 ppc_avr_t one = QW_ONE;
2323 carry_out = avr_qw_addc(&tmp, tmp, one);
2326 r->VsrD(1) = carry_out;
2330 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2332 #ifdef CONFIG_INT128
2333 r->u128 = a->u128 - b->u128;
2336 ppc_avr_t one = QW_ONE;
2338 avr_qw_not(&tmp, *b);
2339 avr_qw_add(&tmp, *a, tmp);
2340 avr_qw_add(r, tmp, one);
2344 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2346 #ifdef CONFIG_INT128
2347 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2351 avr_qw_not(&tmp, *b);
2352 avr_qw_add(&sum, *a, tmp);
2355 tmp.VsrD(1) = c->VsrD(1) & 1;
2356 avr_qw_add(r, sum, tmp);
2360 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2362 #ifdef CONFIG_INT128
2363 r->u128 = (~a->u128 < ~b->u128) ||
2364 (a->u128 + ~b->u128 == (__uint128_t)-1);
2366 int carry = (avr_qw_cmpu(*a, *b) > 0);
2369 avr_qw_not(&tmp, *b);
2370 avr_qw_add(&tmp, *a, tmp);
2371 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2378 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2380 #ifdef CONFIG_INT128
2382 (~a->u128 < ~b->u128) ||
2383 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2385 int carry_in = c->VsrD(1) & 1;
2386 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2387 if (!carry_out && carry_in) {
2389 avr_qw_not(&tmp, *b);
2390 avr_qw_add(&tmp, *a, tmp);
2391 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2395 r->VsrD(1) = carry_out;
2399 #define BCD_PLUS_PREF_1 0xC
2400 #define BCD_PLUS_PREF_2 0xF
2401 #define BCD_PLUS_ALT_1 0xA
2402 #define BCD_NEG_PREF 0xD
2403 #define BCD_NEG_ALT 0xB
2404 #define BCD_PLUS_ALT_2 0xE
2405 #define NATIONAL_PLUS 0x2B
2406 #define NATIONAL_NEG 0x2D
2408 #if defined(HOST_WORDS_BIGENDIAN)
2409 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2411 #define BCD_DIG_BYTE(n) ((n) / 2)
2414 static int bcd_get_sgn(ppc_avr_t *bcd)
2416 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2417 case BCD_PLUS_PREF_1:
2418 case BCD_PLUS_PREF_2:
2419 case BCD_PLUS_ALT_1:
2420 case BCD_PLUS_ALT_2:
2438 static int bcd_preferred_sgn(int sgn, int ps)
2441 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2443 return BCD_NEG_PREF;
2447 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2451 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2453 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2456 if (unlikely(result > 9)) {
2462 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2465 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2466 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2468 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2469 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2473 static bool bcd_is_valid(ppc_avr_t *bcd)
2478 if (bcd_get_sgn(bcd) == 0) {
2482 for (i = 1; i < 32; i++) {
2483 bcd_get_digit(bcd, i, &invalid);
2484 if (unlikely(invalid)) {
2491 static int bcd_cmp_zero(ppc_avr_t *bcd)
2493 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2496 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2500 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2502 return reg->VsrH(7 - n);
2505 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2507 reg->VsrH(7 - n) = val;
2510 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2514 for (i = 31; i > 0; i--) {
2515 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2516 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2517 if (unlikely(invalid)) {
2518 return 0; /* doesn't matter */
2519 } else if (dig_a > dig_b) {
2521 } else if (dig_a < dig_b) {
2529 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2534 for (i = 1; i <= 31; i++) {
2535 uint8_t digit = bcd_get_digit(a, i, invalid) +
2536 bcd_get_digit(b, i, invalid) + carry;
2544 bcd_put_digit(t, digit, i);
2550 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2556 for (i = 1; i <= 31; i++) {
2557 uint8_t digit = bcd_get_digit(a, i, invalid) -
2558 bcd_get_digit(b, i, invalid) + carry;
2566 bcd_put_digit(t, digit, i);
2572 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2575 int sgna = bcd_get_sgn(a);
2576 int sgnb = bcd_get_sgn(b);
2577 int invalid = (sgna == 0) || (sgnb == 0);
2580 ppc_avr_t result = { .u64 = { 0, 0 } };
2584 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2585 bcd_add_mag(&result, a, b, &invalid, &overflow);
2586 cr = bcd_cmp_zero(&result);
2588 int magnitude = bcd_cmp_mag(a, b);
2589 if (magnitude > 0) {
2590 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2591 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2592 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2593 } else if (magnitude < 0) {
2594 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2595 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2596 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2598 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2604 if (unlikely(invalid)) {
2605 result.VsrD(0) = result.VsrD(1) = -1;
2607 } else if (overflow) {
2616 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2618 ppc_avr_t bcopy = *b;
2619 int sgnb = bcd_get_sgn(b);
2621 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2622 } else if (sgnb > 0) {
2623 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2625 /* else invalid ... defer to bcdadd code for proper handling */
2627 return helper_bcdadd(r, a, &bcopy, ps);
2630 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2634 uint16_t national = 0;
2635 uint16_t sgnb = get_national_digit(b, 0);
2636 ppc_avr_t ret = { .u64 = { 0, 0 } };
2637 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2639 for (i = 1; i < 8; i++) {
2640 national = get_national_digit(b, i);
2641 if (unlikely(national < 0x30 || national > 0x39)) {
2646 bcd_put_digit(&ret, national & 0xf, i);
2649 if (sgnb == NATIONAL_PLUS) {
2650 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2652 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2655 cr = bcd_cmp_zero(&ret);
2657 if (unlikely(invalid)) {
2666 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2670 int sgnb = bcd_get_sgn(b);
2671 int invalid = (sgnb == 0);
2672 ppc_avr_t ret = { .u64 = { 0, 0 } };
2674 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2676 for (i = 1; i < 8; i++) {
2677 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2679 if (unlikely(invalid)) {
2683 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2685 cr = bcd_cmp_zero(b);
2691 if (unlikely(invalid)) {
2700 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2706 int zone_lead = ps ? 0xF : 0x3;
2708 ppc_avr_t ret = { .u64 = { 0, 0 } };
2709 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2711 if (unlikely((sgnb < 0xA) && ps)) {
2715 for (i = 0; i < 16; i++) {
2716 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2717 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2718 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2723 bcd_put_digit(&ret, digit, i + 1);
2726 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2727 (!ps && (sgnb & 0x4))) {
2728 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2730 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2733 cr = bcd_cmp_zero(&ret);
2735 if (unlikely(invalid)) {
2744 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2749 int sgnb = bcd_get_sgn(b);
2750 int zone_lead = (ps) ? 0xF0 : 0x30;
2751 int invalid = (sgnb == 0);
2752 ppc_avr_t ret = { .u64 = { 0, 0 } };
2754 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2756 for (i = 0; i < 16; i++) {
2757 digit = bcd_get_digit(b, i + 1, &invalid);
2759 if (unlikely(invalid)) {
2763 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2767 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2769 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2772 cr = bcd_cmp_zero(b);
2778 if (unlikely(invalid)) {
2787 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2793 ppc_avr_t ret = { .u64 = { 0, 0 } };
2795 if (b->VsrSD(0) < 0) {
2796 lo_value = -b->VsrSD(1);
2797 hi_value = ~b->VsrD(0) + !lo_value;
2798 bcd_put_digit(&ret, 0xD, 0);
2800 lo_value = b->VsrD(1);
2801 hi_value = b->VsrD(0);
2802 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2805 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2806 lo_value > 9999999999999999ULL) {
2810 for (i = 1; i < 16; hi_value /= 10, i++) {
2811 bcd_put_digit(&ret, hi_value % 10, i);
2814 for (; i < 32; lo_value /= 10, i++) {
2815 bcd_put_digit(&ret, lo_value % 10, i);
2818 cr |= bcd_cmp_zero(&ret);
2825 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2832 uint64_t hi_value = 0;
2833 int sgnb = bcd_get_sgn(b);
2834 int invalid = (sgnb == 0);
2836 lo_value = bcd_get_digit(b, 31, &invalid);
2837 for (i = 30; i > 0; i--) {
2838 mulu64(&lo_value, &carry, lo_value, 10ULL);
2839 mulu64(&hi_value, &unused, hi_value, 10ULL);
2840 lo_value += bcd_get_digit(b, i, &invalid);
2843 if (unlikely(invalid)) {
2849 r->VsrSD(1) = -lo_value;
2850 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2852 r->VsrSD(1) = lo_value;
2853 r->VsrSD(0) = hi_value;
2856 cr = bcd_cmp_zero(b);
2858 if (unlikely(invalid)) {
2865 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2870 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2875 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
2877 for (i = 1; i < 32; i++) {
2878 bcd_get_digit(a, i, &invalid);
2879 bcd_get_digit(b, i, &invalid);
2880 if (unlikely(invalid)) {
2885 return bcd_cmp_zero(r);
2888 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2890 int sgnb = bcd_get_sgn(b);
2893 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2895 if (bcd_is_valid(b) == false) {
2899 return bcd_cmp_zero(r);
2902 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2905 #if defined(HOST_WORDS_BIGENDIAN)
2910 bool ox_flag = false;
2911 int sgnb = bcd_get_sgn(b);
2913 ret.VsrD(1) &= ~0xf;
2915 if (bcd_is_valid(b) == false) {
2919 if (unlikely(i > 31)) {
2921 } else if (unlikely(i < -31)) {
2926 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2928 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2930 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2934 cr = bcd_cmp_zero(r);
2942 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2947 bool ox_flag = false;
2950 for (i = 0; i < 32; i++) {
2951 bcd_get_digit(b, i, &invalid);
2953 if (unlikely(invalid)) {
2958 #if defined(HOST_WORDS_BIGENDIAN)
2965 ret.VsrD(1) = ret.VsrD(0) = 0;
2966 } else if (i <= -32) {
2967 ret.VsrD(1) = ret.VsrD(0) = 0;
2969 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2971 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2975 cr = bcd_cmp_zero(r);
2983 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2988 bool ox_flag = false;
2989 int sgnb = bcd_get_sgn(b);
2991 ret.VsrD(1) &= ~0xf;
2993 #if defined(HOST_WORDS_BIGENDIAN)
2995 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
2998 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3001 if (bcd_is_valid(b) == false) {
3005 if (unlikely(i > 31)) {
3007 } else if (unlikely(i < -31)) {
3012 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
3014 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
3016 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3017 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3020 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3022 cr = bcd_cmp_zero(&ret);
3031 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3034 uint32_t ox_flag = 0;
3035 #if defined(HOST_WORDS_BIGENDIAN)
3036 int i = a->s16[3] + 1;
3038 int i = a->s16[4] + 1;
3042 if (bcd_is_valid(b) == false) {
3046 if (i > 16 && i < 32) {
3047 mask = (uint64_t)-1 >> (128 - i * 4);
3048 if (ret.VsrD(0) & ~mask) {
3052 ret.VsrD(0) &= mask;
3053 } else if (i >= 0 && i <= 16) {
3054 mask = (uint64_t)-1 >> (64 - i * 4);
3055 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3059 ret.VsrD(1) &= mask;
3062 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3065 return bcd_cmp_zero(&ret) | ox_flag;
3068 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3072 uint32_t ox_flag = 0;
3076 for (i = 0; i < 32; i++) {
3077 bcd_get_digit(b, i, &invalid);
3079 if (unlikely(invalid)) {
3084 #if defined(HOST_WORDS_BIGENDIAN)
3089 if (i > 16 && i < 33) {
3090 mask = (uint64_t)-1 >> (128 - i * 4);
3091 if (ret.VsrD(0) & ~mask) {
3095 ret.VsrD(0) &= mask;
3096 } else if (i > 0 && i <= 16) {
3097 mask = (uint64_t)-1 >> (64 - i * 4);
3098 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3102 ret.VsrD(1) &= mask;
3104 } else if (i == 0) {
3105 if (ret.VsrD(0) || ret.VsrD(1)) {
3108 ret.VsrD(0) = ret.VsrD(1) = 0;
3112 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
3113 return ox_flag | CRF_EQ;
3116 return ox_flag | CRF_GT;
3119 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3122 VECTOR_FOR_INORDER_I(i, u8) {
3123 r->u8[i] = AES_sbox[a->u8[i]];
3127 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3132 VECTOR_FOR_INORDER_I(i, u32) {
3133 result.VsrW(i) = b->VsrW(i) ^
3134 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3135 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3136 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3137 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3142 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3147 VECTOR_FOR_INORDER_I(i, u8) {
3148 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3153 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3155 /* This differs from what is written in ISA V2.07. The RTL is */
3156 /* incorrect and will be fixed in V2.07B. */
3160 VECTOR_FOR_INORDER_I(i, u8) {
3161 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3164 VECTOR_FOR_INORDER_I(i, u32) {
3166 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3167 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3168 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3169 AES_imc[tmp.VsrB(4 * i + 3)][3];
3173 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3178 VECTOR_FOR_INORDER_I(i, u8) {
3179 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3184 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3186 int st = (st_six & 0x10) != 0;
3187 int six = st_six & 0xF;
3190 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3192 if ((six & (0x8 >> i)) == 0) {
3193 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3194 ror32(a->VsrW(i), 18) ^
3196 } else { /* six.bit[i] == 1 */
3197 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3198 ror32(a->VsrW(i), 19) ^
3201 } else { /* st == 1 */
3202 if ((six & (0x8 >> i)) == 0) {
3203 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3204 ror32(a->VsrW(i), 13) ^
3205 ror32(a->VsrW(i), 22);
3206 } else { /* six.bit[i] == 1 */
3207 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3208 ror32(a->VsrW(i), 11) ^
3209 ror32(a->VsrW(i), 25);
3215 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3217 int st = (st_six & 0x10) != 0;
3218 int six = st_six & 0xF;
3221 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3223 if ((six & (0x8 >> (2*i))) == 0) {
3224 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3225 ror64(a->VsrD(i), 8) ^
3227 } else { /* six.bit[2*i] == 1 */
3228 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3229 ror64(a->VsrD(i), 61) ^
3232 } else { /* st == 1 */
3233 if ((six & (0x8 >> (2*i))) == 0) {
3234 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3235 ror64(a->VsrD(i), 34) ^
3236 ror64(a->VsrD(i), 39);
3237 } else { /* six.bit[2*i] == 1 */
3238 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3239 ror64(a->VsrD(i), 18) ^
3240 ror64(a->VsrD(i), 41);
3246 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3251 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3252 int indexA = c->VsrB(i) >> 4;
3253 int indexB = c->VsrB(i) & 0xF;
3255 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3260 #undef VECTOR_FOR_INORDER_I
3262 /*****************************************************************************/
3263 /* SPE extension helpers */
3264 /* Use a table to make this quicker */
3265 static const uint8_t hbrev[16] = {
3266 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3267 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3270 static inline uint8_t byte_reverse(uint8_t val)
3272 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3275 static inline uint32_t word_reverse(uint32_t val)
3277 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3278 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3281 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3282 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3284 uint32_t a, b, d, mask;
3286 mask = UINT32_MAX >> (32 - MASKBITS);
3289 d = word_reverse(1 + word_reverse(a | ~b));
3290 return (arg1 & ~mask) | (d & b);
3293 uint32_t helper_cntlsw32(uint32_t val)
3295 if (val & 0x80000000) {
3302 uint32_t helper_cntlzw32(uint32_t val)
3308 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3309 target_ulong low, uint32_t update_Rc)
3315 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3316 if ((high & mask) == 0) {
3324 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3325 if ((low & mask) == 0) {
3338 env->xer = (env->xer & ~0x7F) | i;
3340 env->crf[0] |= xer_so;