2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
25 #include "fpu/softfloat.h"
26 #include "qapi/error.h"
27 #include "qemu/guest-random.h"
29 #include "helper_regs.h"
30 /*****************************************************************************/
31 /* Fixed point operations helpers */
33 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
36 env->so = env->ov = 1;
42 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
48 uint64_t dividend = (uint64_t)ra << 32;
49 uint64_t divisor = (uint32_t)rb;
51 if (unlikely(divisor == 0)) {
54 rt = dividend / divisor;
55 overflow = rt > UINT32_MAX;
58 if (unlikely(overflow)) {
59 rt = 0; /* Undefined */
63 helper_update_ov_legacy(env, overflow);
66 return (target_ulong)rt;
69 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
75 int64_t dividend = (int64_t)ra << 32;
76 int64_t divisor = (int64_t)((int32_t)rb);
78 if (unlikely((divisor == 0) ||
79 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
82 rt = dividend / divisor;
83 overflow = rt != (int32_t)rt;
86 if (unlikely(overflow)) {
87 rt = 0; /* Undefined */
91 helper_update_ov_legacy(env, overflow);
94 return (target_ulong)rt;
97 #if defined(TARGET_PPC64)
99 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
104 overflow = divu128(&rt, &ra, rb);
106 if (unlikely(overflow)) {
107 rt = 0; /* Undefined */
111 helper_update_ov_legacy(env, overflow);
117 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
120 int64_t ra = (int64_t)rau;
121 int64_t rb = (int64_t)rbu;
122 int overflow = divs128(&rt, &ra, rb);
124 if (unlikely(overflow)) {
125 rt = 0; /* Undefined */
129 helper_update_ov_legacy(env, overflow);
138 #if defined(TARGET_PPC64)
139 /* if x = 0xab, returns 0xababababababababa */
140 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
143 * subtract 1 from each byte, and with inverse, check if MSB is set at each
145 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
146 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
148 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
150 /* When you XOR the pattern and there is a match, that byte will be zero */
151 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
153 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
155 return hasvalue(rb, ra) ? CRF_GT : 0;
163 * Return a random number.
165 uint64_t helper_darn32(void)
170 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
171 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
172 error_get_pretty(err));
180 uint64_t helper_darn64(void)
185 if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
186 qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
187 error_get_pretty(err));
195 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
200 for (i = 0; i < 8; i++) {
201 int index = (rs >> (i * 8)) & 0xFF;
203 if (rb & PPC_BIT(index)) {
213 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
215 target_ulong mask = 0xff;
219 for (i = 0; i < sizeof(target_ulong); i++) {
220 if ((rs & mask) == (rb & mask)) {
228 /* shift right arithmetic helper */
229 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
234 if (likely(!(shift & 0x20))) {
235 if (likely((uint32_t)shift != 0)) {
237 ret = (int32_t)value >> shift;
238 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
239 env->ca32 = env->ca = 0;
241 env->ca32 = env->ca = 1;
244 ret = (int32_t)value;
245 env->ca32 = env->ca = 0;
248 ret = (int32_t)value >> 31;
249 env->ca32 = env->ca = (ret != 0);
251 return (target_long)ret;
254 #if defined(TARGET_PPC64)
255 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
260 if (likely(!(shift & 0x40))) {
261 if (likely((uint64_t)shift != 0)) {
263 ret = (int64_t)value >> shift;
264 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
265 env->ca32 = env->ca = 0;
267 env->ca32 = env->ca = 1;
270 ret = (int64_t)value;
271 env->ca32 = env->ca = 0;
274 ret = (int64_t)value >> 63;
275 env->ca32 = env->ca = (ret != 0);
281 #if defined(TARGET_PPC64)
282 target_ulong helper_popcntb(target_ulong val)
284 /* Note that we don't fold past bytes */
285 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
286 0x5555555555555555ULL);
287 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
288 0x3333333333333333ULL);
289 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
290 0x0f0f0f0f0f0f0f0fULL);
294 target_ulong helper_popcntw(target_ulong val)
296 /* Note that we don't fold past words. */
297 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
298 0x5555555555555555ULL);
299 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
300 0x3333333333333333ULL);
301 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
302 0x0f0f0f0f0f0f0f0fULL);
303 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
304 0x00ff00ff00ff00ffULL);
305 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
306 0x0000ffff0000ffffULL);
310 target_ulong helper_popcntb(target_ulong val)
312 /* Note that we don't fold past bytes */
313 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
314 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
315 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
320 /*****************************************************************************/
321 /* PowerPC 601 specific instructions (POWER bridge) */
322 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
324 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
326 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
327 (int32_t)arg2 == 0) {
328 env->spr[SPR_MQ] = 0;
331 env->spr[SPR_MQ] = tmp % arg2;
332 return tmp / (int32_t)arg2;
336 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
339 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
341 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
342 (int32_t)arg2 == 0) {
343 env->so = env->ov = 1;
344 env->spr[SPR_MQ] = 0;
347 env->spr[SPR_MQ] = tmp % arg2;
348 tmp /= (int32_t)arg2;
349 if ((int32_t)tmp != tmp) {
350 env->so = env->ov = 1;
358 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
361 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
362 (int32_t)arg2 == 0) {
363 env->spr[SPR_MQ] = 0;
366 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
367 return (int32_t)arg1 / (int32_t)arg2;
371 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
374 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
375 (int32_t)arg2 == 0) {
376 env->so = env->ov = 1;
377 env->spr[SPR_MQ] = 0;
381 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
382 return (int32_t)arg1 / (int32_t)arg2;
386 /*****************************************************************************/
387 /* 602 specific instructions */
388 /* mfrom is the most crazy instruction ever seen, imho ! */
389 /* Real implementation uses a ROM table. Do the same */
391 * Extremely decomposed:
393 * return 256 * log10(10 + 1.0) + 0.5
395 #if !defined(CONFIG_USER_ONLY)
396 target_ulong helper_602_mfrom(target_ulong arg)
398 if (likely(arg < 602)) {
399 #include "mfrom_table.inc.c"
400 return mfrom_ROM_table[arg];
407 /*****************************************************************************/
408 /* Altivec extension helpers */
409 #if defined(HOST_WORDS_BIGENDIAN)
410 #define VECTOR_FOR_INORDER_I(index, element) \
411 for (index = 0; index < ARRAY_SIZE(r->element); index++)
413 #define VECTOR_FOR_INORDER_I(index, element) \
414 for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
417 /* Saturating arithmetic helpers. */
418 #define SATCVT(from, to, from_type, to_type, min, max) \
419 static inline to_type cvt##from##to(from_type x, int *sat) \
423 if (x < (from_type)min) { \
426 } else if (x > (from_type)max) { \
434 #define SATCVTU(from, to, from_type, to_type, min, max) \
435 static inline to_type cvt##from##to(from_type x, int *sat) \
439 if (x > (from_type)max) { \
447 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
448 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
449 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
451 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
452 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
453 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
454 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
455 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
456 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
460 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
462 int i, j = (sh & 0xf);
464 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
469 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
471 int i, j = 0x10 - (sh & 0xf);
473 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
478 void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
480 env->vscr = vscr & ~(1u << VSCR_SAT);
481 /* Which bit we set is completely arbitrary, but clear the rest. */
482 env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
483 env->vscr_sat.u64[1] = 0;
484 set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
487 uint32_t helper_mfvscr(CPUPPCState *env)
489 uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
490 return env->vscr | (sat << VSCR_SAT);
493 static inline void set_vscr_sat(CPUPPCState *env)
495 /* The choice of non-zero value is arbitrary. */
496 env->vscr_sat.u32[0] = 1;
499 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
503 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
504 r->u32[i] = ~a->u32[i] < b->u32[i];
509 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
512 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
513 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
520 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
523 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
524 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
532 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
534 uint64_t res = b->u64[0] ^ b->u64[1];
538 r->VsrD(1) = res & 1;
542 #define VARITH_DO(name, op, element) \
543 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
547 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
548 r->element[i] = a->element[i] op b->element[i]; \
551 VARITH_DO(muluwm, *, u32)
555 #define VARITHFP(suffix, func) \
556 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
561 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
562 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
565 VARITHFP(addfp, float32_add)
566 VARITHFP(subfp, float32_sub)
567 VARITHFP(minfp, float32_min)
568 VARITHFP(maxfp, float32_max)
571 #define VARITHFPFMA(suffix, type) \
572 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
573 ppc_avr_t *b, ppc_avr_t *c) \
576 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
577 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
578 type, &env->vec_status); \
581 VARITHFPFMA(maddfp, 0);
582 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
585 #define VARITHSAT_CASE(type, op, cvt, element) \
587 type result = (type)a->element[i] op (type)b->element[i]; \
588 r->element[i] = cvt(result, &sat); \
591 #define VARITHSAT_DO(name, op, optype, cvt, element) \
592 void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat, \
593 ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
598 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
599 VARITHSAT_CASE(optype, op, cvt, element); \
602 vscr_sat->u32[0] = 1; \
605 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
606 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
607 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
608 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
609 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
610 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
611 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
612 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
613 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
614 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
615 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
616 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
617 #undef VARITHSAT_CASE
619 #undef VARITHSAT_SIGNED
620 #undef VARITHSAT_UNSIGNED
622 #define VAVG_DO(name, element, etype) \
623 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
628 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
629 r->element[i] = x >> 1; \
633 #define VAVG(type, signed_element, signed_type, unsigned_element, \
635 VAVG_DO(avgs##type, signed_element, signed_type) \
636 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
637 VAVG(b, s8, int16_t, u8, uint16_t)
638 VAVG(h, s16, int32_t, u16, uint32_t)
639 VAVG(w, s32, int64_t, u32, uint64_t)
643 #define VABSDU_DO(name, element) \
644 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
648 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
649 r->element[i] = (a->element[i] > b->element[i]) ? \
650 (a->element[i] - b->element[i]) : \
651 (b->element[i] - a->element[i]); \
656 * VABSDU - Vector absolute difference unsigned
657 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
658 * element - element type to access from vector
660 #define VABSDU(type, element) \
661 VABSDU_DO(absdu##type, element)
668 #define VCF(suffix, cvt, element) \
669 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
670 ppc_avr_t *b, uint32_t uim) \
674 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
675 float32 t = cvt(b->element[i], &env->vec_status); \
676 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
679 VCF(ux, uint32_to_float32, u32)
680 VCF(sx, int32_to_float32, s32)
683 #define VCMP_DO(suffix, compare, element, record) \
684 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
685 ppc_avr_t *a, ppc_avr_t *b) \
687 uint64_t ones = (uint64_t)-1; \
688 uint64_t all = ones; \
692 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
693 uint64_t result = (a->element[i] compare b->element[i] ? \
695 switch (sizeof(a->element[0])) { \
697 r->u64[i] = result; \
700 r->u32[i] = result; \
703 r->u16[i] = result; \
713 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
716 #define VCMP(suffix, compare, element) \
717 VCMP_DO(suffix, compare, element, 0) \
718 VCMP_DO(suffix##_dot, compare, element, 1)
734 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
735 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
736 ppc_avr_t *a, ppc_avr_t *b) \
738 etype ones = (etype)-1; \
740 etype result, none = 0; \
743 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
745 result = ((a->element[i] == 0) \
746 || (b->element[i] == 0) \
747 || (a->element[i] != b->element[i]) ? \
750 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
752 r->element[i] = result; \
757 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
762 * VCMPNEZ - Vector compare not equal to zero
763 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
764 * element - element type to access from vector
766 #define VCMPNE(suffix, element, etype, cmpzero) \
767 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
768 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
769 VCMPNE(zb, u8, uint8_t, 1)
770 VCMPNE(zh, u16, uint16_t, 1)
771 VCMPNE(zw, u32, uint32_t, 1)
772 VCMPNE(b, u8, uint8_t, 0)
773 VCMPNE(h, u16, uint16_t, 0)
774 VCMPNE(w, u32, uint32_t, 0)
778 #define VCMPFP_DO(suffix, compare, order, record) \
779 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
780 ppc_avr_t *a, ppc_avr_t *b) \
782 uint32_t ones = (uint32_t)-1; \
783 uint32_t all = ones; \
787 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
789 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
791 if (rel == float_relation_unordered) { \
793 } else if (rel compare order) { \
798 r->u32[i] = result; \
803 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
806 #define VCMPFP(suffix, compare, order) \
807 VCMPFP_DO(suffix, compare, order, 0) \
808 VCMPFP_DO(suffix##_dot, compare, order, 1)
809 VCMPFP(eqfp, ==, float_relation_equal)
810 VCMPFP(gefp, !=, float_relation_less)
811 VCMPFP(gtfp, ==, float_relation_greater)
815 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
816 ppc_avr_t *a, ppc_avr_t *b, int record)
821 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
822 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
824 if (le_rel == float_relation_unordered) {
825 r->u32[i] = 0xc0000000;
828 float32 bneg = float32_chs(b->f32[i]);
829 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
831 int le = le_rel != float_relation_greater;
832 int ge = ge_rel != float_relation_less;
834 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
835 all_in |= (!le | !ge);
839 env->crf[6] = (all_in == 0) << 1;
843 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
845 vcmpbfp_internal(env, r, a, b, 0);
848 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
851 vcmpbfp_internal(env, r, a, b, 1);
854 #define VCT(suffix, satcvt, element) \
855 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
856 ppc_avr_t *b, uint32_t uim) \
860 float_status s = env->vec_status; \
862 set_float_rounding_mode(float_round_to_zero, &s); \
863 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
864 if (float32_is_any_nan(b->f32[i])) { \
867 float64 t = float32_to_float64(b->f32[i], &s); \
870 t = float64_scalbn(t, uim, &s); \
871 j = float64_to_int64(t, &s); \
872 r->element[i] = satcvt(j, &sat); \
879 VCT(uxs, cvtsduw, u32)
880 VCT(sxs, cvtsdsw, s32)
883 target_ulong helper_vclzlsbb(ppc_avr_t *r)
885 target_ulong count = 0;
887 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
888 if (r->VsrB(i) & 0x01) {
896 target_ulong helper_vctzlsbb(ppc_avr_t *r)
898 target_ulong count = 0;
900 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
901 if (r->VsrB(i) & 0x01) {
909 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 int32_t prod = a->s16[i] * b->s16[i];
917 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
919 r->s16[i] = cvtswsh(t, &sat);
927 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
928 ppc_avr_t *b, ppc_avr_t *c)
933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
934 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
935 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
936 r->s16[i] = cvtswsh(t, &sat);
944 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
948 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
949 int32_t prod = a->s16[i] * b->s16[i];
950 r->s16[i] = (int16_t) (prod + c->s16[i]);
954 #define VMRG_DO(name, element, access, ofs) \
955 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
958 int i, half = ARRAY_SIZE(r->element) / 2; \
960 for (i = 0; i < half; i++) { \
961 result.access(i * 2 + 0) = a->access(i + ofs); \
962 result.access(i * 2 + 1) = b->access(i + ofs); \
967 #define VMRG(suffix, element, access) \
968 VMRG_DO(mrgl##suffix, element, access, half) \
969 VMRG_DO(mrgh##suffix, element, access, 0)
976 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
977 ppc_avr_t *b, ppc_avr_t *c)
982 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
983 prod[i] = (int32_t)a->s8[i] * b->u8[i];
986 VECTOR_FOR_INORDER_I(i, s32) {
987 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
988 prod[4 * i + 2] + prod[4 * i + 3];
992 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
993 ppc_avr_t *b, ppc_avr_t *c)
998 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
999 prod[i] = a->s16[i] * b->s16[i];
1002 VECTOR_FOR_INORDER_I(i, s32) {
1003 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1007 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1008 ppc_avr_t *b, ppc_avr_t *c)
1014 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1015 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1018 VECTOR_FOR_INORDER_I(i, s32) {
1019 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1021 r->u32[i] = cvtsdsw(t, &sat);
1029 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1030 ppc_avr_t *b, ppc_avr_t *c)
1035 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1036 prod[i] = a->u8[i] * b->u8[i];
1039 VECTOR_FOR_INORDER_I(i, u32) {
1040 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1041 prod[4 * i + 2] + prod[4 * i + 3];
1045 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1046 ppc_avr_t *b, ppc_avr_t *c)
1051 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1052 prod[i] = a->u16[i] * b->u16[i];
1055 VECTOR_FOR_INORDER_I(i, u32) {
1056 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1060 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1061 ppc_avr_t *b, ppc_avr_t *c)
1067 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1068 prod[i] = a->u16[i] * b->u16[i];
1071 VECTOR_FOR_INORDER_I(i, s32) {
1072 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1074 r->u32[i] = cvtuduw(t, &sat);
1082 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1083 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1087 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1088 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1089 (cast)b->mul_access(i); \
1093 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1094 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1098 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1099 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1100 (cast)b->mul_access(i + 1); \
1104 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1105 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1106 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1107 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1108 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1109 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1110 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1111 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1112 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1117 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1123 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1124 int s = c->VsrB(i) & 0x1f;
1125 int index = s & 0xf;
1128 result.VsrB(i) = b->VsrB(index);
1130 result.VsrB(i) = a->VsrB(index);
1136 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1142 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1143 int s = c->VsrB(i) & 0x1f;
1144 int index = 15 - (s & 0xf);
1147 result.VsrB(i) = a->VsrB(index);
1149 result.VsrB(i) = b->VsrB(index);
1155 #if defined(HOST_WORDS_BIGENDIAN)
1156 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1157 #define VBPERMD_INDEX(i) (i)
1158 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1159 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1161 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1162 #define VBPERMD_INDEX(i) (1 - i)
1163 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1164 #define EXTRACT_BIT(avr, i, index) \
1165 (extract64((avr)->u64[1 - i], 63 - index, 1))
1168 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1171 ppc_avr_t result = { .u64 = { 0, 0 } };
1172 VECTOR_FOR_INORDER_I(i, u64) {
1173 for (j = 0; j < 8; j++) {
1174 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1175 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1176 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1183 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1188 VECTOR_FOR_INORDER_I(i, u8) {
1189 int index = VBPERMQ_INDEX(b, i);
1192 uint64_t mask = (1ull << (63 - (index & 0x3F)));
1193 if (a->u64[VBPERMQ_DW(index)] & mask) {
1194 perm |= (0x8000 >> i);
1203 #undef VBPERMQ_INDEX
1206 static const uint64_t VGBBD_MASKS[256] = {
1207 0x0000000000000000ull, /* 00 */
1208 0x0000000000000080ull, /* 01 */
1209 0x0000000000008000ull, /* 02 */
1210 0x0000000000008080ull, /* 03 */
1211 0x0000000000800000ull, /* 04 */
1212 0x0000000000800080ull, /* 05 */
1213 0x0000000000808000ull, /* 06 */
1214 0x0000000000808080ull, /* 07 */
1215 0x0000000080000000ull, /* 08 */
1216 0x0000000080000080ull, /* 09 */
1217 0x0000000080008000ull, /* 0A */
1218 0x0000000080008080ull, /* 0B */
1219 0x0000000080800000ull, /* 0C */
1220 0x0000000080800080ull, /* 0D */
1221 0x0000000080808000ull, /* 0E */
1222 0x0000000080808080ull, /* 0F */
1223 0x0000008000000000ull, /* 10 */
1224 0x0000008000000080ull, /* 11 */
1225 0x0000008000008000ull, /* 12 */
1226 0x0000008000008080ull, /* 13 */
1227 0x0000008000800000ull, /* 14 */
1228 0x0000008000800080ull, /* 15 */
1229 0x0000008000808000ull, /* 16 */
1230 0x0000008000808080ull, /* 17 */
1231 0x0000008080000000ull, /* 18 */
1232 0x0000008080000080ull, /* 19 */
1233 0x0000008080008000ull, /* 1A */
1234 0x0000008080008080ull, /* 1B */
1235 0x0000008080800000ull, /* 1C */
1236 0x0000008080800080ull, /* 1D */
1237 0x0000008080808000ull, /* 1E */
1238 0x0000008080808080ull, /* 1F */
1239 0x0000800000000000ull, /* 20 */
1240 0x0000800000000080ull, /* 21 */
1241 0x0000800000008000ull, /* 22 */
1242 0x0000800000008080ull, /* 23 */
1243 0x0000800000800000ull, /* 24 */
1244 0x0000800000800080ull, /* 25 */
1245 0x0000800000808000ull, /* 26 */
1246 0x0000800000808080ull, /* 27 */
1247 0x0000800080000000ull, /* 28 */
1248 0x0000800080000080ull, /* 29 */
1249 0x0000800080008000ull, /* 2A */
1250 0x0000800080008080ull, /* 2B */
1251 0x0000800080800000ull, /* 2C */
1252 0x0000800080800080ull, /* 2D */
1253 0x0000800080808000ull, /* 2E */
1254 0x0000800080808080ull, /* 2F */
1255 0x0000808000000000ull, /* 30 */
1256 0x0000808000000080ull, /* 31 */
1257 0x0000808000008000ull, /* 32 */
1258 0x0000808000008080ull, /* 33 */
1259 0x0000808000800000ull, /* 34 */
1260 0x0000808000800080ull, /* 35 */
1261 0x0000808000808000ull, /* 36 */
1262 0x0000808000808080ull, /* 37 */
1263 0x0000808080000000ull, /* 38 */
1264 0x0000808080000080ull, /* 39 */
1265 0x0000808080008000ull, /* 3A */
1266 0x0000808080008080ull, /* 3B */
1267 0x0000808080800000ull, /* 3C */
1268 0x0000808080800080ull, /* 3D */
1269 0x0000808080808000ull, /* 3E */
1270 0x0000808080808080ull, /* 3F */
1271 0x0080000000000000ull, /* 40 */
1272 0x0080000000000080ull, /* 41 */
1273 0x0080000000008000ull, /* 42 */
1274 0x0080000000008080ull, /* 43 */
1275 0x0080000000800000ull, /* 44 */
1276 0x0080000000800080ull, /* 45 */
1277 0x0080000000808000ull, /* 46 */
1278 0x0080000000808080ull, /* 47 */
1279 0x0080000080000000ull, /* 48 */
1280 0x0080000080000080ull, /* 49 */
1281 0x0080000080008000ull, /* 4A */
1282 0x0080000080008080ull, /* 4B */
1283 0x0080000080800000ull, /* 4C */
1284 0x0080000080800080ull, /* 4D */
1285 0x0080000080808000ull, /* 4E */
1286 0x0080000080808080ull, /* 4F */
1287 0x0080008000000000ull, /* 50 */
1288 0x0080008000000080ull, /* 51 */
1289 0x0080008000008000ull, /* 52 */
1290 0x0080008000008080ull, /* 53 */
1291 0x0080008000800000ull, /* 54 */
1292 0x0080008000800080ull, /* 55 */
1293 0x0080008000808000ull, /* 56 */
1294 0x0080008000808080ull, /* 57 */
1295 0x0080008080000000ull, /* 58 */
1296 0x0080008080000080ull, /* 59 */
1297 0x0080008080008000ull, /* 5A */
1298 0x0080008080008080ull, /* 5B */
1299 0x0080008080800000ull, /* 5C */
1300 0x0080008080800080ull, /* 5D */
1301 0x0080008080808000ull, /* 5E */
1302 0x0080008080808080ull, /* 5F */
1303 0x0080800000000000ull, /* 60 */
1304 0x0080800000000080ull, /* 61 */
1305 0x0080800000008000ull, /* 62 */
1306 0x0080800000008080ull, /* 63 */
1307 0x0080800000800000ull, /* 64 */
1308 0x0080800000800080ull, /* 65 */
1309 0x0080800000808000ull, /* 66 */
1310 0x0080800000808080ull, /* 67 */
1311 0x0080800080000000ull, /* 68 */
1312 0x0080800080000080ull, /* 69 */
1313 0x0080800080008000ull, /* 6A */
1314 0x0080800080008080ull, /* 6B */
1315 0x0080800080800000ull, /* 6C */
1316 0x0080800080800080ull, /* 6D */
1317 0x0080800080808000ull, /* 6E */
1318 0x0080800080808080ull, /* 6F */
1319 0x0080808000000000ull, /* 70 */
1320 0x0080808000000080ull, /* 71 */
1321 0x0080808000008000ull, /* 72 */
1322 0x0080808000008080ull, /* 73 */
1323 0x0080808000800000ull, /* 74 */
1324 0x0080808000800080ull, /* 75 */
1325 0x0080808000808000ull, /* 76 */
1326 0x0080808000808080ull, /* 77 */
1327 0x0080808080000000ull, /* 78 */
1328 0x0080808080000080ull, /* 79 */
1329 0x0080808080008000ull, /* 7A */
1330 0x0080808080008080ull, /* 7B */
1331 0x0080808080800000ull, /* 7C */
1332 0x0080808080800080ull, /* 7D */
1333 0x0080808080808000ull, /* 7E */
1334 0x0080808080808080ull, /* 7F */
1335 0x8000000000000000ull, /* 80 */
1336 0x8000000000000080ull, /* 81 */
1337 0x8000000000008000ull, /* 82 */
1338 0x8000000000008080ull, /* 83 */
1339 0x8000000000800000ull, /* 84 */
1340 0x8000000000800080ull, /* 85 */
1341 0x8000000000808000ull, /* 86 */
1342 0x8000000000808080ull, /* 87 */
1343 0x8000000080000000ull, /* 88 */
1344 0x8000000080000080ull, /* 89 */
1345 0x8000000080008000ull, /* 8A */
1346 0x8000000080008080ull, /* 8B */
1347 0x8000000080800000ull, /* 8C */
1348 0x8000000080800080ull, /* 8D */
1349 0x8000000080808000ull, /* 8E */
1350 0x8000000080808080ull, /* 8F */
1351 0x8000008000000000ull, /* 90 */
1352 0x8000008000000080ull, /* 91 */
1353 0x8000008000008000ull, /* 92 */
1354 0x8000008000008080ull, /* 93 */
1355 0x8000008000800000ull, /* 94 */
1356 0x8000008000800080ull, /* 95 */
1357 0x8000008000808000ull, /* 96 */
1358 0x8000008000808080ull, /* 97 */
1359 0x8000008080000000ull, /* 98 */
1360 0x8000008080000080ull, /* 99 */
1361 0x8000008080008000ull, /* 9A */
1362 0x8000008080008080ull, /* 9B */
1363 0x8000008080800000ull, /* 9C */
1364 0x8000008080800080ull, /* 9D */
1365 0x8000008080808000ull, /* 9E */
1366 0x8000008080808080ull, /* 9F */
1367 0x8000800000000000ull, /* A0 */
1368 0x8000800000000080ull, /* A1 */
1369 0x8000800000008000ull, /* A2 */
1370 0x8000800000008080ull, /* A3 */
1371 0x8000800000800000ull, /* A4 */
1372 0x8000800000800080ull, /* A5 */
1373 0x8000800000808000ull, /* A6 */
1374 0x8000800000808080ull, /* A7 */
1375 0x8000800080000000ull, /* A8 */
1376 0x8000800080000080ull, /* A9 */
1377 0x8000800080008000ull, /* AA */
1378 0x8000800080008080ull, /* AB */
1379 0x8000800080800000ull, /* AC */
1380 0x8000800080800080ull, /* AD */
1381 0x8000800080808000ull, /* AE */
1382 0x8000800080808080ull, /* AF */
1383 0x8000808000000000ull, /* B0 */
1384 0x8000808000000080ull, /* B1 */
1385 0x8000808000008000ull, /* B2 */
1386 0x8000808000008080ull, /* B3 */
1387 0x8000808000800000ull, /* B4 */
1388 0x8000808000800080ull, /* B5 */
1389 0x8000808000808000ull, /* B6 */
1390 0x8000808000808080ull, /* B7 */
1391 0x8000808080000000ull, /* B8 */
1392 0x8000808080000080ull, /* B9 */
1393 0x8000808080008000ull, /* BA */
1394 0x8000808080008080ull, /* BB */
1395 0x8000808080800000ull, /* BC */
1396 0x8000808080800080ull, /* BD */
1397 0x8000808080808000ull, /* BE */
1398 0x8000808080808080ull, /* BF */
1399 0x8080000000000000ull, /* C0 */
1400 0x8080000000000080ull, /* C1 */
1401 0x8080000000008000ull, /* C2 */
1402 0x8080000000008080ull, /* C3 */
1403 0x8080000000800000ull, /* C4 */
1404 0x8080000000800080ull, /* C5 */
1405 0x8080000000808000ull, /* C6 */
1406 0x8080000000808080ull, /* C7 */
1407 0x8080000080000000ull, /* C8 */
1408 0x8080000080000080ull, /* C9 */
1409 0x8080000080008000ull, /* CA */
1410 0x8080000080008080ull, /* CB */
1411 0x8080000080800000ull, /* CC */
1412 0x8080000080800080ull, /* CD */
1413 0x8080000080808000ull, /* CE */
1414 0x8080000080808080ull, /* CF */
1415 0x8080008000000000ull, /* D0 */
1416 0x8080008000000080ull, /* D1 */
1417 0x8080008000008000ull, /* D2 */
1418 0x8080008000008080ull, /* D3 */
1419 0x8080008000800000ull, /* D4 */
1420 0x8080008000800080ull, /* D5 */
1421 0x8080008000808000ull, /* D6 */
1422 0x8080008000808080ull, /* D7 */
1423 0x8080008080000000ull, /* D8 */
1424 0x8080008080000080ull, /* D9 */
1425 0x8080008080008000ull, /* DA */
1426 0x8080008080008080ull, /* DB */
1427 0x8080008080800000ull, /* DC */
1428 0x8080008080800080ull, /* DD */
1429 0x8080008080808000ull, /* DE */
1430 0x8080008080808080ull, /* DF */
1431 0x8080800000000000ull, /* E0 */
1432 0x8080800000000080ull, /* E1 */
1433 0x8080800000008000ull, /* E2 */
1434 0x8080800000008080ull, /* E3 */
1435 0x8080800000800000ull, /* E4 */
1436 0x8080800000800080ull, /* E5 */
1437 0x8080800000808000ull, /* E6 */
1438 0x8080800000808080ull, /* E7 */
1439 0x8080800080000000ull, /* E8 */
1440 0x8080800080000080ull, /* E9 */
1441 0x8080800080008000ull, /* EA */
1442 0x8080800080008080ull, /* EB */
1443 0x8080800080800000ull, /* EC */
1444 0x8080800080800080ull, /* ED */
1445 0x8080800080808000ull, /* EE */
1446 0x8080800080808080ull, /* EF */
1447 0x8080808000000000ull, /* F0 */
1448 0x8080808000000080ull, /* F1 */
1449 0x8080808000008000ull, /* F2 */
1450 0x8080808000008080ull, /* F3 */
1451 0x8080808000800000ull, /* F4 */
1452 0x8080808000800080ull, /* F5 */
1453 0x8080808000808000ull, /* F6 */
1454 0x8080808000808080ull, /* F7 */
1455 0x8080808080000000ull, /* F8 */
1456 0x8080808080000080ull, /* F9 */
1457 0x8080808080008000ull, /* FA */
1458 0x8080808080008080ull, /* FB */
1459 0x8080808080800000ull, /* FC */
1460 0x8080808080800080ull, /* FD */
1461 0x8080808080808000ull, /* FE */
1462 0x8080808080808080ull, /* FF */
1465 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1468 uint64_t t[2] = { 0, 0 };
1470 VECTOR_FOR_INORDER_I(i, u8) {
1471 #if defined(HOST_WORDS_BIGENDIAN)
1472 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1474 t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7));
1482 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1483 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1486 trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])]; \
1488 VECTOR_FOR_INORDER_I(i, srcfld) { \
1490 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1491 if (a->srcfld[i] & (1ull << j)) { \
1492 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1497 VECTOR_FOR_INORDER_I(i, trgfld) { \
1498 r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1]; \
1502 PMSUM(vpmsumb, u8, u16, uint16_t)
1503 PMSUM(vpmsumh, u16, u32, uint32_t)
1504 PMSUM(vpmsumw, u32, u64, uint64_t)
1506 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1509 #ifdef CONFIG_INT128
1511 __uint128_t prod[2];
1513 VECTOR_FOR_INORDER_I(i, u64) {
1515 for (j = 0; j < 64; j++) {
1516 if (a->u64[i] & (1ull << j)) {
1517 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1522 r->u128 = prod[0] ^ prod[1];
1528 VECTOR_FOR_INORDER_I(i, u64) {
1529 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1530 for (j = 0; j < 64; j++) {
1531 if (a->u64[i] & (1ull << j)) {
1535 bshift.VsrD(1) = b->u64[i];
1537 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1538 bshift.VsrD(1) = b->u64[i] << j;
1540 prod[i].VsrD(1) ^= bshift.VsrD(1);
1541 prod[i].VsrD(0) ^= bshift.VsrD(0);
1546 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1547 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1552 #if defined(HOST_WORDS_BIGENDIAN)
1557 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1561 #if defined(HOST_WORDS_BIGENDIAN)
1562 const ppc_avr_t *x[2] = { a, b };
1564 const ppc_avr_t *x[2] = { b, a };
1567 VECTOR_FOR_INORDER_I(i, u64) {
1568 VECTOR_FOR_INORDER_I(j, u32) {
1569 uint32_t e = x[i]->u32[j];
1571 result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1572 ((e >> 6) & 0x3e0) |
1579 #define VPK(suffix, from, to, cvt, dosat) \
1580 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1581 ppc_avr_t *a, ppc_avr_t *b) \
1586 ppc_avr_t *a0 = PKBIG ? a : b; \
1587 ppc_avr_t *a1 = PKBIG ? b : a; \
1589 VECTOR_FOR_INORDER_I(i, from) { \
1590 result.to[i] = cvt(a0->from[i], &sat); \
1591 result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1594 if (dosat && sat) { \
1595 set_vscr_sat(env); \
1599 VPK(shss, s16, s8, cvtshsb, 1)
1600 VPK(shus, s16, u8, cvtshub, 1)
1601 VPK(swss, s32, s16, cvtswsh, 1)
1602 VPK(swus, s32, u16, cvtswuh, 1)
1603 VPK(sdss, s64, s32, cvtsdsw, 1)
1604 VPK(sdus, s64, u32, cvtsduw, 1)
1605 VPK(uhus, u16, u8, cvtuhub, 1)
1606 VPK(uwus, u32, u16, cvtuwuh, 1)
1607 VPK(udus, u64, u32, cvtuduw, 1)
1608 VPK(uhum, u16, u8, I, 0)
1609 VPK(uwum, u32, u16, I, 0)
1610 VPK(udum, u64, u32, I, 0)
1615 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1619 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1620 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1624 #define VRFI(suffix, rounding) \
1625 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1629 float_status s = env->vec_status; \
1631 set_float_rounding_mode(rounding, &s); \
1632 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1633 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1636 VRFI(n, float_round_nearest_even)
1637 VRFI(m, float_round_down)
1638 VRFI(p, float_round_up)
1639 VRFI(z, float_round_to_zero)
1642 #define VROTATE(suffix, element, mask) \
1643 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1647 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1648 unsigned int shift = b->element[i] & mask; \
1649 r->element[i] = (a->element[i] << shift) | \
1650 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1654 VROTATE(h, u16, 0xF)
1655 VROTATE(w, u32, 0x1F)
1656 VROTATE(d, u64, 0x3F)
1659 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1663 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1664 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1666 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1670 #define VRLMI(name, size, element, insert) \
1671 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1674 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1675 uint##size##_t src1 = a->element[i]; \
1676 uint##size##_t src2 = b->element[i]; \
1677 uint##size##_t src3 = r->element[i]; \
1678 uint##size##_t begin, end, shift, mask, rot_val; \
1680 shift = extract##size(src2, 0, 6); \
1681 end = extract##size(src2, 8, 6); \
1682 begin = extract##size(src2, 16, 6); \
1683 rot_val = rol##size(src1, shift); \
1684 mask = mask_u##size(begin, end); \
1686 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1688 r->element[i] = (rot_val & mask); \
1693 VRLMI(vrldmi, 64, u64, 1);
1694 VRLMI(vrlwmi, 32, u32, 1);
1695 VRLMI(vrldnm, 64, u64, 0);
1696 VRLMI(vrlwnm, 32, u32, 0);
1698 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1701 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1702 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1705 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1709 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1710 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1714 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1718 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1719 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1723 #if defined(HOST_WORDS_BIGENDIAN)
1724 #define VEXTU_X_DO(name, size, left) \
1725 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1729 index = (a & 0xf) * 8; \
1731 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1733 return int128_getlo(int128_rshift(b->s128, index)) & \
1734 MAKE_64BIT_MASK(0, size); \
1737 #define VEXTU_X_DO(name, size, left) \
1738 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1742 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1744 index = (a & 0xf) * 8; \
1746 return int128_getlo(int128_rshift(b->s128, index)) & \
1747 MAKE_64BIT_MASK(0, size); \
1751 VEXTU_X_DO(vextublx, 8, 1)
1752 VEXTU_X_DO(vextuhlx, 16, 1)
1753 VEXTU_X_DO(vextuwlx, 32, 1)
1754 VEXTU_X_DO(vextubrx, 8, 0)
1755 VEXTU_X_DO(vextuhrx, 16, 0)
1756 VEXTU_X_DO(vextuwrx, 32, 0)
1760 * The specification says that the results are undefined if all of the
1761 * shift counts are not identical. We check to make sure that they
1762 * are to conform to what real hardware appears to do.
1764 #define VSHIFT(suffix, leftp) \
1765 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1767 int shift = b->VsrB(15) & 0x7; \
1771 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1772 doit = doit && ((b->u8[i] & 0x7) == shift); \
1777 } else if (leftp) { \
1778 uint64_t carry = a->VsrD(1) >> (64 - shift); \
1780 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \
1781 r->VsrD(1) = a->VsrD(1) << shift; \
1783 uint64_t carry = a->VsrD(0) << (64 - shift); \
1785 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \
1786 r->VsrD(0) = a->VsrD(0) >> shift; \
1794 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1797 unsigned int shift, bytes, size;
1799 size = ARRAY_SIZE(r->u8);
1800 for (i = 0; i < size; i++) {
1801 shift = b->VsrB(i) & 0x7; /* extract shift value */
1802 bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */
1803 (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1804 r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */
1808 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1811 unsigned int shift, bytes;
1814 * Use reverse order, as destination and source register can be
1815 * same. Its being modified in place saving temporary, reverse
1816 * order will guarantee that computed result is not fed back.
1818 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1819 shift = b->VsrB(i) & 0x7; /* extract shift value */
1820 bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1821 /* extract adjacent bytes */
1822 r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */
1826 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1828 int sh = shift & 0xf;
1832 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1835 result.VsrB(i) = b->VsrB(index - 0x10);
1837 result.VsrB(i) = a->VsrB(index);
1843 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1845 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1847 #if defined(HOST_WORDS_BIGENDIAN)
1848 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1849 memset(&r->u8[16 - sh], 0, sh);
1851 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1852 memset(&r->u8[0], 0, sh);
1856 #if defined(HOST_WORDS_BIGENDIAN)
1857 #define VINSERT(suffix, element) \
1858 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1860 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1861 sizeof(r->element[0])); \
1864 #define VINSERT(suffix, element) \
1865 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1867 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1868 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1876 #if defined(HOST_WORDS_BIGENDIAN)
1877 #define VEXTRACT(suffix, element) \
1878 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1880 uint32_t es = sizeof(r->element[0]); \
1881 memmove(&r->u8[8 - es], &b->u8[index], es); \
1882 memset(&r->u8[8], 0, 8); \
1883 memset(&r->u8[0], 0, 8 - es); \
1886 #define VEXTRACT(suffix, element) \
1887 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1889 uint32_t es = sizeof(r->element[0]); \
1890 uint32_t s = (16 - index) - es; \
1891 memmove(&r->u8[8], &b->u8[s], es); \
1892 memset(&r->u8[0], 0, 8); \
1893 memset(&r->u8[8 + es], 0, 8 - es); \
1902 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1903 target_ulong xbn, uint32_t index)
1906 size_t es = sizeof(uint32_t);
1910 getVSR(xbn, &xb, env);
1911 memset(&xt, 0, sizeof(xt));
1914 for (i = 0; i < es; i++, ext_index++) {
1915 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16);
1918 putVSR(xtn, &xt, env);
1921 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
1922 target_ulong xbn, uint32_t index)
1925 size_t es = sizeof(uint32_t);
1926 int ins_index, i = 0;
1928 getVSR(xbn, &xb, env);
1929 getVSR(xtn, &xt, env);
1932 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1933 xt.VsrB(ins_index) = xb.VsrB(8 - es + i);
1936 putVSR(xtn, &xt, env);
1939 #define VEXT_SIGNED(name, element, cast) \
1940 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1943 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1944 r->element[i] = (cast)b->element[i]; \
1947 VEXT_SIGNED(vextsb2w, s32, int8_t)
1948 VEXT_SIGNED(vextsb2d, s64, int8_t)
1949 VEXT_SIGNED(vextsh2w, s32, int16_t)
1950 VEXT_SIGNED(vextsh2d, s64, int16_t)
1951 VEXT_SIGNED(vextsw2d, s64, int32_t)
1954 #define VNEG(name, element) \
1955 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1958 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1959 r->element[i] = -b->element[i]; \
1966 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1968 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1970 #if defined(HOST_WORDS_BIGENDIAN)
1971 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1972 memset(&r->u8[0], 0, sh);
1974 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1975 memset(&r->u8[16 - sh], 0, sh);
1979 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1983 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1984 r->u32[i] = a->u32[i] >= b->u32[i];
1988 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1995 upper = ARRAY_SIZE(r->s32) - 1;
1996 t = (int64_t)b->VsrSW(upper);
1997 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1999 result.VsrSW(i) = 0;
2001 result.VsrSW(upper) = cvtsdsw(t, &sat);
2009 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2016 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2017 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
2020 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2021 t += a->VsrSW(2 * i + j);
2023 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
2032 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2037 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2038 int64_t t = (int64_t)b->s32[i];
2040 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2041 t += a->s8[4 * i + j];
2043 r->s32[i] = cvtsdsw(t, &sat);
2051 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2056 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2057 int64_t t = (int64_t)b->s32[i];
2059 t += a->s16[2 * i] + a->s16[2 * i + 1];
2060 r->s32[i] = cvtsdsw(t, &sat);
2068 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2073 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2074 uint64_t t = (uint64_t)b->u32[i];
2076 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2077 t += a->u8[4 * i + j];
2079 r->u32[i] = cvtuduw(t, &sat);
2087 #if defined(HOST_WORDS_BIGENDIAN)
2094 #define VUPKPX(suffix, hi) \
2095 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2100 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2101 uint16_t e = b->u16[hi ? i : i + 4]; \
2102 uint8_t a = (e >> 15) ? 0xff : 0; \
2103 uint8_t r = (e >> 10) & 0x1f; \
2104 uint8_t g = (e >> 5) & 0x1f; \
2105 uint8_t b = e & 0x1f; \
2107 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2115 #define VUPK(suffix, unpacked, packee, hi) \
2116 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2122 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2123 result.unpacked[i] = b->packee[i]; \
2126 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2128 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2133 VUPK(hsb, s16, s8, UPKHI)
2134 VUPK(hsh, s32, s16, UPKHI)
2135 VUPK(hsw, s64, s32, UPKHI)
2136 VUPK(lsb, s16, s8, UPKLO)
2137 VUPK(lsh, s32, s16, UPKLO)
2138 VUPK(lsw, s64, s32, UPKLO)
2143 #define VGENERIC_DO(name, element) \
2144 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2148 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2149 r->element[i] = name(b->element[i]); \
2153 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2154 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2155 #define clzw(v) clz32((v))
2156 #define clzd(v) clz64((v))
2158 VGENERIC_DO(clzb, u8)
2159 VGENERIC_DO(clzh, u16)
2160 VGENERIC_DO(clzw, u32)
2161 VGENERIC_DO(clzd, u64)
2168 #define ctzb(v) ((v) ? ctz32(v) : 8)
2169 #define ctzh(v) ((v) ? ctz32(v) : 16)
2170 #define ctzw(v) ctz32((v))
2171 #define ctzd(v) ctz64((v))
2173 VGENERIC_DO(ctzb, u8)
2174 VGENERIC_DO(ctzh, u16)
2175 VGENERIC_DO(ctzw, u32)
2176 VGENERIC_DO(ctzd, u64)
2183 #define popcntb(v) ctpop8(v)
2184 #define popcnth(v) ctpop16(v)
2185 #define popcntw(v) ctpop32(v)
2186 #define popcntd(v) ctpop64(v)
2188 VGENERIC_DO(popcntb, u8)
2189 VGENERIC_DO(popcnth, u16)
2190 VGENERIC_DO(popcntw, u32)
2191 VGENERIC_DO(popcntd, u64)
2200 #if defined(HOST_WORDS_BIGENDIAN)
2201 #define QW_ONE { .u64 = { 0, 1 } }
2203 #define QW_ONE { .u64 = { 1, 0 } }
2206 #ifndef CONFIG_INT128
2208 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2210 t->u64[0] = ~a.u64[0];
2211 t->u64[1] = ~a.u64[1];
2214 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2216 if (a.VsrD(0) < b.VsrD(0)) {
2218 } else if (a.VsrD(0) > b.VsrD(0)) {
2220 } else if (a.VsrD(1) < b.VsrD(1)) {
2222 } else if (a.VsrD(1) > b.VsrD(1)) {
2229 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2231 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2232 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2233 (~a.VsrD(1) < b.VsrD(1));
2236 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2239 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2240 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2241 (~a.VsrD(1) < b.VsrD(1));
2242 avr_qw_not(¬_a, a);
2243 return avr_qw_cmpu(not_a, b) < 0;
2248 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2250 #ifdef CONFIG_INT128
2251 r->u128 = a->u128 + b->u128;
2253 avr_qw_add(r, *a, *b);
2257 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2259 #ifdef CONFIG_INT128
2260 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2263 if (c->VsrD(1) & 1) {
2267 tmp.VsrD(1) = c->VsrD(1) & 1;
2268 avr_qw_add(&tmp, *a, tmp);
2269 avr_qw_add(r, tmp, *b);
2271 avr_qw_add(r, *a, *b);
2276 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2278 #ifdef CONFIG_INT128
2279 r->u128 = (~a->u128 < b->u128);
2283 avr_qw_not(¬_a, *a);
2286 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2290 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2292 #ifdef CONFIG_INT128
2293 int carry_out = (~a->u128 < b->u128);
2294 if (!carry_out && (c->u128 & 1)) {
2295 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2296 ((a->u128 != 0) || (b->u128 != 0));
2298 r->u128 = carry_out;
2301 int carry_in = c->VsrD(1) & 1;
2305 carry_out = avr_qw_addc(&tmp, *a, *b);
2307 if (!carry_out && carry_in) {
2308 ppc_avr_t one = QW_ONE;
2309 carry_out = avr_qw_addc(&tmp, tmp, one);
2312 r->VsrD(1) = carry_out;
2316 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2318 #ifdef CONFIG_INT128
2319 r->u128 = a->u128 - b->u128;
2322 ppc_avr_t one = QW_ONE;
2324 avr_qw_not(&tmp, *b);
2325 avr_qw_add(&tmp, *a, tmp);
2326 avr_qw_add(r, tmp, one);
2330 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2332 #ifdef CONFIG_INT128
2333 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2337 avr_qw_not(&tmp, *b);
2338 avr_qw_add(&sum, *a, tmp);
2341 tmp.VsrD(1) = c->VsrD(1) & 1;
2342 avr_qw_add(r, sum, tmp);
2346 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2348 #ifdef CONFIG_INT128
2349 r->u128 = (~a->u128 < ~b->u128) ||
2350 (a->u128 + ~b->u128 == (__uint128_t)-1);
2352 int carry = (avr_qw_cmpu(*a, *b) > 0);
2355 avr_qw_not(&tmp, *b);
2356 avr_qw_add(&tmp, *a, tmp);
2357 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2364 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2366 #ifdef CONFIG_INT128
2368 (~a->u128 < ~b->u128) ||
2369 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2371 int carry_in = c->VsrD(1) & 1;
2372 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2373 if (!carry_out && carry_in) {
2375 avr_qw_not(&tmp, *b);
2376 avr_qw_add(&tmp, *a, tmp);
2377 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2381 r->VsrD(1) = carry_out;
2385 #define BCD_PLUS_PREF_1 0xC
2386 #define BCD_PLUS_PREF_2 0xF
2387 #define BCD_PLUS_ALT_1 0xA
2388 #define BCD_NEG_PREF 0xD
2389 #define BCD_NEG_ALT 0xB
2390 #define BCD_PLUS_ALT_2 0xE
2391 #define NATIONAL_PLUS 0x2B
2392 #define NATIONAL_NEG 0x2D
2394 #if defined(HOST_WORDS_BIGENDIAN)
2395 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2397 #define BCD_DIG_BYTE(n) ((n) / 2)
2400 static int bcd_get_sgn(ppc_avr_t *bcd)
2402 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2403 case BCD_PLUS_PREF_1:
2404 case BCD_PLUS_PREF_2:
2405 case BCD_PLUS_ALT_1:
2406 case BCD_PLUS_ALT_2:
2424 static int bcd_preferred_sgn(int sgn, int ps)
2427 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2429 return BCD_NEG_PREF;
2433 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2437 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2439 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2442 if (unlikely(result > 9)) {
2448 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2451 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2452 bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4);
2454 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2455 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2459 static bool bcd_is_valid(ppc_avr_t *bcd)
2464 if (bcd_get_sgn(bcd) == 0) {
2468 for (i = 1; i < 32; i++) {
2469 bcd_get_digit(bcd, i, &invalid);
2470 if (unlikely(invalid)) {
2477 static int bcd_cmp_zero(ppc_avr_t *bcd)
2479 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2482 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2486 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2488 return reg->VsrH(7 - n);
2491 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2493 reg->VsrH(7 - n) = val;
2496 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2500 for (i = 31; i > 0; i--) {
2501 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2502 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2503 if (unlikely(invalid)) {
2504 return 0; /* doesn't matter */
2505 } else if (dig_a > dig_b) {
2507 } else if (dig_a < dig_b) {
2515 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2520 for (i = 1; i <= 31; i++) {
2521 uint8_t digit = bcd_get_digit(a, i, invalid) +
2522 bcd_get_digit(b, i, invalid) + carry;
2530 bcd_put_digit(t, digit, i);
2536 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2542 for (i = 1; i <= 31; i++) {
2543 uint8_t digit = bcd_get_digit(a, i, invalid) -
2544 bcd_get_digit(b, i, invalid) + carry;
2552 bcd_put_digit(t, digit, i);
2558 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2561 int sgna = bcd_get_sgn(a);
2562 int sgnb = bcd_get_sgn(b);
2563 int invalid = (sgna == 0) || (sgnb == 0);
2566 ppc_avr_t result = { .u64 = { 0, 0 } };
2570 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2571 bcd_add_mag(&result, a, b, &invalid, &overflow);
2572 cr = bcd_cmp_zero(&result);
2574 int magnitude = bcd_cmp_mag(a, b);
2575 if (magnitude > 0) {
2576 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2577 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2578 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2579 } else if (magnitude < 0) {
2580 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2581 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2582 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2584 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2590 if (unlikely(invalid)) {
2591 result.VsrD(0) = result.VsrD(1) = -1;
2593 } else if (overflow) {
2602 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2604 ppc_avr_t bcopy = *b;
2605 int sgnb = bcd_get_sgn(b);
2607 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2608 } else if (sgnb > 0) {
2609 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2611 /* else invalid ... defer to bcdadd code for proper handling */
2613 return helper_bcdadd(r, a, &bcopy, ps);
2616 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2620 uint16_t national = 0;
2621 uint16_t sgnb = get_national_digit(b, 0);
2622 ppc_avr_t ret = { .u64 = { 0, 0 } };
2623 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2625 for (i = 1; i < 8; i++) {
2626 national = get_national_digit(b, i);
2627 if (unlikely(national < 0x30 || national > 0x39)) {
2632 bcd_put_digit(&ret, national & 0xf, i);
2635 if (sgnb == NATIONAL_PLUS) {
2636 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2638 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2641 cr = bcd_cmp_zero(&ret);
2643 if (unlikely(invalid)) {
2652 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2656 int sgnb = bcd_get_sgn(b);
2657 int invalid = (sgnb == 0);
2658 ppc_avr_t ret = { .u64 = { 0, 0 } };
2660 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2662 for (i = 1; i < 8; i++) {
2663 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2665 if (unlikely(invalid)) {
2669 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2671 cr = bcd_cmp_zero(b);
2677 if (unlikely(invalid)) {
2686 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2692 int zone_lead = ps ? 0xF : 0x3;
2694 ppc_avr_t ret = { .u64 = { 0, 0 } };
2695 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2697 if (unlikely((sgnb < 0xA) && ps)) {
2701 for (i = 0; i < 16; i++) {
2702 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2703 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2704 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2709 bcd_put_digit(&ret, digit, i + 1);
2712 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2713 (!ps && (sgnb & 0x4))) {
2714 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2716 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2719 cr = bcd_cmp_zero(&ret);
2721 if (unlikely(invalid)) {
2730 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2735 int sgnb = bcd_get_sgn(b);
2736 int zone_lead = (ps) ? 0xF0 : 0x30;
2737 int invalid = (sgnb == 0);
2738 ppc_avr_t ret = { .u64 = { 0, 0 } };
2740 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2742 for (i = 0; i < 16; i++) {
2743 digit = bcd_get_digit(b, i + 1, &invalid);
2745 if (unlikely(invalid)) {
2749 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2753 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2755 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2758 cr = bcd_cmp_zero(b);
2764 if (unlikely(invalid)) {
2773 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2779 ppc_avr_t ret = { .u64 = { 0, 0 } };
2781 if (b->VsrSD(0) < 0) {
2782 lo_value = -b->VsrSD(1);
2783 hi_value = ~b->VsrD(0) + !lo_value;
2784 bcd_put_digit(&ret, 0xD, 0);
2786 lo_value = b->VsrD(1);
2787 hi_value = b->VsrD(0);
2788 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2791 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2792 lo_value > 9999999999999999ULL) {
2796 for (i = 1; i < 16; hi_value /= 10, i++) {
2797 bcd_put_digit(&ret, hi_value % 10, i);
2800 for (; i < 32; lo_value /= 10, i++) {
2801 bcd_put_digit(&ret, lo_value % 10, i);
2804 cr |= bcd_cmp_zero(&ret);
2811 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2818 uint64_t hi_value = 0;
2819 int sgnb = bcd_get_sgn(b);
2820 int invalid = (sgnb == 0);
2822 lo_value = bcd_get_digit(b, 31, &invalid);
2823 for (i = 30; i > 0; i--) {
2824 mulu64(&lo_value, &carry, lo_value, 10ULL);
2825 mulu64(&hi_value, &unused, hi_value, 10ULL);
2826 lo_value += bcd_get_digit(b, i, &invalid);
2829 if (unlikely(invalid)) {
2835 r->VsrSD(1) = -lo_value;
2836 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2838 r->VsrSD(1) = lo_value;
2839 r->VsrSD(0) = hi_value;
2842 cr = bcd_cmp_zero(b);
2844 if (unlikely(invalid)) {
2851 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2856 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2861 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
2863 for (i = 1; i < 32; i++) {
2864 bcd_get_digit(a, i, &invalid);
2865 bcd_get_digit(b, i, &invalid);
2866 if (unlikely(invalid)) {
2871 return bcd_cmp_zero(r);
2874 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2876 int sgnb = bcd_get_sgn(b);
2879 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2881 if (bcd_is_valid(b) == false) {
2885 return bcd_cmp_zero(r);
2888 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2891 #if defined(HOST_WORDS_BIGENDIAN)
2896 bool ox_flag = false;
2897 int sgnb = bcd_get_sgn(b);
2899 ret.VsrD(1) &= ~0xf;
2901 if (bcd_is_valid(b) == false) {
2905 if (unlikely(i > 31)) {
2907 } else if (unlikely(i < -31)) {
2912 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2914 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2916 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2920 cr = bcd_cmp_zero(r);
2928 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2933 bool ox_flag = false;
2936 for (i = 0; i < 32; i++) {
2937 bcd_get_digit(b, i, &invalid);
2939 if (unlikely(invalid)) {
2944 #if defined(HOST_WORDS_BIGENDIAN)
2951 ret.VsrD(1) = ret.VsrD(0) = 0;
2952 } else if (i <= -32) {
2953 ret.VsrD(1) = ret.VsrD(0) = 0;
2955 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2957 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2961 cr = bcd_cmp_zero(r);
2969 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2974 bool ox_flag = false;
2975 int sgnb = bcd_get_sgn(b);
2977 ret.VsrD(1) &= ~0xf;
2979 #if defined(HOST_WORDS_BIGENDIAN)
2981 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
2984 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
2987 if (bcd_is_valid(b) == false) {
2991 if (unlikely(i > 31)) {
2993 } else if (unlikely(i < -31)) {
2998 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
3000 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
3002 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3003 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3006 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3008 cr = bcd_cmp_zero(&ret);
3017 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3020 uint32_t ox_flag = 0;
3021 #if defined(HOST_WORDS_BIGENDIAN)
3022 int i = a->s16[3] + 1;
3024 int i = a->s16[4] + 1;
3028 if (bcd_is_valid(b) == false) {
3032 if (i > 16 && i < 32) {
3033 mask = (uint64_t)-1 >> (128 - i * 4);
3034 if (ret.VsrD(0) & ~mask) {
3038 ret.VsrD(0) &= mask;
3039 } else if (i >= 0 && i <= 16) {
3040 mask = (uint64_t)-1 >> (64 - i * 4);
3041 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3045 ret.VsrD(1) &= mask;
3048 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3051 return bcd_cmp_zero(&ret) | ox_flag;
3054 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3058 uint32_t ox_flag = 0;
3062 for (i = 0; i < 32; i++) {
3063 bcd_get_digit(b, i, &invalid);
3065 if (unlikely(invalid)) {
3070 #if defined(HOST_WORDS_BIGENDIAN)
3075 if (i > 16 && i < 33) {
3076 mask = (uint64_t)-1 >> (128 - i * 4);
3077 if (ret.VsrD(0) & ~mask) {
3081 ret.VsrD(0) &= mask;
3082 } else if (i > 0 && i <= 16) {
3083 mask = (uint64_t)-1 >> (64 - i * 4);
3084 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3088 ret.VsrD(1) &= mask;
3090 } else if (i == 0) {
3091 if (ret.VsrD(0) || ret.VsrD(1)) {
3094 ret.VsrD(0) = ret.VsrD(1) = 0;
3098 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
3099 return ox_flag | CRF_EQ;
3102 return ox_flag | CRF_GT;
3105 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3108 VECTOR_FOR_INORDER_I(i, u8) {
3109 r->u8[i] = AES_sbox[a->u8[i]];
3113 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3118 VECTOR_FOR_INORDER_I(i, u32) {
3119 result.VsrW(i) = b->VsrW(i) ^
3120 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3121 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3122 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3123 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3128 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3133 VECTOR_FOR_INORDER_I(i, u8) {
3134 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3139 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3141 /* This differs from what is written in ISA V2.07. The RTL is */
3142 /* incorrect and will be fixed in V2.07B. */
3146 VECTOR_FOR_INORDER_I(i, u8) {
3147 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3150 VECTOR_FOR_INORDER_I(i, u32) {
3152 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3153 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3154 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3155 AES_imc[tmp.VsrB(4 * i + 3)][3];
3159 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3164 VECTOR_FOR_INORDER_I(i, u8) {
3165 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3170 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3172 int st = (st_six & 0x10) != 0;
3173 int six = st_six & 0xF;
3176 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3178 if ((six & (0x8 >> i)) == 0) {
3179 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3180 ror32(a->VsrW(i), 18) ^
3182 } else { /* six.bit[i] == 1 */
3183 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3184 ror32(a->VsrW(i), 19) ^
3187 } else { /* st == 1 */
3188 if ((six & (0x8 >> i)) == 0) {
3189 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3190 ror32(a->VsrW(i), 13) ^
3191 ror32(a->VsrW(i), 22);
3192 } else { /* six.bit[i] == 1 */
3193 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3194 ror32(a->VsrW(i), 11) ^
3195 ror32(a->VsrW(i), 25);
3201 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3203 int st = (st_six & 0x10) != 0;
3204 int six = st_six & 0xF;
3207 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3209 if ((six & (0x8 >> (2 * i))) == 0) {
3210 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3211 ror64(a->VsrD(i), 8) ^
3213 } else { /* six.bit[2*i] == 1 */
3214 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3215 ror64(a->VsrD(i), 61) ^
3218 } else { /* st == 1 */
3219 if ((six & (0x8 >> (2 * i))) == 0) {
3220 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3221 ror64(a->VsrD(i), 34) ^
3222 ror64(a->VsrD(i), 39);
3223 } else { /* six.bit[2*i] == 1 */
3224 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3225 ror64(a->VsrD(i), 18) ^
3226 ror64(a->VsrD(i), 41);
3232 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3237 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3238 int indexA = c->VsrB(i) >> 4;
3239 int indexB = c->VsrB(i) & 0xF;
3241 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3246 #undef VECTOR_FOR_INORDER_I
3248 /*****************************************************************************/
3249 /* SPE extension helpers */
3250 /* Use a table to make this quicker */
3251 static const uint8_t hbrev[16] = {
3252 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3253 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3256 static inline uint8_t byte_reverse(uint8_t val)
3258 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3261 static inline uint32_t word_reverse(uint32_t val)
3263 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3264 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3267 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3268 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3270 uint32_t a, b, d, mask;
3272 mask = UINT32_MAX >> (32 - MASKBITS);
3275 d = word_reverse(1 + word_reverse(a | ~b));
3276 return (arg1 & ~mask) | (d & b);
3279 uint32_t helper_cntlsw32(uint32_t val)
3281 if (val & 0x80000000) {
3288 uint32_t helper_cntlzw32(uint32_t val)
3294 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3295 target_ulong low, uint32_t update_Rc)
3301 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3302 if ((high & mask) == 0) {
3310 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3311 if ((low & mask) == 0) {
3324 env->xer = (env->xer & ~0x7F) | i;
3326 env->crf[0] |= xer_so;