2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
25 #include "fpu/softfloat.h"
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
34 env->so = env->ov = 1;
40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint64_t dividend = (uint64_t)ra << 32;
47 uint64_t divisor = (uint32_t)rb;
49 if (unlikely(divisor == 0)) {
52 rt = dividend / divisor;
53 overflow = rt > UINT32_MAX;
56 if (unlikely(overflow)) {
57 rt = 0; /* Undefined */
61 helper_update_ov_legacy(env, overflow);
64 return (target_ulong)rt;
67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 int64_t dividend = (int64_t)ra << 32;
74 int64_t divisor = (int64_t)((int32_t)rb);
76 if (unlikely((divisor == 0) ||
77 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
80 rt = dividend / divisor;
81 overflow = rt != (int32_t)rt;
84 if (unlikely(overflow)) {
85 rt = 0; /* Undefined */
89 helper_update_ov_legacy(env, overflow);
92 return (target_ulong)rt;
95 #if defined(TARGET_PPC64)
97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
102 overflow = divu128(&rt, &ra, rb);
104 if (unlikely(overflow)) {
105 rt = 0; /* Undefined */
109 helper_update_ov_legacy(env, overflow);
115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118 int64_t ra = (int64_t)rau;
119 int64_t rb = (int64_t)rbu;
120 int overflow = divs128(&rt, &ra, rb);
122 if (unlikely(overflow)) {
123 rt = 0; /* Undefined */
127 helper_update_ov_legacy(env, overflow);
136 #if defined(TARGET_PPC64)
137 /* if x = 0xab, returns 0xababababababababa */
138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
140 /* substract 1 from each byte, and with inverse, check if MSB is set at each
142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
147 /* When you XOR the pattern and there is a match, that byte will be zero */
148 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
152 return hasvalue(rb, ra) ? CRF_GT : 0;
159 /* Return invalid random number.
161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
164 target_ulong helper_darn32(void)
169 target_ulong helper_darn64(void)
176 #if defined(TARGET_PPC64)
178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
183 for (i = 0; i < 8; i++) {
184 int index = (rs >> (i*8)) & 0xFF;
186 if (rb & PPC_BIT(index)) {
196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
198 target_ulong mask = 0xff;
202 for (i = 0; i < sizeof(target_ulong); i++) {
203 if ((rs & mask) == (rb & mask)) {
211 /* shift right arithmetic helper */
212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
217 if (likely(!(shift & 0x20))) {
218 if (likely((uint32_t)shift != 0)) {
220 ret = (int32_t)value >> shift;
221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
222 env->ca32 = env->ca = 0;
224 env->ca32 = env->ca = 1;
227 ret = (int32_t)value;
228 env->ca32 = env->ca = 0;
231 ret = (int32_t)value >> 31;
232 env->ca32 = env->ca = (ret != 0);
234 return (target_long)ret;
237 #if defined(TARGET_PPC64)
238 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
243 if (likely(!(shift & 0x40))) {
244 if (likely((uint64_t)shift != 0)) {
246 ret = (int64_t)value >> shift;
247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
248 env->ca32 = env->ca = 0;
250 env->ca32 = env->ca = 1;
253 ret = (int64_t)value;
254 env->ca32 = env->ca = 0;
257 ret = (int64_t)value >> 63;
258 env->ca32 = env->ca = (ret != 0);
264 #if defined(TARGET_PPC64)
265 target_ulong helper_popcntb(target_ulong val)
267 /* Note that we don't fold past bytes */
268 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
269 0x5555555555555555ULL);
270 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
271 0x3333333333333333ULL);
272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
273 0x0f0f0f0f0f0f0f0fULL);
277 target_ulong helper_popcntw(target_ulong val)
279 /* Note that we don't fold past words. */
280 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
281 0x5555555555555555ULL);
282 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
283 0x3333333333333333ULL);
284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
285 0x0f0f0f0f0f0f0f0fULL);
286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
287 0x00ff00ff00ff00ffULL);
288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
289 0x0000ffff0000ffffULL);
293 target_ulong helper_popcntb(target_ulong val)
295 /* Note that we don't fold past bytes */
296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
303 /*****************************************************************************/
304 /* PowerPC 601 specific instructions (POWER bridge) */
305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
310 (int32_t)arg2 == 0) {
311 env->spr[SPR_MQ] = 0;
314 env->spr[SPR_MQ] = tmp % arg2;
315 return tmp / (int32_t)arg2;
319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
325 (int32_t)arg2 == 0) {
326 env->so = env->ov = 1;
327 env->spr[SPR_MQ] = 0;
330 env->spr[SPR_MQ] = tmp % arg2;
331 tmp /= (int32_t)arg2;
332 if ((int32_t)tmp != tmp) {
333 env->so = env->ov = 1;
341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
345 (int32_t)arg2 == 0) {
346 env->spr[SPR_MQ] = 0;
349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
350 return (int32_t)arg1 / (int32_t)arg2;
354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
358 (int32_t)arg2 == 0) {
359 env->so = env->ov = 1;
360 env->spr[SPR_MQ] = 0;
364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
365 return (int32_t)arg1 / (int32_t)arg2;
369 /*****************************************************************************/
370 /* 602 specific instructions */
371 /* mfrom is the most crazy instruction ever seen, imho ! */
372 /* Real implementation uses a ROM table. Do the same */
373 /* Extremely decomposed:
375 * return 256 * log10(10 + 1.0) + 0.5
377 #if !defined(CONFIG_USER_ONLY)
378 target_ulong helper_602_mfrom(target_ulong arg)
380 if (likely(arg < 602)) {
381 #include "mfrom_table.inc.c"
382 return mfrom_ROM_table[arg];
389 /*****************************************************************************/
390 /* Altivec extension helpers */
391 #if defined(HOST_WORDS_BIGENDIAN)
394 #define AVRB(i) u8[i]
395 #define AVRW(i) u32[i]
399 #define AVRB(i) u8[15-(i)]
400 #define AVRW(i) u32[3-(i)]
403 #if defined(HOST_WORDS_BIGENDIAN)
404 #define VECTOR_FOR_INORDER_I(index, element) \
405 for (index = 0; index < ARRAY_SIZE(r->element); index++)
407 #define VECTOR_FOR_INORDER_I(index, element) \
408 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
411 /* Saturating arithmetic helpers. */
412 #define SATCVT(from, to, from_type, to_type, min, max) \
413 static inline to_type cvt##from##to(from_type x, int *sat) \
417 if (x < (from_type)min) { \
420 } else if (x > (from_type)max) { \
428 #define SATCVTU(from, to, from_type, to_type, min, max) \
429 static inline to_type cvt##from##to(from_type x, int *sat) \
433 if (x > (from_type)max) { \
441 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
442 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
443 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
445 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
446 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
447 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
448 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
449 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
450 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
454 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
456 int i, j = (sh & 0xf);
458 VECTOR_FOR_INORDER_I(i, u8) {
463 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
465 int i, j = 0x10 - (sh & 0xf);
467 VECTOR_FOR_INORDER_I(i, u8) {
472 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
474 #if defined(HOST_WORDS_BIGENDIAN)
475 env->vscr = r->u32[3];
477 env->vscr = r->u32[0];
479 set_flush_to_zero(vscr_nj, &env->vec_status);
482 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
486 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
487 r->u32[i] = ~a->u32[i] < b->u32[i];
492 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
495 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
496 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
503 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
506 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
507 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
515 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
517 uint64_t res = b->u64[0] ^ b->u64[1];
521 r->u64[LO_IDX] = res & 1;
525 #define VARITH_DO(name, op, element) \
526 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
530 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
531 r->element[i] = a->element[i] op b->element[i]; \
534 #define VARITH(suffix, element) \
535 VARITH_DO(add##suffix, +, element) \
536 VARITH_DO(sub##suffix, -, element)
541 VARITH_DO(muluwm, *, u32)
545 #define VARITHFP(suffix, func) \
546 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
551 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
552 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
555 VARITHFP(addfp, float32_add)
556 VARITHFP(subfp, float32_sub)
557 VARITHFP(minfp, float32_min)
558 VARITHFP(maxfp, float32_max)
561 #define VARITHFPFMA(suffix, type) \
562 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
563 ppc_avr_t *b, ppc_avr_t *c) \
566 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
567 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
568 type, &env->vec_status); \
571 VARITHFPFMA(maddfp, 0);
572 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
575 #define VARITHSAT_CASE(type, op, cvt, element) \
577 type result = (type)a->element[i] op (type)b->element[i]; \
578 r->element[i] = cvt(result, &sat); \
581 #define VARITHSAT_DO(name, op, optype, cvt, element) \
582 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
588 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
589 switch (sizeof(r->element[0])) { \
591 VARITHSAT_CASE(optype, op, cvt, element); \
594 VARITHSAT_CASE(optype, op, cvt, element); \
597 VARITHSAT_CASE(optype, op, cvt, element); \
602 env->vscr |= (1 << VSCR_SAT); \
605 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
606 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
607 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
608 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
609 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
610 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
611 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
612 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
613 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
614 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
615 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
616 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
617 #undef VARITHSAT_CASE
619 #undef VARITHSAT_SIGNED
620 #undef VARITHSAT_UNSIGNED
622 #define VAVG_DO(name, element, etype) \
623 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
627 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
628 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
629 r->element[i] = x >> 1; \
633 #define VAVG(type, signed_element, signed_type, unsigned_element, \
635 VAVG_DO(avgs##type, signed_element, signed_type) \
636 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
637 VAVG(b, s8, int16_t, u8, uint16_t)
638 VAVG(h, s16, int32_t, u16, uint32_t)
639 VAVG(w, s32, int64_t, u32, uint64_t)
643 #define VABSDU_DO(name, element) \
644 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
648 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
649 r->element[i] = (a->element[i] > b->element[i]) ? \
650 (a->element[i] - b->element[i]) : \
651 (b->element[i] - a->element[i]); \
655 /* VABSDU - Vector absolute difference unsigned
656 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
657 * element - element type to access from vector
659 #define VABSDU(type, element) \
660 VABSDU_DO(absdu##type, element)
667 #define VCF(suffix, cvt, element) \
668 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
669 ppc_avr_t *b, uint32_t uim) \
673 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
674 float32 t = cvt(b->element[i], &env->vec_status); \
675 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
678 VCF(ux, uint32_to_float32, u32)
679 VCF(sx, int32_to_float32, s32)
682 #define VCMP_DO(suffix, compare, element, record) \
683 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
684 ppc_avr_t *a, ppc_avr_t *b) \
686 uint64_t ones = (uint64_t)-1; \
687 uint64_t all = ones; \
691 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
692 uint64_t result = (a->element[i] compare b->element[i] ? \
694 switch (sizeof(a->element[0])) { \
696 r->u64[i] = result; \
699 r->u32[i] = result; \
702 r->u16[i] = result; \
712 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
715 #define VCMP(suffix, compare, element) \
716 VCMP_DO(suffix, compare, element, 0) \
717 VCMP_DO(suffix##_dot, compare, element, 1)
733 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
734 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
735 ppc_avr_t *a, ppc_avr_t *b) \
737 etype ones = (etype)-1; \
739 etype result, none = 0; \
742 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
744 result = ((a->element[i] == 0) \
745 || (b->element[i] == 0) \
746 || (a->element[i] != b->element[i]) ? \
749 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
751 r->element[i] = result; \
756 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
760 /* VCMPNEZ - Vector compare not equal to zero
761 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
762 * element - element type to access from vector
764 #define VCMPNE(suffix, element, etype, cmpzero) \
765 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
766 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
767 VCMPNE(zb, u8, uint8_t, 1)
768 VCMPNE(zh, u16, uint16_t, 1)
769 VCMPNE(zw, u32, uint32_t, 1)
770 VCMPNE(b, u8, uint8_t, 0)
771 VCMPNE(h, u16, uint16_t, 0)
772 VCMPNE(w, u32, uint32_t, 0)
776 #define VCMPFP_DO(suffix, compare, order, record) \
777 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
778 ppc_avr_t *a, ppc_avr_t *b) \
780 uint32_t ones = (uint32_t)-1; \
781 uint32_t all = ones; \
785 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
787 int rel = float32_compare_quiet(a->f[i], b->f[i], \
789 if (rel == float_relation_unordered) { \
791 } else if (rel compare order) { \
796 r->u32[i] = result; \
801 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
804 #define VCMPFP(suffix, compare, order) \
805 VCMPFP_DO(suffix, compare, order, 0) \
806 VCMPFP_DO(suffix##_dot, compare, order, 1)
807 VCMPFP(eqfp, ==, float_relation_equal)
808 VCMPFP(gefp, !=, float_relation_less)
809 VCMPFP(gtfp, ==, float_relation_greater)
813 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
814 ppc_avr_t *a, ppc_avr_t *b, int record)
819 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
820 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
821 if (le_rel == float_relation_unordered) {
822 r->u32[i] = 0xc0000000;
825 float32 bneg = float32_chs(b->f[i]);
826 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
827 int le = le_rel != float_relation_greater;
828 int ge = ge_rel != float_relation_less;
830 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
831 all_in |= (!le | !ge);
835 env->crf[6] = (all_in == 0) << 1;
839 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
841 vcmpbfp_internal(env, r, a, b, 0);
844 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
847 vcmpbfp_internal(env, r, a, b, 1);
850 #define VCT(suffix, satcvt, element) \
851 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
852 ppc_avr_t *b, uint32_t uim) \
856 float_status s = env->vec_status; \
858 set_float_rounding_mode(float_round_to_zero, &s); \
859 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
860 if (float32_is_any_nan(b->f[i])) { \
863 float64 t = float32_to_float64(b->f[i], &s); \
866 t = float64_scalbn(t, uim, &s); \
867 j = float64_to_int64(t, &s); \
868 r->element[i] = satcvt(j, &sat); \
872 env->vscr |= (1 << VSCR_SAT); \
875 VCT(uxs, cvtsduw, u32)
876 VCT(sxs, cvtsdsw, s32)
879 target_ulong helper_vclzlsbb(ppc_avr_t *r)
881 target_ulong count = 0;
883 VECTOR_FOR_INORDER_I(i, u8) {
884 if (r->u8[i] & 0x01) {
892 target_ulong helper_vctzlsbb(ppc_avr_t *r)
894 target_ulong count = 0;
896 #if defined(HOST_WORDS_BIGENDIAN)
897 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
899 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
901 if (r->u8[i] & 0x01) {
909 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 int32_t prod = a->s16[i] * b->s16[i];
917 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
919 r->s16[i] = cvtswsh(t, &sat);
923 env->vscr |= (1 << VSCR_SAT);
927 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
928 ppc_avr_t *b, ppc_avr_t *c)
933 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
934 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
935 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
936 r->s16[i] = cvtswsh(t, &sat);
940 env->vscr |= (1 << VSCR_SAT);
944 #define VMINMAX_DO(name, compare, element) \
945 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
949 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
950 if (a->element[i] compare b->element[i]) { \
951 r->element[i] = b->element[i]; \
953 r->element[i] = a->element[i]; \
957 #define VMINMAX(suffix, element) \
958 VMINMAX_DO(min##suffix, >, element) \
959 VMINMAX_DO(max##suffix, <, element)
971 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
975 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
976 int32_t prod = a->s16[i] * b->s16[i];
977 r->s16[i] = (int16_t) (prod + c->s16[i]);
981 #define VMRG_DO(name, element, highp) \
982 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
986 size_t n_elems = ARRAY_SIZE(r->element); \
988 for (i = 0; i < n_elems / 2; i++) { \
990 result.element[i*2+HI_IDX] = a->element[i]; \
991 result.element[i*2+LO_IDX] = b->element[i]; \
993 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
994 b->element[n_elems - i - 1]; \
995 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
996 a->element[n_elems - i - 1]; \
1001 #if defined(HOST_WORDS_BIGENDIAN)
1008 #define VMRG(suffix, element) \
1009 VMRG_DO(mrgl##suffix, element, MRGHI) \
1010 VMRG_DO(mrgh##suffix, element, MRGLO)
1019 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1020 ppc_avr_t *b, ppc_avr_t *c)
1025 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1026 prod[i] = (int32_t)a->s8[i] * b->u8[i];
1029 VECTOR_FOR_INORDER_I(i, s32) {
1030 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1031 prod[4 * i + 2] + prod[4 * i + 3];
1035 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1036 ppc_avr_t *b, ppc_avr_t *c)
1041 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1042 prod[i] = a->s16[i] * b->s16[i];
1045 VECTOR_FOR_INORDER_I(i, s32) {
1046 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1050 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1051 ppc_avr_t *b, ppc_avr_t *c)
1057 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1058 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1061 VECTOR_FOR_INORDER_I(i, s32) {
1062 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1064 r->u32[i] = cvtsdsw(t, &sat);
1068 env->vscr |= (1 << VSCR_SAT);
1072 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1073 ppc_avr_t *b, ppc_avr_t *c)
1078 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1079 prod[i] = a->u8[i] * b->u8[i];
1082 VECTOR_FOR_INORDER_I(i, u32) {
1083 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1084 prod[4 * i + 2] + prod[4 * i + 3];
1088 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1089 ppc_avr_t *b, ppc_avr_t *c)
1094 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1095 prod[i] = a->u16[i] * b->u16[i];
1098 VECTOR_FOR_INORDER_I(i, u32) {
1099 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1103 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1104 ppc_avr_t *b, ppc_avr_t *c)
1110 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1111 prod[i] = a->u16[i] * b->u16[i];
1114 VECTOR_FOR_INORDER_I(i, s32) {
1115 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1117 r->u32[i] = cvtuduw(t, &sat);
1121 env->vscr |= (1 << VSCR_SAT);
1125 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
1126 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1130 VECTOR_FOR_INORDER_I(i, prod_element) { \
1132 r->prod_element[i] = \
1133 (cast)a->mul_element[i * 2 + HI_IDX] * \
1134 (cast)b->mul_element[i * 2 + HI_IDX]; \
1136 r->prod_element[i] = \
1137 (cast)a->mul_element[i * 2 + LO_IDX] * \
1138 (cast)b->mul_element[i * 2 + LO_IDX]; \
1142 #define VMUL(suffix, mul_element, prod_element, cast) \
1143 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1144 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1145 VMUL(sb, s8, s16, int16_t)
1146 VMUL(sh, s16, s32, int32_t)
1147 VMUL(sw, s32, s64, int64_t)
1148 VMUL(ub, u8, u16, uint16_t)
1149 VMUL(uh, u16, u32, uint32_t)
1150 VMUL(uw, u32, u64, uint64_t)
1154 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1160 VECTOR_FOR_INORDER_I(i, u8) {
1161 int s = c->u8[i] & 0x1f;
1162 #if defined(HOST_WORDS_BIGENDIAN)
1163 int index = s & 0xf;
1165 int index = 15 - (s & 0xf);
1169 result.u8[i] = b->u8[index];
1171 result.u8[i] = a->u8[index];
1177 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1183 VECTOR_FOR_INORDER_I(i, u8) {
1184 int s = c->u8[i] & 0x1f;
1185 #if defined(HOST_WORDS_BIGENDIAN)
1186 int index = 15 - (s & 0xf);
1188 int index = s & 0xf;
1192 result.u8[i] = a->u8[index];
1194 result.u8[i] = b->u8[index];
1200 #if defined(HOST_WORDS_BIGENDIAN)
1201 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1202 #define VBPERMD_INDEX(i) (i)
1203 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1204 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1206 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1207 #define VBPERMD_INDEX(i) (1 - i)
1208 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1209 #define EXTRACT_BIT(avr, i, index) \
1210 (extract64((avr)->u64[1 - i], 63 - index, 1))
1213 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1216 ppc_avr_t result = { .u64 = { 0, 0 } };
1217 VECTOR_FOR_INORDER_I(i, u64) {
1218 for (j = 0; j < 8; j++) {
1219 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1220 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1221 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1228 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1233 VECTOR_FOR_INORDER_I(i, u8) {
1234 int index = VBPERMQ_INDEX(b, i);
1237 uint64_t mask = (1ull << (63-(index & 0x3F)));
1238 if (a->u64[VBPERMQ_DW(index)] & mask) {
1239 perm |= (0x8000 >> i);
1244 r->u64[HI_IDX] = perm;
1248 #undef VBPERMQ_INDEX
1251 static const uint64_t VGBBD_MASKS[256] = {
1252 0x0000000000000000ull, /* 00 */
1253 0x0000000000000080ull, /* 01 */
1254 0x0000000000008000ull, /* 02 */
1255 0x0000000000008080ull, /* 03 */
1256 0x0000000000800000ull, /* 04 */
1257 0x0000000000800080ull, /* 05 */
1258 0x0000000000808000ull, /* 06 */
1259 0x0000000000808080ull, /* 07 */
1260 0x0000000080000000ull, /* 08 */
1261 0x0000000080000080ull, /* 09 */
1262 0x0000000080008000ull, /* 0A */
1263 0x0000000080008080ull, /* 0B */
1264 0x0000000080800000ull, /* 0C */
1265 0x0000000080800080ull, /* 0D */
1266 0x0000000080808000ull, /* 0E */
1267 0x0000000080808080ull, /* 0F */
1268 0x0000008000000000ull, /* 10 */
1269 0x0000008000000080ull, /* 11 */
1270 0x0000008000008000ull, /* 12 */
1271 0x0000008000008080ull, /* 13 */
1272 0x0000008000800000ull, /* 14 */
1273 0x0000008000800080ull, /* 15 */
1274 0x0000008000808000ull, /* 16 */
1275 0x0000008000808080ull, /* 17 */
1276 0x0000008080000000ull, /* 18 */
1277 0x0000008080000080ull, /* 19 */
1278 0x0000008080008000ull, /* 1A */
1279 0x0000008080008080ull, /* 1B */
1280 0x0000008080800000ull, /* 1C */
1281 0x0000008080800080ull, /* 1D */
1282 0x0000008080808000ull, /* 1E */
1283 0x0000008080808080ull, /* 1F */
1284 0x0000800000000000ull, /* 20 */
1285 0x0000800000000080ull, /* 21 */
1286 0x0000800000008000ull, /* 22 */
1287 0x0000800000008080ull, /* 23 */
1288 0x0000800000800000ull, /* 24 */
1289 0x0000800000800080ull, /* 25 */
1290 0x0000800000808000ull, /* 26 */
1291 0x0000800000808080ull, /* 27 */
1292 0x0000800080000000ull, /* 28 */
1293 0x0000800080000080ull, /* 29 */
1294 0x0000800080008000ull, /* 2A */
1295 0x0000800080008080ull, /* 2B */
1296 0x0000800080800000ull, /* 2C */
1297 0x0000800080800080ull, /* 2D */
1298 0x0000800080808000ull, /* 2E */
1299 0x0000800080808080ull, /* 2F */
1300 0x0000808000000000ull, /* 30 */
1301 0x0000808000000080ull, /* 31 */
1302 0x0000808000008000ull, /* 32 */
1303 0x0000808000008080ull, /* 33 */
1304 0x0000808000800000ull, /* 34 */
1305 0x0000808000800080ull, /* 35 */
1306 0x0000808000808000ull, /* 36 */
1307 0x0000808000808080ull, /* 37 */
1308 0x0000808080000000ull, /* 38 */
1309 0x0000808080000080ull, /* 39 */
1310 0x0000808080008000ull, /* 3A */
1311 0x0000808080008080ull, /* 3B */
1312 0x0000808080800000ull, /* 3C */
1313 0x0000808080800080ull, /* 3D */
1314 0x0000808080808000ull, /* 3E */
1315 0x0000808080808080ull, /* 3F */
1316 0x0080000000000000ull, /* 40 */
1317 0x0080000000000080ull, /* 41 */
1318 0x0080000000008000ull, /* 42 */
1319 0x0080000000008080ull, /* 43 */
1320 0x0080000000800000ull, /* 44 */
1321 0x0080000000800080ull, /* 45 */
1322 0x0080000000808000ull, /* 46 */
1323 0x0080000000808080ull, /* 47 */
1324 0x0080000080000000ull, /* 48 */
1325 0x0080000080000080ull, /* 49 */
1326 0x0080000080008000ull, /* 4A */
1327 0x0080000080008080ull, /* 4B */
1328 0x0080000080800000ull, /* 4C */
1329 0x0080000080800080ull, /* 4D */
1330 0x0080000080808000ull, /* 4E */
1331 0x0080000080808080ull, /* 4F */
1332 0x0080008000000000ull, /* 50 */
1333 0x0080008000000080ull, /* 51 */
1334 0x0080008000008000ull, /* 52 */
1335 0x0080008000008080ull, /* 53 */
1336 0x0080008000800000ull, /* 54 */
1337 0x0080008000800080ull, /* 55 */
1338 0x0080008000808000ull, /* 56 */
1339 0x0080008000808080ull, /* 57 */
1340 0x0080008080000000ull, /* 58 */
1341 0x0080008080000080ull, /* 59 */
1342 0x0080008080008000ull, /* 5A */
1343 0x0080008080008080ull, /* 5B */
1344 0x0080008080800000ull, /* 5C */
1345 0x0080008080800080ull, /* 5D */
1346 0x0080008080808000ull, /* 5E */
1347 0x0080008080808080ull, /* 5F */
1348 0x0080800000000000ull, /* 60 */
1349 0x0080800000000080ull, /* 61 */
1350 0x0080800000008000ull, /* 62 */
1351 0x0080800000008080ull, /* 63 */
1352 0x0080800000800000ull, /* 64 */
1353 0x0080800000800080ull, /* 65 */
1354 0x0080800000808000ull, /* 66 */
1355 0x0080800000808080ull, /* 67 */
1356 0x0080800080000000ull, /* 68 */
1357 0x0080800080000080ull, /* 69 */
1358 0x0080800080008000ull, /* 6A */
1359 0x0080800080008080ull, /* 6B */
1360 0x0080800080800000ull, /* 6C */
1361 0x0080800080800080ull, /* 6D */
1362 0x0080800080808000ull, /* 6E */
1363 0x0080800080808080ull, /* 6F */
1364 0x0080808000000000ull, /* 70 */
1365 0x0080808000000080ull, /* 71 */
1366 0x0080808000008000ull, /* 72 */
1367 0x0080808000008080ull, /* 73 */
1368 0x0080808000800000ull, /* 74 */
1369 0x0080808000800080ull, /* 75 */
1370 0x0080808000808000ull, /* 76 */
1371 0x0080808000808080ull, /* 77 */
1372 0x0080808080000000ull, /* 78 */
1373 0x0080808080000080ull, /* 79 */
1374 0x0080808080008000ull, /* 7A */
1375 0x0080808080008080ull, /* 7B */
1376 0x0080808080800000ull, /* 7C */
1377 0x0080808080800080ull, /* 7D */
1378 0x0080808080808000ull, /* 7E */
1379 0x0080808080808080ull, /* 7F */
1380 0x8000000000000000ull, /* 80 */
1381 0x8000000000000080ull, /* 81 */
1382 0x8000000000008000ull, /* 82 */
1383 0x8000000000008080ull, /* 83 */
1384 0x8000000000800000ull, /* 84 */
1385 0x8000000000800080ull, /* 85 */
1386 0x8000000000808000ull, /* 86 */
1387 0x8000000000808080ull, /* 87 */
1388 0x8000000080000000ull, /* 88 */
1389 0x8000000080000080ull, /* 89 */
1390 0x8000000080008000ull, /* 8A */
1391 0x8000000080008080ull, /* 8B */
1392 0x8000000080800000ull, /* 8C */
1393 0x8000000080800080ull, /* 8D */
1394 0x8000000080808000ull, /* 8E */
1395 0x8000000080808080ull, /* 8F */
1396 0x8000008000000000ull, /* 90 */
1397 0x8000008000000080ull, /* 91 */
1398 0x8000008000008000ull, /* 92 */
1399 0x8000008000008080ull, /* 93 */
1400 0x8000008000800000ull, /* 94 */
1401 0x8000008000800080ull, /* 95 */
1402 0x8000008000808000ull, /* 96 */
1403 0x8000008000808080ull, /* 97 */
1404 0x8000008080000000ull, /* 98 */
1405 0x8000008080000080ull, /* 99 */
1406 0x8000008080008000ull, /* 9A */
1407 0x8000008080008080ull, /* 9B */
1408 0x8000008080800000ull, /* 9C */
1409 0x8000008080800080ull, /* 9D */
1410 0x8000008080808000ull, /* 9E */
1411 0x8000008080808080ull, /* 9F */
1412 0x8000800000000000ull, /* A0 */
1413 0x8000800000000080ull, /* A1 */
1414 0x8000800000008000ull, /* A2 */
1415 0x8000800000008080ull, /* A3 */
1416 0x8000800000800000ull, /* A4 */
1417 0x8000800000800080ull, /* A5 */
1418 0x8000800000808000ull, /* A6 */
1419 0x8000800000808080ull, /* A7 */
1420 0x8000800080000000ull, /* A8 */
1421 0x8000800080000080ull, /* A9 */
1422 0x8000800080008000ull, /* AA */
1423 0x8000800080008080ull, /* AB */
1424 0x8000800080800000ull, /* AC */
1425 0x8000800080800080ull, /* AD */
1426 0x8000800080808000ull, /* AE */
1427 0x8000800080808080ull, /* AF */
1428 0x8000808000000000ull, /* B0 */
1429 0x8000808000000080ull, /* B1 */
1430 0x8000808000008000ull, /* B2 */
1431 0x8000808000008080ull, /* B3 */
1432 0x8000808000800000ull, /* B4 */
1433 0x8000808000800080ull, /* B5 */
1434 0x8000808000808000ull, /* B6 */
1435 0x8000808000808080ull, /* B7 */
1436 0x8000808080000000ull, /* B8 */
1437 0x8000808080000080ull, /* B9 */
1438 0x8000808080008000ull, /* BA */
1439 0x8000808080008080ull, /* BB */
1440 0x8000808080800000ull, /* BC */
1441 0x8000808080800080ull, /* BD */
1442 0x8000808080808000ull, /* BE */
1443 0x8000808080808080ull, /* BF */
1444 0x8080000000000000ull, /* C0 */
1445 0x8080000000000080ull, /* C1 */
1446 0x8080000000008000ull, /* C2 */
1447 0x8080000000008080ull, /* C3 */
1448 0x8080000000800000ull, /* C4 */
1449 0x8080000000800080ull, /* C5 */
1450 0x8080000000808000ull, /* C6 */
1451 0x8080000000808080ull, /* C7 */
1452 0x8080000080000000ull, /* C8 */
1453 0x8080000080000080ull, /* C9 */
1454 0x8080000080008000ull, /* CA */
1455 0x8080000080008080ull, /* CB */
1456 0x8080000080800000ull, /* CC */
1457 0x8080000080800080ull, /* CD */
1458 0x8080000080808000ull, /* CE */
1459 0x8080000080808080ull, /* CF */
1460 0x8080008000000000ull, /* D0 */
1461 0x8080008000000080ull, /* D1 */
1462 0x8080008000008000ull, /* D2 */
1463 0x8080008000008080ull, /* D3 */
1464 0x8080008000800000ull, /* D4 */
1465 0x8080008000800080ull, /* D5 */
1466 0x8080008000808000ull, /* D6 */
1467 0x8080008000808080ull, /* D7 */
1468 0x8080008080000000ull, /* D8 */
1469 0x8080008080000080ull, /* D9 */
1470 0x8080008080008000ull, /* DA */
1471 0x8080008080008080ull, /* DB */
1472 0x8080008080800000ull, /* DC */
1473 0x8080008080800080ull, /* DD */
1474 0x8080008080808000ull, /* DE */
1475 0x8080008080808080ull, /* DF */
1476 0x8080800000000000ull, /* E0 */
1477 0x8080800000000080ull, /* E1 */
1478 0x8080800000008000ull, /* E2 */
1479 0x8080800000008080ull, /* E3 */
1480 0x8080800000800000ull, /* E4 */
1481 0x8080800000800080ull, /* E5 */
1482 0x8080800000808000ull, /* E6 */
1483 0x8080800000808080ull, /* E7 */
1484 0x8080800080000000ull, /* E8 */
1485 0x8080800080000080ull, /* E9 */
1486 0x8080800080008000ull, /* EA */
1487 0x8080800080008080ull, /* EB */
1488 0x8080800080800000ull, /* EC */
1489 0x8080800080800080ull, /* ED */
1490 0x8080800080808000ull, /* EE */
1491 0x8080800080808080ull, /* EF */
1492 0x8080808000000000ull, /* F0 */
1493 0x8080808000000080ull, /* F1 */
1494 0x8080808000008000ull, /* F2 */
1495 0x8080808000008080ull, /* F3 */
1496 0x8080808000800000ull, /* F4 */
1497 0x8080808000800080ull, /* F5 */
1498 0x8080808000808000ull, /* F6 */
1499 0x8080808000808080ull, /* F7 */
1500 0x8080808080000000ull, /* F8 */
1501 0x8080808080000080ull, /* F9 */
1502 0x8080808080008000ull, /* FA */
1503 0x8080808080008080ull, /* FB */
1504 0x8080808080800000ull, /* FC */
1505 0x8080808080800080ull, /* FD */
1506 0x8080808080808000ull, /* FE */
1507 0x8080808080808080ull, /* FF */
1510 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1513 uint64_t t[2] = { 0, 0 };
1515 VECTOR_FOR_INORDER_I(i, u8) {
1516 #if defined(HOST_WORDS_BIGENDIAN)
1517 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1519 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1527 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1528 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1531 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1533 VECTOR_FOR_INORDER_I(i, srcfld) { \
1535 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1536 if (a->srcfld[i] & (1ull<<j)) { \
1537 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1542 VECTOR_FOR_INORDER_I(i, trgfld) { \
1543 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1547 PMSUM(vpmsumb, u8, u16, uint16_t)
1548 PMSUM(vpmsumh, u16, u32, uint32_t)
1549 PMSUM(vpmsumw, u32, u64, uint64_t)
1551 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1554 #ifdef CONFIG_INT128
1556 __uint128_t prod[2];
1558 VECTOR_FOR_INORDER_I(i, u64) {
1560 for (j = 0; j < 64; j++) {
1561 if (a->u64[i] & (1ull<<j)) {
1562 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1567 r->u128 = prod[0] ^ prod[1];
1573 VECTOR_FOR_INORDER_I(i, u64) {
1574 prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1575 for (j = 0; j < 64; j++) {
1576 if (a->u64[i] & (1ull<<j)) {
1579 bshift.u64[HI_IDX] = 0;
1580 bshift.u64[LO_IDX] = b->u64[i];
1582 bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1583 bshift.u64[LO_IDX] = b->u64[i] << j;
1585 prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1586 prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1591 r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1592 r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1597 #if defined(HOST_WORDS_BIGENDIAN)
1602 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1606 #if defined(HOST_WORDS_BIGENDIAN)
1607 const ppc_avr_t *x[2] = { a, b };
1609 const ppc_avr_t *x[2] = { b, a };
1612 VECTOR_FOR_INORDER_I(i, u64) {
1613 VECTOR_FOR_INORDER_I(j, u32) {
1614 uint32_t e = x[i]->u32[j];
1616 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1617 ((e >> 6) & 0x3e0) |
1624 #define VPK(suffix, from, to, cvt, dosat) \
1625 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1626 ppc_avr_t *a, ppc_avr_t *b) \
1631 ppc_avr_t *a0 = PKBIG ? a : b; \
1632 ppc_avr_t *a1 = PKBIG ? b : a; \
1634 VECTOR_FOR_INORDER_I(i, from) { \
1635 result.to[i] = cvt(a0->from[i], &sat); \
1636 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1639 if (dosat && sat) { \
1640 env->vscr |= (1 << VSCR_SAT); \
1644 VPK(shss, s16, s8, cvtshsb, 1)
1645 VPK(shus, s16, u8, cvtshub, 1)
1646 VPK(swss, s32, s16, cvtswsh, 1)
1647 VPK(swus, s32, u16, cvtswuh, 1)
1648 VPK(sdss, s64, s32, cvtsdsw, 1)
1649 VPK(sdus, s64, u32, cvtsduw, 1)
1650 VPK(uhus, u16, u8, cvtuhub, 1)
1651 VPK(uwus, u32, u16, cvtuwuh, 1)
1652 VPK(udus, u64, u32, cvtuduw, 1)
1653 VPK(uhum, u16, u8, I, 0)
1654 VPK(uwum, u32, u16, I, 0)
1655 VPK(udum, u64, u32, I, 0)
1660 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1664 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1665 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1669 #define VRFI(suffix, rounding) \
1670 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1674 float_status s = env->vec_status; \
1676 set_float_rounding_mode(rounding, &s); \
1677 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1678 r->f[i] = float32_round_to_int (b->f[i], &s); \
1681 VRFI(n, float_round_nearest_even)
1682 VRFI(m, float_round_down)
1683 VRFI(p, float_round_up)
1684 VRFI(z, float_round_to_zero)
1687 #define VROTATE(suffix, element, mask) \
1688 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1692 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1693 unsigned int shift = b->element[i] & mask; \
1694 r->element[i] = (a->element[i] << shift) | \
1695 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1699 VROTATE(h, u16, 0xF)
1700 VROTATE(w, u32, 0x1F)
1701 VROTATE(d, u64, 0x3F)
1704 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1708 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1709 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1711 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1715 #define VRLMI(name, size, element, insert) \
1716 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1719 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1720 uint##size##_t src1 = a->element[i]; \
1721 uint##size##_t src2 = b->element[i]; \
1722 uint##size##_t src3 = r->element[i]; \
1723 uint##size##_t begin, end, shift, mask, rot_val; \
1725 shift = extract##size(src2, 0, 6); \
1726 end = extract##size(src2, 8, 6); \
1727 begin = extract##size(src2, 16, 6); \
1728 rot_val = rol##size(src1, shift); \
1729 mask = mask_u##size(begin, end); \
1731 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1733 r->element[i] = (rot_val & mask); \
1738 VRLMI(vrldmi, 64, u64, 1);
1739 VRLMI(vrlwmi, 32, u32, 1);
1740 VRLMI(vrldnm, 64, u64, 0);
1741 VRLMI(vrlwnm, 32, u32, 0);
1743 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1746 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1747 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1750 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1754 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1755 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1759 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1763 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1764 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1768 #if defined(HOST_WORDS_BIGENDIAN)
1769 #define VEXTU_X_DO(name, size, left) \
1770 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1774 index = (a & 0xf) * 8; \
1776 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1778 return int128_getlo(int128_rshift(b->s128, index)) & \
1779 MAKE_64BIT_MASK(0, size); \
1782 #define VEXTU_X_DO(name, size, left) \
1783 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1787 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1789 index = (a & 0xf) * 8; \
1791 return int128_getlo(int128_rshift(b->s128, index)) & \
1792 MAKE_64BIT_MASK(0, size); \
1796 VEXTU_X_DO(vextublx, 8, 1)
1797 VEXTU_X_DO(vextuhlx, 16, 1)
1798 VEXTU_X_DO(vextuwlx, 32, 1)
1799 VEXTU_X_DO(vextubrx, 8, 0)
1800 VEXTU_X_DO(vextuhrx, 16, 0)
1801 VEXTU_X_DO(vextuwrx, 32, 0)
1804 /* The specification says that the results are undefined if all of the
1805 * shift counts are not identical. We check to make sure that they are
1806 * to conform to what real hardware appears to do. */
1807 #define VSHIFT(suffix, leftp) \
1808 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1810 int shift = b->u8[LO_IDX*15] & 0x7; \
1814 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1815 doit = doit && ((b->u8[i] & 0x7) == shift); \
1820 } else if (leftp) { \
1821 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1823 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1824 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1826 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1828 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1829 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1837 #define VSL(suffix, element, mask) \
1838 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1842 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1843 unsigned int shift = b->element[i] & mask; \
1845 r->element[i] = a->element[i] << shift; \
1854 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1857 unsigned int shift, bytes, size;
1859 size = ARRAY_SIZE(r->u8);
1860 for (i = 0; i < size; i++) {
1861 shift = b->u8[i] & 0x7; /* extract shift value */
1862 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1863 (((i + 1) < size) ? a->u8[i + 1] : 0);
1864 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1868 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1871 unsigned int shift, bytes;
1873 /* Use reverse order, as destination and source register can be same. Its
1874 * being modified in place saving temporary, reverse order will guarantee
1875 * that computed result is not fed back.
1877 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1878 shift = b->u8[i] & 0x7; /* extract shift value */
1879 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1880 /* extract adjacent bytes */
1881 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1885 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1887 int sh = shift & 0xf;
1891 #if defined(HOST_WORDS_BIGENDIAN)
1892 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1895 result.u8[i] = b->u8[index - 0x10];
1897 result.u8[i] = a->u8[index];
1901 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1902 int index = (16 - sh) + i;
1904 result.u8[i] = a->u8[index - 0x10];
1906 result.u8[i] = b->u8[index];
1913 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1915 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1917 #if defined(HOST_WORDS_BIGENDIAN)
1918 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1919 memset(&r->u8[16-sh], 0, sh);
1921 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1922 memset(&r->u8[0], 0, sh);
1926 /* Experimental testing shows that hardware masks the immediate. */
1927 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1928 #if defined(HOST_WORDS_BIGENDIAN)
1929 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1931 #define SPLAT_ELEMENT(element) \
1932 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1934 #define VSPLT(suffix, element) \
1935 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1937 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1940 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1941 r->element[i] = s; \
1948 #undef SPLAT_ELEMENT
1949 #undef _SPLAT_MASKED
1950 #if defined(HOST_WORDS_BIGENDIAN)
1951 #define VINSERT(suffix, element) \
1952 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1954 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1955 sizeof(r->element[0])); \
1958 #define VINSERT(suffix, element) \
1959 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1961 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1962 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1970 #if defined(HOST_WORDS_BIGENDIAN)
1971 #define VEXTRACT(suffix, element) \
1972 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1974 uint32_t es = sizeof(r->element[0]); \
1975 memmove(&r->u8[8 - es], &b->u8[index], es); \
1976 memset(&r->u8[8], 0, 8); \
1977 memset(&r->u8[0], 0, 8 - es); \
1980 #define VEXTRACT(suffix, element) \
1981 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1983 uint32_t es = sizeof(r->element[0]); \
1984 uint32_t s = (16 - index) - es; \
1985 memmove(&r->u8[8], &b->u8[s], es); \
1986 memset(&r->u8[0], 0, 8); \
1987 memset(&r->u8[8 + es], 0, 8 - es); \
1996 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1997 target_ulong xbn, uint32_t index)
2000 size_t es = sizeof(uint32_t);
2004 getVSR(xbn, &xb, env);
2005 memset(&xt, 0, sizeof(xt));
2007 #if defined(HOST_WORDS_BIGENDIAN)
2009 for (i = 0; i < es; i++, ext_index++) {
2010 xt.u8[8 - es + i] = xb.u8[ext_index % 16];
2013 ext_index = 15 - index;
2014 for (i = es - 1; i >= 0; i--, ext_index--) {
2015 xt.u8[8 + i] = xb.u8[ext_index % 16];
2019 putVSR(xtn, &xt, env);
2022 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
2023 target_ulong xbn, uint32_t index)
2026 size_t es = sizeof(uint32_t);
2027 int ins_index, i = 0;
2029 getVSR(xbn, &xb, env);
2030 getVSR(xtn, &xt, env);
2032 #if defined(HOST_WORDS_BIGENDIAN)
2034 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
2035 xt.u8[ins_index] = xb.u8[8 - es + i];
2038 ins_index = 15 - index;
2039 for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) {
2040 xt.u8[ins_index] = xb.u8[8 + i];
2044 putVSR(xtn, &xt, env);
2047 #define VEXT_SIGNED(name, element, mask, cast, recast) \
2048 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2051 VECTOR_FOR_INORDER_I(i, element) { \
2052 r->element[i] = (recast)((cast)(b->element[i] & mask)); \
2055 VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2056 VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2057 VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2058 VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2059 VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2062 #define VNEG(name, element) \
2063 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2066 VECTOR_FOR_INORDER_I(i, element) { \
2067 r->element[i] = -b->element[i]; \
2074 #define VSPLTI(suffix, element, splat_type) \
2075 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2077 splat_type x = (int8_t)(splat << 3) >> 3; \
2080 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2081 r->element[i] = x; \
2084 VSPLTI(b, s8, int8_t)
2085 VSPLTI(h, s16, int16_t)
2086 VSPLTI(w, s32, int32_t)
2089 #define VSR(suffix, element, mask) \
2090 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2094 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2095 unsigned int shift = b->element[i] & mask; \
2096 r->element[i] = a->element[i] >> shift; \
2109 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2111 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2113 #if defined(HOST_WORDS_BIGENDIAN)
2114 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2115 memset(&r->u8[0], 0, sh);
2117 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2118 memset(&r->u8[16 - sh], 0, sh);
2122 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2126 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2127 r->u32[i] = a->u32[i] >= b->u32[i];
2131 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2138 #if defined(HOST_WORDS_BIGENDIAN)
2139 upper = ARRAY_SIZE(r->s32)-1;
2143 t = (int64_t)b->s32[upper];
2144 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2148 result.s32[upper] = cvtsdsw(t, &sat);
2152 env->vscr |= (1 << VSCR_SAT);
2156 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2162 #if defined(HOST_WORDS_BIGENDIAN)
2167 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2168 int64_t t = (int64_t)b->s32[upper + i * 2];
2171 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2172 t += a->s32[2 * i + j];
2174 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2179 env->vscr |= (1 << VSCR_SAT);
2183 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2188 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2189 int64_t t = (int64_t)b->s32[i];
2191 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2192 t += a->s8[4 * i + j];
2194 r->s32[i] = cvtsdsw(t, &sat);
2198 env->vscr |= (1 << VSCR_SAT);
2202 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2207 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2208 int64_t t = (int64_t)b->s32[i];
2210 t += a->s16[2 * i] + a->s16[2 * i + 1];
2211 r->s32[i] = cvtsdsw(t, &sat);
2215 env->vscr |= (1 << VSCR_SAT);
2219 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2224 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2225 uint64_t t = (uint64_t)b->u32[i];
2227 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2228 t += a->u8[4 * i + j];
2230 r->u32[i] = cvtuduw(t, &sat);
2234 env->vscr |= (1 << VSCR_SAT);
2238 #if defined(HOST_WORDS_BIGENDIAN)
2245 #define VUPKPX(suffix, hi) \
2246 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2251 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2252 uint16_t e = b->u16[hi ? i : i+4]; \
2253 uint8_t a = (e >> 15) ? 0xff : 0; \
2254 uint8_t r = (e >> 10) & 0x1f; \
2255 uint8_t g = (e >> 5) & 0x1f; \
2256 uint8_t b = e & 0x1f; \
2258 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2266 #define VUPK(suffix, unpacked, packee, hi) \
2267 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2273 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2274 result.unpacked[i] = b->packee[i]; \
2277 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2279 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2284 VUPK(hsb, s16, s8, UPKHI)
2285 VUPK(hsh, s32, s16, UPKHI)
2286 VUPK(hsw, s64, s32, UPKHI)
2287 VUPK(lsb, s16, s8, UPKLO)
2288 VUPK(lsh, s32, s16, UPKLO)
2289 VUPK(lsw, s64, s32, UPKLO)
2294 #define VGENERIC_DO(name, element) \
2295 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2299 VECTOR_FOR_INORDER_I(i, element) { \
2300 r->element[i] = name(b->element[i]); \
2304 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2305 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2306 #define clzw(v) clz32((v))
2307 #define clzd(v) clz64((v))
2309 VGENERIC_DO(clzb, u8)
2310 VGENERIC_DO(clzh, u16)
2311 VGENERIC_DO(clzw, u32)
2312 VGENERIC_DO(clzd, u64)
2319 #define ctzb(v) ((v) ? ctz32(v) : 8)
2320 #define ctzh(v) ((v) ? ctz32(v) : 16)
2321 #define ctzw(v) ctz32((v))
2322 #define ctzd(v) ctz64((v))
2324 VGENERIC_DO(ctzb, u8)
2325 VGENERIC_DO(ctzh, u16)
2326 VGENERIC_DO(ctzw, u32)
2327 VGENERIC_DO(ctzd, u64)
2334 #define popcntb(v) ctpop8(v)
2335 #define popcnth(v) ctpop16(v)
2336 #define popcntw(v) ctpop32(v)
2337 #define popcntd(v) ctpop64(v)
2339 VGENERIC_DO(popcntb, u8)
2340 VGENERIC_DO(popcnth, u16)
2341 VGENERIC_DO(popcntw, u32)
2342 VGENERIC_DO(popcntd, u64)
2351 #if defined(HOST_WORDS_BIGENDIAN)
2352 #define QW_ONE { .u64 = { 0, 1 } }
2354 #define QW_ONE { .u64 = { 1, 0 } }
2357 #ifndef CONFIG_INT128
2359 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2361 t->u64[0] = ~a.u64[0];
2362 t->u64[1] = ~a.u64[1];
2365 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2367 if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2369 } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2371 } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2373 } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2380 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2382 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2383 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2384 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2387 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2390 t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2391 t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2392 (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2393 avr_qw_not(¬_a, a);
2394 return avr_qw_cmpu(not_a, b) < 0;
2399 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2401 #ifdef CONFIG_INT128
2402 r->u128 = a->u128 + b->u128;
2404 avr_qw_add(r, *a, *b);
2408 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2410 #ifdef CONFIG_INT128
2411 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2414 if (c->u64[LO_IDX] & 1) {
2417 tmp.u64[HI_IDX] = 0;
2418 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2419 avr_qw_add(&tmp, *a, tmp);
2420 avr_qw_add(r, tmp, *b);
2422 avr_qw_add(r, *a, *b);
2427 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2429 #ifdef CONFIG_INT128
2430 r->u128 = (~a->u128 < b->u128);
2434 avr_qw_not(¬_a, *a);
2437 r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2441 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2443 #ifdef CONFIG_INT128
2444 int carry_out = (~a->u128 < b->u128);
2445 if (!carry_out && (c->u128 & 1)) {
2446 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2447 ((a->u128 != 0) || (b->u128 != 0));
2449 r->u128 = carry_out;
2452 int carry_in = c->u64[LO_IDX] & 1;
2456 carry_out = avr_qw_addc(&tmp, *a, *b);
2458 if (!carry_out && carry_in) {
2459 ppc_avr_t one = QW_ONE;
2460 carry_out = avr_qw_addc(&tmp, tmp, one);
2463 r->u64[LO_IDX] = carry_out;
2467 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2469 #ifdef CONFIG_INT128
2470 r->u128 = a->u128 - b->u128;
2473 ppc_avr_t one = QW_ONE;
2475 avr_qw_not(&tmp, *b);
2476 avr_qw_add(&tmp, *a, tmp);
2477 avr_qw_add(r, tmp, one);
2481 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2483 #ifdef CONFIG_INT128
2484 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2488 avr_qw_not(&tmp, *b);
2489 avr_qw_add(&sum, *a, tmp);
2491 tmp.u64[HI_IDX] = 0;
2492 tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2493 avr_qw_add(r, sum, tmp);
2497 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2499 #ifdef CONFIG_INT128
2500 r->u128 = (~a->u128 < ~b->u128) ||
2501 (a->u128 + ~b->u128 == (__uint128_t)-1);
2503 int carry = (avr_qw_cmpu(*a, *b) > 0);
2506 avr_qw_not(&tmp, *b);
2507 avr_qw_add(&tmp, *a, tmp);
2508 carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2511 r->u64[LO_IDX] = carry;
2515 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2517 #ifdef CONFIG_INT128
2519 (~a->u128 < ~b->u128) ||
2520 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2522 int carry_in = c->u64[LO_IDX] & 1;
2523 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2524 if (!carry_out && carry_in) {
2526 avr_qw_not(&tmp, *b);
2527 avr_qw_add(&tmp, *a, tmp);
2528 carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2532 r->u64[LO_IDX] = carry_out;
2536 #define BCD_PLUS_PREF_1 0xC
2537 #define BCD_PLUS_PREF_2 0xF
2538 #define BCD_PLUS_ALT_1 0xA
2539 #define BCD_NEG_PREF 0xD
2540 #define BCD_NEG_ALT 0xB
2541 #define BCD_PLUS_ALT_2 0xE
2542 #define NATIONAL_PLUS 0x2B
2543 #define NATIONAL_NEG 0x2D
2545 #if defined(HOST_WORDS_BIGENDIAN)
2546 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2548 #define BCD_DIG_BYTE(n) ((n) / 2)
2551 static int bcd_get_sgn(ppc_avr_t *bcd)
2553 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2554 case BCD_PLUS_PREF_1:
2555 case BCD_PLUS_PREF_2:
2556 case BCD_PLUS_ALT_1:
2557 case BCD_PLUS_ALT_2:
2575 static int bcd_preferred_sgn(int sgn, int ps)
2578 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2580 return BCD_NEG_PREF;
2584 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2588 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2590 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2593 if (unlikely(result > 9)) {
2599 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2602 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2603 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2605 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2606 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2610 static bool bcd_is_valid(ppc_avr_t *bcd)
2615 if (bcd_get_sgn(bcd) == 0) {
2619 for (i = 1; i < 32; i++) {
2620 bcd_get_digit(bcd, i, &invalid);
2621 if (unlikely(invalid)) {
2628 static int bcd_cmp_zero(ppc_avr_t *bcd)
2630 if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2633 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2637 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2639 #if defined(HOST_WORDS_BIGENDIAN)
2640 return reg->u16[7 - n];
2646 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2648 #if defined(HOST_WORDS_BIGENDIAN)
2649 reg->u16[7 - n] = val;
2655 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2659 for (i = 31; i > 0; i--) {
2660 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2661 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2662 if (unlikely(invalid)) {
2663 return 0; /* doesn't matter */
2664 } else if (dig_a > dig_b) {
2666 } else if (dig_a < dig_b) {
2674 static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2680 for (i = 1; i <= 31; i++) {
2681 uint8_t digit = bcd_get_digit(a, i, invalid) +
2682 bcd_get_digit(b, i, invalid) + carry;
2683 is_zero &= (digit == 0);
2691 bcd_put_digit(t, digit, i);
2693 if (unlikely(*invalid)) {
2702 static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2708 for (i = 1; i <= 31; i++) {
2709 uint8_t digit = bcd_get_digit(a, i, invalid) -
2710 bcd_get_digit(b, i, invalid) + carry;
2711 is_zero &= (digit == 0);
2719 bcd_put_digit(t, digit, i);
2721 if (unlikely(*invalid)) {
2730 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2733 int sgna = bcd_get_sgn(a);
2734 int sgnb = bcd_get_sgn(b);
2735 int invalid = (sgna == 0) || (sgnb == 0);
2739 ppc_avr_t result = { .u64 = { 0, 0 } };
2743 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2744 zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2745 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2746 } else if (bcd_cmp_mag(a, b) > 0) {
2747 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2748 zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2749 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2750 } else if (bcd_cmp_mag(a, b) == 0) {
2751 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2752 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2754 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2755 zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2756 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2760 if (unlikely(invalid)) {
2761 result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2763 } else if (overflow) {
2774 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2776 ppc_avr_t bcopy = *b;
2777 int sgnb = bcd_get_sgn(b);
2779 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2780 } else if (sgnb > 0) {
2781 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2783 /* else invalid ... defer to bcdadd code for proper handling */
2785 return helper_bcdadd(r, a, &bcopy, ps);
2788 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2792 uint16_t national = 0;
2793 uint16_t sgnb = get_national_digit(b, 0);
2794 ppc_avr_t ret = { .u64 = { 0, 0 } };
2795 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2797 for (i = 1; i < 8; i++) {
2798 national = get_national_digit(b, i);
2799 if (unlikely(national < 0x30 || national > 0x39)) {
2804 bcd_put_digit(&ret, national & 0xf, i);
2807 if (sgnb == NATIONAL_PLUS) {
2808 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2810 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2813 cr = bcd_cmp_zero(&ret);
2815 if (unlikely(invalid)) {
2824 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2828 int sgnb = bcd_get_sgn(b);
2829 int invalid = (sgnb == 0);
2830 ppc_avr_t ret = { .u64 = { 0, 0 } };
2832 int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2834 for (i = 1; i < 8; i++) {
2835 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2837 if (unlikely(invalid)) {
2841 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2843 cr = bcd_cmp_zero(b);
2849 if (unlikely(invalid)) {
2858 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2864 int zone_lead = ps ? 0xF : 0x3;
2866 ppc_avr_t ret = { .u64 = { 0, 0 } };
2867 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2869 if (unlikely((sgnb < 0xA) && ps)) {
2873 for (i = 0; i < 16; i++) {
2874 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2875 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2876 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2881 bcd_put_digit(&ret, digit, i + 1);
2884 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2885 (!ps && (sgnb & 0x4))) {
2886 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2888 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2891 cr = bcd_cmp_zero(&ret);
2893 if (unlikely(invalid)) {
2902 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2907 int sgnb = bcd_get_sgn(b);
2908 int zone_lead = (ps) ? 0xF0 : 0x30;
2909 int invalid = (sgnb == 0);
2910 ppc_avr_t ret = { .u64 = { 0, 0 } };
2912 int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2914 for (i = 0; i < 16; i++) {
2915 digit = bcd_get_digit(b, i + 1, &invalid);
2917 if (unlikely(invalid)) {
2921 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2925 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2927 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2930 cr = bcd_cmp_zero(b);
2936 if (unlikely(invalid)) {
2945 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2951 ppc_avr_t ret = { .u64 = { 0, 0 } };
2953 if (b->s64[HI_IDX] < 0) {
2954 lo_value = -b->s64[LO_IDX];
2955 hi_value = ~b->u64[HI_IDX] + !lo_value;
2956 bcd_put_digit(&ret, 0xD, 0);
2958 lo_value = b->u64[LO_IDX];
2959 hi_value = b->u64[HI_IDX];
2960 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2963 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2964 lo_value > 9999999999999999ULL) {
2968 for (i = 1; i < 16; hi_value /= 10, i++) {
2969 bcd_put_digit(&ret, hi_value % 10, i);
2972 for (; i < 32; lo_value /= 10, i++) {
2973 bcd_put_digit(&ret, lo_value % 10, i);
2976 cr |= bcd_cmp_zero(&ret);
2983 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2990 uint64_t hi_value = 0;
2991 int sgnb = bcd_get_sgn(b);
2992 int invalid = (sgnb == 0);
2994 lo_value = bcd_get_digit(b, 31, &invalid);
2995 for (i = 30; i > 0; i--) {
2996 mulu64(&lo_value, &carry, lo_value, 10ULL);
2997 mulu64(&hi_value, &unused, hi_value, 10ULL);
2998 lo_value += bcd_get_digit(b, i, &invalid);
3001 if (unlikely(invalid)) {
3007 r->s64[LO_IDX] = -lo_value;
3008 r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
3010 r->s64[LO_IDX] = lo_value;
3011 r->s64[HI_IDX] = hi_value;
3014 cr = bcd_cmp_zero(b);
3016 if (unlikely(invalid)) {
3023 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3028 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
3033 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
3035 for (i = 1; i < 32; i++) {
3036 bcd_get_digit(a, i, &invalid);
3037 bcd_get_digit(b, i, &invalid);
3038 if (unlikely(invalid)) {
3043 return bcd_cmp_zero(r);
3046 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
3048 int sgnb = bcd_get_sgn(b);
3051 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
3053 if (bcd_is_valid(b) == false) {
3057 return bcd_cmp_zero(r);
3060 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3063 #if defined(HOST_WORDS_BIGENDIAN)
3068 bool ox_flag = false;
3069 int sgnb = bcd_get_sgn(b);
3071 ret.u64[LO_IDX] &= ~0xf;
3073 if (bcd_is_valid(b) == false) {
3077 if (unlikely(i > 31)) {
3079 } else if (unlikely(i < -31)) {
3084 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3086 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3088 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3092 cr = bcd_cmp_zero(r);
3100 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3105 bool ox_flag = false;
3108 for (i = 0; i < 32; i++) {
3109 bcd_get_digit(b, i, &invalid);
3111 if (unlikely(invalid)) {
3116 #if defined(HOST_WORDS_BIGENDIAN)
3123 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3124 } else if (i <= -32) {
3125 ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3127 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3129 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3133 cr = bcd_cmp_zero(r);
3141 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3146 bool ox_flag = false;
3147 int sgnb = bcd_get_sgn(b);
3149 ret.u64[LO_IDX] &= ~0xf;
3151 #if defined(HOST_WORDS_BIGENDIAN)
3153 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
3156 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3159 if (bcd_is_valid(b) == false) {
3163 if (unlikely(i > 31)) {
3165 } else if (unlikely(i < -31)) {
3170 ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3172 urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3174 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3175 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3178 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3180 cr = bcd_cmp_zero(&ret);
3189 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3192 uint32_t ox_flag = 0;
3193 #if defined(HOST_WORDS_BIGENDIAN)
3194 int i = a->s16[3] + 1;
3196 int i = a->s16[4] + 1;
3200 if (bcd_is_valid(b) == false) {
3204 if (i > 16 && i < 32) {
3205 mask = (uint64_t)-1 >> (128 - i * 4);
3206 if (ret.u64[HI_IDX] & ~mask) {
3210 ret.u64[HI_IDX] &= mask;
3211 } else if (i >= 0 && i <= 16) {
3212 mask = (uint64_t)-1 >> (64 - i * 4);
3213 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3217 ret.u64[LO_IDX] &= mask;
3218 ret.u64[HI_IDX] = 0;
3220 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3223 return bcd_cmp_zero(&ret) | ox_flag;
3226 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3230 uint32_t ox_flag = 0;
3234 for (i = 0; i < 32; i++) {
3235 bcd_get_digit(b, i, &invalid);
3237 if (unlikely(invalid)) {
3242 #if defined(HOST_WORDS_BIGENDIAN)
3247 if (i > 16 && i < 33) {
3248 mask = (uint64_t)-1 >> (128 - i * 4);
3249 if (ret.u64[HI_IDX] & ~mask) {
3253 ret.u64[HI_IDX] &= mask;
3254 } else if (i > 0 && i <= 16) {
3255 mask = (uint64_t)-1 >> (64 - i * 4);
3256 if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3260 ret.u64[LO_IDX] &= mask;
3261 ret.u64[HI_IDX] = 0;
3262 } else if (i == 0) {
3263 if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) {
3266 ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0;
3270 if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) {
3271 return ox_flag | CRF_EQ;
3274 return ox_flag | CRF_GT;
3277 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3280 VECTOR_FOR_INORDER_I(i, u8) {
3281 r->u8[i] = AES_sbox[a->u8[i]];
3285 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3290 VECTOR_FOR_INORDER_I(i, u32) {
3291 result.AVRW(i) = b->AVRW(i) ^
3292 (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
3293 AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
3294 AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
3295 AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
3300 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3305 VECTOR_FOR_INORDER_I(i, u8) {
3306 result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
3311 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3313 /* This differs from what is written in ISA V2.07. The RTL is */
3314 /* incorrect and will be fixed in V2.07B. */
3318 VECTOR_FOR_INORDER_I(i, u8) {
3319 tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
3322 VECTOR_FOR_INORDER_I(i, u32) {
3324 AES_imc[tmp.AVRB(4*i + 0)][0] ^
3325 AES_imc[tmp.AVRB(4*i + 1)][1] ^
3326 AES_imc[tmp.AVRB(4*i + 2)][2] ^
3327 AES_imc[tmp.AVRB(4*i + 3)][3];
3331 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3336 VECTOR_FOR_INORDER_I(i, u8) {
3337 result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
3342 #define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
3343 #if defined(HOST_WORDS_BIGENDIAN)
3344 #define EL_IDX(i) (i)
3346 #define EL_IDX(i) (3 - (i))
3349 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3351 int st = (st_six & 0x10) != 0;
3352 int six = st_six & 0xF;
3355 VECTOR_FOR_INORDER_I(i, u32) {
3357 if ((six & (0x8 >> i)) == 0) {
3358 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
3359 ROTRu32(a->u32[EL_IDX(i)], 18) ^
3360 (a->u32[EL_IDX(i)] >> 3);
3361 } else { /* six.bit[i] == 1 */
3362 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
3363 ROTRu32(a->u32[EL_IDX(i)], 19) ^
3364 (a->u32[EL_IDX(i)] >> 10);
3366 } else { /* st == 1 */
3367 if ((six & (0x8 >> i)) == 0) {
3368 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
3369 ROTRu32(a->u32[EL_IDX(i)], 13) ^
3370 ROTRu32(a->u32[EL_IDX(i)], 22);
3371 } else { /* six.bit[i] == 1 */
3372 r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
3373 ROTRu32(a->u32[EL_IDX(i)], 11) ^
3374 ROTRu32(a->u32[EL_IDX(i)], 25);
3383 #define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
3384 #if defined(HOST_WORDS_BIGENDIAN)
3385 #define EL_IDX(i) (i)
3387 #define EL_IDX(i) (1 - (i))
3390 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3392 int st = (st_six & 0x10) != 0;
3393 int six = st_six & 0xF;
3396 VECTOR_FOR_INORDER_I(i, u64) {
3398 if ((six & (0x8 >> (2*i))) == 0) {
3399 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3400 ROTRu64(a->u64[EL_IDX(i)], 8) ^
3401 (a->u64[EL_IDX(i)] >> 7);
3402 } else { /* six.bit[2*i] == 1 */
3403 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3404 ROTRu64(a->u64[EL_IDX(i)], 61) ^
3405 (a->u64[EL_IDX(i)] >> 6);
3407 } else { /* st == 1 */
3408 if ((six & (0x8 >> (2*i))) == 0) {
3409 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3410 ROTRu64(a->u64[EL_IDX(i)], 34) ^
3411 ROTRu64(a->u64[EL_IDX(i)], 39);
3412 } else { /* six.bit[2*i] == 1 */
3413 r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3414 ROTRu64(a->u64[EL_IDX(i)], 18) ^
3415 ROTRu64(a->u64[EL_IDX(i)], 41);
3424 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3429 VECTOR_FOR_INORDER_I(i, u8) {
3430 int indexA = c->u8[i] >> 4;
3431 int indexB = c->u8[i] & 0xF;
3432 #if defined(HOST_WORDS_BIGENDIAN)
3433 result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3435 result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3441 #undef VECTOR_FOR_INORDER_I
3445 /*****************************************************************************/
3446 /* SPE extension helpers */
3447 /* Use a table to make this quicker */
3448 static const uint8_t hbrev[16] = {
3449 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3450 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3453 static inline uint8_t byte_reverse(uint8_t val)
3455 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3458 static inline uint32_t word_reverse(uint32_t val)
3460 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3461 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3464 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3465 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3467 uint32_t a, b, d, mask;
3469 mask = UINT32_MAX >> (32 - MASKBITS);
3472 d = word_reverse(1 + word_reverse(a | ~b));
3473 return (arg1 & ~mask) | (d & b);
3476 uint32_t helper_cntlsw32(uint32_t val)
3478 if (val & 0x80000000) {
3485 uint32_t helper_cntlzw32(uint32_t val)
3491 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3492 target_ulong low, uint32_t update_Rc)
3498 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3499 if ((high & mask) == 0) {
3507 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3508 if ((low & mask) == 0) {
3521 env->xer = (env->xer & ~0x7F) | i;
3523 env->crf[0] |= xer_so;