2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
22 #include "qemu/host-utils.h"
23 #include "exec/helper-proto.h"
24 #include "crypto/aes.h"
25 #include "fpu/softfloat.h"
27 #include "helper_regs.h"
28 /*****************************************************************************/
29 /* Fixed point operations helpers */
31 static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
34 env->so = env->ov = 1;
40 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
46 uint64_t dividend = (uint64_t)ra << 32;
47 uint64_t divisor = (uint32_t)rb;
49 if (unlikely(divisor == 0)) {
52 rt = dividend / divisor;
53 overflow = rt > UINT32_MAX;
56 if (unlikely(overflow)) {
57 rt = 0; /* Undefined */
61 helper_update_ov_legacy(env, overflow);
64 return (target_ulong)rt;
67 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
73 int64_t dividend = (int64_t)ra << 32;
74 int64_t divisor = (int64_t)((int32_t)rb);
76 if (unlikely((divisor == 0) ||
77 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
80 rt = dividend / divisor;
81 overflow = rt != (int32_t)rt;
84 if (unlikely(overflow)) {
85 rt = 0; /* Undefined */
89 helper_update_ov_legacy(env, overflow);
92 return (target_ulong)rt;
95 #if defined(TARGET_PPC64)
97 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
102 overflow = divu128(&rt, &ra, rb);
104 if (unlikely(overflow)) {
105 rt = 0; /* Undefined */
109 helper_update_ov_legacy(env, overflow);
115 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
118 int64_t ra = (int64_t)rau;
119 int64_t rb = (int64_t)rbu;
120 int overflow = divs128(&rt, &ra, rb);
122 if (unlikely(overflow)) {
123 rt = 0; /* Undefined */
127 helper_update_ov_legacy(env, overflow);
136 #if defined(TARGET_PPC64)
137 /* if x = 0xab, returns 0xababababababababa */
138 #define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
140 /* substract 1 from each byte, and with inverse, check if MSB is set at each
142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
143 * (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
145 #define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
147 /* When you XOR the pattern and there is a match, that byte will be zero */
148 #define hasvalue(x, n) (haszero((x) ^ pattern(n)))
150 uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
152 return hasvalue(rb, ra) ? CRF_GT : 0;
159 /* Return invalid random number.
161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
164 target_ulong helper_darn32(void)
169 target_ulong helper_darn64(void)
176 #if defined(TARGET_PPC64)
178 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
183 for (i = 0; i < 8; i++) {
184 int index = (rs >> (i*8)) & 0xFF;
186 if (rb & PPC_BIT(index)) {
196 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
198 target_ulong mask = 0xff;
202 for (i = 0; i < sizeof(target_ulong); i++) {
203 if ((rs & mask) == (rb & mask)) {
211 /* shift right arithmetic helper */
212 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
217 if (likely(!(shift & 0x20))) {
218 if (likely((uint32_t)shift != 0)) {
220 ret = (int32_t)value >> shift;
221 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
222 env->ca32 = env->ca = 0;
224 env->ca32 = env->ca = 1;
227 ret = (int32_t)value;
228 env->ca32 = env->ca = 0;
231 ret = (int32_t)value >> 31;
232 env->ca32 = env->ca = (ret != 0);
234 return (target_long)ret;
237 #if defined(TARGET_PPC64)
238 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
243 if (likely(!(shift & 0x40))) {
244 if (likely((uint64_t)shift != 0)) {
246 ret = (int64_t)value >> shift;
247 if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
248 env->ca32 = env->ca = 0;
250 env->ca32 = env->ca = 1;
253 ret = (int64_t)value;
254 env->ca32 = env->ca = 0;
257 ret = (int64_t)value >> 63;
258 env->ca32 = env->ca = (ret != 0);
264 #if defined(TARGET_PPC64)
265 target_ulong helper_popcntb(target_ulong val)
267 /* Note that we don't fold past bytes */
268 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
269 0x5555555555555555ULL);
270 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
271 0x3333333333333333ULL);
272 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
273 0x0f0f0f0f0f0f0f0fULL);
277 target_ulong helper_popcntw(target_ulong val)
279 /* Note that we don't fold past words. */
280 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
281 0x5555555555555555ULL);
282 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
283 0x3333333333333333ULL);
284 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
285 0x0f0f0f0f0f0f0f0fULL);
286 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
287 0x00ff00ff00ff00ffULL);
288 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
289 0x0000ffff0000ffffULL);
293 target_ulong helper_popcntb(target_ulong val)
295 /* Note that we don't fold past bytes */
296 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
297 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
298 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
303 /*****************************************************************************/
304 /* PowerPC 601 specific instructions (POWER bridge) */
305 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
307 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
309 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
310 (int32_t)arg2 == 0) {
311 env->spr[SPR_MQ] = 0;
314 env->spr[SPR_MQ] = tmp % arg2;
315 return tmp / (int32_t)arg2;
319 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
322 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
324 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
325 (int32_t)arg2 == 0) {
326 env->so = env->ov = 1;
327 env->spr[SPR_MQ] = 0;
330 env->spr[SPR_MQ] = tmp % arg2;
331 tmp /= (int32_t)arg2;
332 if ((int32_t)tmp != tmp) {
333 env->so = env->ov = 1;
341 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
344 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
345 (int32_t)arg2 == 0) {
346 env->spr[SPR_MQ] = 0;
349 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
350 return (int32_t)arg1 / (int32_t)arg2;
354 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
357 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
358 (int32_t)arg2 == 0) {
359 env->so = env->ov = 1;
360 env->spr[SPR_MQ] = 0;
364 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
365 return (int32_t)arg1 / (int32_t)arg2;
369 /*****************************************************************************/
370 /* 602 specific instructions */
371 /* mfrom is the most crazy instruction ever seen, imho ! */
372 /* Real implementation uses a ROM table. Do the same */
373 /* Extremely decomposed:
375 * return 256 * log10(10 + 1.0) + 0.5
377 #if !defined(CONFIG_USER_ONLY)
378 target_ulong helper_602_mfrom(target_ulong arg)
380 if (likely(arg < 602)) {
381 #include "mfrom_table.inc.c"
382 return mfrom_ROM_table[arg];
389 /*****************************************************************************/
390 /* Altivec extension helpers */
391 #if defined(HOST_WORDS_BIGENDIAN)
392 #define VECTOR_FOR_INORDER_I(index, element) \
393 for (index = 0; index < ARRAY_SIZE(r->element); index++)
395 #define VECTOR_FOR_INORDER_I(index, element) \
396 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
399 /* Saturating arithmetic helpers. */
400 #define SATCVT(from, to, from_type, to_type, min, max) \
401 static inline to_type cvt##from##to(from_type x, int *sat) \
405 if (x < (from_type)min) { \
408 } else if (x > (from_type)max) { \
416 #define SATCVTU(from, to, from_type, to_type, min, max) \
417 static inline to_type cvt##from##to(from_type x, int *sat) \
421 if (x > (from_type)max) { \
429 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
430 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
431 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
433 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
434 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
435 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
436 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
437 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
438 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
442 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
444 int i, j = (sh & 0xf);
446 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
451 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
453 int i, j = 0x10 - (sh & 0xf);
455 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
460 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
462 env->vscr = r->VsrW(3);
463 set_flush_to_zero(vscr_nj, &env->vec_status);
466 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
470 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
471 r->u32[i] = ~a->u32[i] < b->u32[i];
476 void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
479 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
480 uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
487 void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
490 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
491 uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
499 void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
501 uint64_t res = b->u64[0] ^ b->u64[1];
505 r->VsrD(1) = res & 1;
509 #define VARITH_DO(name, op, element) \
510 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
514 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
515 r->element[i] = a->element[i] op b->element[i]; \
518 #define VARITH(suffix, element) \
519 VARITH_DO(add##suffix, +, element) \
520 VARITH_DO(sub##suffix, -, element)
525 VARITH_DO(muluwm, *, u32)
529 #define VARITHFP(suffix, func) \
530 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
535 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
536 r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status); \
539 VARITHFP(addfp, float32_add)
540 VARITHFP(subfp, float32_sub)
541 VARITHFP(minfp, float32_min)
542 VARITHFP(maxfp, float32_max)
545 #define VARITHFPFMA(suffix, type) \
546 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
547 ppc_avr_t *b, ppc_avr_t *c) \
550 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
551 r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
552 type, &env->vec_status); \
555 VARITHFPFMA(maddfp, 0);
556 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
559 #define VARITHSAT_CASE(type, op, cvt, element) \
561 type result = (type)a->element[i] op (type)b->element[i]; \
562 r->element[i] = cvt(result, &sat); \
565 #define VARITHSAT_DO(name, op, optype, cvt, element) \
566 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
572 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
573 switch (sizeof(r->element[0])) { \
575 VARITHSAT_CASE(optype, op, cvt, element); \
578 VARITHSAT_CASE(optype, op, cvt, element); \
581 VARITHSAT_CASE(optype, op, cvt, element); \
586 env->vscr |= (1 << VSCR_SAT); \
589 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
590 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
591 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
592 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
593 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
594 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
595 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
596 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
597 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
598 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
599 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
600 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
601 #undef VARITHSAT_CASE
603 #undef VARITHSAT_SIGNED
604 #undef VARITHSAT_UNSIGNED
606 #define VAVG_DO(name, element, etype) \
607 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
611 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
612 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
613 r->element[i] = x >> 1; \
617 #define VAVG(type, signed_element, signed_type, unsigned_element, \
619 VAVG_DO(avgs##type, signed_element, signed_type) \
620 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
621 VAVG(b, s8, int16_t, u8, uint16_t)
622 VAVG(h, s16, int32_t, u16, uint32_t)
623 VAVG(w, s32, int64_t, u32, uint64_t)
627 #define VABSDU_DO(name, element) \
628 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
632 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
633 r->element[i] = (a->element[i] > b->element[i]) ? \
634 (a->element[i] - b->element[i]) : \
635 (b->element[i] - a->element[i]); \
639 /* VABSDU - Vector absolute difference unsigned
640 * name - instruction mnemonic suffix (b: byte, h: halfword, w: word)
641 * element - element type to access from vector
643 #define VABSDU(type, element) \
644 VABSDU_DO(absdu##type, element)
651 #define VCF(suffix, cvt, element) \
652 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
653 ppc_avr_t *b, uint32_t uim) \
657 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
658 float32 t = cvt(b->element[i], &env->vec_status); \
659 r->f32[i] = float32_scalbn(t, -uim, &env->vec_status); \
662 VCF(ux, uint32_to_float32, u32)
663 VCF(sx, int32_to_float32, s32)
666 #define VCMP_DO(suffix, compare, element, record) \
667 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
668 ppc_avr_t *a, ppc_avr_t *b) \
670 uint64_t ones = (uint64_t)-1; \
671 uint64_t all = ones; \
675 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
676 uint64_t result = (a->element[i] compare b->element[i] ? \
678 switch (sizeof(a->element[0])) { \
680 r->u64[i] = result; \
683 r->u32[i] = result; \
686 r->u16[i] = result; \
696 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
699 #define VCMP(suffix, compare, element) \
700 VCMP_DO(suffix, compare, element, 0) \
701 VCMP_DO(suffix##_dot, compare, element, 1)
717 #define VCMPNE_DO(suffix, element, etype, cmpzero, record) \
718 void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r, \
719 ppc_avr_t *a, ppc_avr_t *b) \
721 etype ones = (etype)-1; \
723 etype result, none = 0; \
726 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
728 result = ((a->element[i] == 0) \
729 || (b->element[i] == 0) \
730 || (a->element[i] != b->element[i]) ? \
733 result = (a->element[i] != b->element[i]) ? ones : 0x0; \
735 r->element[i] = result; \
740 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
744 /* VCMPNEZ - Vector compare not equal to zero
745 * suffix - instruction mnemonic suffix (b: byte, h: halfword, w: word)
746 * element - element type to access from vector
748 #define VCMPNE(suffix, element, etype, cmpzero) \
749 VCMPNE_DO(suffix, element, etype, cmpzero, 0) \
750 VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
751 VCMPNE(zb, u8, uint8_t, 1)
752 VCMPNE(zh, u16, uint16_t, 1)
753 VCMPNE(zw, u32, uint32_t, 1)
754 VCMPNE(b, u8, uint8_t, 0)
755 VCMPNE(h, u16, uint16_t, 0)
756 VCMPNE(w, u32, uint32_t, 0)
760 #define VCMPFP_DO(suffix, compare, order, record) \
761 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
762 ppc_avr_t *a, ppc_avr_t *b) \
764 uint32_t ones = (uint32_t)-1; \
765 uint32_t all = ones; \
769 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
771 int rel = float32_compare_quiet(a->f32[i], b->f32[i], \
773 if (rel == float_relation_unordered) { \
775 } else if (rel compare order) { \
780 r->u32[i] = result; \
785 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
788 #define VCMPFP(suffix, compare, order) \
789 VCMPFP_DO(suffix, compare, order, 0) \
790 VCMPFP_DO(suffix##_dot, compare, order, 1)
791 VCMPFP(eqfp, ==, float_relation_equal)
792 VCMPFP(gefp, !=, float_relation_less)
793 VCMPFP(gtfp, ==, float_relation_greater)
797 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
798 ppc_avr_t *a, ppc_avr_t *b, int record)
803 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
804 int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
806 if (le_rel == float_relation_unordered) {
807 r->u32[i] = 0xc0000000;
810 float32 bneg = float32_chs(b->f32[i]);
811 int ge_rel = float32_compare_quiet(a->f32[i], bneg,
813 int le = le_rel != float_relation_greater;
814 int ge = ge_rel != float_relation_less;
816 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
817 all_in |= (!le | !ge);
821 env->crf[6] = (all_in == 0) << 1;
825 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
827 vcmpbfp_internal(env, r, a, b, 0);
830 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
833 vcmpbfp_internal(env, r, a, b, 1);
836 #define VCT(suffix, satcvt, element) \
837 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
838 ppc_avr_t *b, uint32_t uim) \
842 float_status s = env->vec_status; \
844 set_float_rounding_mode(float_round_to_zero, &s); \
845 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
846 if (float32_is_any_nan(b->f32[i])) { \
849 float64 t = float32_to_float64(b->f32[i], &s); \
852 t = float64_scalbn(t, uim, &s); \
853 j = float64_to_int64(t, &s); \
854 r->element[i] = satcvt(j, &sat); \
858 env->vscr |= (1 << VSCR_SAT); \
861 VCT(uxs, cvtsduw, u32)
862 VCT(sxs, cvtsdsw, s32)
865 target_ulong helper_vclzlsbb(ppc_avr_t *r)
867 target_ulong count = 0;
869 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
870 if (r->VsrB(i) & 0x01) {
878 target_ulong helper_vctzlsbb(ppc_avr_t *r)
880 target_ulong count = 0;
882 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
883 if (r->VsrB(i) & 0x01) {
891 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
892 ppc_avr_t *b, ppc_avr_t *c)
897 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
898 int32_t prod = a->s16[i] * b->s16[i];
899 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
901 r->s16[i] = cvtswsh(t, &sat);
905 env->vscr |= (1 << VSCR_SAT);
909 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
910 ppc_avr_t *b, ppc_avr_t *c)
915 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
916 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
917 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
918 r->s16[i] = cvtswsh(t, &sat);
922 env->vscr |= (1 << VSCR_SAT);
926 #define VMINMAX_DO(name, compare, element) \
927 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
931 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
932 if (a->element[i] compare b->element[i]) { \
933 r->element[i] = b->element[i]; \
935 r->element[i] = a->element[i]; \
939 #define VMINMAX(suffix, element) \
940 VMINMAX_DO(min##suffix, >, element) \
941 VMINMAX_DO(max##suffix, <, element)
953 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
957 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
958 int32_t prod = a->s16[i] * b->s16[i];
959 r->s16[i] = (int16_t) (prod + c->s16[i]);
963 #define VMRG_DO(name, element, access, ofs) \
964 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
967 int i, half = ARRAY_SIZE(r->element) / 2; \
969 for (i = 0; i < half; i++) { \
970 result.access(i * 2 + 0) = a->access(i + ofs); \
971 result.access(i * 2 + 1) = b->access(i + ofs); \
976 #define VMRG(suffix, element, access) \
977 VMRG_DO(mrgl##suffix, element, access, half) \
978 VMRG_DO(mrgh##suffix, element, access, 0)
985 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
986 ppc_avr_t *b, ppc_avr_t *c)
991 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
992 prod[i] = (int32_t)a->s8[i] * b->u8[i];
995 VECTOR_FOR_INORDER_I(i, s32) {
996 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
997 prod[4 * i + 2] + prod[4 * i + 3];
1001 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1002 ppc_avr_t *b, ppc_avr_t *c)
1007 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1008 prod[i] = a->s16[i] * b->s16[i];
1011 VECTOR_FOR_INORDER_I(i, s32) {
1012 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1016 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1017 ppc_avr_t *b, ppc_avr_t *c)
1023 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1024 prod[i] = (int32_t)a->s16[i] * b->s16[i];
1027 VECTOR_FOR_INORDER_I(i, s32) {
1028 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1030 r->u32[i] = cvtsdsw(t, &sat);
1034 env->vscr |= (1 << VSCR_SAT);
1038 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1039 ppc_avr_t *b, ppc_avr_t *c)
1044 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1045 prod[i] = a->u8[i] * b->u8[i];
1048 VECTOR_FOR_INORDER_I(i, u32) {
1049 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1050 prod[4 * i + 2] + prod[4 * i + 3];
1054 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1055 ppc_avr_t *b, ppc_avr_t *c)
1060 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1061 prod[i] = a->u16[i] * b->u16[i];
1064 VECTOR_FOR_INORDER_I(i, u32) {
1065 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1069 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1070 ppc_avr_t *b, ppc_avr_t *c)
1076 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1077 prod[i] = a->u16[i] * b->u16[i];
1080 VECTOR_FOR_INORDER_I(i, s32) {
1081 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1083 r->u32[i] = cvtuduw(t, &sat);
1087 env->vscr |= (1 << VSCR_SAT);
1091 #define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast) \
1092 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1096 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1097 r->prod_access(i >> 1) = (cast)a->mul_access(i) * \
1098 (cast)b->mul_access(i); \
1102 #define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast) \
1103 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1107 for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) { \
1108 r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) * \
1109 (cast)b->mul_access(i + 1); \
1113 #define VMUL(suffix, mul_element, mul_access, prod_access, cast) \
1114 VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast) \
1115 VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1116 VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1117 VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1118 VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1119 VMUL(ub, u8, VsrB, VsrH, uint16_t)
1120 VMUL(uh, u16, VsrH, VsrW, uint32_t)
1121 VMUL(uw, u32, VsrW, VsrD, uint64_t)
1126 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1132 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1133 int s = c->VsrB(i) & 0x1f;
1134 int index = s & 0xf;
1137 result.VsrB(i) = b->VsrB(index);
1139 result.VsrB(i) = a->VsrB(index);
1145 void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1151 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1152 int s = c->VsrB(i) & 0x1f;
1153 int index = 15 - (s & 0xf);
1156 result.VsrB(i) = a->VsrB(index);
1158 result.VsrB(i) = b->VsrB(index);
1164 #if defined(HOST_WORDS_BIGENDIAN)
1165 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1166 #define VBPERMD_INDEX(i) (i)
1167 #define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1168 #define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1170 #define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1171 #define VBPERMD_INDEX(i) (1 - i)
1172 #define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1173 #define EXTRACT_BIT(avr, i, index) \
1174 (extract64((avr)->u64[1 - i], 63 - index, 1))
1177 void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1180 ppc_avr_t result = { .u64 = { 0, 0 } };
1181 VECTOR_FOR_INORDER_I(i, u64) {
1182 for (j = 0; j < 8; j++) {
1183 int index = VBPERMQ_INDEX(b, (i * 8) + j);
1184 if (index < 64 && EXTRACT_BIT(a, i, index)) {
1185 result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1192 void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1197 VECTOR_FOR_INORDER_I(i, u8) {
1198 int index = VBPERMQ_INDEX(b, i);
1201 uint64_t mask = (1ull << (63-(index & 0x3F)));
1202 if (a->u64[VBPERMQ_DW(index)] & mask) {
1203 perm |= (0x8000 >> i);
1212 #undef VBPERMQ_INDEX
1215 static const uint64_t VGBBD_MASKS[256] = {
1216 0x0000000000000000ull, /* 00 */
1217 0x0000000000000080ull, /* 01 */
1218 0x0000000000008000ull, /* 02 */
1219 0x0000000000008080ull, /* 03 */
1220 0x0000000000800000ull, /* 04 */
1221 0x0000000000800080ull, /* 05 */
1222 0x0000000000808000ull, /* 06 */
1223 0x0000000000808080ull, /* 07 */
1224 0x0000000080000000ull, /* 08 */
1225 0x0000000080000080ull, /* 09 */
1226 0x0000000080008000ull, /* 0A */
1227 0x0000000080008080ull, /* 0B */
1228 0x0000000080800000ull, /* 0C */
1229 0x0000000080800080ull, /* 0D */
1230 0x0000000080808000ull, /* 0E */
1231 0x0000000080808080ull, /* 0F */
1232 0x0000008000000000ull, /* 10 */
1233 0x0000008000000080ull, /* 11 */
1234 0x0000008000008000ull, /* 12 */
1235 0x0000008000008080ull, /* 13 */
1236 0x0000008000800000ull, /* 14 */
1237 0x0000008000800080ull, /* 15 */
1238 0x0000008000808000ull, /* 16 */
1239 0x0000008000808080ull, /* 17 */
1240 0x0000008080000000ull, /* 18 */
1241 0x0000008080000080ull, /* 19 */
1242 0x0000008080008000ull, /* 1A */
1243 0x0000008080008080ull, /* 1B */
1244 0x0000008080800000ull, /* 1C */
1245 0x0000008080800080ull, /* 1D */
1246 0x0000008080808000ull, /* 1E */
1247 0x0000008080808080ull, /* 1F */
1248 0x0000800000000000ull, /* 20 */
1249 0x0000800000000080ull, /* 21 */
1250 0x0000800000008000ull, /* 22 */
1251 0x0000800000008080ull, /* 23 */
1252 0x0000800000800000ull, /* 24 */
1253 0x0000800000800080ull, /* 25 */
1254 0x0000800000808000ull, /* 26 */
1255 0x0000800000808080ull, /* 27 */
1256 0x0000800080000000ull, /* 28 */
1257 0x0000800080000080ull, /* 29 */
1258 0x0000800080008000ull, /* 2A */
1259 0x0000800080008080ull, /* 2B */
1260 0x0000800080800000ull, /* 2C */
1261 0x0000800080800080ull, /* 2D */
1262 0x0000800080808000ull, /* 2E */
1263 0x0000800080808080ull, /* 2F */
1264 0x0000808000000000ull, /* 30 */
1265 0x0000808000000080ull, /* 31 */
1266 0x0000808000008000ull, /* 32 */
1267 0x0000808000008080ull, /* 33 */
1268 0x0000808000800000ull, /* 34 */
1269 0x0000808000800080ull, /* 35 */
1270 0x0000808000808000ull, /* 36 */
1271 0x0000808000808080ull, /* 37 */
1272 0x0000808080000000ull, /* 38 */
1273 0x0000808080000080ull, /* 39 */
1274 0x0000808080008000ull, /* 3A */
1275 0x0000808080008080ull, /* 3B */
1276 0x0000808080800000ull, /* 3C */
1277 0x0000808080800080ull, /* 3D */
1278 0x0000808080808000ull, /* 3E */
1279 0x0000808080808080ull, /* 3F */
1280 0x0080000000000000ull, /* 40 */
1281 0x0080000000000080ull, /* 41 */
1282 0x0080000000008000ull, /* 42 */
1283 0x0080000000008080ull, /* 43 */
1284 0x0080000000800000ull, /* 44 */
1285 0x0080000000800080ull, /* 45 */
1286 0x0080000000808000ull, /* 46 */
1287 0x0080000000808080ull, /* 47 */
1288 0x0080000080000000ull, /* 48 */
1289 0x0080000080000080ull, /* 49 */
1290 0x0080000080008000ull, /* 4A */
1291 0x0080000080008080ull, /* 4B */
1292 0x0080000080800000ull, /* 4C */
1293 0x0080000080800080ull, /* 4D */
1294 0x0080000080808000ull, /* 4E */
1295 0x0080000080808080ull, /* 4F */
1296 0x0080008000000000ull, /* 50 */
1297 0x0080008000000080ull, /* 51 */
1298 0x0080008000008000ull, /* 52 */
1299 0x0080008000008080ull, /* 53 */
1300 0x0080008000800000ull, /* 54 */
1301 0x0080008000800080ull, /* 55 */
1302 0x0080008000808000ull, /* 56 */
1303 0x0080008000808080ull, /* 57 */
1304 0x0080008080000000ull, /* 58 */
1305 0x0080008080000080ull, /* 59 */
1306 0x0080008080008000ull, /* 5A */
1307 0x0080008080008080ull, /* 5B */
1308 0x0080008080800000ull, /* 5C */
1309 0x0080008080800080ull, /* 5D */
1310 0x0080008080808000ull, /* 5E */
1311 0x0080008080808080ull, /* 5F */
1312 0x0080800000000000ull, /* 60 */
1313 0x0080800000000080ull, /* 61 */
1314 0x0080800000008000ull, /* 62 */
1315 0x0080800000008080ull, /* 63 */
1316 0x0080800000800000ull, /* 64 */
1317 0x0080800000800080ull, /* 65 */
1318 0x0080800000808000ull, /* 66 */
1319 0x0080800000808080ull, /* 67 */
1320 0x0080800080000000ull, /* 68 */
1321 0x0080800080000080ull, /* 69 */
1322 0x0080800080008000ull, /* 6A */
1323 0x0080800080008080ull, /* 6B */
1324 0x0080800080800000ull, /* 6C */
1325 0x0080800080800080ull, /* 6D */
1326 0x0080800080808000ull, /* 6E */
1327 0x0080800080808080ull, /* 6F */
1328 0x0080808000000000ull, /* 70 */
1329 0x0080808000000080ull, /* 71 */
1330 0x0080808000008000ull, /* 72 */
1331 0x0080808000008080ull, /* 73 */
1332 0x0080808000800000ull, /* 74 */
1333 0x0080808000800080ull, /* 75 */
1334 0x0080808000808000ull, /* 76 */
1335 0x0080808000808080ull, /* 77 */
1336 0x0080808080000000ull, /* 78 */
1337 0x0080808080000080ull, /* 79 */
1338 0x0080808080008000ull, /* 7A */
1339 0x0080808080008080ull, /* 7B */
1340 0x0080808080800000ull, /* 7C */
1341 0x0080808080800080ull, /* 7D */
1342 0x0080808080808000ull, /* 7E */
1343 0x0080808080808080ull, /* 7F */
1344 0x8000000000000000ull, /* 80 */
1345 0x8000000000000080ull, /* 81 */
1346 0x8000000000008000ull, /* 82 */
1347 0x8000000000008080ull, /* 83 */
1348 0x8000000000800000ull, /* 84 */
1349 0x8000000000800080ull, /* 85 */
1350 0x8000000000808000ull, /* 86 */
1351 0x8000000000808080ull, /* 87 */
1352 0x8000000080000000ull, /* 88 */
1353 0x8000000080000080ull, /* 89 */
1354 0x8000000080008000ull, /* 8A */
1355 0x8000000080008080ull, /* 8B */
1356 0x8000000080800000ull, /* 8C */
1357 0x8000000080800080ull, /* 8D */
1358 0x8000000080808000ull, /* 8E */
1359 0x8000000080808080ull, /* 8F */
1360 0x8000008000000000ull, /* 90 */
1361 0x8000008000000080ull, /* 91 */
1362 0x8000008000008000ull, /* 92 */
1363 0x8000008000008080ull, /* 93 */
1364 0x8000008000800000ull, /* 94 */
1365 0x8000008000800080ull, /* 95 */
1366 0x8000008000808000ull, /* 96 */
1367 0x8000008000808080ull, /* 97 */
1368 0x8000008080000000ull, /* 98 */
1369 0x8000008080000080ull, /* 99 */
1370 0x8000008080008000ull, /* 9A */
1371 0x8000008080008080ull, /* 9B */
1372 0x8000008080800000ull, /* 9C */
1373 0x8000008080800080ull, /* 9D */
1374 0x8000008080808000ull, /* 9E */
1375 0x8000008080808080ull, /* 9F */
1376 0x8000800000000000ull, /* A0 */
1377 0x8000800000000080ull, /* A1 */
1378 0x8000800000008000ull, /* A2 */
1379 0x8000800000008080ull, /* A3 */
1380 0x8000800000800000ull, /* A4 */
1381 0x8000800000800080ull, /* A5 */
1382 0x8000800000808000ull, /* A6 */
1383 0x8000800000808080ull, /* A7 */
1384 0x8000800080000000ull, /* A8 */
1385 0x8000800080000080ull, /* A9 */
1386 0x8000800080008000ull, /* AA */
1387 0x8000800080008080ull, /* AB */
1388 0x8000800080800000ull, /* AC */
1389 0x8000800080800080ull, /* AD */
1390 0x8000800080808000ull, /* AE */
1391 0x8000800080808080ull, /* AF */
1392 0x8000808000000000ull, /* B0 */
1393 0x8000808000000080ull, /* B1 */
1394 0x8000808000008000ull, /* B2 */
1395 0x8000808000008080ull, /* B3 */
1396 0x8000808000800000ull, /* B4 */
1397 0x8000808000800080ull, /* B5 */
1398 0x8000808000808000ull, /* B6 */
1399 0x8000808000808080ull, /* B7 */
1400 0x8000808080000000ull, /* B8 */
1401 0x8000808080000080ull, /* B9 */
1402 0x8000808080008000ull, /* BA */
1403 0x8000808080008080ull, /* BB */
1404 0x8000808080800000ull, /* BC */
1405 0x8000808080800080ull, /* BD */
1406 0x8000808080808000ull, /* BE */
1407 0x8000808080808080ull, /* BF */
1408 0x8080000000000000ull, /* C0 */
1409 0x8080000000000080ull, /* C1 */
1410 0x8080000000008000ull, /* C2 */
1411 0x8080000000008080ull, /* C3 */
1412 0x8080000000800000ull, /* C4 */
1413 0x8080000000800080ull, /* C5 */
1414 0x8080000000808000ull, /* C6 */
1415 0x8080000000808080ull, /* C7 */
1416 0x8080000080000000ull, /* C8 */
1417 0x8080000080000080ull, /* C9 */
1418 0x8080000080008000ull, /* CA */
1419 0x8080000080008080ull, /* CB */
1420 0x8080000080800000ull, /* CC */
1421 0x8080000080800080ull, /* CD */
1422 0x8080000080808000ull, /* CE */
1423 0x8080000080808080ull, /* CF */
1424 0x8080008000000000ull, /* D0 */
1425 0x8080008000000080ull, /* D1 */
1426 0x8080008000008000ull, /* D2 */
1427 0x8080008000008080ull, /* D3 */
1428 0x8080008000800000ull, /* D4 */
1429 0x8080008000800080ull, /* D5 */
1430 0x8080008000808000ull, /* D6 */
1431 0x8080008000808080ull, /* D7 */
1432 0x8080008080000000ull, /* D8 */
1433 0x8080008080000080ull, /* D9 */
1434 0x8080008080008000ull, /* DA */
1435 0x8080008080008080ull, /* DB */
1436 0x8080008080800000ull, /* DC */
1437 0x8080008080800080ull, /* DD */
1438 0x8080008080808000ull, /* DE */
1439 0x8080008080808080ull, /* DF */
1440 0x8080800000000000ull, /* E0 */
1441 0x8080800000000080ull, /* E1 */
1442 0x8080800000008000ull, /* E2 */
1443 0x8080800000008080ull, /* E3 */
1444 0x8080800000800000ull, /* E4 */
1445 0x8080800000800080ull, /* E5 */
1446 0x8080800000808000ull, /* E6 */
1447 0x8080800000808080ull, /* E7 */
1448 0x8080800080000000ull, /* E8 */
1449 0x8080800080000080ull, /* E9 */
1450 0x8080800080008000ull, /* EA */
1451 0x8080800080008080ull, /* EB */
1452 0x8080800080800000ull, /* EC */
1453 0x8080800080800080ull, /* ED */
1454 0x8080800080808000ull, /* EE */
1455 0x8080800080808080ull, /* EF */
1456 0x8080808000000000ull, /* F0 */
1457 0x8080808000000080ull, /* F1 */
1458 0x8080808000008000ull, /* F2 */
1459 0x8080808000008080ull, /* F3 */
1460 0x8080808000800000ull, /* F4 */
1461 0x8080808000800080ull, /* F5 */
1462 0x8080808000808000ull, /* F6 */
1463 0x8080808000808080ull, /* F7 */
1464 0x8080808080000000ull, /* F8 */
1465 0x8080808080000080ull, /* F9 */
1466 0x8080808080008000ull, /* FA */
1467 0x8080808080008080ull, /* FB */
1468 0x8080808080800000ull, /* FC */
1469 0x8080808080800080ull, /* FD */
1470 0x8080808080808000ull, /* FE */
1471 0x8080808080808080ull, /* FF */
1474 void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1477 uint64_t t[2] = { 0, 0 };
1479 VECTOR_FOR_INORDER_I(i, u8) {
1480 #if defined(HOST_WORDS_BIGENDIAN)
1481 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1483 t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1491 #define PMSUM(name, srcfld, trgfld, trgtyp) \
1492 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1495 trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])]; \
1497 VECTOR_FOR_INORDER_I(i, srcfld) { \
1499 for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) { \
1500 if (a->srcfld[i] & (1ull<<j)) { \
1501 prod[i] ^= ((trgtyp)b->srcfld[i] << j); \
1506 VECTOR_FOR_INORDER_I(i, trgfld) { \
1507 r->trgfld[i] = prod[2*i] ^ prod[2*i+1]; \
1511 PMSUM(vpmsumb, u8, u16, uint16_t)
1512 PMSUM(vpmsumh, u16, u32, uint32_t)
1513 PMSUM(vpmsumw, u32, u64, uint64_t)
1515 void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1518 #ifdef CONFIG_INT128
1520 __uint128_t prod[2];
1522 VECTOR_FOR_INORDER_I(i, u64) {
1524 for (j = 0; j < 64; j++) {
1525 if (a->u64[i] & (1ull<<j)) {
1526 prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1531 r->u128 = prod[0] ^ prod[1];
1537 VECTOR_FOR_INORDER_I(i, u64) {
1538 prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1539 for (j = 0; j < 64; j++) {
1540 if (a->u64[i] & (1ull<<j)) {
1544 bshift.VsrD(1) = b->u64[i];
1546 bshift.VsrD(0) = b->u64[i] >> (64 - j);
1547 bshift.VsrD(1) = b->u64[i] << j;
1549 prod[i].VsrD(1) ^= bshift.VsrD(1);
1550 prod[i].VsrD(0) ^= bshift.VsrD(0);
1555 r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1556 r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1561 #if defined(HOST_WORDS_BIGENDIAN)
1566 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1570 #if defined(HOST_WORDS_BIGENDIAN)
1571 const ppc_avr_t *x[2] = { a, b };
1573 const ppc_avr_t *x[2] = { b, a };
1576 VECTOR_FOR_INORDER_I(i, u64) {
1577 VECTOR_FOR_INORDER_I(j, u32) {
1578 uint32_t e = x[i]->u32[j];
1580 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1581 ((e >> 6) & 0x3e0) |
1588 #define VPK(suffix, from, to, cvt, dosat) \
1589 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1590 ppc_avr_t *a, ppc_avr_t *b) \
1595 ppc_avr_t *a0 = PKBIG ? a : b; \
1596 ppc_avr_t *a1 = PKBIG ? b : a; \
1598 VECTOR_FOR_INORDER_I(i, from) { \
1599 result.to[i] = cvt(a0->from[i], &sat); \
1600 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1603 if (dosat && sat) { \
1604 env->vscr |= (1 << VSCR_SAT); \
1608 VPK(shss, s16, s8, cvtshsb, 1)
1609 VPK(shus, s16, u8, cvtshub, 1)
1610 VPK(swss, s32, s16, cvtswsh, 1)
1611 VPK(swus, s32, u16, cvtswuh, 1)
1612 VPK(sdss, s64, s32, cvtsdsw, 1)
1613 VPK(sdus, s64, u32, cvtsduw, 1)
1614 VPK(uhus, u16, u8, cvtuhub, 1)
1615 VPK(uwus, u32, u16, cvtuwuh, 1)
1616 VPK(udus, u64, u32, cvtuduw, 1)
1617 VPK(uhum, u16, u8, I, 0)
1618 VPK(uwum, u32, u16, I, 0)
1619 VPK(udum, u64, u32, I, 0)
1624 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1628 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1629 r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1633 #define VRFI(suffix, rounding) \
1634 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1638 float_status s = env->vec_status; \
1640 set_float_rounding_mode(rounding, &s); \
1641 for (i = 0; i < ARRAY_SIZE(r->f32); i++) { \
1642 r->f32[i] = float32_round_to_int (b->f32[i], &s); \
1645 VRFI(n, float_round_nearest_even)
1646 VRFI(m, float_round_down)
1647 VRFI(p, float_round_up)
1648 VRFI(z, float_round_to_zero)
1651 #define VROTATE(suffix, element, mask) \
1652 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1656 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1657 unsigned int shift = b->element[i] & mask; \
1658 r->element[i] = (a->element[i] << shift) | \
1659 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1663 VROTATE(h, u16, 0xF)
1664 VROTATE(w, u32, 0x1F)
1665 VROTATE(d, u64, 0x3F)
1668 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1672 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1673 float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1675 r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1679 #define VRLMI(name, size, element, insert) \
1680 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1683 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1684 uint##size##_t src1 = a->element[i]; \
1685 uint##size##_t src2 = b->element[i]; \
1686 uint##size##_t src3 = r->element[i]; \
1687 uint##size##_t begin, end, shift, mask, rot_val; \
1689 shift = extract##size(src2, 0, 6); \
1690 end = extract##size(src2, 8, 6); \
1691 begin = extract##size(src2, 16, 6); \
1692 rot_val = rol##size(src1, shift); \
1693 mask = mask_u##size(begin, end); \
1695 r->element[i] = (rot_val & mask) | (src3 & ~mask); \
1697 r->element[i] = (rot_val & mask); \
1702 VRLMI(vrldmi, 64, u64, 1);
1703 VRLMI(vrlwmi, 32, u32, 1);
1704 VRLMI(vrldnm, 64, u64, 0);
1705 VRLMI(vrlwnm, 32, u32, 0);
1707 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1710 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1711 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1714 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1718 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1719 r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1723 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1727 for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1728 r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1732 #if defined(HOST_WORDS_BIGENDIAN)
1733 #define VEXTU_X_DO(name, size, left) \
1734 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1738 index = (a & 0xf) * 8; \
1740 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1742 return int128_getlo(int128_rshift(b->s128, index)) & \
1743 MAKE_64BIT_MASK(0, size); \
1746 #define VEXTU_X_DO(name, size, left) \
1747 target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b) \
1751 index = ((15 - (a & 0xf) + 1) * 8) - size; \
1753 index = (a & 0xf) * 8; \
1755 return int128_getlo(int128_rshift(b->s128, index)) & \
1756 MAKE_64BIT_MASK(0, size); \
1760 VEXTU_X_DO(vextublx, 8, 1)
1761 VEXTU_X_DO(vextuhlx, 16, 1)
1762 VEXTU_X_DO(vextuwlx, 32, 1)
1763 VEXTU_X_DO(vextubrx, 8, 0)
1764 VEXTU_X_DO(vextuhrx, 16, 0)
1765 VEXTU_X_DO(vextuwrx, 32, 0)
1768 /* The specification says that the results are undefined if all of the
1769 * shift counts are not identical. We check to make sure that they are
1770 * to conform to what real hardware appears to do. */
1771 #define VSHIFT(suffix, leftp) \
1772 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1774 int shift = b->VsrB(15) & 0x7; \
1778 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1779 doit = doit && ((b->u8[i] & 0x7) == shift); \
1784 } else if (leftp) { \
1785 uint64_t carry = a->VsrD(1) >> (64 - shift); \
1787 r->VsrD(0) = (a->VsrD(0) << shift) | carry; \
1788 r->VsrD(1) = a->VsrD(1) << shift; \
1790 uint64_t carry = a->VsrD(0) << (64 - shift); \
1792 r->VsrD(1) = (a->VsrD(1) >> shift) | carry; \
1793 r->VsrD(0) = a->VsrD(0) >> shift; \
1801 #define VSL(suffix, element, mask) \
1802 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1806 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1807 unsigned int shift = b->element[i] & mask; \
1809 r->element[i] = a->element[i] << shift; \
1818 void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1821 unsigned int shift, bytes, size;
1823 size = ARRAY_SIZE(r->u8);
1824 for (i = 0; i < size; i++) {
1825 shift = b->u8[i] & 0x7; /* extract shift value */
1826 bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
1827 (((i + 1) < size) ? a->u8[i + 1] : 0);
1828 r->u8[i] = (bytes << shift) >> 8; /* shift and store result */
1832 void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1835 unsigned int shift, bytes;
1837 /* Use reverse order, as destination and source register can be same. Its
1838 * being modified in place saving temporary, reverse order will guarantee
1839 * that computed result is not fed back.
1841 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1842 shift = b->u8[i] & 0x7; /* extract shift value */
1843 bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1844 /* extract adjacent bytes */
1845 r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
1849 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1851 int sh = shift & 0xf;
1855 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1858 result.VsrB(i) = b->VsrB(index - 0x10);
1860 result.VsrB(i) = a->VsrB(index);
1866 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1868 int sh = (b->VsrB(0xf) >> 3) & 0xf;
1870 #if defined(HOST_WORDS_BIGENDIAN)
1871 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1872 memset(&r->u8[16-sh], 0, sh);
1874 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1875 memset(&r->u8[0], 0, sh);
1879 /* Experimental testing shows that hardware masks the immediate. */
1880 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1881 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1882 #define VSPLT(suffix, element, access) \
1883 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1885 uint32_t s = b->access(SPLAT_ELEMENT(element)); \
1888 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1896 #undef SPLAT_ELEMENT
1897 #undef _SPLAT_MASKED
1898 #if defined(HOST_WORDS_BIGENDIAN)
1899 #define VINSERT(suffix, element) \
1900 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1902 memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])], \
1903 sizeof(r->element[0])); \
1906 #define VINSERT(suffix, element) \
1907 void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1909 uint32_t d = (16 - index) - sizeof(r->element[0]); \
1910 memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0])); \
1918 #if defined(HOST_WORDS_BIGENDIAN)
1919 #define VEXTRACT(suffix, element) \
1920 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1922 uint32_t es = sizeof(r->element[0]); \
1923 memmove(&r->u8[8 - es], &b->u8[index], es); \
1924 memset(&r->u8[8], 0, 8); \
1925 memset(&r->u8[0], 0, 8 - es); \
1928 #define VEXTRACT(suffix, element) \
1929 void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1931 uint32_t es = sizeof(r->element[0]); \
1932 uint32_t s = (16 - index) - es; \
1933 memmove(&r->u8[8], &b->u8[s], es); \
1934 memset(&r->u8[0], 0, 8); \
1935 memset(&r->u8[8 + es], 0, 8 - es); \
1944 void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1945 target_ulong xbn, uint32_t index)
1948 size_t es = sizeof(uint32_t);
1952 getVSR(xbn, &xb, env);
1953 memset(&xt, 0, sizeof(xt));
1956 for (i = 0; i < es; i++, ext_index++) {
1957 xt.VsrB(8 - es + i) = xb.VsrB(ext_index % 16);
1960 putVSR(xtn, &xt, env);
1963 void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
1964 target_ulong xbn, uint32_t index)
1967 size_t es = sizeof(uint32_t);
1968 int ins_index, i = 0;
1970 getVSR(xbn, &xb, env);
1971 getVSR(xtn, &xt, env);
1974 for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1975 xt.VsrB(ins_index) = xb.VsrB(8 - es + i);
1978 putVSR(xtn, &xt, env);
1981 #define VEXT_SIGNED(name, element, cast) \
1982 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
1985 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1986 r->element[i] = (cast)b->element[i]; \
1989 VEXT_SIGNED(vextsb2w, s32, int8_t)
1990 VEXT_SIGNED(vextsb2d, s64, int8_t)
1991 VEXT_SIGNED(vextsh2w, s32, int16_t)
1992 VEXT_SIGNED(vextsh2d, s64, int16_t)
1993 VEXT_SIGNED(vextsw2d, s64, int32_t)
1996 #define VNEG(name, element) \
1997 void helper_##name(ppc_avr_t *r, ppc_avr_t *b) \
2000 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2001 r->element[i] = -b->element[i]; \
2008 #define VSPLTI(suffix, element, splat_type) \
2009 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
2011 splat_type x = (int8_t)(splat << 3) >> 3; \
2014 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2015 r->element[i] = x; \
2018 VSPLTI(b, s8, int8_t)
2019 VSPLTI(h, s16, int16_t)
2020 VSPLTI(w, s32, int32_t)
2023 #define VSR(suffix, element, mask) \
2024 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
2028 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2029 unsigned int shift = b->element[i] & mask; \
2030 r->element[i] = a->element[i] >> shift; \
2043 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2045 int sh = (b->VsrB(0xf) >> 3) & 0xf;
2047 #if defined(HOST_WORDS_BIGENDIAN)
2048 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2049 memset(&r->u8[0], 0, sh);
2051 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2052 memset(&r->u8[16 - sh], 0, sh);
2056 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2060 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2061 r->u32[i] = a->u32[i] >= b->u32[i];
2065 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2072 upper = ARRAY_SIZE(r->s32) - 1;
2073 t = (int64_t)b->VsrSW(upper);
2074 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2076 result.VsrSW(i) = 0;
2078 result.VsrSW(upper) = cvtsdsw(t, &sat);
2082 env->vscr |= (1 << VSCR_SAT);
2086 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2093 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2094 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
2097 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2098 t += a->VsrSW(2 * i + j);
2100 result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
2105 env->vscr |= (1 << VSCR_SAT);
2109 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2114 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2115 int64_t t = (int64_t)b->s32[i];
2117 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2118 t += a->s8[4 * i + j];
2120 r->s32[i] = cvtsdsw(t, &sat);
2124 env->vscr |= (1 << VSCR_SAT);
2128 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2133 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2134 int64_t t = (int64_t)b->s32[i];
2136 t += a->s16[2 * i] + a->s16[2 * i + 1];
2137 r->s32[i] = cvtsdsw(t, &sat);
2141 env->vscr |= (1 << VSCR_SAT);
2145 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2150 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2151 uint64_t t = (uint64_t)b->u32[i];
2153 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2154 t += a->u8[4 * i + j];
2156 r->u32[i] = cvtuduw(t, &sat);
2160 env->vscr |= (1 << VSCR_SAT);
2164 #if defined(HOST_WORDS_BIGENDIAN)
2171 #define VUPKPX(suffix, hi) \
2172 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2177 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
2178 uint16_t e = b->u16[hi ? i : i+4]; \
2179 uint8_t a = (e >> 15) ? 0xff : 0; \
2180 uint8_t r = (e >> 10) & 0x1f; \
2181 uint8_t g = (e >> 5) & 0x1f; \
2182 uint8_t b = e & 0x1f; \
2184 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
2192 #define VUPK(suffix, unpacked, packee, hi) \
2193 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
2199 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
2200 result.unpacked[i] = b->packee[i]; \
2203 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2205 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2210 VUPK(hsb, s16, s8, UPKHI)
2211 VUPK(hsh, s32, s16, UPKHI)
2212 VUPK(hsw, s64, s32, UPKHI)
2213 VUPK(lsb, s16, s8, UPKLO)
2214 VUPK(lsh, s32, s16, UPKLO)
2215 VUPK(lsw, s64, s32, UPKLO)
2220 #define VGENERIC_DO(name, element) \
2221 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
2225 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
2226 r->element[i] = name(b->element[i]); \
2230 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2231 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2232 #define clzw(v) clz32((v))
2233 #define clzd(v) clz64((v))
2235 VGENERIC_DO(clzb, u8)
2236 VGENERIC_DO(clzh, u16)
2237 VGENERIC_DO(clzw, u32)
2238 VGENERIC_DO(clzd, u64)
2245 #define ctzb(v) ((v) ? ctz32(v) : 8)
2246 #define ctzh(v) ((v) ? ctz32(v) : 16)
2247 #define ctzw(v) ctz32((v))
2248 #define ctzd(v) ctz64((v))
2250 VGENERIC_DO(ctzb, u8)
2251 VGENERIC_DO(ctzh, u16)
2252 VGENERIC_DO(ctzw, u32)
2253 VGENERIC_DO(ctzd, u64)
2260 #define popcntb(v) ctpop8(v)
2261 #define popcnth(v) ctpop16(v)
2262 #define popcntw(v) ctpop32(v)
2263 #define popcntd(v) ctpop64(v)
2265 VGENERIC_DO(popcntb, u8)
2266 VGENERIC_DO(popcnth, u16)
2267 VGENERIC_DO(popcntw, u32)
2268 VGENERIC_DO(popcntd, u64)
2277 #if defined(HOST_WORDS_BIGENDIAN)
2278 #define QW_ONE { .u64 = { 0, 1 } }
2280 #define QW_ONE { .u64 = { 1, 0 } }
2283 #ifndef CONFIG_INT128
2285 static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2287 t->u64[0] = ~a.u64[0];
2288 t->u64[1] = ~a.u64[1];
2291 static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2293 if (a.VsrD(0) < b.VsrD(0)) {
2295 } else if (a.VsrD(0) > b.VsrD(0)) {
2297 } else if (a.VsrD(1) < b.VsrD(1)) {
2299 } else if (a.VsrD(1) > b.VsrD(1)) {
2306 static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2308 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2309 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2310 (~a.VsrD(1) < b.VsrD(1));
2313 static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2316 t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2317 t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2318 (~a.VsrD(1) < b.VsrD(1));
2319 avr_qw_not(¬_a, a);
2320 return avr_qw_cmpu(not_a, b) < 0;
2325 void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2327 #ifdef CONFIG_INT128
2328 r->u128 = a->u128 + b->u128;
2330 avr_qw_add(r, *a, *b);
2334 void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2336 #ifdef CONFIG_INT128
2337 r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2340 if (c->VsrD(1) & 1) {
2344 tmp.VsrD(1) = c->VsrD(1) & 1;
2345 avr_qw_add(&tmp, *a, tmp);
2346 avr_qw_add(r, tmp, *b);
2348 avr_qw_add(r, *a, *b);
2353 void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2355 #ifdef CONFIG_INT128
2356 r->u128 = (~a->u128 < b->u128);
2360 avr_qw_not(¬_a, *a);
2363 r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2367 void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2369 #ifdef CONFIG_INT128
2370 int carry_out = (~a->u128 < b->u128);
2371 if (!carry_out && (c->u128 & 1)) {
2372 carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2373 ((a->u128 != 0) || (b->u128 != 0));
2375 r->u128 = carry_out;
2378 int carry_in = c->VsrD(1) & 1;
2382 carry_out = avr_qw_addc(&tmp, *a, *b);
2384 if (!carry_out && carry_in) {
2385 ppc_avr_t one = QW_ONE;
2386 carry_out = avr_qw_addc(&tmp, tmp, one);
2389 r->VsrD(1) = carry_out;
2393 void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2395 #ifdef CONFIG_INT128
2396 r->u128 = a->u128 - b->u128;
2399 ppc_avr_t one = QW_ONE;
2401 avr_qw_not(&tmp, *b);
2402 avr_qw_add(&tmp, *a, tmp);
2403 avr_qw_add(r, tmp, one);
2407 void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2409 #ifdef CONFIG_INT128
2410 r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2414 avr_qw_not(&tmp, *b);
2415 avr_qw_add(&sum, *a, tmp);
2418 tmp.VsrD(1) = c->VsrD(1) & 1;
2419 avr_qw_add(r, sum, tmp);
2423 void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2425 #ifdef CONFIG_INT128
2426 r->u128 = (~a->u128 < ~b->u128) ||
2427 (a->u128 + ~b->u128 == (__uint128_t)-1);
2429 int carry = (avr_qw_cmpu(*a, *b) > 0);
2432 avr_qw_not(&tmp, *b);
2433 avr_qw_add(&tmp, *a, tmp);
2434 carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2441 void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2443 #ifdef CONFIG_INT128
2445 (~a->u128 < ~b->u128) ||
2446 ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2448 int carry_in = c->VsrD(1) & 1;
2449 int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2450 if (!carry_out && carry_in) {
2452 avr_qw_not(&tmp, *b);
2453 avr_qw_add(&tmp, *a, tmp);
2454 carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2458 r->VsrD(1) = carry_out;
2462 #define BCD_PLUS_PREF_1 0xC
2463 #define BCD_PLUS_PREF_2 0xF
2464 #define BCD_PLUS_ALT_1 0xA
2465 #define BCD_NEG_PREF 0xD
2466 #define BCD_NEG_ALT 0xB
2467 #define BCD_PLUS_ALT_2 0xE
2468 #define NATIONAL_PLUS 0x2B
2469 #define NATIONAL_NEG 0x2D
2471 #if defined(HOST_WORDS_BIGENDIAN)
2472 #define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2474 #define BCD_DIG_BYTE(n) ((n) / 2)
2477 static int bcd_get_sgn(ppc_avr_t *bcd)
2479 switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2480 case BCD_PLUS_PREF_1:
2481 case BCD_PLUS_PREF_2:
2482 case BCD_PLUS_ALT_1:
2483 case BCD_PLUS_ALT_2:
2501 static int bcd_preferred_sgn(int sgn, int ps)
2504 return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2506 return BCD_NEG_PREF;
2510 static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2514 result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2516 result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2519 if (unlikely(result > 9)) {
2525 static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2528 bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2529 bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2531 bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2532 bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2536 static bool bcd_is_valid(ppc_avr_t *bcd)
2541 if (bcd_get_sgn(bcd) == 0) {
2545 for (i = 1; i < 32; i++) {
2546 bcd_get_digit(bcd, i, &invalid);
2547 if (unlikely(invalid)) {
2554 static int bcd_cmp_zero(ppc_avr_t *bcd)
2556 if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2559 return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2563 static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2565 return reg->VsrH(7 - n);
2568 static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2570 reg->VsrH(7 - n) = val;
2573 static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2577 for (i = 31; i > 0; i--) {
2578 uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2579 uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2580 if (unlikely(invalid)) {
2581 return 0; /* doesn't matter */
2582 } else if (dig_a > dig_b) {
2584 } else if (dig_a < dig_b) {
2592 static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2597 for (i = 1; i <= 31; i++) {
2598 uint8_t digit = bcd_get_digit(a, i, invalid) +
2599 bcd_get_digit(b, i, invalid) + carry;
2607 bcd_put_digit(t, digit, i);
2613 static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2619 for (i = 1; i <= 31; i++) {
2620 uint8_t digit = bcd_get_digit(a, i, invalid) -
2621 bcd_get_digit(b, i, invalid) + carry;
2629 bcd_put_digit(t, digit, i);
2635 uint32_t helper_bcdadd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2638 int sgna = bcd_get_sgn(a);
2639 int sgnb = bcd_get_sgn(b);
2640 int invalid = (sgna == 0) || (sgnb == 0);
2643 ppc_avr_t result = { .u64 = { 0, 0 } };
2647 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2648 bcd_add_mag(&result, a, b, &invalid, &overflow);
2649 cr = bcd_cmp_zero(&result);
2651 int magnitude = bcd_cmp_mag(a, b);
2652 if (magnitude > 0) {
2653 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2654 bcd_sub_mag(&result, a, b, &invalid, &overflow);
2655 cr = (sgna > 0) ? CRF_GT : CRF_LT;
2656 } else if (magnitude < 0) {
2657 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2658 bcd_sub_mag(&result, b, a, &invalid, &overflow);
2659 cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2661 result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2667 if (unlikely(invalid)) {
2668 result.VsrD(0) = result.VsrD(1) = -1;
2670 } else if (overflow) {
2679 uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2681 ppc_avr_t bcopy = *b;
2682 int sgnb = bcd_get_sgn(b);
2684 bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2685 } else if (sgnb > 0) {
2686 bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2688 /* else invalid ... defer to bcdadd code for proper handling */
2690 return helper_bcdadd(r, a, &bcopy, ps);
2693 uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2697 uint16_t national = 0;
2698 uint16_t sgnb = get_national_digit(b, 0);
2699 ppc_avr_t ret = { .u64 = { 0, 0 } };
2700 int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2702 for (i = 1; i < 8; i++) {
2703 national = get_national_digit(b, i);
2704 if (unlikely(national < 0x30 || national > 0x39)) {
2709 bcd_put_digit(&ret, national & 0xf, i);
2712 if (sgnb == NATIONAL_PLUS) {
2713 bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2715 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2718 cr = bcd_cmp_zero(&ret);
2720 if (unlikely(invalid)) {
2729 uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2733 int sgnb = bcd_get_sgn(b);
2734 int invalid = (sgnb == 0);
2735 ppc_avr_t ret = { .u64 = { 0, 0 } };
2737 int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2739 for (i = 1; i < 8; i++) {
2740 set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2742 if (unlikely(invalid)) {
2746 set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2748 cr = bcd_cmp_zero(b);
2754 if (unlikely(invalid)) {
2763 uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2769 int zone_lead = ps ? 0xF : 0x3;
2771 ppc_avr_t ret = { .u64 = { 0, 0 } };
2772 int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2774 if (unlikely((sgnb < 0xA) && ps)) {
2778 for (i = 0; i < 16; i++) {
2779 zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2780 digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2781 if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2786 bcd_put_digit(&ret, digit, i + 1);
2789 if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2790 (!ps && (sgnb & 0x4))) {
2791 bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2793 bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2796 cr = bcd_cmp_zero(&ret);
2798 if (unlikely(invalid)) {
2807 uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2812 int sgnb = bcd_get_sgn(b);
2813 int zone_lead = (ps) ? 0xF0 : 0x30;
2814 int invalid = (sgnb == 0);
2815 ppc_avr_t ret = { .u64 = { 0, 0 } };
2817 int ox_flag = ((b->VsrD(0) >> 4) != 0);
2819 for (i = 0; i < 16; i++) {
2820 digit = bcd_get_digit(b, i + 1, &invalid);
2822 if (unlikely(invalid)) {
2826 ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2830 bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2832 bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2835 cr = bcd_cmp_zero(b);
2841 if (unlikely(invalid)) {
2850 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2856 ppc_avr_t ret = { .u64 = { 0, 0 } };
2858 if (b->VsrSD(0) < 0) {
2859 lo_value = -b->VsrSD(1);
2860 hi_value = ~b->VsrD(0) + !lo_value;
2861 bcd_put_digit(&ret, 0xD, 0);
2863 lo_value = b->VsrD(1);
2864 hi_value = b->VsrD(0);
2865 bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2868 if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2869 lo_value > 9999999999999999ULL) {
2873 for (i = 1; i < 16; hi_value /= 10, i++) {
2874 bcd_put_digit(&ret, hi_value % 10, i);
2877 for (; i < 32; lo_value /= 10, i++) {
2878 bcd_put_digit(&ret, lo_value % 10, i);
2881 cr |= bcd_cmp_zero(&ret);
2888 uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2895 uint64_t hi_value = 0;
2896 int sgnb = bcd_get_sgn(b);
2897 int invalid = (sgnb == 0);
2899 lo_value = bcd_get_digit(b, 31, &invalid);
2900 for (i = 30; i > 0; i--) {
2901 mulu64(&lo_value, &carry, lo_value, 10ULL);
2902 mulu64(&hi_value, &unused, hi_value, 10ULL);
2903 lo_value += bcd_get_digit(b, i, &invalid);
2906 if (unlikely(invalid)) {
2912 r->VsrSD(1) = -lo_value;
2913 r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2915 r->VsrSD(1) = lo_value;
2916 r->VsrSD(0) = hi_value;
2919 cr = bcd_cmp_zero(b);
2921 if (unlikely(invalid)) {
2928 uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2933 if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2938 bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
2940 for (i = 1; i < 32; i++) {
2941 bcd_get_digit(a, i, &invalid);
2942 bcd_get_digit(b, i, &invalid);
2943 if (unlikely(invalid)) {
2948 return bcd_cmp_zero(r);
2951 uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2953 int sgnb = bcd_get_sgn(b);
2956 bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2958 if (bcd_is_valid(b) == false) {
2962 return bcd_cmp_zero(r);
2965 uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2968 #if defined(HOST_WORDS_BIGENDIAN)
2973 bool ox_flag = false;
2974 int sgnb = bcd_get_sgn(b);
2976 ret.VsrD(1) &= ~0xf;
2978 if (bcd_is_valid(b) == false) {
2982 if (unlikely(i > 31)) {
2984 } else if (unlikely(i < -31)) {
2989 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2991 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2993 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2997 cr = bcd_cmp_zero(r);
3005 uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3010 bool ox_flag = false;
3013 for (i = 0; i < 32; i++) {
3014 bcd_get_digit(b, i, &invalid);
3016 if (unlikely(invalid)) {
3021 #if defined(HOST_WORDS_BIGENDIAN)
3028 ret.VsrD(1) = ret.VsrD(0) = 0;
3029 } else if (i <= -32) {
3030 ret.VsrD(1) = ret.VsrD(0) = 0;
3032 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
3034 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
3038 cr = bcd_cmp_zero(r);
3046 uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3051 bool ox_flag = false;
3052 int sgnb = bcd_get_sgn(b);
3054 ret.VsrD(1) &= ~0xf;
3056 #if defined(HOST_WORDS_BIGENDIAN)
3058 ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
3061 ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3064 if (bcd_is_valid(b) == false) {
3068 if (unlikely(i > 31)) {
3070 } else if (unlikely(i < -31)) {
3075 ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
3077 urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
3079 if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3080 bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3083 bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3085 cr = bcd_cmp_zero(&ret);
3094 uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3097 uint32_t ox_flag = 0;
3098 #if defined(HOST_WORDS_BIGENDIAN)
3099 int i = a->s16[3] + 1;
3101 int i = a->s16[4] + 1;
3105 if (bcd_is_valid(b) == false) {
3109 if (i > 16 && i < 32) {
3110 mask = (uint64_t)-1 >> (128 - i * 4);
3111 if (ret.VsrD(0) & ~mask) {
3115 ret.VsrD(0) &= mask;
3116 } else if (i >= 0 && i <= 16) {
3117 mask = (uint64_t)-1 >> (64 - i * 4);
3118 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3122 ret.VsrD(1) &= mask;
3125 bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3128 return bcd_cmp_zero(&ret) | ox_flag;
3131 uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3135 uint32_t ox_flag = 0;
3139 for (i = 0; i < 32; i++) {
3140 bcd_get_digit(b, i, &invalid);
3142 if (unlikely(invalid)) {
3147 #if defined(HOST_WORDS_BIGENDIAN)
3152 if (i > 16 && i < 33) {
3153 mask = (uint64_t)-1 >> (128 - i * 4);
3154 if (ret.VsrD(0) & ~mask) {
3158 ret.VsrD(0) &= mask;
3159 } else if (i > 0 && i <= 16) {
3160 mask = (uint64_t)-1 >> (64 - i * 4);
3161 if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3165 ret.VsrD(1) &= mask;
3167 } else if (i == 0) {
3168 if (ret.VsrD(0) || ret.VsrD(1)) {
3171 ret.VsrD(0) = ret.VsrD(1) = 0;
3175 if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
3176 return ox_flag | CRF_EQ;
3179 return ox_flag | CRF_GT;
3182 void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3185 VECTOR_FOR_INORDER_I(i, u8) {
3186 r->u8[i] = AES_sbox[a->u8[i]];
3190 void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3195 VECTOR_FOR_INORDER_I(i, u32) {
3196 result.VsrW(i) = b->VsrW(i) ^
3197 (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3198 AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3199 AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3200 AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3205 void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3210 VECTOR_FOR_INORDER_I(i, u8) {
3211 result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3216 void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3218 /* This differs from what is written in ISA V2.07. The RTL is */
3219 /* incorrect and will be fixed in V2.07B. */
3223 VECTOR_FOR_INORDER_I(i, u8) {
3224 tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3227 VECTOR_FOR_INORDER_I(i, u32) {
3229 AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3230 AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3231 AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3232 AES_imc[tmp.VsrB(4 * i + 3)][3];
3236 void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3241 VECTOR_FOR_INORDER_I(i, u8) {
3242 result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3247 void helper_vshasigmaw(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3249 int st = (st_six & 0x10) != 0;
3250 int six = st_six & 0xF;
3253 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3255 if ((six & (0x8 >> i)) == 0) {
3256 r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3257 ror32(a->VsrW(i), 18) ^
3259 } else { /* six.bit[i] == 1 */
3260 r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3261 ror32(a->VsrW(i), 19) ^
3264 } else { /* st == 1 */
3265 if ((six & (0x8 >> i)) == 0) {
3266 r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3267 ror32(a->VsrW(i), 13) ^
3268 ror32(a->VsrW(i), 22);
3269 } else { /* six.bit[i] == 1 */
3270 r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3271 ror32(a->VsrW(i), 11) ^
3272 ror32(a->VsrW(i), 25);
3278 void helper_vshasigmad(ppc_avr_t *r, ppc_avr_t *a, uint32_t st_six)
3280 int st = (st_six & 0x10) != 0;
3281 int six = st_six & 0xF;
3284 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3286 if ((six & (0x8 >> (2*i))) == 0) {
3287 r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3288 ror64(a->VsrD(i), 8) ^
3290 } else { /* six.bit[2*i] == 1 */
3291 r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3292 ror64(a->VsrD(i), 61) ^
3295 } else { /* st == 1 */
3296 if ((six & (0x8 >> (2*i))) == 0) {
3297 r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3298 ror64(a->VsrD(i), 34) ^
3299 ror64(a->VsrD(i), 39);
3300 } else { /* six.bit[2*i] == 1 */
3301 r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3302 ror64(a->VsrD(i), 18) ^
3303 ror64(a->VsrD(i), 41);
3309 void helper_vpermxor(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3314 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3315 int indexA = c->VsrB(i) >> 4;
3316 int indexB = c->VsrB(i) & 0xF;
3318 result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3323 #undef VECTOR_FOR_INORDER_I
3325 /*****************************************************************************/
3326 /* SPE extension helpers */
3327 /* Use a table to make this quicker */
3328 static const uint8_t hbrev[16] = {
3329 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3330 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3333 static inline uint8_t byte_reverse(uint8_t val)
3335 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3338 static inline uint32_t word_reverse(uint32_t val)
3340 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3341 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3344 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3345 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3347 uint32_t a, b, d, mask;
3349 mask = UINT32_MAX >> (32 - MASKBITS);
3352 d = word_reverse(1 + word_reverse(a | ~b));
3353 return (arg1 & ~mask) | (d & b);
3356 uint32_t helper_cntlsw32(uint32_t val)
3358 if (val & 0x80000000) {
3365 uint32_t helper_cntlzw32(uint32_t val)
3371 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3372 target_ulong low, uint32_t update_Rc)
3378 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3379 if ((high & mask) == 0) {
3387 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3388 if ((low & mask) == 0) {
3401 env->xer = (env->xer & ~0x7F) | i;
3403 env->crf[0] |= xer_so;