2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/host-utils.h"
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
28 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
33 muls64(&tl, (uint64_t *)&th, arg1, arg2);
34 /* If th != 0 && th != -1, then we had an overflow */
35 if (likely((uint64_t)(th + 1) <= 1)) {
38 env->so = env->ov = 1;
44 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
50 uint64_t dividend = (uint64_t)ra << 32;
51 uint64_t divisor = (uint32_t)rb;
53 if (unlikely(divisor == 0)) {
56 rt = dividend / divisor;
57 overflow = rt > UINT32_MAX;
60 if (unlikely(overflow)) {
61 rt = 0; /* Undefined */
65 if (unlikely(overflow)) {
66 env->so = env->ov = 1;
72 return (target_ulong)rt;
75 target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
81 int64_t dividend = (int64_t)ra << 32;
82 int64_t divisor = (int64_t)((int32_t)rb);
84 if (unlikely((divisor == 0) ||
85 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
88 rt = dividend / divisor;
89 overflow = rt != (int32_t)rt;
92 if (unlikely(overflow)) {
93 rt = 0; /* Undefined */
97 if (unlikely(overflow)) {
98 env->so = env->ov = 1;
104 return (target_ulong)rt;
107 #if defined(TARGET_PPC64)
109 uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
114 overflow = divu128(&rt, &ra, rb);
116 if (unlikely(overflow)) {
117 rt = 0; /* Undefined */
121 if (unlikely(overflow)) {
122 env->so = env->ov = 1;
131 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
134 int64_t ra = (int64_t)rau;
135 int64_t rb = (int64_t)rbu;
136 int overflow = divs128(&rt, &ra, rb);
138 if (unlikely(overflow)) {
139 rt = 0; /* Undefined */
144 if (unlikely(overflow)) {
145 env->so = env->ov = 1;
157 target_ulong helper_cntlzw(target_ulong t)
162 #if defined(TARGET_PPC64)
163 target_ulong helper_cntlzd(target_ulong t)
169 #if defined(TARGET_PPC64)
171 uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
176 for (i = 0; i < 8; i++) {
177 int index = (rs >> (i*8)) & 0xFF;
179 if (rb & (1ull << (63-index))) {
189 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
191 target_ulong mask = 0xff;
195 for (i = 0; i < sizeof(target_ulong); i++) {
196 if ((rs & mask) == (rb & mask)) {
204 /* shift right arithmetic helper */
205 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
210 if (likely(!(shift & 0x20))) {
211 if (likely((uint32_t)shift != 0)) {
213 ret = (int32_t)value >> shift;
214 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
220 ret = (int32_t)value;
224 ret = (int32_t)value >> 31;
225 env->ca = (ret != 0);
227 return (target_long)ret;
230 #if defined(TARGET_PPC64)
231 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
236 if (likely(!(shift & 0x40))) {
237 if (likely((uint64_t)shift != 0)) {
239 ret = (int64_t)value >> shift;
240 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
246 ret = (int64_t)value;
250 ret = (int64_t)value >> 63;
251 env->ca = (ret != 0);
257 #if defined(TARGET_PPC64)
258 target_ulong helper_popcntb(target_ulong val)
260 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
261 0x5555555555555555ULL);
262 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
263 0x3333333333333333ULL);
264 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
265 0x0f0f0f0f0f0f0f0fULL);
269 target_ulong helper_popcntw(target_ulong val)
271 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
272 0x5555555555555555ULL);
273 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
274 0x3333333333333333ULL);
275 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
276 0x0f0f0f0f0f0f0f0fULL);
277 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
278 0x00ff00ff00ff00ffULL);
279 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
280 0x0000ffff0000ffffULL);
284 target_ulong helper_popcntd(target_ulong val)
289 target_ulong helper_popcntb(target_ulong val)
291 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
292 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
293 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
297 target_ulong helper_popcntw(target_ulong val)
299 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
300 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
301 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
302 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
303 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
308 /*****************************************************************************/
309 /* PowerPC 601 specific instructions (POWER bridge) */
310 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
312 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
314 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
315 (int32_t)arg2 == 0) {
316 env->spr[SPR_MQ] = 0;
319 env->spr[SPR_MQ] = tmp % arg2;
320 return tmp / (int32_t)arg2;
324 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
327 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
329 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
330 (int32_t)arg2 == 0) {
331 env->so = env->ov = 1;
332 env->spr[SPR_MQ] = 0;
335 env->spr[SPR_MQ] = tmp % arg2;
336 tmp /= (int32_t)arg2;
337 if ((int32_t)tmp != tmp) {
338 env->so = env->ov = 1;
346 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
349 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
350 (int32_t)arg2 == 0) {
351 env->spr[SPR_MQ] = 0;
354 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
355 return (int32_t)arg1 / (int32_t)arg2;
359 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
362 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
363 (int32_t)arg2 == 0) {
364 env->so = env->ov = 1;
365 env->spr[SPR_MQ] = 0;
369 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
370 return (int32_t)arg1 / (int32_t)arg2;
374 /*****************************************************************************/
375 /* 602 specific instructions */
376 /* mfrom is the most crazy instruction ever seen, imho ! */
377 /* Real implementation uses a ROM table. Do the same */
378 /* Extremely decomposed:
380 * return 256 * log10(10 + 1.0) + 0.5
382 #if !defined(CONFIG_USER_ONLY)
383 target_ulong helper_602_mfrom(target_ulong arg)
385 if (likely(arg < 602)) {
386 #include "mfrom_table.c"
387 return mfrom_ROM_table[arg];
394 /*****************************************************************************/
395 /* Altivec extension helpers */
396 #if defined(HOST_WORDS_BIGENDIAN)
404 #if defined(HOST_WORDS_BIGENDIAN)
405 #define VECTOR_FOR_INORDER_I(index, element) \
406 for (index = 0; index < ARRAY_SIZE(r->element); index++)
408 #define VECTOR_FOR_INORDER_I(index, element) \
409 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
412 /* Saturating arithmetic helpers. */
413 #define SATCVT(from, to, from_type, to_type, min, max) \
414 static inline to_type cvt##from##to(from_type x, int *sat) \
418 if (x < (from_type)min) { \
421 } else if (x > (from_type)max) { \
429 #define SATCVTU(from, to, from_type, to_type, min, max) \
430 static inline to_type cvt##from##to(from_type x, int *sat) \
434 if (x > (from_type)max) { \
442 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
443 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
444 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
446 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
447 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
448 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
449 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
450 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
451 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
455 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
457 int i, j = (sh & 0xf);
459 VECTOR_FOR_INORDER_I(i, u8) {
464 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
466 int i, j = 0x10 - (sh & 0xf);
468 VECTOR_FOR_INORDER_I(i, u8) {
473 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
475 #if defined(HOST_WORDS_BIGENDIAN)
476 env->vscr = r->u32[3];
478 env->vscr = r->u32[0];
480 set_flush_to_zero(vscr_nj, &env->vec_status);
483 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
487 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
488 r->u32[i] = ~a->u32[i] < b->u32[i];
492 #define VARITH_DO(name, op, element) \
493 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
497 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
498 r->element[i] = a->element[i] op b->element[i]; \
501 #define VARITH(suffix, element) \
502 VARITH_DO(add##suffix, +, element) \
503 VARITH_DO(sub##suffix, -, element)
508 VARITH_DO(muluwm, *, u32)
512 #define VARITHFP(suffix, func) \
513 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
518 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
519 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
522 VARITHFP(addfp, float32_add)
523 VARITHFP(subfp, float32_sub)
524 VARITHFP(minfp, float32_min)
525 VARITHFP(maxfp, float32_max)
528 #define VARITHFPFMA(suffix, type) \
529 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
530 ppc_avr_t *b, ppc_avr_t *c) \
533 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
534 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
535 type, &env->vec_status); \
538 VARITHFPFMA(maddfp, 0);
539 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
542 #define VARITHSAT_CASE(type, op, cvt, element) \
544 type result = (type)a->element[i] op (type)b->element[i]; \
545 r->element[i] = cvt(result, &sat); \
548 #define VARITHSAT_DO(name, op, optype, cvt, element) \
549 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
555 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
556 switch (sizeof(r->element[0])) { \
558 VARITHSAT_CASE(optype, op, cvt, element); \
561 VARITHSAT_CASE(optype, op, cvt, element); \
564 VARITHSAT_CASE(optype, op, cvt, element); \
569 env->vscr |= (1 << VSCR_SAT); \
572 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
573 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
574 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
575 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
576 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
577 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
578 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
579 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
580 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
581 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
582 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
583 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
584 #undef VARITHSAT_CASE
586 #undef VARITHSAT_SIGNED
587 #undef VARITHSAT_UNSIGNED
589 #define VAVG_DO(name, element, etype) \
590 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
594 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
595 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
596 r->element[i] = x >> 1; \
600 #define VAVG(type, signed_element, signed_type, unsigned_element, \
602 VAVG_DO(avgs##type, signed_element, signed_type) \
603 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
604 VAVG(b, s8, int16_t, u8, uint16_t)
605 VAVG(h, s16, int32_t, u16, uint32_t)
606 VAVG(w, s32, int64_t, u32, uint64_t)
610 #define VCF(suffix, cvt, element) \
611 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
612 ppc_avr_t *b, uint32_t uim) \
616 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
617 float32 t = cvt(b->element[i], &env->vec_status); \
618 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
621 VCF(ux, uint32_to_float32, u32)
622 VCF(sx, int32_to_float32, s32)
625 #define VCMP_DO(suffix, compare, element, record) \
626 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
627 ppc_avr_t *a, ppc_avr_t *b) \
629 uint32_t ones = (uint32_t)-1; \
630 uint32_t all = ones; \
634 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
635 uint32_t result = (a->element[i] compare b->element[i] ? \
637 switch (sizeof(a->element[0])) { \
639 r->u32[i] = result; \
642 r->u16[i] = result; \
652 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
655 #define VCMP(suffix, compare, element) \
656 VCMP_DO(suffix, compare, element, 0) \
657 VCMP_DO(suffix##_dot, compare, element, 1)
670 #define VCMPFP_DO(suffix, compare, order, record) \
671 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
672 ppc_avr_t *a, ppc_avr_t *b) \
674 uint32_t ones = (uint32_t)-1; \
675 uint32_t all = ones; \
679 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
681 int rel = float32_compare_quiet(a->f[i], b->f[i], \
683 if (rel == float_relation_unordered) { \
685 } else if (rel compare order) { \
690 r->u32[i] = result; \
695 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
698 #define VCMPFP(suffix, compare, order) \
699 VCMPFP_DO(suffix, compare, order, 0) \
700 VCMPFP_DO(suffix##_dot, compare, order, 1)
701 VCMPFP(eqfp, ==, float_relation_equal)
702 VCMPFP(gefp, !=, float_relation_less)
703 VCMPFP(gtfp, ==, float_relation_greater)
707 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
708 ppc_avr_t *a, ppc_avr_t *b, int record)
713 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
714 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
715 if (le_rel == float_relation_unordered) {
716 r->u32[i] = 0xc0000000;
717 /* ALL_IN does not need to be updated here. */
719 float32 bneg = float32_chs(b->f[i]);
720 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
721 int le = le_rel != float_relation_greater;
722 int ge = ge_rel != float_relation_less;
724 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
725 all_in |= (!le | !ge);
729 env->crf[6] = (all_in == 0) << 1;
733 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
735 vcmpbfp_internal(env, r, a, b, 0);
738 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
741 vcmpbfp_internal(env, r, a, b, 1);
744 #define VCT(suffix, satcvt, element) \
745 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
746 ppc_avr_t *b, uint32_t uim) \
750 float_status s = env->vec_status; \
752 set_float_rounding_mode(float_round_to_zero, &s); \
753 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
754 if (float32_is_any_nan(b->f[i])) { \
757 float64 t = float32_to_float64(b->f[i], &s); \
760 t = float64_scalbn(t, uim, &s); \
761 j = float64_to_int64(t, &s); \
762 r->element[i] = satcvt(j, &sat); \
766 env->vscr |= (1 << VSCR_SAT); \
769 VCT(uxs, cvtsduw, u32)
770 VCT(sxs, cvtsdsw, s32)
773 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
774 ppc_avr_t *b, ppc_avr_t *c)
779 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
780 int32_t prod = a->s16[i] * b->s16[i];
781 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
783 r->s16[i] = cvtswsh(t, &sat);
787 env->vscr |= (1 << VSCR_SAT);
791 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
792 ppc_avr_t *b, ppc_avr_t *c)
797 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
798 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
799 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
800 r->s16[i] = cvtswsh(t, &sat);
804 env->vscr |= (1 << VSCR_SAT);
808 #define VMINMAX_DO(name, compare, element) \
809 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
813 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
814 if (a->element[i] compare b->element[i]) { \
815 r->element[i] = b->element[i]; \
817 r->element[i] = a->element[i]; \
821 #define VMINMAX(suffix, element) \
822 VMINMAX_DO(min##suffix, >, element) \
823 VMINMAX_DO(max##suffix, <, element)
833 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
837 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
838 int32_t prod = a->s16[i] * b->s16[i];
839 r->s16[i] = (int16_t) (prod + c->s16[i]);
843 #define VMRG_DO(name, element, highp) \
844 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
848 size_t n_elems = ARRAY_SIZE(r->element); \
850 for (i = 0; i < n_elems / 2; i++) { \
852 result.element[i*2+HI_IDX] = a->element[i]; \
853 result.element[i*2+LO_IDX] = b->element[i]; \
855 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
856 b->element[n_elems - i - 1]; \
857 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
858 a->element[n_elems - i - 1]; \
863 #if defined(HOST_WORDS_BIGENDIAN)
870 #define VMRG(suffix, element) \
871 VMRG_DO(mrgl##suffix, element, MRGHI) \
872 VMRG_DO(mrgh##suffix, element, MRGLO)
881 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
882 ppc_avr_t *b, ppc_avr_t *c)
887 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
888 prod[i] = (int32_t)a->s8[i] * b->u8[i];
891 VECTOR_FOR_INORDER_I(i, s32) {
892 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
893 prod[4 * i + 2] + prod[4 * i + 3];
897 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
898 ppc_avr_t *b, ppc_avr_t *c)
903 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
904 prod[i] = a->s16[i] * b->s16[i];
907 VECTOR_FOR_INORDER_I(i, s32) {
908 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
912 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
913 ppc_avr_t *b, ppc_avr_t *c)
919 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
920 prod[i] = (int32_t)a->s16[i] * b->s16[i];
923 VECTOR_FOR_INORDER_I(i, s32) {
924 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
926 r->u32[i] = cvtsdsw(t, &sat);
930 env->vscr |= (1 << VSCR_SAT);
934 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
935 ppc_avr_t *b, ppc_avr_t *c)
940 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
941 prod[i] = a->u8[i] * b->u8[i];
944 VECTOR_FOR_INORDER_I(i, u32) {
945 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
946 prod[4 * i + 2] + prod[4 * i + 3];
950 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
951 ppc_avr_t *b, ppc_avr_t *c)
956 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
957 prod[i] = a->u16[i] * b->u16[i];
960 VECTOR_FOR_INORDER_I(i, u32) {
961 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
965 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
966 ppc_avr_t *b, ppc_avr_t *c)
972 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
973 prod[i] = a->u16[i] * b->u16[i];
976 VECTOR_FOR_INORDER_I(i, s32) {
977 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
979 r->u32[i] = cvtuduw(t, &sat);
983 env->vscr |= (1 << VSCR_SAT);
987 #define VMUL_DO(name, mul_element, prod_element, cast, evenp) \
988 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
992 VECTOR_FOR_INORDER_I(i, prod_element) { \
994 r->prod_element[i] = \
995 (cast)a->mul_element[i * 2 + HI_IDX] * \
996 (cast)b->mul_element[i * 2 + HI_IDX]; \
998 r->prod_element[i] = \
999 (cast)a->mul_element[i * 2 + LO_IDX] * \
1000 (cast)b->mul_element[i * 2 + LO_IDX]; \
1004 #define VMUL(suffix, mul_element, prod_element, cast) \
1005 VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1) \
1006 VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1007 VMUL(sb, s8, s16, int16_t)
1008 VMUL(sh, s16, s32, int32_t)
1009 VMUL(sw, s32, s64, int64_t)
1010 VMUL(ub, u8, u16, uint16_t)
1011 VMUL(uh, u16, u32, uint32_t)
1012 VMUL(uw, u32, u64, uint64_t)
1016 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1022 VECTOR_FOR_INORDER_I(i, u8) {
1023 int s = c->u8[i] & 0x1f;
1024 #if defined(HOST_WORDS_BIGENDIAN)
1025 int index = s & 0xf;
1027 int index = 15 - (s & 0xf);
1031 result.u8[i] = b->u8[index];
1033 result.u8[i] = a->u8[index];
1039 #if defined(HOST_WORDS_BIGENDIAN)
1044 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1048 #if defined(HOST_WORDS_BIGENDIAN)
1049 const ppc_avr_t *x[2] = { a, b };
1051 const ppc_avr_t *x[2] = { b, a };
1054 VECTOR_FOR_INORDER_I(i, u64) {
1055 VECTOR_FOR_INORDER_I(j, u32) {
1056 uint32_t e = x[i]->u32[j];
1058 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1059 ((e >> 6) & 0x3e0) |
1066 #define VPK(suffix, from, to, cvt, dosat) \
1067 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
1068 ppc_avr_t *a, ppc_avr_t *b) \
1073 ppc_avr_t *a0 = PKBIG ? a : b; \
1074 ppc_avr_t *a1 = PKBIG ? b : a; \
1076 VECTOR_FOR_INORDER_I(i, from) { \
1077 result.to[i] = cvt(a0->from[i], &sat); \
1078 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
1081 if (dosat && sat) { \
1082 env->vscr |= (1 << VSCR_SAT); \
1086 VPK(shss, s16, s8, cvtshsb, 1)
1087 VPK(shus, s16, u8, cvtshub, 1)
1088 VPK(swss, s32, s16, cvtswsh, 1)
1089 VPK(swus, s32, u16, cvtswuh, 1)
1090 VPK(uhus, u16, u8, cvtuhub, 1)
1091 VPK(uwus, u32, u16, cvtuwuh, 1)
1092 VPK(uhum, u16, u8, I, 0)
1093 VPK(uwum, u32, u16, I, 0)
1098 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1102 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1103 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1107 #define VRFI(suffix, rounding) \
1108 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
1112 float_status s = env->vec_status; \
1114 set_float_rounding_mode(rounding, &s); \
1115 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
1116 r->f[i] = float32_round_to_int (b->f[i], &s); \
1119 VRFI(n, float_round_nearest_even)
1120 VRFI(m, float_round_down)
1121 VRFI(p, float_round_up)
1122 VRFI(z, float_round_to_zero)
1125 #define VROTATE(suffix, element) \
1126 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1130 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1131 unsigned int mask = ((1 << \
1132 (3 + (sizeof(a->element[0]) >> 1))) \
1134 unsigned int shift = b->element[i] & mask; \
1135 r->element[i] = (a->element[i] << shift) | \
1136 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1144 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1148 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1149 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1151 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1155 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1158 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1159 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1162 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1166 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1167 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1171 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1175 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1176 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1180 #if defined(HOST_WORDS_BIGENDIAN)
1187 /* The specification says that the results are undefined if all of the
1188 * shift counts are not identical. We check to make sure that they are
1189 * to conform to what real hardware appears to do. */
1190 #define VSHIFT(suffix, leftp) \
1191 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1193 int shift = b->u8[LO_IDX*15] & 0x7; \
1197 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1198 doit = doit && ((b->u8[i] & 0x7) == shift); \
1203 } else if (leftp) { \
1204 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1206 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1207 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1209 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1211 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1212 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1222 #define VSL(suffix, element) \
1223 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1227 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1228 unsigned int mask = ((1 << \
1229 (3 + (sizeof(a->element[0]) >> 1))) \
1231 unsigned int shift = b->element[i] & mask; \
1233 r->element[i] = a->element[i] << shift; \
1241 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1243 int sh = shift & 0xf;
1247 #if defined(HOST_WORDS_BIGENDIAN)
1248 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1251 result.u8[i] = b->u8[index - 0x10];
1253 result.u8[i] = a->u8[index];
1257 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1258 int index = (16 - sh) + i;
1260 result.u8[i] = a->u8[index - 0x10];
1262 result.u8[i] = b->u8[index];
1269 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1271 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1273 #if defined(HOST_WORDS_BIGENDIAN)
1274 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1275 memset(&r->u8[16-sh], 0, sh);
1277 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1278 memset(&r->u8[0], 0, sh);
1282 /* Experimental testing shows that hardware masks the immediate. */
1283 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1284 #if defined(HOST_WORDS_BIGENDIAN)
1285 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1287 #define SPLAT_ELEMENT(element) \
1288 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1290 #define VSPLT(suffix, element) \
1291 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1293 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1296 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1297 r->element[i] = s; \
1304 #undef SPLAT_ELEMENT
1305 #undef _SPLAT_MASKED
1307 #define VSPLTI(suffix, element, splat_type) \
1308 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1310 splat_type x = (int8_t)(splat << 3) >> 3; \
1313 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1314 r->element[i] = x; \
1317 VSPLTI(b, s8, int8_t)
1318 VSPLTI(h, s16, int16_t)
1319 VSPLTI(w, s32, int32_t)
1322 #define VSR(suffix, element) \
1323 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1327 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1328 unsigned int mask = ((1 << \
1329 (3 + (sizeof(a->element[0]) >> 1))) \
1331 unsigned int shift = b->element[i] & mask; \
1333 r->element[i] = a->element[i] >> shift; \
1344 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1346 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1348 #if defined(HOST_WORDS_BIGENDIAN)
1349 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1350 memset(&r->u8[0], 0, sh);
1352 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1353 memset(&r->u8[16 - sh], 0, sh);
1357 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1361 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1362 r->u32[i] = a->u32[i] >= b->u32[i];
1366 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1373 #if defined(HOST_WORDS_BIGENDIAN)
1374 upper = ARRAY_SIZE(r->s32)-1;
1378 t = (int64_t)b->s32[upper];
1379 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1383 result.s32[upper] = cvtsdsw(t, &sat);
1387 env->vscr |= (1 << VSCR_SAT);
1391 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1397 #if defined(HOST_WORDS_BIGENDIAN)
1402 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1403 int64_t t = (int64_t)b->s32[upper + i * 2];
1406 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1407 t += a->s32[2 * i + j];
1409 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1414 env->vscr |= (1 << VSCR_SAT);
1418 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1423 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1424 int64_t t = (int64_t)b->s32[i];
1426 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1427 t += a->s8[4 * i + j];
1429 r->s32[i] = cvtsdsw(t, &sat);
1433 env->vscr |= (1 << VSCR_SAT);
1437 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1442 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1443 int64_t t = (int64_t)b->s32[i];
1445 t += a->s16[2 * i] + a->s16[2 * i + 1];
1446 r->s32[i] = cvtsdsw(t, &sat);
1450 env->vscr |= (1 << VSCR_SAT);
1454 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1459 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1460 uint64_t t = (uint64_t)b->u32[i];
1462 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1463 t += a->u8[4 * i + j];
1465 r->u32[i] = cvtuduw(t, &sat);
1469 env->vscr |= (1 << VSCR_SAT);
1473 #if defined(HOST_WORDS_BIGENDIAN)
1480 #define VUPKPX(suffix, hi) \
1481 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1486 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1487 uint16_t e = b->u16[hi ? i : i+4]; \
1488 uint8_t a = (e >> 15) ? 0xff : 0; \
1489 uint8_t r = (e >> 10) & 0x1f; \
1490 uint8_t g = (e >> 5) & 0x1f; \
1491 uint8_t b = e & 0x1f; \
1493 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1501 #define VUPK(suffix, unpacked, packee, hi) \
1502 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1508 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1509 result.unpacked[i] = b->packee[i]; \
1512 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1514 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1519 VUPK(hsb, s16, s8, UPKHI)
1520 VUPK(hsh, s32, s16, UPKHI)
1521 VUPK(lsb, s16, s8, UPKLO)
1522 VUPK(lsh, s32, s16, UPKLO)
1527 #define VGENERIC_DO(name, element) \
1528 void helper_v##name(ppc_avr_t *r, ppc_avr_t *b) \
1532 VECTOR_FOR_INORDER_I(i, element) { \
1533 r->element[i] = name(b->element[i]); \
1537 #define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1538 #define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1539 #define clzw(v) clz32((v))
1540 #define clzd(v) clz64((v))
1542 VGENERIC_DO(clzb, u8)
1543 VGENERIC_DO(clzh, u16)
1544 VGENERIC_DO(clzw, u32)
1545 VGENERIC_DO(clzd, u64)
1556 #undef VECTOR_FOR_INORDER_I
1560 /*****************************************************************************/
1561 /* SPE extension helpers */
1562 /* Use a table to make this quicker */
1563 static const uint8_t hbrev[16] = {
1564 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1565 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1568 static inline uint8_t byte_reverse(uint8_t val)
1570 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1573 static inline uint32_t word_reverse(uint32_t val)
1575 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1576 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1579 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1580 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1582 uint32_t a, b, d, mask;
1584 mask = UINT32_MAX >> (32 - MASKBITS);
1587 d = word_reverse(1 + word_reverse(a | ~b));
1588 return (arg1 & ~mask) | (d & b);
1591 uint32_t helper_cntlsw32(uint32_t val)
1593 if (val & 0x80000000) {
1600 uint32_t helper_cntlzw32(uint32_t val)
1606 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
1607 target_ulong low, uint32_t update_Rc)
1613 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1614 if ((high & mask) == 0) {
1622 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1623 if ((low & mask) == 0) {
1635 env->xer = (env->xer & ~0x7F) | i;
1637 env->crf[0] |= xer_so;