2 * PowerPC integer and vector emulation helpers for QEMU.
4 * Copyright (c) 2003-2007 Jocelyn Mayer
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/host-utils.h"
23 #include "helper_regs.h"
24 /*****************************************************************************/
25 /* Fixed point operations helpers */
26 #if defined(TARGET_PPC64)
28 uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
33 muls64(&tl, (uint64_t *)&th, arg1, arg2);
34 /* If th != 0 && th != -1, then we had an overflow */
35 if (likely((uint64_t)(th + 1) <= 1)) {
38 env->so = env->ov = 1;
44 target_ulong helper_cntlzw(target_ulong t)
49 #if defined(TARGET_PPC64)
50 target_ulong helper_cntlzd(target_ulong t)
56 target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
58 target_ulong mask = 0xff;
62 for (i = 0; i < sizeof(target_ulong); i++) {
63 if ((rs & mask) == (rb & mask)) {
71 /* shift right arithmetic helper */
72 target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
77 if (likely(!(shift & 0x20))) {
78 if (likely((uint32_t)shift != 0)) {
80 ret = (int32_t)value >> shift;
81 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
91 ret = (int32_t)value >> 31;
94 return (target_long)ret;
97 #if defined(TARGET_PPC64)
98 target_ulong helper_srad(CPUPPCState *env, target_ulong value,
103 if (likely(!(shift & 0x40))) {
104 if (likely((uint64_t)shift != 0)) {
106 ret = (int64_t)value >> shift;
107 if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
113 ret = (int64_t)value;
117 ret = (int64_t)value >> 63;
118 env->ca = (ret != 0);
124 #if defined(TARGET_PPC64)
125 target_ulong helper_popcntb(target_ulong val)
127 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
128 0x5555555555555555ULL);
129 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
130 0x3333333333333333ULL);
131 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
132 0x0f0f0f0f0f0f0f0fULL);
136 target_ulong helper_popcntw(target_ulong val)
138 val = (val & 0x5555555555555555ULL) + ((val >> 1) &
139 0x5555555555555555ULL);
140 val = (val & 0x3333333333333333ULL) + ((val >> 2) &
141 0x3333333333333333ULL);
142 val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) &
143 0x0f0f0f0f0f0f0f0fULL);
144 val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) &
145 0x00ff00ff00ff00ffULL);
146 val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
147 0x0000ffff0000ffffULL);
151 target_ulong helper_popcntd(target_ulong val)
156 target_ulong helper_popcntb(target_ulong val)
158 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
159 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
160 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
164 target_ulong helper_popcntw(target_ulong val)
166 val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
167 val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
168 val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
169 val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
170 val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
175 /*****************************************************************************/
176 /* PowerPC 601 specific instructions (POWER bridge) */
177 target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
179 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
181 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
182 (int32_t)arg2 == 0) {
183 env->spr[SPR_MQ] = 0;
186 env->spr[SPR_MQ] = tmp % arg2;
187 return tmp / (int32_t)arg2;
191 target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
194 uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
196 if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
197 (int32_t)arg2 == 0) {
198 env->so = env->ov = 1;
199 env->spr[SPR_MQ] = 0;
202 env->spr[SPR_MQ] = tmp % arg2;
203 tmp /= (int32_t)arg2;
204 if ((int32_t)tmp != tmp) {
205 env->so = env->ov = 1;
213 target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
216 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
217 (int32_t)arg2 == 0) {
218 env->spr[SPR_MQ] = 0;
221 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
222 return (int32_t)arg1 / (int32_t)arg2;
226 target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
229 if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
230 (int32_t)arg2 == 0) {
231 env->so = env->ov = 1;
232 env->spr[SPR_MQ] = 0;
236 env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
237 return (int32_t)arg1 / (int32_t)arg2;
241 /*****************************************************************************/
242 /* 602 specific instructions */
243 /* mfrom is the most crazy instruction ever seen, imho ! */
244 /* Real implementation uses a ROM table. Do the same */
245 /* Extremely decomposed:
247 * return 256 * log10(10 + 1.0) + 0.5
249 #if !defined(CONFIG_USER_ONLY)
250 target_ulong helper_602_mfrom(target_ulong arg)
252 if (likely(arg < 602)) {
253 #include "mfrom_table.c"
254 return mfrom_ROM_table[arg];
261 /*****************************************************************************/
262 /* Altivec extension helpers */
263 #if defined(HOST_WORDS_BIGENDIAN)
271 #if defined(HOST_WORDS_BIGENDIAN)
272 #define VECTOR_FOR_INORDER_I(index, element) \
273 for (index = 0; index < ARRAY_SIZE(r->element); index++)
275 #define VECTOR_FOR_INORDER_I(index, element) \
276 for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
279 /* Saturating arithmetic helpers. */
280 #define SATCVT(from, to, from_type, to_type, min, max) \
281 static inline to_type cvt##from##to(from_type x, int *sat) \
285 if (x < (from_type)min) { \
288 } else if (x > (from_type)max) { \
296 #define SATCVTU(from, to, from_type, to_type, min, max) \
297 static inline to_type cvt##from##to(from_type x, int *sat) \
301 if (x > (from_type)max) { \
309 SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
310 SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
311 SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
313 SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
314 SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
315 SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
316 SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
317 SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
318 SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
322 void helper_lvsl(ppc_avr_t *r, target_ulong sh)
324 int i, j = (sh & 0xf);
326 VECTOR_FOR_INORDER_I(i, u8) {
331 void helper_lvsr(ppc_avr_t *r, target_ulong sh)
333 int i, j = 0x10 - (sh & 0xf);
335 VECTOR_FOR_INORDER_I(i, u8) {
340 void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
342 #if defined(HOST_WORDS_BIGENDIAN)
343 env->vscr = r->u32[3];
345 env->vscr = r->u32[0];
347 set_flush_to_zero(vscr_nj, &env->vec_status);
350 void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
354 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
355 r->u32[i] = ~a->u32[i] < b->u32[i];
359 #define VARITH_DO(name, op, element) \
360 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
364 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
365 r->element[i] = a->element[i] op b->element[i]; \
368 #define VARITH(suffix, element) \
369 VARITH_DO(add##suffix, +, element) \
370 VARITH_DO(sub##suffix, -, element)
377 #define VARITHFP(suffix, func) \
378 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
383 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
384 r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \
387 VARITHFP(addfp, float32_add)
388 VARITHFP(subfp, float32_sub)
389 VARITHFP(minfp, float32_min)
390 VARITHFP(maxfp, float32_max)
393 #define VARITHFPFMA(suffix, type) \
394 void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
395 ppc_avr_t *b, ppc_avr_t *c) \
398 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
399 r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i], \
400 type, &env->vec_status); \
403 VARITHFPFMA(maddfp, 0);
404 VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
407 #define VARITHSAT_CASE(type, op, cvt, element) \
409 type result = (type)a->element[i] op (type)b->element[i]; \
410 r->element[i] = cvt(result, &sat); \
413 #define VARITHSAT_DO(name, op, optype, cvt, element) \
414 void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
420 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
421 switch (sizeof(r->element[0])) { \
423 VARITHSAT_CASE(optype, op, cvt, element); \
426 VARITHSAT_CASE(optype, op, cvt, element); \
429 VARITHSAT_CASE(optype, op, cvt, element); \
434 env->vscr |= (1 << VSCR_SAT); \
437 #define VARITHSAT_SIGNED(suffix, element, optype, cvt) \
438 VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \
439 VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
440 #define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \
441 VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \
442 VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
443 VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
444 VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
445 VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
446 VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
447 VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
448 VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
449 #undef VARITHSAT_CASE
451 #undef VARITHSAT_SIGNED
452 #undef VARITHSAT_UNSIGNED
454 #define VAVG_DO(name, element, etype) \
455 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
459 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
460 etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \
461 r->element[i] = x >> 1; \
465 #define VAVG(type, signed_element, signed_type, unsigned_element, \
467 VAVG_DO(avgs##type, signed_element, signed_type) \
468 VAVG_DO(avgu##type, unsigned_element, unsigned_type)
469 VAVG(b, s8, int16_t, u8, uint16_t)
470 VAVG(h, s16, int32_t, u16, uint32_t)
471 VAVG(w, s32, int64_t, u32, uint64_t)
475 #define VCF(suffix, cvt, element) \
476 void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r, \
477 ppc_avr_t *b, uint32_t uim) \
481 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
482 float32 t = cvt(b->element[i], &env->vec_status); \
483 r->f[i] = float32_scalbn(t, -uim, &env->vec_status); \
486 VCF(ux, uint32_to_float32, u32)
487 VCF(sx, int32_to_float32, s32)
490 #define VCMP_DO(suffix, compare, element, record) \
491 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
492 ppc_avr_t *a, ppc_avr_t *b) \
494 uint32_t ones = (uint32_t)-1; \
495 uint32_t all = ones; \
499 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
500 uint32_t result = (a->element[i] compare b->element[i] ? \
502 switch (sizeof(a->element[0])) { \
504 r->u32[i] = result; \
507 r->u16[i] = result; \
517 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
520 #define VCMP(suffix, compare, element) \
521 VCMP_DO(suffix, compare, element, 0) \
522 VCMP_DO(suffix##_dot, compare, element, 1)
535 #define VCMPFP_DO(suffix, compare, order, record) \
536 void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r, \
537 ppc_avr_t *a, ppc_avr_t *b) \
539 uint32_t ones = (uint32_t)-1; \
540 uint32_t all = ones; \
544 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
546 int rel = float32_compare_quiet(a->f[i], b->f[i], \
548 if (rel == float_relation_unordered) { \
550 } else if (rel compare order) { \
555 r->u32[i] = result; \
560 env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \
563 #define VCMPFP(suffix, compare, order) \
564 VCMPFP_DO(suffix, compare, order, 0) \
565 VCMPFP_DO(suffix##_dot, compare, order, 1)
566 VCMPFP(eqfp, ==, float_relation_equal)
567 VCMPFP(gefp, !=, float_relation_less)
568 VCMPFP(gtfp, ==, float_relation_greater)
572 static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
573 ppc_avr_t *a, ppc_avr_t *b, int record)
578 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
579 int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
580 if (le_rel == float_relation_unordered) {
581 r->u32[i] = 0xc0000000;
582 /* ALL_IN does not need to be updated here. */
584 float32 bneg = float32_chs(b->f[i]);
585 int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
586 int le = le_rel != float_relation_greater;
587 int ge = ge_rel != float_relation_less;
589 r->u32[i] = ((!le) << 31) | ((!ge) << 30);
590 all_in |= (!le | !ge);
594 env->crf[6] = (all_in == 0) << 1;
598 void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
600 vcmpbfp_internal(env, r, a, b, 0);
603 void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
606 vcmpbfp_internal(env, r, a, b, 1);
609 #define VCT(suffix, satcvt, element) \
610 void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r, \
611 ppc_avr_t *b, uint32_t uim) \
615 float_status s = env->vec_status; \
617 set_float_rounding_mode(float_round_to_zero, &s); \
618 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
619 if (float32_is_any_nan(b->f[i])) { \
622 float64 t = float32_to_float64(b->f[i], &s); \
625 t = float64_scalbn(t, uim, &s); \
626 j = float64_to_int64(t, &s); \
627 r->element[i] = satcvt(j, &sat); \
631 env->vscr |= (1 << VSCR_SAT); \
634 VCT(uxs, cvtsduw, u32)
635 VCT(sxs, cvtsdsw, s32)
638 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
639 ppc_avr_t *b, ppc_avr_t *c)
644 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
645 int32_t prod = a->s16[i] * b->s16[i];
646 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
648 r->s16[i] = cvtswsh(t, &sat);
652 env->vscr |= (1 << VSCR_SAT);
656 void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
657 ppc_avr_t *b, ppc_avr_t *c)
662 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
663 int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
664 int32_t t = (int32_t)c->s16[i] + (prod >> 15);
665 r->s16[i] = cvtswsh(t, &sat);
669 env->vscr |= (1 << VSCR_SAT);
673 #define VMINMAX_DO(name, compare, element) \
674 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
678 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
679 if (a->element[i] compare b->element[i]) { \
680 r->element[i] = b->element[i]; \
682 r->element[i] = a->element[i]; \
686 #define VMINMAX(suffix, element) \
687 VMINMAX_DO(min##suffix, >, element) \
688 VMINMAX_DO(max##suffix, <, element)
698 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
702 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
703 int32_t prod = a->s16[i] * b->s16[i];
704 r->s16[i] = (int16_t) (prod + c->s16[i]);
708 #define VMRG_DO(name, element, highp) \
709 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
713 size_t n_elems = ARRAY_SIZE(r->element); \
715 for (i = 0; i < n_elems / 2; i++) { \
717 result.element[i*2+HI_IDX] = a->element[i]; \
718 result.element[i*2+LO_IDX] = b->element[i]; \
720 result.element[n_elems - i * 2 - (1 + HI_IDX)] = \
721 b->element[n_elems - i - 1]; \
722 result.element[n_elems - i * 2 - (1 + LO_IDX)] = \
723 a->element[n_elems - i - 1]; \
728 #if defined(HOST_WORDS_BIGENDIAN)
735 #define VMRG(suffix, element) \
736 VMRG_DO(mrgl##suffix, element, MRGHI) \
737 VMRG_DO(mrgh##suffix, element, MRGLO)
746 void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
747 ppc_avr_t *b, ppc_avr_t *c)
752 for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
753 prod[i] = (int32_t)a->s8[i] * b->u8[i];
756 VECTOR_FOR_INORDER_I(i, s32) {
757 r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
758 prod[4 * i + 2] + prod[4 * i + 3];
762 void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
763 ppc_avr_t *b, ppc_avr_t *c)
768 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
769 prod[i] = a->s16[i] * b->s16[i];
772 VECTOR_FOR_INORDER_I(i, s32) {
773 r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
777 void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
778 ppc_avr_t *b, ppc_avr_t *c)
784 for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
785 prod[i] = (int32_t)a->s16[i] * b->s16[i];
788 VECTOR_FOR_INORDER_I(i, s32) {
789 int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
791 r->u32[i] = cvtsdsw(t, &sat);
795 env->vscr |= (1 << VSCR_SAT);
799 void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
800 ppc_avr_t *b, ppc_avr_t *c)
805 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
806 prod[i] = a->u8[i] * b->u8[i];
809 VECTOR_FOR_INORDER_I(i, u32) {
810 r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
811 prod[4 * i + 2] + prod[4 * i + 3];
815 void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
816 ppc_avr_t *b, ppc_avr_t *c)
821 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
822 prod[i] = a->u16[i] * b->u16[i];
825 VECTOR_FOR_INORDER_I(i, u32) {
826 r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
830 void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
831 ppc_avr_t *b, ppc_avr_t *c)
837 for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
838 prod[i] = a->u16[i] * b->u16[i];
841 VECTOR_FOR_INORDER_I(i, s32) {
842 uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
844 r->u32[i] = cvtuduw(t, &sat);
848 env->vscr |= (1 << VSCR_SAT);
852 #define VMUL_DO(name, mul_element, prod_element, evenp) \
853 void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
857 VECTOR_FOR_INORDER_I(i, prod_element) { \
859 r->prod_element[i] = a->mul_element[i * 2 + HI_IDX] * \
860 b->mul_element[i * 2 + HI_IDX]; \
862 r->prod_element[i] = a->mul_element[i * 2 + LO_IDX] * \
863 b->mul_element[i * 2 + LO_IDX]; \
867 #define VMUL(suffix, mul_element, prod_element) \
868 VMUL_DO(mule##suffix, mul_element, prod_element, 1) \
869 VMUL_DO(mulo##suffix, mul_element, prod_element, 0)
877 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
883 VECTOR_FOR_INORDER_I(i, u8) {
884 int s = c->u8[i] & 0x1f;
885 #if defined(HOST_WORDS_BIGENDIAN)
888 int index = 15 - (s & 0xf);
892 result.u8[i] = b->u8[index];
894 result.u8[i] = a->u8[index];
900 #if defined(HOST_WORDS_BIGENDIAN)
905 void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
909 #if defined(HOST_WORDS_BIGENDIAN)
910 const ppc_avr_t *x[2] = { a, b };
912 const ppc_avr_t *x[2] = { b, a };
915 VECTOR_FOR_INORDER_I(i, u64) {
916 VECTOR_FOR_INORDER_I(j, u32) {
917 uint32_t e = x[i]->u32[j];
919 result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
927 #define VPK(suffix, from, to, cvt, dosat) \
928 void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r, \
929 ppc_avr_t *a, ppc_avr_t *b) \
934 ppc_avr_t *a0 = PKBIG ? a : b; \
935 ppc_avr_t *a1 = PKBIG ? b : a; \
937 VECTOR_FOR_INORDER_I(i, from) { \
938 result.to[i] = cvt(a0->from[i], &sat); \
939 result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \
942 if (dosat && sat) { \
943 env->vscr |= (1 << VSCR_SAT); \
947 VPK(shss, s16, s8, cvtshsb, 1)
948 VPK(shus, s16, u8, cvtshub, 1)
949 VPK(swss, s32, s16, cvtswsh, 1)
950 VPK(swus, s32, u16, cvtswuh, 1)
951 VPK(uhus, u16, u8, cvtuhub, 1)
952 VPK(uwus, u32, u16, cvtuwuh, 1)
953 VPK(uhum, u16, u8, I, 0)
954 VPK(uwum, u32, u16, I, 0)
959 void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
963 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
964 r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
968 #define VRFI(suffix, rounding) \
969 void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r, \
973 float_status s = env->vec_status; \
975 set_float_rounding_mode(rounding, &s); \
976 for (i = 0; i < ARRAY_SIZE(r->f); i++) { \
977 r->f[i] = float32_round_to_int (b->f[i], &s); \
980 VRFI(n, float_round_nearest_even)
981 VRFI(m, float_round_down)
982 VRFI(p, float_round_up)
983 VRFI(z, float_round_to_zero)
986 #define VROTATE(suffix, element) \
987 void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
991 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
992 unsigned int mask = ((1 << \
993 (3 + (sizeof(a->element[0]) >> 1))) \
995 unsigned int shift = b->element[i] & mask; \
996 r->element[i] = (a->element[i] << shift) | \
997 (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1005 void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1009 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1010 float32 t = float32_sqrt(b->f[i], &env->vec_status);
1012 r->f[i] = float32_div(float32_one, t, &env->vec_status);
1016 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1019 r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1020 r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1023 void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1027 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1028 r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1032 void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1036 for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1037 r->f[i] = float32_log2(b->f[i], &env->vec_status);
1041 #if defined(HOST_WORDS_BIGENDIAN)
1048 /* The specification says that the results are undefined if all of the
1049 * shift counts are not identical. We check to make sure that they are
1050 * to conform to what real hardware appears to do. */
1051 #define VSHIFT(suffix, leftp) \
1052 void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1054 int shift = b->u8[LO_IDX*15] & 0x7; \
1058 for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \
1059 doit = doit && ((b->u8[i] & 0x7) == shift); \
1064 } else if (leftp) { \
1065 uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \
1067 r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \
1068 r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \
1070 uint64_t carry = a->u64[HI_IDX] << (64 - shift); \
1072 r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \
1073 r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \
1083 #define VSL(suffix, element) \
1084 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1088 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1089 unsigned int mask = ((1 << \
1090 (3 + (sizeof(a->element[0]) >> 1))) \
1092 unsigned int shift = b->element[i] & mask; \
1094 r->element[i] = a->element[i] << shift; \
1102 void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1104 int sh = shift & 0xf;
1108 #if defined(HOST_WORDS_BIGENDIAN)
1109 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1112 result.u8[i] = b->u8[index - 0x10];
1114 result.u8[i] = a->u8[index];
1118 for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1119 int index = (16 - sh) + i;
1121 result.u8[i] = a->u8[index - 0x10];
1123 result.u8[i] = b->u8[index];
1130 void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1132 int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1134 #if defined(HOST_WORDS_BIGENDIAN)
1135 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1136 memset(&r->u8[16-sh], 0, sh);
1138 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1139 memset(&r->u8[0], 0, sh);
1143 /* Experimental testing shows that hardware masks the immediate. */
1144 #define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1145 #if defined(HOST_WORDS_BIGENDIAN)
1146 #define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1148 #define SPLAT_ELEMENT(element) \
1149 (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1151 #define VSPLT(suffix, element) \
1152 void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1154 uint32_t s = b->element[SPLAT_ELEMENT(element)]; \
1157 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1158 r->element[i] = s; \
1165 #undef SPLAT_ELEMENT
1166 #undef _SPLAT_MASKED
1168 #define VSPLTI(suffix, element, splat_type) \
1169 void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat) \
1171 splat_type x = (int8_t)(splat << 3) >> 3; \
1174 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1175 r->element[i] = x; \
1178 VSPLTI(b, s8, int8_t)
1179 VSPLTI(h, s16, int16_t)
1180 VSPLTI(w, s32, int32_t)
1183 #define VSR(suffix, element) \
1184 void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \
1188 for (i = 0; i < ARRAY_SIZE(r->element); i++) { \
1189 unsigned int mask = ((1 << \
1190 (3 + (sizeof(a->element[0]) >> 1))) \
1192 unsigned int shift = b->element[i] & mask; \
1194 r->element[i] = a->element[i] >> shift; \
1205 void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1207 int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1209 #if defined(HOST_WORDS_BIGENDIAN)
1210 memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1211 memset(&r->u8[0], 0, sh);
1213 memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1214 memset(&r->u8[16 - sh], 0, sh);
1218 void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1222 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1223 r->u32[i] = a->u32[i] >= b->u32[i];
1227 void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1234 #if defined(HOST_WORDS_BIGENDIAN)
1235 upper = ARRAY_SIZE(r->s32)-1;
1239 t = (int64_t)b->s32[upper];
1240 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1244 result.s32[upper] = cvtsdsw(t, &sat);
1248 env->vscr |= (1 << VSCR_SAT);
1252 void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1258 #if defined(HOST_WORDS_BIGENDIAN)
1263 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1264 int64_t t = (int64_t)b->s32[upper + i * 2];
1267 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1268 t += a->s32[2 * i + j];
1270 result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1275 env->vscr |= (1 << VSCR_SAT);
1279 void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1284 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1285 int64_t t = (int64_t)b->s32[i];
1287 for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1288 t += a->s8[4 * i + j];
1290 r->s32[i] = cvtsdsw(t, &sat);
1294 env->vscr |= (1 << VSCR_SAT);
1298 void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1303 for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1304 int64_t t = (int64_t)b->s32[i];
1306 t += a->s16[2 * i] + a->s16[2 * i + 1];
1307 r->s32[i] = cvtsdsw(t, &sat);
1311 env->vscr |= (1 << VSCR_SAT);
1315 void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1320 for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1321 uint64_t t = (uint64_t)b->u32[i];
1323 for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1324 t += a->u8[4 * i + j];
1326 r->u32[i] = cvtuduw(t, &sat);
1330 env->vscr |= (1 << VSCR_SAT);
1334 #if defined(HOST_WORDS_BIGENDIAN)
1341 #define VUPKPX(suffix, hi) \
1342 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1347 for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \
1348 uint16_t e = b->u16[hi ? i : i+4]; \
1349 uint8_t a = (e >> 15) ? 0xff : 0; \
1350 uint8_t r = (e >> 10) & 0x1f; \
1351 uint8_t g = (e >> 5) & 0x1f; \
1352 uint8_t b = e & 0x1f; \
1354 result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \
1362 #define VUPK(suffix, unpacked, packee, hi) \
1363 void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b) \
1369 for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \
1370 result.unpacked[i] = b->packee[i]; \
1373 for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1375 result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1380 VUPK(hsb, s16, s8, UPKHI)
1381 VUPK(hsh, s32, s16, UPKHI)
1382 VUPK(lsb, s16, s8, UPKLO)
1383 VUPK(lsh, s32, s16, UPKLO)
1388 #undef VECTOR_FOR_INORDER_I
1392 /*****************************************************************************/
1393 /* SPE extension helpers */
1394 /* Use a table to make this quicker */
1395 static const uint8_t hbrev[16] = {
1396 0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
1397 0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
1400 static inline uint8_t byte_reverse(uint8_t val)
1402 return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
1405 static inline uint32_t word_reverse(uint32_t val)
1407 return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
1408 (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
1411 #define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
1412 target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
1414 uint32_t a, b, d, mask;
1416 mask = UINT32_MAX >> (32 - MASKBITS);
1419 d = word_reverse(1 + word_reverse(a | ~b));
1420 return (arg1 & ~mask) | (d & b);
1423 uint32_t helper_cntlsw32(uint32_t val)
1425 if (val & 0x80000000) {
1432 uint32_t helper_cntlzw32(uint32_t val)
1438 target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
1439 target_ulong low, uint32_t update_Rc)
1445 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1446 if ((high & mask) == 0) {
1454 for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
1455 if ((low & mask) == 0) {
1467 env->xer = (env->xer & ~0x7F) | i;
1469 env->crf[0] |= xer_so;