DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
+#ifndef IN_HELPER_PROTO
+/*
+ * Pass calls to memset directly to libc, without a thunk in qemu.
+ * Do not re-declare memset, especially since we fudge the type here;
+ * we assume sizeof(void *) == sizeof(size_t), which is true for
+ * all supported hosts.
+ */
+#define helper_memset memset
+DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
+#endif /* IN_HELPER_PROTO */
+
#ifdef CONFIG_SOFTMMU
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
dh_ctype(t4), dh_ctype(t5), dh_ctype(t6), \
dh_ctype(t7));
+#define IN_HELPER_PROTO
+
#include "helper.h"
#include "trace/generated-helpers.h"
#include "tcg-runtime.h"
#include "plugin-helpers.h"
+#undef IN_HELPER_PROTO
+
#undef DEF_HELPER_FLAGS_0
#undef DEF_HELPER_FLAGS_1
#undef DEF_HELPER_FLAGS_2
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i64
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i64
#define tcg_gen_bswap64_tl tcg_gen_bswap64_i64
+#define tcg_gen_bswap_tl tcg_gen_bswap64_i64
#define tcg_gen_concat_tl_i64 tcg_gen_concat32_i64
#define tcg_gen_extr_i64_tl tcg_gen_extr32_i64
#define tcg_gen_andc_tl tcg_gen_andc_i64
#define tcg_gen_ext32s_tl tcg_gen_mov_i32
#define tcg_gen_bswap16_tl tcg_gen_bswap16_i32
#define tcg_gen_bswap32_tl tcg_gen_bswap32_i32
+#define tcg_gen_bswap_tl tcg_gen_bswap32_i32
#define tcg_gen_concat_tl_i64 tcg_gen_concat_i32_i64
#define tcg_gen_extr_i64_tl tcg_gen_extr_i64_i32
#define tcg_gen_andc_tl tcg_gen_andc_i32
case INDEX_op_shl_i32:
if (c2) {
- tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SLLIW, a0, a1, a2 & 0x1f);
} else {
tcg_out_opc_reg(s, OPC_SLLW, a0, a1, a2);
}
break;
case INDEX_op_shl_i64:
if (c2) {
- tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SLLI, a0, a1, a2 & 0x3f);
} else {
tcg_out_opc_reg(s, OPC_SLL, a0, a1, a2);
}
case INDEX_op_shr_i32:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRLIW, a0, a1, a2 & 0x1f);
} else {
tcg_out_opc_reg(s, OPC_SRLW, a0, a1, a2);
}
break;
case INDEX_op_shr_i64:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRLI, a0, a1, a2 & 0x3f);
} else {
tcg_out_opc_reg(s, OPC_SRL, a0, a1, a2);
}
case INDEX_op_sar_i32:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRAIW, a0, a1, a2 & 0x1f);
} else {
tcg_out_opc_reg(s, OPC_SRAW, a0, a1, a2);
}
break;
case INDEX_op_sar_i64:
if (c2) {
- tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2);
+ tcg_out_opc_imm(s, OPC_SRAI, a0, a1, a2 & 0x3f);
} else {
tcg_out_opc_reg(s, OPC_SRA, a0, a1, a2);
}
in_c = dup_const(vece, in_c);
if (in_c == 0) {
oprsz = maxsz;
+ vece = MO_8;
+ } else if (in_c == dup_const(MO_8, in_c)) {
+ vece = MO_8;
}
}
/* Otherwise implement out of line. */
t_ptr = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_ptr, cpu_env, dofs);
+
+ /*
+ * This may be expand_clr for the tail of an operation, e.g.
+ * oprsz == 8 && maxsz == 64. The size of the clear is misaligned
+ * wrt simd_desc and will assert. Simply pass all replicated byte
+ * stores through to memset.
+ */
+ if (oprsz == maxsz && vece == MO_8) {
+ TCGv_ptr t_size = tcg_const_ptr(oprsz);
+ TCGv_i32 t_val;
+
+ if (in_32) {
+ t_val = in_32;
+ } else if (in_64) {
+ t_val = tcg_temp_new_i32();
+ tcg_gen_extrl_i64_i32(t_val, in_64);
+ } else {
+ t_val = tcg_const_i32(in_c);
+ }
+ gen_helper_memset(t_ptr, t_ptr, t_val, t_size);
+
+ if (!in_32) {
+ tcg_temp_free_i32(t_val);
+ }
+ tcg_temp_free_ptr(t_size);
+ tcg_temp_free_ptr(t_ptr);
+ return;
+ }
+
t_desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
if (vece == MO_64) {