#include "tcg-op.h"
#include "qemu/log.h"
#include "qemu/bitops.h"
+#include "arm_ldst.h"
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
IWMMXT_OP_ENV(avgw0)
IWMMXT_OP_ENV(avgw1)
-IWMMXT_OP(msadb)
-
IWMMXT_OP_ENV(packuw)
IWMMXT_OP_ENV(packul)
IWMMXT_OP_ENV(packuq)
#define NEON_3R_VPMIN 21
#define NEON_3R_VQDMULH_VQRDMULH 22
#define NEON_3R_VPADD 23
+#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
[NEON_3R_VPMIN] = 0x7,
[NEON_3R_VQDMULH_VQRDMULH] = 0x6,
[NEON_3R_VPADD] = 0x7,
+ [NEON_3R_SHA] = 0xf, /* size field encodes op type */
[NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
#define NEON_2RM_VCEQ0 18
#define NEON_2RM_VCLE0 19
#define NEON_2RM_VCLT0 20
+#define NEON_2RM_SHA1H 21
#define NEON_2RM_VABS 22
#define NEON_2RM_VNEG 23
#define NEON_2RM_VCGT0_F 24
#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
#define NEON_2RM_VSHLL 38
+#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
#define NEON_2RM_VRINTN 40
#define NEON_2RM_VRINTX 41
#define NEON_2RM_VRINTA 42
[NEON_2RM_VCEQ0] = 0x7,
[NEON_2RM_VCLE0] = 0x7,
[NEON_2RM_VCLT0] = 0x7,
+ [NEON_2RM_SHA1H] = 0x4,
[NEON_2RM_VABS] = 0x7,
[NEON_2RM_VNEG] = 0x7,
[NEON_2RM_VCGT0_F] = 0x4,
[NEON_2RM_VMOVN] = 0x7,
[NEON_2RM_VQMOVN] = 0x7,
[NEON_2RM_VSHLL] = 0x7,
+ [NEON_2RM_SHA1SU1] = 0x4,
[NEON_2RM_VRINTN] = 0x4,
[NEON_2RM_VRINTX] = 0x4,
[NEON_2RM_VRINTA] = 0x4,
if (q && ((rd | rn | rm) & 1)) {
return 1;
}
+ /*
+ * The SHA-1/SHA-256 3-register instructions require special treatment
+ * here, as their size field is overloaded as an op type selector, and
+ * they all consume their input in a single pass.
+ */
+ if (op == NEON_3R_SHA) {
+ if (!q) {
+ return 1;
+ }
+ if (!u) { /* SHA-1 */
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rn);
+ tmp3 = tcg_const_i32(rm);
+ tmp4 = tcg_const_i32(size);
+ gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
+ tcg_temp_free_i32(tmp4);
+ } else { /* SHA-256 */
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA256) || size == 3) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rn);
+ tmp3 = tcg_const_i32(rm);
+ switch (size) {
+ case 0:
+ gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
+ break;
+ case 1:
+ gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
+ break;
+ case 2:
+ gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
+ break;
+ }
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
+ return 0;
+ }
if (size == 3 && op != NEON_3R_LOGIC) {
/* 64-bit element instructions. */
for (pass = 0; pass < (q ? 2 : 1); pass++) {
int src1_wide;
int src2_wide;
int prewiden;
- /* undefreq: bit 0 : UNDEF if size != 0
- * bit 1 : UNDEF if size == 0
- * bit 2 : UNDEF if U == 1
- * Note that [1:0] set implies 'always UNDEF'
+ /* undefreq: bit 0 : UNDEF if size == 0
+ * bit 1 : UNDEF if size == 1
+ * bit 2 : UNDEF if size == 2
+ * bit 3 : UNDEF if U == 1
+ * Note that [2:0] set implies 'always UNDEF'
*/
int undefreq;
/* prewiden, src1_wide, src2_wide, undefreq */
{0, 1, 1, 0}, /* VSUBHN */
{0, 0, 0, 0}, /* VABDL */
{0, 0, 0, 0}, /* VMLAL */
- {0, 0, 0, 6}, /* VQDMLAL */
+ {0, 0, 0, 9}, /* VQDMLAL */
{0, 0, 0, 0}, /* VMLSL */
- {0, 0, 0, 6}, /* VQDMLSL */
+ {0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 2}, /* VQDMULL */
- {0, 0, 0, 5}, /* Polynomial VMULL */
- {0, 0, 0, 3}, /* Reserved: always UNDEF */
+ {0, 0, 0, 1}, /* VQDMULL */
+ {0, 0, 0, 0xa}, /* Polynomial VMULL */
+ {0, 0, 0, 7}, /* Reserved: always UNDEF */
};
prewiden = neon_3reg_wide[op][0];
src2_wide = neon_3reg_wide[op][2];
undefreq = neon_3reg_wide[op][3];
- if (((undefreq & 1) && (size != 0)) ||
- ((undefreq & 2) && (size == 0)) ||
- ((undefreq & 4) && u)) {
+ if ((undefreq & (1 << size)) ||
+ ((undefreq & 8) && u)) {
return 1;
}
if ((src1_wide && (rn & 1)) ||
return 1;
}
+ /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
+ * outside the loop below as it only performs a single pass.
+ */
+ if (op == 14 && size == 2) {
+ TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
+
+ if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
+ return 1;
+ }
+ tcg_rn = tcg_temp_new_i64();
+ tcg_rm = tcg_temp_new_i64();
+ tcg_rd = tcg_temp_new_i64();
+ neon_load_reg64(tcg_rn, rn);
+ neon_load_reg64(tcg_rm, rm);
+ gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
+ neon_store_reg64(tcg_rd, rd);
+ gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
+ neon_store_reg64(tcg_rd, rd + 1);
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rm);
+ tcg_temp_free_i64(tcg_rd);
+ return 0;
+ }
+
/* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide
destinations in problematic ways. */
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(tmp3);
break;
+ case NEON_2RM_SHA1H:
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA1)
+ || ((rm | rd) & 1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+
+ gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
+
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ break;
+ case NEON_2RM_SHA1SU1:
+ if ((rm | rd) & 1) {
+ return 1;
+ }
+ /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
+ if (q) {
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA256)) {
+ return 1;
+ }
+ } else if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+ if (q) {
+ gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
+ } else {
+ gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ break;
default:
elementwise:
for (pass = 0; pass < (q ? 4 : 2); pass++) {
tmp = load_reg(s, rn);
tmp2 = load_reg(s, rm);
+ if (op1 == 0) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xff);
+ } else if (op1 == 1) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
+ }
tmp3 = tcg_const_i32(1 << op1);
if (c & 0x2) {
gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
}
tmp2 = load_reg(s, rm);
+ if (sz == 0) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xff);
+ } else if (sz == 1) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
+ }
tmp3 = tcg_const_i32(1 << sz);
if (c) {
gen_helper_crc32c(tmp, tmp, tmp2, tmp3);