]> Git Repo - qemu.git/blob - target-arm/translate.c
target-arm/translate.c: Don't pass CPUARMState around in the decoder
[qemu.git] / target-arm / translate.c
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include <stdarg.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <inttypes.h>
26
27 #include "cpu.h"
28 #include "internals.h"
29 #include "disas/disas.h"
30 #include "tcg-op.h"
31 #include "qemu/log.h"
32 #include "qemu/bitops.h"
33 #include "arm_ldst.h"
34
35 #include "exec/helper-proto.h"
36 #include "exec/helper-gen.h"
37
38 #include "trace-tcg.h"
39
40
41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J    0
46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
51
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
53
54 #include "translate.h"
55 static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
56
57 #if defined(CONFIG_USER_ONLY)
58 #define IS_USER(s) 1
59 #else
60 #define IS_USER(s) (s->user)
61 #endif
62
63 TCGv_ptr cpu_env;
64 /* We reuse the same 64-bit temporaries for efficiency.  */
65 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
66 static TCGv_i32 cpu_R[16];
67 static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
68 static TCGv_i64 cpu_exclusive_addr;
69 static TCGv_i64 cpu_exclusive_val;
70 #ifdef CONFIG_USER_ONLY
71 static TCGv_i64 cpu_exclusive_test;
72 static TCGv_i32 cpu_exclusive_info;
73 #endif
74
75 /* FIXME:  These should be removed.  */
76 static TCGv_i32 cpu_F0s, cpu_F1s;
77 static TCGv_i64 cpu_F0d, cpu_F1d;
78
79 #include "exec/gen-icount.h"
80
81 static const char *regnames[] =
82     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
83       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
84
85 /* initialize TCG globals.  */
86 void arm_translate_init(void)
87 {
88     int i;
89
90     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
91
92     for (i = 0; i < 16; i++) {
93         cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
94                                           offsetof(CPUARMState, regs[i]),
95                                           regnames[i]);
96     }
97     cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
98     cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
99     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
100     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
101
102     cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
103         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
104     cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
105         offsetof(CPUARMState, exclusive_val), "exclusive_val");
106 #ifdef CONFIG_USER_ONLY
107     cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
108         offsetof(CPUARMState, exclusive_test), "exclusive_test");
109     cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
110         offsetof(CPUARMState, exclusive_info), "exclusive_info");
111 #endif
112
113     a64_translate_init();
114 }
115
116 static inline TCGv_i32 load_cpu_offset(int offset)
117 {
118     TCGv_i32 tmp = tcg_temp_new_i32();
119     tcg_gen_ld_i32(tmp, cpu_env, offset);
120     return tmp;
121 }
122
123 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
124
125 static inline void store_cpu_offset(TCGv_i32 var, int offset)
126 {
127     tcg_gen_st_i32(var, cpu_env, offset);
128     tcg_temp_free_i32(var);
129 }
130
131 #define store_cpu_field(var, name) \
132     store_cpu_offset(var, offsetof(CPUARMState, name))
133
134 /* Set a variable to the value of a CPU register.  */
135 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
136 {
137     if (reg == 15) {
138         uint32_t addr;
139         /* normally, since we updated PC, we need only to add one insn */
140         if (s->thumb)
141             addr = (long)s->pc + 2;
142         else
143             addr = (long)s->pc + 4;
144         tcg_gen_movi_i32(var, addr);
145     } else {
146         tcg_gen_mov_i32(var, cpu_R[reg]);
147     }
148 }
149
150 /* Create a new temporary and set it to the value of a CPU register.  */
151 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
152 {
153     TCGv_i32 tmp = tcg_temp_new_i32();
154     load_reg_var(s, tmp, reg);
155     return tmp;
156 }
157
158 /* Set a CPU register.  The source must be a temporary and will be
159    marked as dead.  */
160 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
161 {
162     if (reg == 15) {
163         tcg_gen_andi_i32(var, var, ~1);
164         s->is_jmp = DISAS_JUMP;
165     }
166     tcg_gen_mov_i32(cpu_R[reg], var);
167     tcg_temp_free_i32(var);
168 }
169
170 /* Value extensions.  */
171 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
172 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
173 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
174 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
175
176 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
177 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
178
179
180 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
181 {
182     TCGv_i32 tmp_mask = tcg_const_i32(mask);
183     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
184     tcg_temp_free_i32(tmp_mask);
185 }
186 /* Set NZCV flags from the high 4 bits of var.  */
187 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
188
189 static void gen_exception_internal(int excp)
190 {
191     TCGv_i32 tcg_excp = tcg_const_i32(excp);
192
193     assert(excp_is_internal(excp));
194     gen_helper_exception_internal(cpu_env, tcg_excp);
195     tcg_temp_free_i32(tcg_excp);
196 }
197
198 static void gen_exception(int excp, uint32_t syndrome)
199 {
200     TCGv_i32 tcg_excp = tcg_const_i32(excp);
201     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
202
203     gen_helper_exception_with_syndrome(cpu_env, tcg_excp, tcg_syn);
204     tcg_temp_free_i32(tcg_syn);
205     tcg_temp_free_i32(tcg_excp);
206 }
207
208 static void gen_ss_advance(DisasContext *s)
209 {
210     /* If the singlestep state is Active-not-pending, advance to
211      * Active-pending.
212      */
213     if (s->ss_active) {
214         s->pstate_ss = 0;
215         gen_helper_clear_pstate_ss(cpu_env);
216     }
217 }
218
219 static void gen_step_complete_exception(DisasContext *s)
220 {
221     /* We just completed step of an insn. Move from Active-not-pending
222      * to Active-pending, and then also take the swstep exception.
223      * This corresponds to making the (IMPDEF) choice to prioritize
224      * swstep exceptions over asynchronous exceptions taken to an exception
225      * level where debug is disabled. This choice has the advantage that
226      * we do not need to maintain internal state corresponding to the
227      * ISV/EX syndrome bits between completion of the step and generation
228      * of the exception, and our syndrome information is always correct.
229      */
230     gen_ss_advance(s);
231     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex));
232     s->is_jmp = DISAS_EXC;
233 }
234
235 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
236 {
237     TCGv_i32 tmp1 = tcg_temp_new_i32();
238     TCGv_i32 tmp2 = tcg_temp_new_i32();
239     tcg_gen_ext16s_i32(tmp1, a);
240     tcg_gen_ext16s_i32(tmp2, b);
241     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
242     tcg_temp_free_i32(tmp2);
243     tcg_gen_sari_i32(a, a, 16);
244     tcg_gen_sari_i32(b, b, 16);
245     tcg_gen_mul_i32(b, b, a);
246     tcg_gen_mov_i32(a, tmp1);
247     tcg_temp_free_i32(tmp1);
248 }
249
250 /* Byteswap each halfword.  */
251 static void gen_rev16(TCGv_i32 var)
252 {
253     TCGv_i32 tmp = tcg_temp_new_i32();
254     tcg_gen_shri_i32(tmp, var, 8);
255     tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
256     tcg_gen_shli_i32(var, var, 8);
257     tcg_gen_andi_i32(var, var, 0xff00ff00);
258     tcg_gen_or_i32(var, var, tmp);
259     tcg_temp_free_i32(tmp);
260 }
261
262 /* Byteswap low halfword and sign extend.  */
263 static void gen_revsh(TCGv_i32 var)
264 {
265     tcg_gen_ext16u_i32(var, var);
266     tcg_gen_bswap16_i32(var, var);
267     tcg_gen_ext16s_i32(var, var);
268 }
269
270 /* Unsigned bitfield extract.  */
271 static void gen_ubfx(TCGv_i32 var, int shift, uint32_t mask)
272 {
273     if (shift)
274         tcg_gen_shri_i32(var, var, shift);
275     tcg_gen_andi_i32(var, var, mask);
276 }
277
278 /* Signed bitfield extract.  */
279 static void gen_sbfx(TCGv_i32 var, int shift, int width)
280 {
281     uint32_t signbit;
282
283     if (shift)
284         tcg_gen_sari_i32(var, var, shift);
285     if (shift + width < 32) {
286         signbit = 1u << (width - 1);
287         tcg_gen_andi_i32(var, var, (1u << width) - 1);
288         tcg_gen_xori_i32(var, var, signbit);
289         tcg_gen_subi_i32(var, var, signbit);
290     }
291 }
292
293 /* Return (b << 32) + a. Mark inputs as dead */
294 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
295 {
296     TCGv_i64 tmp64 = tcg_temp_new_i64();
297
298     tcg_gen_extu_i32_i64(tmp64, b);
299     tcg_temp_free_i32(b);
300     tcg_gen_shli_i64(tmp64, tmp64, 32);
301     tcg_gen_add_i64(a, tmp64, a);
302
303     tcg_temp_free_i64(tmp64);
304     return a;
305 }
306
307 /* Return (b << 32) - a. Mark inputs as dead. */
308 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
309 {
310     TCGv_i64 tmp64 = tcg_temp_new_i64();
311
312     tcg_gen_extu_i32_i64(tmp64, b);
313     tcg_temp_free_i32(b);
314     tcg_gen_shli_i64(tmp64, tmp64, 32);
315     tcg_gen_sub_i64(a, tmp64, a);
316
317     tcg_temp_free_i64(tmp64);
318     return a;
319 }
320
321 /* 32x32->64 multiply.  Marks inputs as dead.  */
322 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
323 {
324     TCGv_i32 lo = tcg_temp_new_i32();
325     TCGv_i32 hi = tcg_temp_new_i32();
326     TCGv_i64 ret;
327
328     tcg_gen_mulu2_i32(lo, hi, a, b);
329     tcg_temp_free_i32(a);
330     tcg_temp_free_i32(b);
331
332     ret = tcg_temp_new_i64();
333     tcg_gen_concat_i32_i64(ret, lo, hi);
334     tcg_temp_free_i32(lo);
335     tcg_temp_free_i32(hi);
336
337     return ret;
338 }
339
340 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
341 {
342     TCGv_i32 lo = tcg_temp_new_i32();
343     TCGv_i32 hi = tcg_temp_new_i32();
344     TCGv_i64 ret;
345
346     tcg_gen_muls2_i32(lo, hi, a, b);
347     tcg_temp_free_i32(a);
348     tcg_temp_free_i32(b);
349
350     ret = tcg_temp_new_i64();
351     tcg_gen_concat_i32_i64(ret, lo, hi);
352     tcg_temp_free_i32(lo);
353     tcg_temp_free_i32(hi);
354
355     return ret;
356 }
357
358 /* Swap low and high halfwords.  */
359 static void gen_swap_half(TCGv_i32 var)
360 {
361     TCGv_i32 tmp = tcg_temp_new_i32();
362     tcg_gen_shri_i32(tmp, var, 16);
363     tcg_gen_shli_i32(var, var, 16);
364     tcg_gen_or_i32(var, var, tmp);
365     tcg_temp_free_i32(tmp);
366 }
367
368 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
369     tmp = (t0 ^ t1) & 0x8000;
370     t0 &= ~0x8000;
371     t1 &= ~0x8000;
372     t0 = (t0 + t1) ^ tmp;
373  */
374
375 static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
376 {
377     TCGv_i32 tmp = tcg_temp_new_i32();
378     tcg_gen_xor_i32(tmp, t0, t1);
379     tcg_gen_andi_i32(tmp, tmp, 0x8000);
380     tcg_gen_andi_i32(t0, t0, ~0x8000);
381     tcg_gen_andi_i32(t1, t1, ~0x8000);
382     tcg_gen_add_i32(t0, t0, t1);
383     tcg_gen_xor_i32(t0, t0, tmp);
384     tcg_temp_free_i32(tmp);
385     tcg_temp_free_i32(t1);
386 }
387
388 /* Set CF to the top bit of var.  */
389 static void gen_set_CF_bit31(TCGv_i32 var)
390 {
391     tcg_gen_shri_i32(cpu_CF, var, 31);
392 }
393
394 /* Set N and Z flags from var.  */
395 static inline void gen_logic_CC(TCGv_i32 var)
396 {
397     tcg_gen_mov_i32(cpu_NF, var);
398     tcg_gen_mov_i32(cpu_ZF, var);
399 }
400
401 /* T0 += T1 + CF.  */
402 static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
403 {
404     tcg_gen_add_i32(t0, t0, t1);
405     tcg_gen_add_i32(t0, t0, cpu_CF);
406 }
407
408 /* dest = T0 + T1 + CF. */
409 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
410 {
411     tcg_gen_add_i32(dest, t0, t1);
412     tcg_gen_add_i32(dest, dest, cpu_CF);
413 }
414
415 /* dest = T0 - T1 + CF - 1.  */
416 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
417 {
418     tcg_gen_sub_i32(dest, t0, t1);
419     tcg_gen_add_i32(dest, dest, cpu_CF);
420     tcg_gen_subi_i32(dest, dest, 1);
421 }
422
423 /* dest = T0 + T1. Compute C, N, V and Z flags */
424 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
425 {
426     TCGv_i32 tmp = tcg_temp_new_i32();
427     tcg_gen_movi_i32(tmp, 0);
428     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
429     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
430     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
431     tcg_gen_xor_i32(tmp, t0, t1);
432     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
433     tcg_temp_free_i32(tmp);
434     tcg_gen_mov_i32(dest, cpu_NF);
435 }
436
437 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
438 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
439 {
440     TCGv_i32 tmp = tcg_temp_new_i32();
441     if (TCG_TARGET_HAS_add2_i32) {
442         tcg_gen_movi_i32(tmp, 0);
443         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
444         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
445     } else {
446         TCGv_i64 q0 = tcg_temp_new_i64();
447         TCGv_i64 q1 = tcg_temp_new_i64();
448         tcg_gen_extu_i32_i64(q0, t0);
449         tcg_gen_extu_i32_i64(q1, t1);
450         tcg_gen_add_i64(q0, q0, q1);
451         tcg_gen_extu_i32_i64(q1, cpu_CF);
452         tcg_gen_add_i64(q0, q0, q1);
453         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
454         tcg_temp_free_i64(q0);
455         tcg_temp_free_i64(q1);
456     }
457     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
458     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
459     tcg_gen_xor_i32(tmp, t0, t1);
460     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
461     tcg_temp_free_i32(tmp);
462     tcg_gen_mov_i32(dest, cpu_NF);
463 }
464
465 /* dest = T0 - T1. Compute C, N, V and Z flags */
466 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
467 {
468     TCGv_i32 tmp;
469     tcg_gen_sub_i32(cpu_NF, t0, t1);
470     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
471     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
472     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
473     tmp = tcg_temp_new_i32();
474     tcg_gen_xor_i32(tmp, t0, t1);
475     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
476     tcg_temp_free_i32(tmp);
477     tcg_gen_mov_i32(dest, cpu_NF);
478 }
479
480 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
481 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
482 {
483     TCGv_i32 tmp = tcg_temp_new_i32();
484     tcg_gen_not_i32(tmp, t1);
485     gen_adc_CC(dest, t0, tmp);
486     tcg_temp_free_i32(tmp);
487 }
488
489 #define GEN_SHIFT(name)                                               \
490 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
491 {                                                                     \
492     TCGv_i32 tmp1, tmp2, tmp3;                                        \
493     tmp1 = tcg_temp_new_i32();                                        \
494     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
495     tmp2 = tcg_const_i32(0);                                          \
496     tmp3 = tcg_const_i32(0x1f);                                       \
497     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
498     tcg_temp_free_i32(tmp3);                                          \
499     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
500     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
501     tcg_temp_free_i32(tmp2);                                          \
502     tcg_temp_free_i32(tmp1);                                          \
503 }
504 GEN_SHIFT(shl)
505 GEN_SHIFT(shr)
506 #undef GEN_SHIFT
507
508 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
509 {
510     TCGv_i32 tmp1, tmp2;
511     tmp1 = tcg_temp_new_i32();
512     tcg_gen_andi_i32(tmp1, t1, 0xff);
513     tmp2 = tcg_const_i32(0x1f);
514     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
515     tcg_temp_free_i32(tmp2);
516     tcg_gen_sar_i32(dest, t0, tmp1);
517     tcg_temp_free_i32(tmp1);
518 }
519
520 static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
521 {
522     TCGv_i32 c0 = tcg_const_i32(0);
523     TCGv_i32 tmp = tcg_temp_new_i32();
524     tcg_gen_neg_i32(tmp, src);
525     tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
526     tcg_temp_free_i32(c0);
527     tcg_temp_free_i32(tmp);
528 }
529
530 static void shifter_out_im(TCGv_i32 var, int shift)
531 {
532     if (shift == 0) {
533         tcg_gen_andi_i32(cpu_CF, var, 1);
534     } else {
535         tcg_gen_shri_i32(cpu_CF, var, shift);
536         if (shift != 31) {
537             tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
538         }
539     }
540 }
541
542 /* Shift by immediate.  Includes special handling for shift == 0.  */
543 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
544                                     int shift, int flags)
545 {
546     switch (shiftop) {
547     case 0: /* LSL */
548         if (shift != 0) {
549             if (flags)
550                 shifter_out_im(var, 32 - shift);
551             tcg_gen_shli_i32(var, var, shift);
552         }
553         break;
554     case 1: /* LSR */
555         if (shift == 0) {
556             if (flags) {
557                 tcg_gen_shri_i32(cpu_CF, var, 31);
558             }
559             tcg_gen_movi_i32(var, 0);
560         } else {
561             if (flags)
562                 shifter_out_im(var, shift - 1);
563             tcg_gen_shri_i32(var, var, shift);
564         }
565         break;
566     case 2: /* ASR */
567         if (shift == 0)
568             shift = 32;
569         if (flags)
570             shifter_out_im(var, shift - 1);
571         if (shift == 32)
572           shift = 31;
573         tcg_gen_sari_i32(var, var, shift);
574         break;
575     case 3: /* ROR/RRX */
576         if (shift != 0) {
577             if (flags)
578                 shifter_out_im(var, shift - 1);
579             tcg_gen_rotri_i32(var, var, shift); break;
580         } else {
581             TCGv_i32 tmp = tcg_temp_new_i32();
582             tcg_gen_shli_i32(tmp, cpu_CF, 31);
583             if (flags)
584                 shifter_out_im(var, 0);
585             tcg_gen_shri_i32(var, var, 1);
586             tcg_gen_or_i32(var, var, tmp);
587             tcg_temp_free_i32(tmp);
588         }
589     }
590 };
591
592 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
593                                      TCGv_i32 shift, int flags)
594 {
595     if (flags) {
596         switch (shiftop) {
597         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
598         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
599         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
600         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
601         }
602     } else {
603         switch (shiftop) {
604         case 0:
605             gen_shl(var, var, shift);
606             break;
607         case 1:
608             gen_shr(var, var, shift);
609             break;
610         case 2:
611             gen_sar(var, var, shift);
612             break;
613         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
614                 tcg_gen_rotr_i32(var, var, shift); break;
615         }
616     }
617     tcg_temp_free_i32(shift);
618 }
619
620 #define PAS_OP(pfx) \
621     switch (op2) {  \
622     case 0: gen_pas_helper(glue(pfx,add16)); break; \
623     case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
624     case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
625     case 3: gen_pas_helper(glue(pfx,sub16)); break; \
626     case 4: gen_pas_helper(glue(pfx,add8)); break; \
627     case 7: gen_pas_helper(glue(pfx,sub8)); break; \
628     }
629 static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
630 {
631     TCGv_ptr tmp;
632
633     switch (op1) {
634 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
635     case 1:
636         tmp = tcg_temp_new_ptr();
637         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
638         PAS_OP(s)
639         tcg_temp_free_ptr(tmp);
640         break;
641     case 5:
642         tmp = tcg_temp_new_ptr();
643         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
644         PAS_OP(u)
645         tcg_temp_free_ptr(tmp);
646         break;
647 #undef gen_pas_helper
648 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
649     case 2:
650         PAS_OP(q);
651         break;
652     case 3:
653         PAS_OP(sh);
654         break;
655     case 6:
656         PAS_OP(uq);
657         break;
658     case 7:
659         PAS_OP(uh);
660         break;
661 #undef gen_pas_helper
662     }
663 }
664 #undef PAS_OP
665
666 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
667 #define PAS_OP(pfx) \
668     switch (op1) {  \
669     case 0: gen_pas_helper(glue(pfx,add8)); break; \
670     case 1: gen_pas_helper(glue(pfx,add16)); break; \
671     case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
672     case 4: gen_pas_helper(glue(pfx,sub8)); break; \
673     case 5: gen_pas_helper(glue(pfx,sub16)); break; \
674     case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
675     }
676 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
677 {
678     TCGv_ptr tmp;
679
680     switch (op2) {
681 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
682     case 0:
683         tmp = tcg_temp_new_ptr();
684         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
685         PAS_OP(s)
686         tcg_temp_free_ptr(tmp);
687         break;
688     case 4:
689         tmp = tcg_temp_new_ptr();
690         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
691         PAS_OP(u)
692         tcg_temp_free_ptr(tmp);
693         break;
694 #undef gen_pas_helper
695 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
696     case 1:
697         PAS_OP(q);
698         break;
699     case 2:
700         PAS_OP(sh);
701         break;
702     case 5:
703         PAS_OP(uq);
704         break;
705     case 6:
706         PAS_OP(uh);
707         break;
708 #undef gen_pas_helper
709     }
710 }
711 #undef PAS_OP
712
713 /*
714  * generate a conditional branch based on ARM condition code cc.
715  * This is common between ARM and Aarch64 targets.
716  */
717 void arm_gen_test_cc(int cc, int label)
718 {
719     TCGv_i32 tmp;
720     int inv;
721
722     switch (cc) {
723     case 0: /* eq: Z */
724         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
725         break;
726     case 1: /* ne: !Z */
727         tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
728         break;
729     case 2: /* cs: C */
730         tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
731         break;
732     case 3: /* cc: !C */
733         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
734         break;
735     case 4: /* mi: N */
736         tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
737         break;
738     case 5: /* pl: !N */
739         tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
740         break;
741     case 6: /* vs: V */
742         tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
743         break;
744     case 7: /* vc: !V */
745         tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
746         break;
747     case 8: /* hi: C && !Z */
748         inv = gen_new_label();
749         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
750         tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
751         gen_set_label(inv);
752         break;
753     case 9: /* ls: !C || Z */
754         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
755         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
756         break;
757     case 10: /* ge: N == V -> N ^ V == 0 */
758         tmp = tcg_temp_new_i32();
759         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
760         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
761         tcg_temp_free_i32(tmp);
762         break;
763     case 11: /* lt: N != V -> N ^ V != 0 */
764         tmp = tcg_temp_new_i32();
765         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
766         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
767         tcg_temp_free_i32(tmp);
768         break;
769     case 12: /* gt: !Z && N == V */
770         inv = gen_new_label();
771         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
772         tmp = tcg_temp_new_i32();
773         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
774         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
775         tcg_temp_free_i32(tmp);
776         gen_set_label(inv);
777         break;
778     case 13: /* le: Z || N != V */
779         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
780         tmp = tcg_temp_new_i32();
781         tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
782         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
783         tcg_temp_free_i32(tmp);
784         break;
785     default:
786         fprintf(stderr, "Bad condition code 0x%x\n", cc);
787         abort();
788     }
789 }
790
791 static const uint8_t table_logic_cc[16] = {
792     1, /* and */
793     1, /* xor */
794     0, /* sub */
795     0, /* rsb */
796     0, /* add */
797     0, /* adc */
798     0, /* sbc */
799     0, /* rsc */
800     1, /* andl */
801     1, /* xorl */
802     0, /* cmp */
803     0, /* cmn */
804     1, /* orr */
805     1, /* mov */
806     1, /* bic */
807     1, /* mvn */
808 };
809
810 /* Set PC and Thumb state from an immediate address.  */
811 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
812 {
813     TCGv_i32 tmp;
814
815     s->is_jmp = DISAS_UPDATE;
816     if (s->thumb != (addr & 1)) {
817         tmp = tcg_temp_new_i32();
818         tcg_gen_movi_i32(tmp, addr & 1);
819         tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
820         tcg_temp_free_i32(tmp);
821     }
822     tcg_gen_movi_i32(cpu_R[15], addr & ~1);
823 }
824
825 /* Set PC and Thumb state from var.  var is marked as dead.  */
826 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
827 {
828     s->is_jmp = DISAS_UPDATE;
829     tcg_gen_andi_i32(cpu_R[15], var, ~1);
830     tcg_gen_andi_i32(var, var, 1);
831     store_cpu_field(var, thumb);
832 }
833
834 /* Variant of store_reg which uses branch&exchange logic when storing
835    to r15 in ARM architecture v7 and above. The source must be a temporary
836    and will be marked as dead. */
837 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
838 {
839     if (reg == 15 && ENABLE_ARCH_7) {
840         gen_bx(s, var);
841     } else {
842         store_reg(s, reg, var);
843     }
844 }
845
846 /* Variant of store_reg which uses branch&exchange logic when storing
847  * to r15 in ARM architecture v5T and above. This is used for storing
848  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
849  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
850 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
851 {
852     if (reg == 15 && ENABLE_ARCH_5) {
853         gen_bx(s, var);
854     } else {
855         store_reg(s, reg, var);
856     }
857 }
858
859 /* Abstractions of "generate code to do a guest load/store for
860  * AArch32", where a vaddr is always 32 bits (and is zero
861  * extended if we're a 64 bit core) and  data is also
862  * 32 bits unless specifically doing a 64 bit access.
863  * These functions work like tcg_gen_qemu_{ld,st}* except
864  * that the address argument is TCGv_i32 rather than TCGv.
865  */
866 #if TARGET_LONG_BITS == 32
867
868 #define DO_GEN_LD(SUFF, OPC)                                             \
869 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
870 {                                                                        \
871     tcg_gen_qemu_ld_i32(val, addr, index, OPC);                          \
872 }
873
874 #define DO_GEN_ST(SUFF, OPC)                                             \
875 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
876 {                                                                        \
877     tcg_gen_qemu_st_i32(val, addr, index, OPC);                          \
878 }
879
880 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
881 {
882     tcg_gen_qemu_ld_i64(val, addr, index, MO_TEQ);
883 }
884
885 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
886 {
887     tcg_gen_qemu_st_i64(val, addr, index, MO_TEQ);
888 }
889
890 #else
891
892 #define DO_GEN_LD(SUFF, OPC)                                             \
893 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
894 {                                                                        \
895     TCGv addr64 = tcg_temp_new();                                        \
896     tcg_gen_extu_i32_i64(addr64, addr);                                  \
897     tcg_gen_qemu_ld_i32(val, addr64, index, OPC);                        \
898     tcg_temp_free(addr64);                                               \
899 }
900
901 #define DO_GEN_ST(SUFF, OPC)                                             \
902 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
903 {                                                                        \
904     TCGv addr64 = tcg_temp_new();                                        \
905     tcg_gen_extu_i32_i64(addr64, addr);                                  \
906     tcg_gen_qemu_st_i32(val, addr64, index, OPC);                        \
907     tcg_temp_free(addr64);                                               \
908 }
909
910 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
911 {
912     TCGv addr64 = tcg_temp_new();
913     tcg_gen_extu_i32_i64(addr64, addr);
914     tcg_gen_qemu_ld_i64(val, addr64, index, MO_TEQ);
915     tcg_temp_free(addr64);
916 }
917
918 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
919 {
920     TCGv addr64 = tcg_temp_new();
921     tcg_gen_extu_i32_i64(addr64, addr);
922     tcg_gen_qemu_st_i64(val, addr64, index, MO_TEQ);
923     tcg_temp_free(addr64);
924 }
925
926 #endif
927
928 DO_GEN_LD(8s, MO_SB)
929 DO_GEN_LD(8u, MO_UB)
930 DO_GEN_LD(16s, MO_TESW)
931 DO_GEN_LD(16u, MO_TEUW)
932 DO_GEN_LD(32u, MO_TEUL)
933 DO_GEN_ST(8, MO_UB)
934 DO_GEN_ST(16, MO_TEUW)
935 DO_GEN_ST(32, MO_TEUL)
936
937 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
938 {
939     tcg_gen_movi_i32(cpu_R[15], val);
940 }
941
942 static inline void gen_hvc(DisasContext *s, int imm16)
943 {
944     /* The pre HVC helper handles cases when HVC gets trapped
945      * as an undefined insn by runtime configuration (ie before
946      * the insn really executes).
947      */
948     gen_set_pc_im(s, s->pc - 4);
949     gen_helper_pre_hvc(cpu_env);
950     /* Otherwise we will treat this as a real exception which
951      * happens after execution of the insn. (The distinction matters
952      * for the PC value reported to the exception handler and also
953      * for single stepping.)
954      */
955     s->svc_imm = imm16;
956     gen_set_pc_im(s, s->pc);
957     s->is_jmp = DISAS_HVC;
958 }
959
960 static inline void gen_smc(DisasContext *s)
961 {
962     /* As with HVC, we may take an exception either before or after
963      * the insn executes.
964      */
965     TCGv_i32 tmp;
966
967     gen_set_pc_im(s, s->pc - 4);
968     tmp = tcg_const_i32(syn_aa32_smc());
969     gen_helper_pre_smc(cpu_env, tmp);
970     tcg_temp_free_i32(tmp);
971     gen_set_pc_im(s, s->pc);
972     s->is_jmp = DISAS_SMC;
973 }
974
975 static inline void
976 gen_set_condexec (DisasContext *s)
977 {
978     if (s->condexec_mask) {
979         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
980         TCGv_i32 tmp = tcg_temp_new_i32();
981         tcg_gen_movi_i32(tmp, val);
982         store_cpu_field(tmp, condexec_bits);
983     }
984 }
985
986 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
987 {
988     gen_set_condexec(s);
989     gen_set_pc_im(s, s->pc - offset);
990     gen_exception_internal(excp);
991     s->is_jmp = DISAS_JUMP;
992 }
993
994 static void gen_exception_insn(DisasContext *s, int offset, int excp, int syn)
995 {
996     gen_set_condexec(s);
997     gen_set_pc_im(s, s->pc - offset);
998     gen_exception(excp, syn);
999     s->is_jmp = DISAS_JUMP;
1000 }
1001
1002 /* Force a TB lookup after an instruction that changes the CPU state.  */
1003 static inline void gen_lookup_tb(DisasContext *s)
1004 {
1005     tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1006     s->is_jmp = DISAS_UPDATE;
1007 }
1008
1009 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1010                                        TCGv_i32 var)
1011 {
1012     int val, rm, shift, shiftop;
1013     TCGv_i32 offset;
1014
1015     if (!(insn & (1 << 25))) {
1016         /* immediate */
1017         val = insn & 0xfff;
1018         if (!(insn & (1 << 23)))
1019             val = -val;
1020         if (val != 0)
1021             tcg_gen_addi_i32(var, var, val);
1022     } else {
1023         /* shift/register */
1024         rm = (insn) & 0xf;
1025         shift = (insn >> 7) & 0x1f;
1026         shiftop = (insn >> 5) & 3;
1027         offset = load_reg(s, rm);
1028         gen_arm_shift_im(offset, shiftop, shift, 0);
1029         if (!(insn & (1 << 23)))
1030             tcg_gen_sub_i32(var, var, offset);
1031         else
1032             tcg_gen_add_i32(var, var, offset);
1033         tcg_temp_free_i32(offset);
1034     }
1035 }
1036
1037 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1038                                         int extra, TCGv_i32 var)
1039 {
1040     int val, rm;
1041     TCGv_i32 offset;
1042
1043     if (insn & (1 << 22)) {
1044         /* immediate */
1045         val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1046         if (!(insn & (1 << 23)))
1047             val = -val;
1048         val += extra;
1049         if (val != 0)
1050             tcg_gen_addi_i32(var, var, val);
1051     } else {
1052         /* register */
1053         if (extra)
1054             tcg_gen_addi_i32(var, var, extra);
1055         rm = (insn) & 0xf;
1056         offset = load_reg(s, rm);
1057         if (!(insn & (1 << 23)))
1058             tcg_gen_sub_i32(var, var, offset);
1059         else
1060             tcg_gen_add_i32(var, var, offset);
1061         tcg_temp_free_i32(offset);
1062     }
1063 }
1064
1065 static TCGv_ptr get_fpstatus_ptr(int neon)
1066 {
1067     TCGv_ptr statusptr = tcg_temp_new_ptr();
1068     int offset;
1069     if (neon) {
1070         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1071     } else {
1072         offset = offsetof(CPUARMState, vfp.fp_status);
1073     }
1074     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1075     return statusptr;
1076 }
1077
1078 #define VFP_OP2(name)                                                 \
1079 static inline void gen_vfp_##name(int dp)                             \
1080 {                                                                     \
1081     TCGv_ptr fpst = get_fpstatus_ptr(0);                              \
1082     if (dp) {                                                         \
1083         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst);    \
1084     } else {                                                          \
1085         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst);    \
1086     }                                                                 \
1087     tcg_temp_free_ptr(fpst);                                          \
1088 }
1089
1090 VFP_OP2(add)
1091 VFP_OP2(sub)
1092 VFP_OP2(mul)
1093 VFP_OP2(div)
1094
1095 #undef VFP_OP2
1096
1097 static inline void gen_vfp_F1_mul(int dp)
1098 {
1099     /* Like gen_vfp_mul() but put result in F1 */
1100     TCGv_ptr fpst = get_fpstatus_ptr(0);
1101     if (dp) {
1102         gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
1103     } else {
1104         gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
1105     }
1106     tcg_temp_free_ptr(fpst);
1107 }
1108
1109 static inline void gen_vfp_F1_neg(int dp)
1110 {
1111     /* Like gen_vfp_neg() but put result in F1 */
1112     if (dp) {
1113         gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
1114     } else {
1115         gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
1116     }
1117 }
1118
1119 static inline void gen_vfp_abs(int dp)
1120 {
1121     if (dp)
1122         gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1123     else
1124         gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1125 }
1126
1127 static inline void gen_vfp_neg(int dp)
1128 {
1129     if (dp)
1130         gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1131     else
1132         gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1133 }
1134
1135 static inline void gen_vfp_sqrt(int dp)
1136 {
1137     if (dp)
1138         gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1139     else
1140         gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1141 }
1142
1143 static inline void gen_vfp_cmp(int dp)
1144 {
1145     if (dp)
1146         gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1147     else
1148         gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1149 }
1150
1151 static inline void gen_vfp_cmpe(int dp)
1152 {
1153     if (dp)
1154         gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1155     else
1156         gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1157 }
1158
1159 static inline void gen_vfp_F1_ld0(int dp)
1160 {
1161     if (dp)
1162         tcg_gen_movi_i64(cpu_F1d, 0);
1163     else
1164         tcg_gen_movi_i32(cpu_F1s, 0);
1165 }
1166
1167 #define VFP_GEN_ITOF(name) \
1168 static inline void gen_vfp_##name(int dp, int neon) \
1169 { \
1170     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1171     if (dp) { \
1172         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1173     } else { \
1174         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1175     } \
1176     tcg_temp_free_ptr(statusptr); \
1177 }
1178
1179 VFP_GEN_ITOF(uito)
1180 VFP_GEN_ITOF(sito)
1181 #undef VFP_GEN_ITOF
1182
1183 #define VFP_GEN_FTOI(name) \
1184 static inline void gen_vfp_##name(int dp, int neon) \
1185 { \
1186     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1187     if (dp) { \
1188         gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1189     } else { \
1190         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1191     } \
1192     tcg_temp_free_ptr(statusptr); \
1193 }
1194
1195 VFP_GEN_FTOI(toui)
1196 VFP_GEN_FTOI(touiz)
1197 VFP_GEN_FTOI(tosi)
1198 VFP_GEN_FTOI(tosiz)
1199 #undef VFP_GEN_FTOI
1200
1201 #define VFP_GEN_FIX(name, round) \
1202 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1203 { \
1204     TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1205     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1206     if (dp) { \
1207         gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1208                                         statusptr); \
1209     } else { \
1210         gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1211                                         statusptr); \
1212     } \
1213     tcg_temp_free_i32(tmp_shift); \
1214     tcg_temp_free_ptr(statusptr); \
1215 }
1216 VFP_GEN_FIX(tosh, _round_to_zero)
1217 VFP_GEN_FIX(tosl, _round_to_zero)
1218 VFP_GEN_FIX(touh, _round_to_zero)
1219 VFP_GEN_FIX(toul, _round_to_zero)
1220 VFP_GEN_FIX(shto, )
1221 VFP_GEN_FIX(slto, )
1222 VFP_GEN_FIX(uhto, )
1223 VFP_GEN_FIX(ulto, )
1224 #undef VFP_GEN_FIX
1225
1226 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
1227 {
1228     if (dp) {
1229         gen_aa32_ld64(cpu_F0d, addr, get_mem_index(s));
1230     } else {
1231         gen_aa32_ld32u(cpu_F0s, addr, get_mem_index(s));
1232     }
1233 }
1234
1235 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
1236 {
1237     if (dp) {
1238         gen_aa32_st64(cpu_F0d, addr, get_mem_index(s));
1239     } else {
1240         gen_aa32_st32(cpu_F0s, addr, get_mem_index(s));
1241     }
1242 }
1243
1244 static inline long
1245 vfp_reg_offset (int dp, int reg)
1246 {
1247     if (dp)
1248         return offsetof(CPUARMState, vfp.regs[reg]);
1249     else if (reg & 1) {
1250         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1251           + offsetof(CPU_DoubleU, l.upper);
1252     } else {
1253         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1254           + offsetof(CPU_DoubleU, l.lower);
1255     }
1256 }
1257
1258 /* Return the offset of a 32-bit piece of a NEON register.
1259    zero is the least significant end of the register.  */
1260 static inline long
1261 neon_reg_offset (int reg, int n)
1262 {
1263     int sreg;
1264     sreg = reg * 2 + n;
1265     return vfp_reg_offset(0, sreg);
1266 }
1267
1268 static TCGv_i32 neon_load_reg(int reg, int pass)
1269 {
1270     TCGv_i32 tmp = tcg_temp_new_i32();
1271     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1272     return tmp;
1273 }
1274
1275 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1276 {
1277     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1278     tcg_temp_free_i32(var);
1279 }
1280
1281 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1282 {
1283     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1284 }
1285
1286 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1287 {
1288     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1289 }
1290
1291 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1292 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1293 #define tcg_gen_st_f32 tcg_gen_st_i32
1294 #define tcg_gen_st_f64 tcg_gen_st_i64
1295
1296 static inline void gen_mov_F0_vreg(int dp, int reg)
1297 {
1298     if (dp)
1299         tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1300     else
1301         tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1302 }
1303
1304 static inline void gen_mov_F1_vreg(int dp, int reg)
1305 {
1306     if (dp)
1307         tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1308     else
1309         tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1310 }
1311
1312 static inline void gen_mov_vreg_F0(int dp, int reg)
1313 {
1314     if (dp)
1315         tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1316     else
1317         tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1318 }
1319
1320 #define ARM_CP_RW_BIT   (1 << 20)
1321
1322 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1323 {
1324     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1325 }
1326
1327 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1328 {
1329     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1330 }
1331
1332 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1333 {
1334     TCGv_i32 var = tcg_temp_new_i32();
1335     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1336     return var;
1337 }
1338
1339 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1340 {
1341     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1342     tcg_temp_free_i32(var);
1343 }
1344
1345 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1346 {
1347     iwmmxt_store_reg(cpu_M0, rn);
1348 }
1349
1350 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1351 {
1352     iwmmxt_load_reg(cpu_M0, rn);
1353 }
1354
1355 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1356 {
1357     iwmmxt_load_reg(cpu_V1, rn);
1358     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1359 }
1360
1361 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1362 {
1363     iwmmxt_load_reg(cpu_V1, rn);
1364     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1365 }
1366
1367 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1368 {
1369     iwmmxt_load_reg(cpu_V1, rn);
1370     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1371 }
1372
1373 #define IWMMXT_OP(name) \
1374 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1375 { \
1376     iwmmxt_load_reg(cpu_V1, rn); \
1377     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1378 }
1379
1380 #define IWMMXT_OP_ENV(name) \
1381 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1382 { \
1383     iwmmxt_load_reg(cpu_V1, rn); \
1384     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1385 }
1386
1387 #define IWMMXT_OP_ENV_SIZE(name) \
1388 IWMMXT_OP_ENV(name##b) \
1389 IWMMXT_OP_ENV(name##w) \
1390 IWMMXT_OP_ENV(name##l)
1391
1392 #define IWMMXT_OP_ENV1(name) \
1393 static inline void gen_op_iwmmxt_##name##_M0(void) \
1394 { \
1395     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1396 }
1397
1398 IWMMXT_OP(maddsq)
1399 IWMMXT_OP(madduq)
1400 IWMMXT_OP(sadb)
1401 IWMMXT_OP(sadw)
1402 IWMMXT_OP(mulslw)
1403 IWMMXT_OP(mulshw)
1404 IWMMXT_OP(mululw)
1405 IWMMXT_OP(muluhw)
1406 IWMMXT_OP(macsw)
1407 IWMMXT_OP(macuw)
1408
1409 IWMMXT_OP_ENV_SIZE(unpackl)
1410 IWMMXT_OP_ENV_SIZE(unpackh)
1411
1412 IWMMXT_OP_ENV1(unpacklub)
1413 IWMMXT_OP_ENV1(unpackluw)
1414 IWMMXT_OP_ENV1(unpacklul)
1415 IWMMXT_OP_ENV1(unpackhub)
1416 IWMMXT_OP_ENV1(unpackhuw)
1417 IWMMXT_OP_ENV1(unpackhul)
1418 IWMMXT_OP_ENV1(unpacklsb)
1419 IWMMXT_OP_ENV1(unpacklsw)
1420 IWMMXT_OP_ENV1(unpacklsl)
1421 IWMMXT_OP_ENV1(unpackhsb)
1422 IWMMXT_OP_ENV1(unpackhsw)
1423 IWMMXT_OP_ENV1(unpackhsl)
1424
1425 IWMMXT_OP_ENV_SIZE(cmpeq)
1426 IWMMXT_OP_ENV_SIZE(cmpgtu)
1427 IWMMXT_OP_ENV_SIZE(cmpgts)
1428
1429 IWMMXT_OP_ENV_SIZE(mins)
1430 IWMMXT_OP_ENV_SIZE(minu)
1431 IWMMXT_OP_ENV_SIZE(maxs)
1432 IWMMXT_OP_ENV_SIZE(maxu)
1433
1434 IWMMXT_OP_ENV_SIZE(subn)
1435 IWMMXT_OP_ENV_SIZE(addn)
1436 IWMMXT_OP_ENV_SIZE(subu)
1437 IWMMXT_OP_ENV_SIZE(addu)
1438 IWMMXT_OP_ENV_SIZE(subs)
1439 IWMMXT_OP_ENV_SIZE(adds)
1440
1441 IWMMXT_OP_ENV(avgb0)
1442 IWMMXT_OP_ENV(avgb1)
1443 IWMMXT_OP_ENV(avgw0)
1444 IWMMXT_OP_ENV(avgw1)
1445
1446 IWMMXT_OP_ENV(packuw)
1447 IWMMXT_OP_ENV(packul)
1448 IWMMXT_OP_ENV(packuq)
1449 IWMMXT_OP_ENV(packsw)
1450 IWMMXT_OP_ENV(packsl)
1451 IWMMXT_OP_ENV(packsq)
1452
1453 static void gen_op_iwmmxt_set_mup(void)
1454 {
1455     TCGv_i32 tmp;
1456     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457     tcg_gen_ori_i32(tmp, tmp, 2);
1458     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1459 }
1460
1461 static void gen_op_iwmmxt_set_cup(void)
1462 {
1463     TCGv_i32 tmp;
1464     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465     tcg_gen_ori_i32(tmp, tmp, 1);
1466     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1467 }
1468
1469 static void gen_op_iwmmxt_setpsr_nz(void)
1470 {
1471     TCGv_i32 tmp = tcg_temp_new_i32();
1472     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1473     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1474 }
1475
1476 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1477 {
1478     iwmmxt_load_reg(cpu_V1, rn);
1479     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1480     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1481 }
1482
1483 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1484                                      TCGv_i32 dest)
1485 {
1486     int rd;
1487     uint32_t offset;
1488     TCGv_i32 tmp;
1489
1490     rd = (insn >> 16) & 0xf;
1491     tmp = load_reg(s, rd);
1492
1493     offset = (insn & 0xff) << ((insn >> 7) & 2);
1494     if (insn & (1 << 24)) {
1495         /* Pre indexed */
1496         if (insn & (1 << 23))
1497             tcg_gen_addi_i32(tmp, tmp, offset);
1498         else
1499             tcg_gen_addi_i32(tmp, tmp, -offset);
1500         tcg_gen_mov_i32(dest, tmp);
1501         if (insn & (1 << 21))
1502             store_reg(s, rd, tmp);
1503         else
1504             tcg_temp_free_i32(tmp);
1505     } else if (insn & (1 << 21)) {
1506         /* Post indexed */
1507         tcg_gen_mov_i32(dest, tmp);
1508         if (insn & (1 << 23))
1509             tcg_gen_addi_i32(tmp, tmp, offset);
1510         else
1511             tcg_gen_addi_i32(tmp, tmp, -offset);
1512         store_reg(s, rd, tmp);
1513     } else if (!(insn & (1 << 23)))
1514         return 1;
1515     return 0;
1516 }
1517
1518 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1519 {
1520     int rd = (insn >> 0) & 0xf;
1521     TCGv_i32 tmp;
1522
1523     if (insn & (1 << 8)) {
1524         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1525             return 1;
1526         } else {
1527             tmp = iwmmxt_load_creg(rd);
1528         }
1529     } else {
1530         tmp = tcg_temp_new_i32();
1531         iwmmxt_load_reg(cpu_V0, rd);
1532         tcg_gen_trunc_i64_i32(tmp, cpu_V0);
1533     }
1534     tcg_gen_andi_i32(tmp, tmp, mask);
1535     tcg_gen_mov_i32(dest, tmp);
1536     tcg_temp_free_i32(tmp);
1537     return 0;
1538 }
1539
1540 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1541    (ie. an undefined instruction).  */
1542 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1543 {
1544     int rd, wrd;
1545     int rdhi, rdlo, rd0, rd1, i;
1546     TCGv_i32 addr;
1547     TCGv_i32 tmp, tmp2, tmp3;
1548
1549     if ((insn & 0x0e000e00) == 0x0c000000) {
1550         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1551             wrd = insn & 0xf;
1552             rdlo = (insn >> 12) & 0xf;
1553             rdhi = (insn >> 16) & 0xf;
1554             if (insn & ARM_CP_RW_BIT) {                 /* TMRRC */
1555                 iwmmxt_load_reg(cpu_V0, wrd);
1556                 tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
1557                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1558                 tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
1559             } else {                                    /* TMCRR */
1560                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1561                 iwmmxt_store_reg(cpu_V0, wrd);
1562                 gen_op_iwmmxt_set_mup();
1563             }
1564             return 0;
1565         }
1566
1567         wrd = (insn >> 12) & 0xf;
1568         addr = tcg_temp_new_i32();
1569         if (gen_iwmmxt_address(s, insn, addr)) {
1570             tcg_temp_free_i32(addr);
1571             return 1;
1572         }
1573         if (insn & ARM_CP_RW_BIT) {
1574             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1575                 tmp = tcg_temp_new_i32();
1576                 gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1577                 iwmmxt_store_creg(wrd, tmp);
1578             } else {
1579                 i = 1;
1580                 if (insn & (1 << 8)) {
1581                     if (insn & (1 << 22)) {             /* WLDRD */
1582                         gen_aa32_ld64(cpu_M0, addr, get_mem_index(s));
1583                         i = 0;
1584                     } else {                            /* WLDRW wRd */
1585                         tmp = tcg_temp_new_i32();
1586                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1587                     }
1588                 } else {
1589                     tmp = tcg_temp_new_i32();
1590                     if (insn & (1 << 22)) {             /* WLDRH */
1591                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
1592                     } else {                            /* WLDRB */
1593                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
1594                     }
1595                 }
1596                 if (i) {
1597                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1598                     tcg_temp_free_i32(tmp);
1599                 }
1600                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1601             }
1602         } else {
1603             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1604                 tmp = iwmmxt_load_creg(wrd);
1605                 gen_aa32_st32(tmp, addr, get_mem_index(s));
1606             } else {
1607                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1608                 tmp = tcg_temp_new_i32();
1609                 if (insn & (1 << 8)) {
1610                     if (insn & (1 << 22)) {             /* WSTRD */
1611                         gen_aa32_st64(cpu_M0, addr, get_mem_index(s));
1612                     } else {                            /* WSTRW wRd */
1613                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1614                         gen_aa32_st32(tmp, addr, get_mem_index(s));
1615                     }
1616                 } else {
1617                     if (insn & (1 << 22)) {             /* WSTRH */
1618                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1619                         gen_aa32_st16(tmp, addr, get_mem_index(s));
1620                     } else {                            /* WSTRB */
1621                         tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1622                         gen_aa32_st8(tmp, addr, get_mem_index(s));
1623                     }
1624                 }
1625             }
1626             tcg_temp_free_i32(tmp);
1627         }
1628         tcg_temp_free_i32(addr);
1629         return 0;
1630     }
1631
1632     if ((insn & 0x0f000000) != 0x0e000000)
1633         return 1;
1634
1635     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1636     case 0x000:                                         /* WOR */
1637         wrd = (insn >> 12) & 0xf;
1638         rd0 = (insn >> 0) & 0xf;
1639         rd1 = (insn >> 16) & 0xf;
1640         gen_op_iwmmxt_movq_M0_wRn(rd0);
1641         gen_op_iwmmxt_orq_M0_wRn(rd1);
1642         gen_op_iwmmxt_setpsr_nz();
1643         gen_op_iwmmxt_movq_wRn_M0(wrd);
1644         gen_op_iwmmxt_set_mup();
1645         gen_op_iwmmxt_set_cup();
1646         break;
1647     case 0x011:                                         /* TMCR */
1648         if (insn & 0xf)
1649             return 1;
1650         rd = (insn >> 12) & 0xf;
1651         wrd = (insn >> 16) & 0xf;
1652         switch (wrd) {
1653         case ARM_IWMMXT_wCID:
1654         case ARM_IWMMXT_wCASF:
1655             break;
1656         case ARM_IWMMXT_wCon:
1657             gen_op_iwmmxt_set_cup();
1658             /* Fall through.  */
1659         case ARM_IWMMXT_wCSSF:
1660             tmp = iwmmxt_load_creg(wrd);
1661             tmp2 = load_reg(s, rd);
1662             tcg_gen_andc_i32(tmp, tmp, tmp2);
1663             tcg_temp_free_i32(tmp2);
1664             iwmmxt_store_creg(wrd, tmp);
1665             break;
1666         case ARM_IWMMXT_wCGR0:
1667         case ARM_IWMMXT_wCGR1:
1668         case ARM_IWMMXT_wCGR2:
1669         case ARM_IWMMXT_wCGR3:
1670             gen_op_iwmmxt_set_cup();
1671             tmp = load_reg(s, rd);
1672             iwmmxt_store_creg(wrd, tmp);
1673             break;
1674         default:
1675             return 1;
1676         }
1677         break;
1678     case 0x100:                                         /* WXOR */
1679         wrd = (insn >> 12) & 0xf;
1680         rd0 = (insn >> 0) & 0xf;
1681         rd1 = (insn >> 16) & 0xf;
1682         gen_op_iwmmxt_movq_M0_wRn(rd0);
1683         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1684         gen_op_iwmmxt_setpsr_nz();
1685         gen_op_iwmmxt_movq_wRn_M0(wrd);
1686         gen_op_iwmmxt_set_mup();
1687         gen_op_iwmmxt_set_cup();
1688         break;
1689     case 0x111:                                         /* TMRC */
1690         if (insn & 0xf)
1691             return 1;
1692         rd = (insn >> 12) & 0xf;
1693         wrd = (insn >> 16) & 0xf;
1694         tmp = iwmmxt_load_creg(wrd);
1695         store_reg(s, rd, tmp);
1696         break;
1697     case 0x300:                                         /* WANDN */
1698         wrd = (insn >> 12) & 0xf;
1699         rd0 = (insn >> 0) & 0xf;
1700         rd1 = (insn >> 16) & 0xf;
1701         gen_op_iwmmxt_movq_M0_wRn(rd0);
1702         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1703         gen_op_iwmmxt_andq_M0_wRn(rd1);
1704         gen_op_iwmmxt_setpsr_nz();
1705         gen_op_iwmmxt_movq_wRn_M0(wrd);
1706         gen_op_iwmmxt_set_mup();
1707         gen_op_iwmmxt_set_cup();
1708         break;
1709     case 0x200:                                         /* WAND */
1710         wrd = (insn >> 12) & 0xf;
1711         rd0 = (insn >> 0) & 0xf;
1712         rd1 = (insn >> 16) & 0xf;
1713         gen_op_iwmmxt_movq_M0_wRn(rd0);
1714         gen_op_iwmmxt_andq_M0_wRn(rd1);
1715         gen_op_iwmmxt_setpsr_nz();
1716         gen_op_iwmmxt_movq_wRn_M0(wrd);
1717         gen_op_iwmmxt_set_mup();
1718         gen_op_iwmmxt_set_cup();
1719         break;
1720     case 0x810: case 0xa10:                             /* WMADD */
1721         wrd = (insn >> 12) & 0xf;
1722         rd0 = (insn >> 0) & 0xf;
1723         rd1 = (insn >> 16) & 0xf;
1724         gen_op_iwmmxt_movq_M0_wRn(rd0);
1725         if (insn & (1 << 21))
1726             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1727         else
1728             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1729         gen_op_iwmmxt_movq_wRn_M0(wrd);
1730         gen_op_iwmmxt_set_mup();
1731         break;
1732     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1733         wrd = (insn >> 12) & 0xf;
1734         rd0 = (insn >> 16) & 0xf;
1735         rd1 = (insn >> 0) & 0xf;
1736         gen_op_iwmmxt_movq_M0_wRn(rd0);
1737         switch ((insn >> 22) & 3) {
1738         case 0:
1739             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1740             break;
1741         case 1:
1742             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1743             break;
1744         case 2:
1745             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1746             break;
1747         case 3:
1748             return 1;
1749         }
1750         gen_op_iwmmxt_movq_wRn_M0(wrd);
1751         gen_op_iwmmxt_set_mup();
1752         gen_op_iwmmxt_set_cup();
1753         break;
1754     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1755         wrd = (insn >> 12) & 0xf;
1756         rd0 = (insn >> 16) & 0xf;
1757         rd1 = (insn >> 0) & 0xf;
1758         gen_op_iwmmxt_movq_M0_wRn(rd0);
1759         switch ((insn >> 22) & 3) {
1760         case 0:
1761             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1762             break;
1763         case 1:
1764             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1765             break;
1766         case 2:
1767             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1768             break;
1769         case 3:
1770             return 1;
1771         }
1772         gen_op_iwmmxt_movq_wRn_M0(wrd);
1773         gen_op_iwmmxt_set_mup();
1774         gen_op_iwmmxt_set_cup();
1775         break;
1776     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1777         wrd = (insn >> 12) & 0xf;
1778         rd0 = (insn >> 16) & 0xf;
1779         rd1 = (insn >> 0) & 0xf;
1780         gen_op_iwmmxt_movq_M0_wRn(rd0);
1781         if (insn & (1 << 22))
1782             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1783         else
1784             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1785         if (!(insn & (1 << 20)))
1786             gen_op_iwmmxt_addl_M0_wRn(wrd);
1787         gen_op_iwmmxt_movq_wRn_M0(wrd);
1788         gen_op_iwmmxt_set_mup();
1789         break;
1790     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1791         wrd = (insn >> 12) & 0xf;
1792         rd0 = (insn >> 16) & 0xf;
1793         rd1 = (insn >> 0) & 0xf;
1794         gen_op_iwmmxt_movq_M0_wRn(rd0);
1795         if (insn & (1 << 21)) {
1796             if (insn & (1 << 20))
1797                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1798             else
1799                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1800         } else {
1801             if (insn & (1 << 20))
1802                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1803             else
1804                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1805         }
1806         gen_op_iwmmxt_movq_wRn_M0(wrd);
1807         gen_op_iwmmxt_set_mup();
1808         break;
1809     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1810         wrd = (insn >> 12) & 0xf;
1811         rd0 = (insn >> 16) & 0xf;
1812         rd1 = (insn >> 0) & 0xf;
1813         gen_op_iwmmxt_movq_M0_wRn(rd0);
1814         if (insn & (1 << 21))
1815             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1816         else
1817             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1818         if (!(insn & (1 << 20))) {
1819             iwmmxt_load_reg(cpu_V1, wrd);
1820             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1821         }
1822         gen_op_iwmmxt_movq_wRn_M0(wrd);
1823         gen_op_iwmmxt_set_mup();
1824         break;
1825     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1826         wrd = (insn >> 12) & 0xf;
1827         rd0 = (insn >> 16) & 0xf;
1828         rd1 = (insn >> 0) & 0xf;
1829         gen_op_iwmmxt_movq_M0_wRn(rd0);
1830         switch ((insn >> 22) & 3) {
1831         case 0:
1832             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1833             break;
1834         case 1:
1835             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1836             break;
1837         case 2:
1838             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1839             break;
1840         case 3:
1841             return 1;
1842         }
1843         gen_op_iwmmxt_movq_wRn_M0(wrd);
1844         gen_op_iwmmxt_set_mup();
1845         gen_op_iwmmxt_set_cup();
1846         break;
1847     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1848         wrd = (insn >> 12) & 0xf;
1849         rd0 = (insn >> 16) & 0xf;
1850         rd1 = (insn >> 0) & 0xf;
1851         gen_op_iwmmxt_movq_M0_wRn(rd0);
1852         if (insn & (1 << 22)) {
1853             if (insn & (1 << 20))
1854                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1855             else
1856                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1857         } else {
1858             if (insn & (1 << 20))
1859                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1860             else
1861                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1862         }
1863         gen_op_iwmmxt_movq_wRn_M0(wrd);
1864         gen_op_iwmmxt_set_mup();
1865         gen_op_iwmmxt_set_cup();
1866         break;
1867     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1868         wrd = (insn >> 12) & 0xf;
1869         rd0 = (insn >> 16) & 0xf;
1870         rd1 = (insn >> 0) & 0xf;
1871         gen_op_iwmmxt_movq_M0_wRn(rd0);
1872         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1873         tcg_gen_andi_i32(tmp, tmp, 7);
1874         iwmmxt_load_reg(cpu_V1, rd1);
1875         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1876         tcg_temp_free_i32(tmp);
1877         gen_op_iwmmxt_movq_wRn_M0(wrd);
1878         gen_op_iwmmxt_set_mup();
1879         break;
1880     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1881         if (((insn >> 6) & 3) == 3)
1882             return 1;
1883         rd = (insn >> 12) & 0xf;
1884         wrd = (insn >> 16) & 0xf;
1885         tmp = load_reg(s, rd);
1886         gen_op_iwmmxt_movq_M0_wRn(wrd);
1887         switch ((insn >> 6) & 3) {
1888         case 0:
1889             tmp2 = tcg_const_i32(0xff);
1890             tmp3 = tcg_const_i32((insn & 7) << 3);
1891             break;
1892         case 1:
1893             tmp2 = tcg_const_i32(0xffff);
1894             tmp3 = tcg_const_i32((insn & 3) << 4);
1895             break;
1896         case 2:
1897             tmp2 = tcg_const_i32(0xffffffff);
1898             tmp3 = tcg_const_i32((insn & 1) << 5);
1899             break;
1900         default:
1901             TCGV_UNUSED_I32(tmp2);
1902             TCGV_UNUSED_I32(tmp3);
1903         }
1904         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1905         tcg_temp_free_i32(tmp3);
1906         tcg_temp_free_i32(tmp2);
1907         tcg_temp_free_i32(tmp);
1908         gen_op_iwmmxt_movq_wRn_M0(wrd);
1909         gen_op_iwmmxt_set_mup();
1910         break;
1911     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1912         rd = (insn >> 12) & 0xf;
1913         wrd = (insn >> 16) & 0xf;
1914         if (rd == 15 || ((insn >> 22) & 3) == 3)
1915             return 1;
1916         gen_op_iwmmxt_movq_M0_wRn(wrd);
1917         tmp = tcg_temp_new_i32();
1918         switch ((insn >> 22) & 3) {
1919         case 0:
1920             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1921             tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1922             if (insn & 8) {
1923                 tcg_gen_ext8s_i32(tmp, tmp);
1924             } else {
1925                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1926             }
1927             break;
1928         case 1:
1929             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1930             tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1931             if (insn & 8) {
1932                 tcg_gen_ext16s_i32(tmp, tmp);
1933             } else {
1934                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1935             }
1936             break;
1937         case 2:
1938             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1939             tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1940             break;
1941         }
1942         store_reg(s, rd, tmp);
1943         break;
1944     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1945         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1946             return 1;
1947         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1948         switch ((insn >> 22) & 3) {
1949         case 0:
1950             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1951             break;
1952         case 1:
1953             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1954             break;
1955         case 2:
1956             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1957             break;
1958         }
1959         tcg_gen_shli_i32(tmp, tmp, 28);
1960         gen_set_nzcv(tmp);
1961         tcg_temp_free_i32(tmp);
1962         break;
1963     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1964         if (((insn >> 6) & 3) == 3)
1965             return 1;
1966         rd = (insn >> 12) & 0xf;
1967         wrd = (insn >> 16) & 0xf;
1968         tmp = load_reg(s, rd);
1969         switch ((insn >> 6) & 3) {
1970         case 0:
1971             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1972             break;
1973         case 1:
1974             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1975             break;
1976         case 2:
1977             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1978             break;
1979         }
1980         tcg_temp_free_i32(tmp);
1981         gen_op_iwmmxt_movq_wRn_M0(wrd);
1982         gen_op_iwmmxt_set_mup();
1983         break;
1984     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1985         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1986             return 1;
1987         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1988         tmp2 = tcg_temp_new_i32();
1989         tcg_gen_mov_i32(tmp2, tmp);
1990         switch ((insn >> 22) & 3) {
1991         case 0:
1992             for (i = 0; i < 7; i ++) {
1993                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1994                 tcg_gen_and_i32(tmp, tmp, tmp2);
1995             }
1996             break;
1997         case 1:
1998             for (i = 0; i < 3; i ++) {
1999                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2000                 tcg_gen_and_i32(tmp, tmp, tmp2);
2001             }
2002             break;
2003         case 2:
2004             tcg_gen_shli_i32(tmp2, tmp2, 16);
2005             tcg_gen_and_i32(tmp, tmp, tmp2);
2006             break;
2007         }
2008         gen_set_nzcv(tmp);
2009         tcg_temp_free_i32(tmp2);
2010         tcg_temp_free_i32(tmp);
2011         break;
2012     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2013         wrd = (insn >> 12) & 0xf;
2014         rd0 = (insn >> 16) & 0xf;
2015         gen_op_iwmmxt_movq_M0_wRn(rd0);
2016         switch ((insn >> 22) & 3) {
2017         case 0:
2018             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2019             break;
2020         case 1:
2021             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2022             break;
2023         case 2:
2024             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2025             break;
2026         case 3:
2027             return 1;
2028         }
2029         gen_op_iwmmxt_movq_wRn_M0(wrd);
2030         gen_op_iwmmxt_set_mup();
2031         break;
2032     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2033         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2034             return 1;
2035         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2036         tmp2 = tcg_temp_new_i32();
2037         tcg_gen_mov_i32(tmp2, tmp);
2038         switch ((insn >> 22) & 3) {
2039         case 0:
2040             for (i = 0; i < 7; i ++) {
2041                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2042                 tcg_gen_or_i32(tmp, tmp, tmp2);
2043             }
2044             break;
2045         case 1:
2046             for (i = 0; i < 3; i ++) {
2047                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2048                 tcg_gen_or_i32(tmp, tmp, tmp2);
2049             }
2050             break;
2051         case 2:
2052             tcg_gen_shli_i32(tmp2, tmp2, 16);
2053             tcg_gen_or_i32(tmp, tmp, tmp2);
2054             break;
2055         }
2056         gen_set_nzcv(tmp);
2057         tcg_temp_free_i32(tmp2);
2058         tcg_temp_free_i32(tmp);
2059         break;
2060     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2061         rd = (insn >> 12) & 0xf;
2062         rd0 = (insn >> 16) & 0xf;
2063         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2064             return 1;
2065         gen_op_iwmmxt_movq_M0_wRn(rd0);
2066         tmp = tcg_temp_new_i32();
2067         switch ((insn >> 22) & 3) {
2068         case 0:
2069             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2070             break;
2071         case 1:
2072             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2073             break;
2074         case 2:
2075             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2076             break;
2077         }
2078         store_reg(s, rd, tmp);
2079         break;
2080     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2081     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2082         wrd = (insn >> 12) & 0xf;
2083         rd0 = (insn >> 16) & 0xf;
2084         rd1 = (insn >> 0) & 0xf;
2085         gen_op_iwmmxt_movq_M0_wRn(rd0);
2086         switch ((insn >> 22) & 3) {
2087         case 0:
2088             if (insn & (1 << 21))
2089                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2090             else
2091                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2092             break;
2093         case 1:
2094             if (insn & (1 << 21))
2095                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2096             else
2097                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2098             break;
2099         case 2:
2100             if (insn & (1 << 21))
2101                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2102             else
2103                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2104             break;
2105         case 3:
2106             return 1;
2107         }
2108         gen_op_iwmmxt_movq_wRn_M0(wrd);
2109         gen_op_iwmmxt_set_mup();
2110         gen_op_iwmmxt_set_cup();
2111         break;
2112     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2113     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2114         wrd = (insn >> 12) & 0xf;
2115         rd0 = (insn >> 16) & 0xf;
2116         gen_op_iwmmxt_movq_M0_wRn(rd0);
2117         switch ((insn >> 22) & 3) {
2118         case 0:
2119             if (insn & (1 << 21))
2120                 gen_op_iwmmxt_unpacklsb_M0();
2121             else
2122                 gen_op_iwmmxt_unpacklub_M0();
2123             break;
2124         case 1:
2125             if (insn & (1 << 21))
2126                 gen_op_iwmmxt_unpacklsw_M0();
2127             else
2128                 gen_op_iwmmxt_unpackluw_M0();
2129             break;
2130         case 2:
2131             if (insn & (1 << 21))
2132                 gen_op_iwmmxt_unpacklsl_M0();
2133             else
2134                 gen_op_iwmmxt_unpacklul_M0();
2135             break;
2136         case 3:
2137             return 1;
2138         }
2139         gen_op_iwmmxt_movq_wRn_M0(wrd);
2140         gen_op_iwmmxt_set_mup();
2141         gen_op_iwmmxt_set_cup();
2142         break;
2143     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2144     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2145         wrd = (insn >> 12) & 0xf;
2146         rd0 = (insn >> 16) & 0xf;
2147         gen_op_iwmmxt_movq_M0_wRn(rd0);
2148         switch ((insn >> 22) & 3) {
2149         case 0:
2150             if (insn & (1 << 21))
2151                 gen_op_iwmmxt_unpackhsb_M0();
2152             else
2153                 gen_op_iwmmxt_unpackhub_M0();
2154             break;
2155         case 1:
2156             if (insn & (1 << 21))
2157                 gen_op_iwmmxt_unpackhsw_M0();
2158             else
2159                 gen_op_iwmmxt_unpackhuw_M0();
2160             break;
2161         case 2:
2162             if (insn & (1 << 21))
2163                 gen_op_iwmmxt_unpackhsl_M0();
2164             else
2165                 gen_op_iwmmxt_unpackhul_M0();
2166             break;
2167         case 3:
2168             return 1;
2169         }
2170         gen_op_iwmmxt_movq_wRn_M0(wrd);
2171         gen_op_iwmmxt_set_mup();
2172         gen_op_iwmmxt_set_cup();
2173         break;
2174     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2175     case 0x214: case 0x614: case 0xa14: case 0xe14:
2176         if (((insn >> 22) & 3) == 0)
2177             return 1;
2178         wrd = (insn >> 12) & 0xf;
2179         rd0 = (insn >> 16) & 0xf;
2180         gen_op_iwmmxt_movq_M0_wRn(rd0);
2181         tmp = tcg_temp_new_i32();
2182         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2183             tcg_temp_free_i32(tmp);
2184             return 1;
2185         }
2186         switch ((insn >> 22) & 3) {
2187         case 1:
2188             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2189             break;
2190         case 2:
2191             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2192             break;
2193         case 3:
2194             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2195             break;
2196         }
2197         tcg_temp_free_i32(tmp);
2198         gen_op_iwmmxt_movq_wRn_M0(wrd);
2199         gen_op_iwmmxt_set_mup();
2200         gen_op_iwmmxt_set_cup();
2201         break;
2202     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2203     case 0x014: case 0x414: case 0x814: case 0xc14:
2204         if (((insn >> 22) & 3) == 0)
2205             return 1;
2206         wrd = (insn >> 12) & 0xf;
2207         rd0 = (insn >> 16) & 0xf;
2208         gen_op_iwmmxt_movq_M0_wRn(rd0);
2209         tmp = tcg_temp_new_i32();
2210         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2211             tcg_temp_free_i32(tmp);
2212             return 1;
2213         }
2214         switch ((insn >> 22) & 3) {
2215         case 1:
2216             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 2:
2219             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         case 3:
2222             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2223             break;
2224         }
2225         tcg_temp_free_i32(tmp);
2226         gen_op_iwmmxt_movq_wRn_M0(wrd);
2227         gen_op_iwmmxt_set_mup();
2228         gen_op_iwmmxt_set_cup();
2229         break;
2230     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2231     case 0x114: case 0x514: case 0x914: case 0xd14:
2232         if (((insn >> 22) & 3) == 0)
2233             return 1;
2234         wrd = (insn >> 12) & 0xf;
2235         rd0 = (insn >> 16) & 0xf;
2236         gen_op_iwmmxt_movq_M0_wRn(rd0);
2237         tmp = tcg_temp_new_i32();
2238         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2239             tcg_temp_free_i32(tmp);
2240             return 1;
2241         }
2242         switch ((insn >> 22) & 3) {
2243         case 1:
2244             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 2:
2247             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         case 3:
2250             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2251             break;
2252         }
2253         tcg_temp_free_i32(tmp);
2254         gen_op_iwmmxt_movq_wRn_M0(wrd);
2255         gen_op_iwmmxt_set_mup();
2256         gen_op_iwmmxt_set_cup();
2257         break;
2258     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2259     case 0x314: case 0x714: case 0xb14: case 0xf14:
2260         if (((insn >> 22) & 3) == 0)
2261             return 1;
2262         wrd = (insn >> 12) & 0xf;
2263         rd0 = (insn >> 16) & 0xf;
2264         gen_op_iwmmxt_movq_M0_wRn(rd0);
2265         tmp = tcg_temp_new_i32();
2266         switch ((insn >> 22) & 3) {
2267         case 1:
2268             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2269                 tcg_temp_free_i32(tmp);
2270                 return 1;
2271             }
2272             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2273             break;
2274         case 2:
2275             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2276                 tcg_temp_free_i32(tmp);
2277                 return 1;
2278             }
2279             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2280             break;
2281         case 3:
2282             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2283                 tcg_temp_free_i32(tmp);
2284                 return 1;
2285             }
2286             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2287             break;
2288         }
2289         tcg_temp_free_i32(tmp);
2290         gen_op_iwmmxt_movq_wRn_M0(wrd);
2291         gen_op_iwmmxt_set_mup();
2292         gen_op_iwmmxt_set_cup();
2293         break;
2294     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2295     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2296         wrd = (insn >> 12) & 0xf;
2297         rd0 = (insn >> 16) & 0xf;
2298         rd1 = (insn >> 0) & 0xf;
2299         gen_op_iwmmxt_movq_M0_wRn(rd0);
2300         switch ((insn >> 22) & 3) {
2301         case 0:
2302             if (insn & (1 << 21))
2303                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2304             else
2305                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2306             break;
2307         case 1:
2308             if (insn & (1 << 21))
2309                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2310             else
2311                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2312             break;
2313         case 2:
2314             if (insn & (1 << 21))
2315                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2316             else
2317                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2318             break;
2319         case 3:
2320             return 1;
2321         }
2322         gen_op_iwmmxt_movq_wRn_M0(wrd);
2323         gen_op_iwmmxt_set_mup();
2324         break;
2325     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2326     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2327         wrd = (insn >> 12) & 0xf;
2328         rd0 = (insn >> 16) & 0xf;
2329         rd1 = (insn >> 0) & 0xf;
2330         gen_op_iwmmxt_movq_M0_wRn(rd0);
2331         switch ((insn >> 22) & 3) {
2332         case 0:
2333             if (insn & (1 << 21))
2334                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2335             else
2336                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2337             break;
2338         case 1:
2339             if (insn & (1 << 21))
2340                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2341             else
2342                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2343             break;
2344         case 2:
2345             if (insn & (1 << 21))
2346                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2347             else
2348                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2349             break;
2350         case 3:
2351             return 1;
2352         }
2353         gen_op_iwmmxt_movq_wRn_M0(wrd);
2354         gen_op_iwmmxt_set_mup();
2355         break;
2356     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2357     case 0x402: case 0x502: case 0x602: case 0x702:
2358         wrd = (insn >> 12) & 0xf;
2359         rd0 = (insn >> 16) & 0xf;
2360         rd1 = (insn >> 0) & 0xf;
2361         gen_op_iwmmxt_movq_M0_wRn(rd0);
2362         tmp = tcg_const_i32((insn >> 20) & 3);
2363         iwmmxt_load_reg(cpu_V1, rd1);
2364         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2365         tcg_temp_free_i32(tmp);
2366         gen_op_iwmmxt_movq_wRn_M0(wrd);
2367         gen_op_iwmmxt_set_mup();
2368         break;
2369     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2370     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2371     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2372     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2373         wrd = (insn >> 12) & 0xf;
2374         rd0 = (insn >> 16) & 0xf;
2375         rd1 = (insn >> 0) & 0xf;
2376         gen_op_iwmmxt_movq_M0_wRn(rd0);
2377         switch ((insn >> 20) & 0xf) {
2378         case 0x0:
2379             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2380             break;
2381         case 0x1:
2382             gen_op_iwmmxt_subub_M0_wRn(rd1);
2383             break;
2384         case 0x3:
2385             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2386             break;
2387         case 0x4:
2388             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2389             break;
2390         case 0x5:
2391             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2392             break;
2393         case 0x7:
2394             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2395             break;
2396         case 0x8:
2397             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2398             break;
2399         case 0x9:
2400             gen_op_iwmmxt_subul_M0_wRn(rd1);
2401             break;
2402         case 0xb:
2403             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2404             break;
2405         default:
2406             return 1;
2407         }
2408         gen_op_iwmmxt_movq_wRn_M0(wrd);
2409         gen_op_iwmmxt_set_mup();
2410         gen_op_iwmmxt_set_cup();
2411         break;
2412     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2413     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2414     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2415     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2416         wrd = (insn >> 12) & 0xf;
2417         rd0 = (insn >> 16) & 0xf;
2418         gen_op_iwmmxt_movq_M0_wRn(rd0);
2419         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2420         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2421         tcg_temp_free_i32(tmp);
2422         gen_op_iwmmxt_movq_wRn_M0(wrd);
2423         gen_op_iwmmxt_set_mup();
2424         gen_op_iwmmxt_set_cup();
2425         break;
2426     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2427     case 0x418: case 0x518: case 0x618: case 0x718:
2428     case 0x818: case 0x918: case 0xa18: case 0xb18:
2429     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2430         wrd = (insn >> 12) & 0xf;
2431         rd0 = (insn >> 16) & 0xf;
2432         rd1 = (insn >> 0) & 0xf;
2433         gen_op_iwmmxt_movq_M0_wRn(rd0);
2434         switch ((insn >> 20) & 0xf) {
2435         case 0x0:
2436             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2437             break;
2438         case 0x1:
2439             gen_op_iwmmxt_addub_M0_wRn(rd1);
2440             break;
2441         case 0x3:
2442             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2443             break;
2444         case 0x4:
2445             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2446             break;
2447         case 0x5:
2448             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2449             break;
2450         case 0x7:
2451             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2452             break;
2453         case 0x8:
2454             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2455             break;
2456         case 0x9:
2457             gen_op_iwmmxt_addul_M0_wRn(rd1);
2458             break;
2459         case 0xb:
2460             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2461             break;
2462         default:
2463             return 1;
2464         }
2465         gen_op_iwmmxt_movq_wRn_M0(wrd);
2466         gen_op_iwmmxt_set_mup();
2467         gen_op_iwmmxt_set_cup();
2468         break;
2469     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2470     case 0x408: case 0x508: case 0x608: case 0x708:
2471     case 0x808: case 0x908: case 0xa08: case 0xb08:
2472     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2473         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2474             return 1;
2475         wrd = (insn >> 12) & 0xf;
2476         rd0 = (insn >> 16) & 0xf;
2477         rd1 = (insn >> 0) & 0xf;
2478         gen_op_iwmmxt_movq_M0_wRn(rd0);
2479         switch ((insn >> 22) & 3) {
2480         case 1:
2481             if (insn & (1 << 21))
2482                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2483             else
2484                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2485             break;
2486         case 2:
2487             if (insn & (1 << 21))
2488                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2489             else
2490                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2491             break;
2492         case 3:
2493             if (insn & (1 << 21))
2494                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2495             else
2496                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2497             break;
2498         }
2499         gen_op_iwmmxt_movq_wRn_M0(wrd);
2500         gen_op_iwmmxt_set_mup();
2501         gen_op_iwmmxt_set_cup();
2502         break;
2503     case 0x201: case 0x203: case 0x205: case 0x207:
2504     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2505     case 0x211: case 0x213: case 0x215: case 0x217:
2506     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2507         wrd = (insn >> 5) & 0xf;
2508         rd0 = (insn >> 12) & 0xf;
2509         rd1 = (insn >> 0) & 0xf;
2510         if (rd0 == 0xf || rd1 == 0xf)
2511             return 1;
2512         gen_op_iwmmxt_movq_M0_wRn(wrd);
2513         tmp = load_reg(s, rd0);
2514         tmp2 = load_reg(s, rd1);
2515         switch ((insn >> 16) & 0xf) {
2516         case 0x0:                                       /* TMIA */
2517             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2518             break;
2519         case 0x8:                                       /* TMIAPH */
2520             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2521             break;
2522         case 0xc: case 0xd: case 0xe: case 0xf:         /* TMIAxy */
2523             if (insn & (1 << 16))
2524                 tcg_gen_shri_i32(tmp, tmp, 16);
2525             if (insn & (1 << 17))
2526                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2527             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2528             break;
2529         default:
2530             tcg_temp_free_i32(tmp2);
2531             tcg_temp_free_i32(tmp);
2532             return 1;
2533         }
2534         tcg_temp_free_i32(tmp2);
2535         tcg_temp_free_i32(tmp);
2536         gen_op_iwmmxt_movq_wRn_M0(wrd);
2537         gen_op_iwmmxt_set_mup();
2538         break;
2539     default:
2540         return 1;
2541     }
2542
2543     return 0;
2544 }
2545
2546 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2547    (ie. an undefined instruction).  */
2548 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2549 {
2550     int acc, rd0, rd1, rdhi, rdlo;
2551     TCGv_i32 tmp, tmp2;
2552
2553     if ((insn & 0x0ff00f10) == 0x0e200010) {
2554         /* Multiply with Internal Accumulate Format */
2555         rd0 = (insn >> 12) & 0xf;
2556         rd1 = insn & 0xf;
2557         acc = (insn >> 5) & 7;
2558
2559         if (acc != 0)
2560             return 1;
2561
2562         tmp = load_reg(s, rd0);
2563         tmp2 = load_reg(s, rd1);
2564         switch ((insn >> 16) & 0xf) {
2565         case 0x0:                                       /* MIA */
2566             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2567             break;
2568         case 0x8:                                       /* MIAPH */
2569             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2570             break;
2571         case 0xc:                                       /* MIABB */
2572         case 0xd:                                       /* MIABT */
2573         case 0xe:                                       /* MIATB */
2574         case 0xf:                                       /* MIATT */
2575             if (insn & (1 << 16))
2576                 tcg_gen_shri_i32(tmp, tmp, 16);
2577             if (insn & (1 << 17))
2578                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2579             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2580             break;
2581         default:
2582             return 1;
2583         }
2584         tcg_temp_free_i32(tmp2);
2585         tcg_temp_free_i32(tmp);
2586
2587         gen_op_iwmmxt_movq_wRn_M0(acc);
2588         return 0;
2589     }
2590
2591     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2592         /* Internal Accumulator Access Format */
2593         rdhi = (insn >> 16) & 0xf;
2594         rdlo = (insn >> 12) & 0xf;
2595         acc = insn & 7;
2596
2597         if (acc != 0)
2598             return 1;
2599
2600         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2601             iwmmxt_load_reg(cpu_V0, acc);
2602             tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
2603             tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2604             tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
2605             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2606         } else {                                        /* MAR */
2607             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2608             iwmmxt_store_reg(cpu_V0, acc);
2609         }
2610         return 0;
2611     }
2612
2613     return 1;
2614 }
2615
2616 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2617 #define VFP_SREG(insn, bigbit, smallbit) \
2618   ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2619 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2620     if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2621         reg = (((insn) >> (bigbit)) & 0x0f) \
2622               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2623     } else { \
2624         if (insn & (1 << (smallbit))) \
2625             return 1; \
2626         reg = ((insn) >> (bigbit)) & 0x0f; \
2627     }} while (0)
2628
2629 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2630 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2631 #define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2632 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2633 #define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2634 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2635
2636 /* Move between integer and VFP cores.  */
2637 static TCGv_i32 gen_vfp_mrs(void)
2638 {
2639     TCGv_i32 tmp = tcg_temp_new_i32();
2640     tcg_gen_mov_i32(tmp, cpu_F0s);
2641     return tmp;
2642 }
2643
2644 static void gen_vfp_msr(TCGv_i32 tmp)
2645 {
2646     tcg_gen_mov_i32(cpu_F0s, tmp);
2647     tcg_temp_free_i32(tmp);
2648 }
2649
2650 static void gen_neon_dup_u8(TCGv_i32 var, int shift)
2651 {
2652     TCGv_i32 tmp = tcg_temp_new_i32();
2653     if (shift)
2654         tcg_gen_shri_i32(var, var, shift);
2655     tcg_gen_ext8u_i32(var, var);
2656     tcg_gen_shli_i32(tmp, var, 8);
2657     tcg_gen_or_i32(var, var, tmp);
2658     tcg_gen_shli_i32(tmp, var, 16);
2659     tcg_gen_or_i32(var, var, tmp);
2660     tcg_temp_free_i32(tmp);
2661 }
2662
2663 static void gen_neon_dup_low16(TCGv_i32 var)
2664 {
2665     TCGv_i32 tmp = tcg_temp_new_i32();
2666     tcg_gen_ext16u_i32(var, var);
2667     tcg_gen_shli_i32(tmp, var, 16);
2668     tcg_gen_or_i32(var, var, tmp);
2669     tcg_temp_free_i32(tmp);
2670 }
2671
2672 static void gen_neon_dup_high16(TCGv_i32 var)
2673 {
2674     TCGv_i32 tmp = tcg_temp_new_i32();
2675     tcg_gen_andi_i32(var, var, 0xffff0000);
2676     tcg_gen_shri_i32(tmp, var, 16);
2677     tcg_gen_or_i32(var, var, tmp);
2678     tcg_temp_free_i32(tmp);
2679 }
2680
2681 static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
2682 {
2683     /* Load a single Neon element and replicate into a 32 bit TCG reg */
2684     TCGv_i32 tmp = tcg_temp_new_i32();
2685     switch (size) {
2686     case 0:
2687         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
2688         gen_neon_dup_u8(tmp, 0);
2689         break;
2690     case 1:
2691         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
2692         gen_neon_dup_low16(tmp);
2693         break;
2694     case 2:
2695         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
2696         break;
2697     default: /* Avoid compiler warnings.  */
2698         abort();
2699     }
2700     return tmp;
2701 }
2702
2703 static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
2704                        uint32_t dp)
2705 {
2706     uint32_t cc = extract32(insn, 20, 2);
2707
2708     if (dp) {
2709         TCGv_i64 frn, frm, dest;
2710         TCGv_i64 tmp, zero, zf, nf, vf;
2711
2712         zero = tcg_const_i64(0);
2713
2714         frn = tcg_temp_new_i64();
2715         frm = tcg_temp_new_i64();
2716         dest = tcg_temp_new_i64();
2717
2718         zf = tcg_temp_new_i64();
2719         nf = tcg_temp_new_i64();
2720         vf = tcg_temp_new_i64();
2721
2722         tcg_gen_extu_i32_i64(zf, cpu_ZF);
2723         tcg_gen_ext_i32_i64(nf, cpu_NF);
2724         tcg_gen_ext_i32_i64(vf, cpu_VF);
2725
2726         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2727         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2728         switch (cc) {
2729         case 0: /* eq: Z */
2730             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
2731                                 frn, frm);
2732             break;
2733         case 1: /* vs: V */
2734             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
2735                                 frn, frm);
2736             break;
2737         case 2: /* ge: N == V -> N ^ V == 0 */
2738             tmp = tcg_temp_new_i64();
2739             tcg_gen_xor_i64(tmp, vf, nf);
2740             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2741                                 frn, frm);
2742             tcg_temp_free_i64(tmp);
2743             break;
2744         case 3: /* gt: !Z && N == V */
2745             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
2746                                 frn, frm);
2747             tmp = tcg_temp_new_i64();
2748             tcg_gen_xor_i64(tmp, vf, nf);
2749             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2750                                 dest, frm);
2751             tcg_temp_free_i64(tmp);
2752             break;
2753         }
2754         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2755         tcg_temp_free_i64(frn);
2756         tcg_temp_free_i64(frm);
2757         tcg_temp_free_i64(dest);
2758
2759         tcg_temp_free_i64(zf);
2760         tcg_temp_free_i64(nf);
2761         tcg_temp_free_i64(vf);
2762
2763         tcg_temp_free_i64(zero);
2764     } else {
2765         TCGv_i32 frn, frm, dest;
2766         TCGv_i32 tmp, zero;
2767
2768         zero = tcg_const_i32(0);
2769
2770         frn = tcg_temp_new_i32();
2771         frm = tcg_temp_new_i32();
2772         dest = tcg_temp_new_i32();
2773         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2774         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2775         switch (cc) {
2776         case 0: /* eq: Z */
2777             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
2778                                 frn, frm);
2779             break;
2780         case 1: /* vs: V */
2781             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
2782                                 frn, frm);
2783             break;
2784         case 2: /* ge: N == V -> N ^ V == 0 */
2785             tmp = tcg_temp_new_i32();
2786             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2787             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2788                                 frn, frm);
2789             tcg_temp_free_i32(tmp);
2790             break;
2791         case 3: /* gt: !Z && N == V */
2792             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
2793                                 frn, frm);
2794             tmp = tcg_temp_new_i32();
2795             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2796             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2797                                 dest, frm);
2798             tcg_temp_free_i32(tmp);
2799             break;
2800         }
2801         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2802         tcg_temp_free_i32(frn);
2803         tcg_temp_free_i32(frm);
2804         tcg_temp_free_i32(dest);
2805
2806         tcg_temp_free_i32(zero);
2807     }
2808
2809     return 0;
2810 }
2811
2812 static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
2813                             uint32_t rm, uint32_t dp)
2814 {
2815     uint32_t vmin = extract32(insn, 6, 1);
2816     TCGv_ptr fpst = get_fpstatus_ptr(0);
2817
2818     if (dp) {
2819         TCGv_i64 frn, frm, dest;
2820
2821         frn = tcg_temp_new_i64();
2822         frm = tcg_temp_new_i64();
2823         dest = tcg_temp_new_i64();
2824
2825         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2826         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2827         if (vmin) {
2828             gen_helper_vfp_minnumd(dest, frn, frm, fpst);
2829         } else {
2830             gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
2831         }
2832         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2833         tcg_temp_free_i64(frn);
2834         tcg_temp_free_i64(frm);
2835         tcg_temp_free_i64(dest);
2836     } else {
2837         TCGv_i32 frn, frm, dest;
2838
2839         frn = tcg_temp_new_i32();
2840         frm = tcg_temp_new_i32();
2841         dest = tcg_temp_new_i32();
2842
2843         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2844         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2845         if (vmin) {
2846             gen_helper_vfp_minnums(dest, frn, frm, fpst);
2847         } else {
2848             gen_helper_vfp_maxnums(dest, frn, frm, fpst);
2849         }
2850         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2851         tcg_temp_free_i32(frn);
2852         tcg_temp_free_i32(frm);
2853         tcg_temp_free_i32(dest);
2854     }
2855
2856     tcg_temp_free_ptr(fpst);
2857     return 0;
2858 }
2859
2860 static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2861                         int rounding)
2862 {
2863     TCGv_ptr fpst = get_fpstatus_ptr(0);
2864     TCGv_i32 tcg_rmode;
2865
2866     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2867     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2868
2869     if (dp) {
2870         TCGv_i64 tcg_op;
2871         TCGv_i64 tcg_res;
2872         tcg_op = tcg_temp_new_i64();
2873         tcg_res = tcg_temp_new_i64();
2874         tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2875         gen_helper_rintd(tcg_res, tcg_op, fpst);
2876         tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2877         tcg_temp_free_i64(tcg_op);
2878         tcg_temp_free_i64(tcg_res);
2879     } else {
2880         TCGv_i32 tcg_op;
2881         TCGv_i32 tcg_res;
2882         tcg_op = tcg_temp_new_i32();
2883         tcg_res = tcg_temp_new_i32();
2884         tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2885         gen_helper_rints(tcg_res, tcg_op, fpst);
2886         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2887         tcg_temp_free_i32(tcg_op);
2888         tcg_temp_free_i32(tcg_res);
2889     }
2890
2891     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2892     tcg_temp_free_i32(tcg_rmode);
2893
2894     tcg_temp_free_ptr(fpst);
2895     return 0;
2896 }
2897
2898 static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2899                        int rounding)
2900 {
2901     bool is_signed = extract32(insn, 7, 1);
2902     TCGv_ptr fpst = get_fpstatus_ptr(0);
2903     TCGv_i32 tcg_rmode, tcg_shift;
2904
2905     tcg_shift = tcg_const_i32(0);
2906
2907     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2908     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2909
2910     if (dp) {
2911         TCGv_i64 tcg_double, tcg_res;
2912         TCGv_i32 tcg_tmp;
2913         /* Rd is encoded as a single precision register even when the source
2914          * is double precision.
2915          */
2916         rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
2917         tcg_double = tcg_temp_new_i64();
2918         tcg_res = tcg_temp_new_i64();
2919         tcg_tmp = tcg_temp_new_i32();
2920         tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
2921         if (is_signed) {
2922             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
2923         } else {
2924             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
2925         }
2926         tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res);
2927         tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
2928         tcg_temp_free_i32(tcg_tmp);
2929         tcg_temp_free_i64(tcg_res);
2930         tcg_temp_free_i64(tcg_double);
2931     } else {
2932         TCGv_i32 tcg_single, tcg_res;
2933         tcg_single = tcg_temp_new_i32();
2934         tcg_res = tcg_temp_new_i32();
2935         tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
2936         if (is_signed) {
2937             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
2938         } else {
2939             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
2940         }
2941         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
2942         tcg_temp_free_i32(tcg_res);
2943         tcg_temp_free_i32(tcg_single);
2944     }
2945
2946     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2947     tcg_temp_free_i32(tcg_rmode);
2948
2949     tcg_temp_free_i32(tcg_shift);
2950
2951     tcg_temp_free_ptr(fpst);
2952
2953     return 0;
2954 }
2955
2956 /* Table for converting the most common AArch32 encoding of
2957  * rounding mode to arm_fprounding order (which matches the
2958  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
2959  */
2960 static const uint8_t fp_decode_rm[] = {
2961     FPROUNDING_TIEAWAY,
2962     FPROUNDING_TIEEVEN,
2963     FPROUNDING_POSINF,
2964     FPROUNDING_NEGINF,
2965 };
2966
2967 static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
2968 {
2969     uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
2970
2971     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
2972         return 1;
2973     }
2974
2975     if (dp) {
2976         VFP_DREG_D(rd, insn);
2977         VFP_DREG_N(rn, insn);
2978         VFP_DREG_M(rm, insn);
2979     } else {
2980         rd = VFP_SREG_D(insn);
2981         rn = VFP_SREG_N(insn);
2982         rm = VFP_SREG_M(insn);
2983     }
2984
2985     if ((insn & 0x0f800e50) == 0x0e000a00) {
2986         return handle_vsel(insn, rd, rn, rm, dp);
2987     } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
2988         return handle_vminmaxnm(insn, rd, rn, rm, dp);
2989     } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
2990         /* VRINTA, VRINTN, VRINTP, VRINTM */
2991         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
2992         return handle_vrint(insn, rd, rm, dp, rounding);
2993     } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
2994         /* VCVTA, VCVTN, VCVTP, VCVTM */
2995         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
2996         return handle_vcvt(insn, rd, rm, dp, rounding);
2997     }
2998     return 1;
2999 }
3000
3001 /* Disassemble a VFP instruction.  Returns nonzero if an error occurred
3002    (ie. an undefined instruction).  */
3003 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
3004 {
3005     uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
3006     int dp, veclen;
3007     TCGv_i32 addr;
3008     TCGv_i32 tmp;
3009     TCGv_i32 tmp2;
3010
3011     if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
3012         return 1;
3013     }
3014
3015     /* FIXME: this access check should not take precedence over UNDEF
3016      * for invalid encodings; we will generate incorrect syndrome information
3017      * for attempts to execute invalid vfp/neon encodings with FP disabled.
3018      */
3019     if (!s->cpacr_fpen) {
3020         gen_exception_insn(s, 4, EXCP_UDEF,
3021                            syn_fp_access_trap(1, 0xe, s->thumb));
3022         return 0;
3023     }
3024
3025     if (!s->vfp_enabled) {
3026         /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
3027         if ((insn & 0x0fe00fff) != 0x0ee00a10)
3028             return 1;
3029         rn = (insn >> 16) & 0xf;
3030         if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
3031             && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
3032             return 1;
3033         }
3034     }
3035
3036     if (extract32(insn, 28, 4) == 0xf) {
3037         /* Encodings with T=1 (Thumb) or unconditional (ARM):
3038          * only used in v8 and above.
3039          */
3040         return disas_vfp_v8_insn(s, insn);
3041     }
3042
3043     dp = ((insn & 0xf00) == 0xb00);
3044     switch ((insn >> 24) & 0xf) {
3045     case 0xe:
3046         if (insn & (1 << 4)) {
3047             /* single register transfer */
3048             rd = (insn >> 12) & 0xf;
3049             if (dp) {
3050                 int size;
3051                 int pass;
3052
3053                 VFP_DREG_N(rn, insn);
3054                 if (insn & 0xf)
3055                     return 1;
3056                 if (insn & 0x00c00060
3057                     && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
3058                     return 1;
3059                 }
3060
3061                 pass = (insn >> 21) & 1;
3062                 if (insn & (1 << 22)) {
3063                     size = 0;
3064                     offset = ((insn >> 5) & 3) * 8;
3065                 } else if (insn & (1 << 5)) {
3066                     size = 1;
3067                     offset = (insn & (1 << 6)) ? 16 : 0;
3068                 } else {
3069                     size = 2;
3070                     offset = 0;
3071                 }
3072                 if (insn & ARM_CP_RW_BIT) {
3073                     /* vfp->arm */
3074                     tmp = neon_load_reg(rn, pass);
3075                     switch (size) {
3076                     case 0:
3077                         if (offset)
3078                             tcg_gen_shri_i32(tmp, tmp, offset);
3079                         if (insn & (1 << 23))
3080                             gen_uxtb(tmp);
3081                         else
3082                             gen_sxtb(tmp);
3083                         break;
3084                     case 1:
3085                         if (insn & (1 << 23)) {
3086                             if (offset) {
3087                                 tcg_gen_shri_i32(tmp, tmp, 16);
3088                             } else {
3089                                 gen_uxth(tmp);
3090                             }
3091                         } else {
3092                             if (offset) {
3093                                 tcg_gen_sari_i32(tmp, tmp, 16);
3094                             } else {
3095                                 gen_sxth(tmp);
3096                             }
3097                         }
3098                         break;
3099                     case 2:
3100                         break;
3101                     }
3102                     store_reg(s, rd, tmp);
3103                 } else {
3104                     /* arm->vfp */
3105                     tmp = load_reg(s, rd);
3106                     if (insn & (1 << 23)) {
3107                         /* VDUP */
3108                         if (size == 0) {
3109                             gen_neon_dup_u8(tmp, 0);
3110                         } else if (size == 1) {
3111                             gen_neon_dup_low16(tmp);
3112                         }
3113                         for (n = 0; n <= pass * 2; n++) {
3114                             tmp2 = tcg_temp_new_i32();
3115                             tcg_gen_mov_i32(tmp2, tmp);
3116                             neon_store_reg(rn, n, tmp2);
3117                         }
3118                         neon_store_reg(rn, n, tmp);
3119                     } else {
3120                         /* VMOV */
3121                         switch (size) {
3122                         case 0:
3123                             tmp2 = neon_load_reg(rn, pass);
3124                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
3125                             tcg_temp_free_i32(tmp2);
3126                             break;
3127                         case 1:
3128                             tmp2 = neon_load_reg(rn, pass);
3129                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
3130                             tcg_temp_free_i32(tmp2);
3131                             break;
3132                         case 2:
3133                             break;
3134                         }
3135                         neon_store_reg(rn, pass, tmp);
3136                     }
3137                 }
3138             } else { /* !dp */
3139                 if ((insn & 0x6f) != 0x00)
3140                     return 1;
3141                 rn = VFP_SREG_N(insn);
3142                 if (insn & ARM_CP_RW_BIT) {
3143                     /* vfp->arm */
3144                     if (insn & (1 << 21)) {
3145                         /* system register */
3146                         rn >>= 1;
3147
3148                         switch (rn) {
3149                         case ARM_VFP_FPSID:
3150                             /* VFP2 allows access to FSID from userspace.
3151                                VFP3 restricts all id registers to privileged
3152                                accesses.  */
3153                             if (IS_USER(s)
3154                                 && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3155                                 return 1;
3156                             }
3157                             tmp = load_cpu_field(vfp.xregs[rn]);
3158                             break;
3159                         case ARM_VFP_FPEXC:
3160                             if (IS_USER(s))
3161                                 return 1;
3162                             tmp = load_cpu_field(vfp.xregs[rn]);
3163                             break;
3164                         case ARM_VFP_FPINST:
3165                         case ARM_VFP_FPINST2:
3166                             /* Not present in VFP3.  */
3167                             if (IS_USER(s)
3168                                 || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3169                                 return 1;
3170                             }
3171                             tmp = load_cpu_field(vfp.xregs[rn]);
3172                             break;
3173                         case ARM_VFP_FPSCR:
3174                             if (rd == 15) {
3175                                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
3176                                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
3177                             } else {
3178                                 tmp = tcg_temp_new_i32();
3179                                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
3180                             }
3181                             break;
3182                         case ARM_VFP_MVFR2:
3183                             if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3184                                 return 1;
3185                             }
3186                             /* fall through */
3187                         case ARM_VFP_MVFR0:
3188                         case ARM_VFP_MVFR1:
3189                             if (IS_USER(s)
3190                                 || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
3191                                 return 1;
3192                             }
3193                             tmp = load_cpu_field(vfp.xregs[rn]);
3194                             break;
3195                         default:
3196                             return 1;
3197                         }
3198                     } else {
3199                         gen_mov_F0_vreg(0, rn);
3200                         tmp = gen_vfp_mrs();
3201                     }
3202                     if (rd == 15) {
3203                         /* Set the 4 flag bits in the CPSR.  */
3204                         gen_set_nzcv(tmp);
3205                         tcg_temp_free_i32(tmp);
3206                     } else {
3207                         store_reg(s, rd, tmp);
3208                     }
3209                 } else {
3210                     /* arm->vfp */
3211                     if (insn & (1 << 21)) {
3212                         rn >>= 1;
3213                         /* system register */
3214                         switch (rn) {
3215                         case ARM_VFP_FPSID:
3216                         case ARM_VFP_MVFR0:
3217                         case ARM_VFP_MVFR1:
3218                             /* Writes are ignored.  */
3219                             break;
3220                         case ARM_VFP_FPSCR:
3221                             tmp = load_reg(s, rd);
3222                             gen_helper_vfp_set_fpscr(cpu_env, tmp);
3223                             tcg_temp_free_i32(tmp);
3224                             gen_lookup_tb(s);
3225                             break;
3226                         case ARM_VFP_FPEXC:
3227                             if (IS_USER(s))
3228                                 return 1;
3229                             /* TODO: VFP subarchitecture support.
3230                              * For now, keep the EN bit only */
3231                             tmp = load_reg(s, rd);
3232                             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
3233                             store_cpu_field(tmp, vfp.xregs[rn]);
3234                             gen_lookup_tb(s);
3235                             break;
3236                         case ARM_VFP_FPINST:
3237                         case ARM_VFP_FPINST2:
3238                             if (IS_USER(s)) {
3239                                 return 1;
3240                             }
3241                             tmp = load_reg(s, rd);
3242                             store_cpu_field(tmp, vfp.xregs[rn]);
3243                             break;
3244                         default:
3245                             return 1;
3246                         }
3247                     } else {
3248                         tmp = load_reg(s, rd);
3249                         gen_vfp_msr(tmp);
3250                         gen_mov_vreg_F0(0, rn);
3251                     }
3252                 }
3253             }
3254         } else {
3255             /* data processing */
3256             /* The opcode is in bits 23, 21, 20 and 6.  */
3257             op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
3258             if (dp) {
3259                 if (op == 15) {
3260                     /* rn is opcode */
3261                     rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
3262                 } else {
3263                     /* rn is register number */
3264                     VFP_DREG_N(rn, insn);
3265                 }
3266
3267                 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
3268                                  ((rn & 0x1e) == 0x6))) {
3269                     /* Integer or single/half precision destination.  */
3270                     rd = VFP_SREG_D(insn);
3271                 } else {
3272                     VFP_DREG_D(rd, insn);
3273                 }
3274                 if (op == 15 &&
3275                     (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
3276                      ((rn & 0x1e) == 0x4))) {
3277                     /* VCVT from int or half precision is always from S reg
3278                      * regardless of dp bit. VCVT with immediate frac_bits
3279                      * has same format as SREG_M.
3280                      */
3281                     rm = VFP_SREG_M(insn);
3282                 } else {
3283                     VFP_DREG_M(rm, insn);
3284                 }
3285             } else {
3286                 rn = VFP_SREG_N(insn);
3287                 if (op == 15 && rn == 15) {
3288                     /* Double precision destination.  */
3289                     VFP_DREG_D(rd, insn);
3290                 } else {
3291                     rd = VFP_SREG_D(insn);
3292                 }
3293                 /* NB that we implicitly rely on the encoding for the frac_bits
3294                  * in VCVT of fixed to float being the same as that of an SREG_M
3295                  */
3296                 rm = VFP_SREG_M(insn);
3297             }
3298
3299             veclen = s->vec_len;
3300             if (op == 15 && rn > 3)
3301                 veclen = 0;
3302
3303             /* Shut up compiler warnings.  */
3304             delta_m = 0;
3305             delta_d = 0;
3306             bank_mask = 0;
3307
3308             if (veclen > 0) {
3309                 if (dp)
3310                     bank_mask = 0xc;
3311                 else
3312                     bank_mask = 0x18;
3313
3314                 /* Figure out what type of vector operation this is.  */
3315                 if ((rd & bank_mask) == 0) {
3316                     /* scalar */
3317                     veclen = 0;
3318                 } else {
3319                     if (dp)
3320                         delta_d = (s->vec_stride >> 1) + 1;
3321                     else
3322                         delta_d = s->vec_stride + 1;
3323
3324                     if ((rm & bank_mask) == 0) {
3325                         /* mixed scalar/vector */
3326                         delta_m = 0;
3327                     } else {
3328                         /* vector */
3329                         delta_m = delta_d;
3330                     }
3331                 }
3332             }
3333
3334             /* Load the initial operands.  */
3335             if (op == 15) {
3336                 switch (rn) {
3337                 case 16:
3338                 case 17:
3339                     /* Integer source */
3340                     gen_mov_F0_vreg(0, rm);
3341                     break;
3342                 case 8:
3343                 case 9:
3344                     /* Compare */
3345                     gen_mov_F0_vreg(dp, rd);
3346                     gen_mov_F1_vreg(dp, rm);
3347                     break;
3348                 case 10:
3349                 case 11:
3350                     /* Compare with zero */
3351                     gen_mov_F0_vreg(dp, rd);
3352                     gen_vfp_F1_ld0(dp);
3353                     break;
3354                 case 20:
3355                 case 21:
3356                 case 22:
3357                 case 23:
3358                 case 28:
3359                 case 29:
3360                 case 30:
3361                 case 31:
3362                     /* Source and destination the same.  */
3363                     gen_mov_F0_vreg(dp, rd);
3364                     break;
3365                 case 4:
3366                 case 5:
3367                 case 6:
3368                 case 7:
3369                     /* VCVTB, VCVTT: only present with the halfprec extension
3370                      * UNPREDICTABLE if bit 8 is set prior to ARMv8
3371                      * (we choose to UNDEF)
3372                      */
3373                     if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
3374                         !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
3375                         return 1;
3376                     }
3377                     if (!extract32(rn, 1, 1)) {
3378                         /* Half precision source.  */
3379                         gen_mov_F0_vreg(0, rm);
3380                         break;
3381                     }
3382                     /* Otherwise fall through */
3383                 default:
3384                     /* One source operand.  */
3385                     gen_mov_F0_vreg(dp, rm);
3386                     break;
3387                 }
3388             } else {
3389                 /* Two source operands.  */
3390                 gen_mov_F0_vreg(dp, rn);
3391                 gen_mov_F1_vreg(dp, rm);
3392             }
3393
3394             for (;;) {
3395                 /* Perform the calculation.  */
3396                 switch (op) {
3397                 case 0: /* VMLA: fd + (fn * fm) */
3398                     /* Note that order of inputs to the add matters for NaNs */
3399                     gen_vfp_F1_mul(dp);
3400                     gen_mov_F0_vreg(dp, rd);
3401                     gen_vfp_add(dp);
3402                     break;
3403                 case 1: /* VMLS: fd + -(fn * fm) */
3404                     gen_vfp_mul(dp);
3405                     gen_vfp_F1_neg(dp);
3406                     gen_mov_F0_vreg(dp, rd);
3407                     gen_vfp_add(dp);
3408                     break;
3409                 case 2: /* VNMLS: -fd + (fn * fm) */
3410                     /* Note that it isn't valid to replace (-A + B) with (B - A)
3411                      * or similar plausible looking simplifications
3412                      * because this will give wrong results for NaNs.
3413                      */
3414                     gen_vfp_F1_mul(dp);
3415                     gen_mov_F0_vreg(dp, rd);
3416                     gen_vfp_neg(dp);
3417                     gen_vfp_add(dp);
3418                     break;
3419                 case 3: /* VNMLA: -fd + -(fn * fm) */
3420                     gen_vfp_mul(dp);
3421                     gen_vfp_F1_neg(dp);
3422                     gen_mov_F0_vreg(dp, rd);
3423                     gen_vfp_neg(dp);
3424                     gen_vfp_add(dp);
3425                     break;
3426                 case 4: /* mul: fn * fm */
3427                     gen_vfp_mul(dp);
3428                     break;
3429                 case 5: /* nmul: -(fn * fm) */
3430                     gen_vfp_mul(dp);
3431                     gen_vfp_neg(dp);
3432                     break;
3433                 case 6: /* add: fn + fm */
3434                     gen_vfp_add(dp);
3435                     break;
3436                 case 7: /* sub: fn - fm */
3437                     gen_vfp_sub(dp);
3438                     break;
3439                 case 8: /* div: fn / fm */
3440                     gen_vfp_div(dp);
3441                     break;
3442                 case 10: /* VFNMA : fd = muladd(-fd,  fn, fm) */
3443                 case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
3444                 case 12: /* VFMA  : fd = muladd( fd,  fn, fm) */
3445                 case 13: /* VFMS  : fd = muladd( fd, -fn, fm) */
3446                     /* These are fused multiply-add, and must be done as one
3447                      * floating point operation with no rounding between the
3448                      * multiplication and addition steps.
3449                      * NB that doing the negations here as separate steps is
3450                      * correct : an input NaN should come out with its sign bit
3451                      * flipped if it is a negated-input.
3452                      */
3453                     if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
3454                         return 1;
3455                     }
3456                     if (dp) {
3457                         TCGv_ptr fpst;
3458                         TCGv_i64 frd;
3459                         if (op & 1) {
3460                             /* VFNMS, VFMS */
3461                             gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
3462                         }
3463                         frd = tcg_temp_new_i64();
3464                         tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
3465                         if (op & 2) {
3466                             /* VFNMA, VFNMS */
3467                             gen_helper_vfp_negd(frd, frd);
3468                         }
3469                         fpst = get_fpstatus_ptr(0);
3470                         gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
3471                                                cpu_F1d, frd, fpst);
3472                         tcg_temp_free_ptr(fpst);
3473                         tcg_temp_free_i64(frd);
3474                     } else {
3475                         TCGv_ptr fpst;
3476                         TCGv_i32 frd;
3477                         if (op & 1) {
3478                             /* VFNMS, VFMS */
3479                             gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
3480                         }
3481                         frd = tcg_temp_new_i32();
3482                         tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
3483                         if (op & 2) {
3484                             gen_helper_vfp_negs(frd, frd);
3485                         }
3486                         fpst = get_fpstatus_ptr(0);
3487                         gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3488                                                cpu_F1s, frd, fpst);
3489                         tcg_temp_free_ptr(fpst);
3490                         tcg_temp_free_i32(frd);
3491                     }
3492                     break;
3493                 case 14: /* fconst */
3494                     if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3495                         return 1;
3496                     }
3497
3498                     n = (insn << 12) & 0x80000000;
3499                     i = ((insn >> 12) & 0x70) | (insn & 0xf);
3500                     if (dp) {
3501                         if (i & 0x40)
3502                             i |= 0x3f80;
3503                         else
3504                             i |= 0x4000;
3505                         n |= i << 16;
3506                         tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3507                     } else {
3508                         if (i & 0x40)
3509                             i |= 0x780;
3510                         else
3511                             i |= 0x800;
3512                         n |= i << 19;
3513                         tcg_gen_movi_i32(cpu_F0s, n);
3514                     }
3515                     break;
3516                 case 15: /* extension space */
3517                     switch (rn) {
3518                     case 0: /* cpy */
3519                         /* no-op */
3520                         break;
3521                     case 1: /* abs */
3522                         gen_vfp_abs(dp);
3523                         break;
3524                     case 2: /* neg */
3525                         gen_vfp_neg(dp);
3526                         break;
3527                     case 3: /* sqrt */
3528                         gen_vfp_sqrt(dp);
3529                         break;
3530                     case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
3531                         tmp = gen_vfp_mrs();
3532                         tcg_gen_ext16u_i32(tmp, tmp);
3533                         if (dp) {
3534                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3535                                                            cpu_env);
3536                         } else {
3537                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3538                                                            cpu_env);
3539                         }
3540                         tcg_temp_free_i32(tmp);
3541                         break;
3542                     case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
3543                         tmp = gen_vfp_mrs();
3544                         tcg_gen_shri_i32(tmp, tmp, 16);
3545                         if (dp) {
3546                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3547                                                            cpu_env);
3548                         } else {
3549                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3550                                                            cpu_env);
3551                         }
3552                         tcg_temp_free_i32(tmp);
3553                         break;
3554                     case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
3555                         tmp = tcg_temp_new_i32();
3556                         if (dp) {
3557                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3558                                                            cpu_env);
3559                         } else {
3560                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3561                                                            cpu_env);
3562                         }
3563                         gen_mov_F0_vreg(0, rd);
3564                         tmp2 = gen_vfp_mrs();
3565                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3566                         tcg_gen_or_i32(tmp, tmp, tmp2);
3567                         tcg_temp_free_i32(tmp2);
3568                         gen_vfp_msr(tmp);
3569                         break;
3570                     case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
3571                         tmp = tcg_temp_new_i32();
3572                         if (dp) {
3573                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3574                                                            cpu_env);
3575                         } else {
3576                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3577                                                            cpu_env);
3578                         }
3579                         tcg_gen_shli_i32(tmp, tmp, 16);
3580                         gen_mov_F0_vreg(0, rd);
3581                         tmp2 = gen_vfp_mrs();
3582                         tcg_gen_ext16u_i32(tmp2, tmp2);
3583                         tcg_gen_or_i32(tmp, tmp, tmp2);
3584                         tcg_temp_free_i32(tmp2);
3585                         gen_vfp_msr(tmp);
3586                         break;
3587                     case 8: /* cmp */
3588                         gen_vfp_cmp(dp);
3589                         break;
3590                     case 9: /* cmpe */
3591                         gen_vfp_cmpe(dp);
3592                         break;
3593                     case 10: /* cmpz */
3594                         gen_vfp_cmp(dp);
3595                         break;
3596                     case 11: /* cmpez */
3597                         gen_vfp_F1_ld0(dp);
3598                         gen_vfp_cmpe(dp);
3599                         break;
3600                     case 12: /* vrintr */
3601                     {
3602                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3603                         if (dp) {
3604                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3605                         } else {
3606                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3607                         }
3608                         tcg_temp_free_ptr(fpst);
3609                         break;
3610                     }
3611                     case 13: /* vrintz */
3612                     {
3613                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3614                         TCGv_i32 tcg_rmode;
3615                         tcg_rmode = tcg_const_i32(float_round_to_zero);
3616                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3617                         if (dp) {
3618                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3619                         } else {
3620                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3621                         }
3622                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3623                         tcg_temp_free_i32(tcg_rmode);
3624                         tcg_temp_free_ptr(fpst);
3625                         break;
3626                     }
3627                     case 14: /* vrintx */
3628                     {
3629                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3630                         if (dp) {
3631                             gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
3632                         } else {
3633                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
3634                         }
3635                         tcg_temp_free_ptr(fpst);
3636                         break;
3637                     }
3638                     case 15: /* single<->double conversion */
3639                         if (dp)
3640                             gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3641                         else
3642                             gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3643                         break;
3644                     case 16: /* fuito */
3645                         gen_vfp_uito(dp, 0);
3646                         break;
3647                     case 17: /* fsito */
3648                         gen_vfp_sito(dp, 0);
3649                         break;
3650                     case 20: /* fshto */
3651                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3652                             return 1;
3653                         }
3654                         gen_vfp_shto(dp, 16 - rm, 0);
3655                         break;
3656                     case 21: /* fslto */
3657                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3658                             return 1;
3659                         }
3660                         gen_vfp_slto(dp, 32 - rm, 0);
3661                         break;
3662                     case 22: /* fuhto */
3663                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3664                             return 1;
3665                         }
3666                         gen_vfp_uhto(dp, 16 - rm, 0);
3667                         break;
3668                     case 23: /* fulto */
3669                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3670                             return 1;
3671                         }
3672                         gen_vfp_ulto(dp, 32 - rm, 0);
3673                         break;
3674                     case 24: /* ftoui */
3675                         gen_vfp_toui(dp, 0);
3676                         break;
3677                     case 25: /* ftouiz */
3678                         gen_vfp_touiz(dp, 0);
3679                         break;
3680                     case 26: /* ftosi */
3681                         gen_vfp_tosi(dp, 0);
3682                         break;
3683                     case 27: /* ftosiz */
3684                         gen_vfp_tosiz(dp, 0);
3685                         break;
3686                     case 28: /* ftosh */
3687                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3688                             return 1;
3689                         }
3690                         gen_vfp_tosh(dp, 16 - rm, 0);
3691                         break;
3692                     case 29: /* ftosl */
3693                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3694                             return 1;
3695                         }
3696                         gen_vfp_tosl(dp, 32 - rm, 0);
3697                         break;
3698                     case 30: /* ftouh */
3699                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3700                             return 1;
3701                         }
3702                         gen_vfp_touh(dp, 16 - rm, 0);
3703                         break;
3704                     case 31: /* ftoul */
3705                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3706                             return 1;
3707                         }
3708                         gen_vfp_toul(dp, 32 - rm, 0);
3709                         break;
3710                     default: /* undefined */
3711                         return 1;
3712                     }
3713                     break;
3714                 default: /* undefined */
3715                     return 1;
3716                 }
3717
3718                 /* Write back the result.  */
3719                 if (op == 15 && (rn >= 8 && rn <= 11)) {
3720                     /* Comparison, do nothing.  */
3721                 } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
3722                                               (rn & 0x1e) == 0x6)) {
3723                     /* VCVT double to int: always integer result.
3724                      * VCVT double to half precision is always a single
3725                      * precision result.
3726                      */
3727                     gen_mov_vreg_F0(0, rd);
3728                 } else if (op == 15 && rn == 15) {
3729                     /* conversion */
3730                     gen_mov_vreg_F0(!dp, rd);
3731                 } else {
3732                     gen_mov_vreg_F0(dp, rd);
3733                 }
3734
3735                 /* break out of the loop if we have finished  */
3736                 if (veclen == 0)
3737                     break;
3738
3739                 if (op == 15 && delta_m == 0) {
3740                     /* single source one-many */
3741                     while (veclen--) {
3742                         rd = ((rd + delta_d) & (bank_mask - 1))
3743                              | (rd & bank_mask);
3744                         gen_mov_vreg_F0(dp, rd);
3745                     }
3746                     break;
3747                 }
3748                 /* Setup the next operands.  */
3749                 veclen--;
3750                 rd = ((rd + delta_d) & (bank_mask - 1))
3751                      | (rd & bank_mask);
3752
3753                 if (op == 15) {
3754                     /* One source operand.  */
3755                     rm = ((rm + delta_m) & (bank_mask - 1))
3756                          | (rm & bank_mask);
3757                     gen_mov_F0_vreg(dp, rm);
3758                 } else {
3759                     /* Two source operands.  */
3760                     rn = ((rn + delta_d) & (bank_mask - 1))
3761                          | (rn & bank_mask);
3762                     gen_mov_F0_vreg(dp, rn);
3763                     if (delta_m) {
3764                         rm = ((rm + delta_m) & (bank_mask - 1))
3765                              | (rm & bank_mask);
3766                         gen_mov_F1_vreg(dp, rm);
3767                     }
3768                 }
3769             }
3770         }
3771         break;
3772     case 0xc:
3773     case 0xd:
3774         if ((insn & 0x03e00000) == 0x00400000) {
3775             /* two-register transfer */
3776             rn = (insn >> 16) & 0xf;
3777             rd = (insn >> 12) & 0xf;
3778             if (dp) {
3779                 VFP_DREG_M(rm, insn);
3780             } else {
3781                 rm = VFP_SREG_M(insn);
3782             }
3783
3784             if (insn & ARM_CP_RW_BIT) {
3785                 /* vfp->arm */
3786                 if (dp) {
3787                     gen_mov_F0_vreg(0, rm * 2);
3788                     tmp = gen_vfp_mrs();
3789                     store_reg(s, rd, tmp);
3790                     gen_mov_F0_vreg(0, rm * 2 + 1);
3791                     tmp = gen_vfp_mrs();
3792                     store_reg(s, rn, tmp);
3793                 } else {
3794                     gen_mov_F0_vreg(0, rm);
3795                     tmp = gen_vfp_mrs();
3796                     store_reg(s, rd, tmp);
3797                     gen_mov_F0_vreg(0, rm + 1);
3798                     tmp = gen_vfp_mrs();
3799                     store_reg(s, rn, tmp);
3800                 }
3801             } else {
3802                 /* arm->vfp */
3803                 if (dp) {
3804                     tmp = load_reg(s, rd);
3805                     gen_vfp_msr(tmp);
3806                     gen_mov_vreg_F0(0, rm * 2);
3807                     tmp = load_reg(s, rn);
3808                     gen_vfp_msr(tmp);
3809                     gen_mov_vreg_F0(0, rm * 2 + 1);
3810                 } else {
3811                     tmp = load_reg(s, rd);
3812                     gen_vfp_msr(tmp);
3813                     gen_mov_vreg_F0(0, rm);
3814                     tmp = load_reg(s, rn);
3815                     gen_vfp_msr(tmp);
3816                     gen_mov_vreg_F0(0, rm + 1);
3817                 }
3818             }
3819         } else {
3820             /* Load/store */
3821             rn = (insn >> 16) & 0xf;
3822             if (dp)
3823                 VFP_DREG_D(rd, insn);
3824             else
3825                 rd = VFP_SREG_D(insn);
3826             if ((insn & 0x01200000) == 0x01000000) {
3827                 /* Single load/store */
3828                 offset = (insn & 0xff) << 2;
3829                 if ((insn & (1 << 23)) == 0)
3830                     offset = -offset;
3831                 if (s->thumb && rn == 15) {
3832                     /* This is actually UNPREDICTABLE */
3833                     addr = tcg_temp_new_i32();
3834                     tcg_gen_movi_i32(addr, s->pc & ~2);
3835                 } else {
3836                     addr = load_reg(s, rn);
3837                 }
3838                 tcg_gen_addi_i32(addr, addr, offset);
3839                 if (insn & (1 << 20)) {
3840                     gen_vfp_ld(s, dp, addr);
3841                     gen_mov_vreg_F0(dp, rd);
3842                 } else {
3843                     gen_mov_F0_vreg(dp, rd);
3844                     gen_vfp_st(s, dp, addr);
3845                 }
3846                 tcg_temp_free_i32(addr);
3847             } else {
3848                 /* load/store multiple */
3849                 int w = insn & (1 << 21);
3850                 if (dp)
3851                     n = (insn >> 1) & 0x7f;
3852                 else
3853                     n = insn & 0xff;
3854
3855                 if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
3856                     /* P == U , W == 1  => UNDEF */
3857                     return 1;
3858                 }
3859                 if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
3860                     /* UNPREDICTABLE cases for bad immediates: we choose to
3861                      * UNDEF to avoid generating huge numbers of TCG ops
3862                      */
3863                     return 1;
3864                 }
3865                 if (rn == 15 && w) {
3866                     /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
3867                     return 1;
3868                 }
3869
3870                 if (s->thumb && rn == 15) {
3871                     /* This is actually UNPREDICTABLE */
3872                     addr = tcg_temp_new_i32();
3873                     tcg_gen_movi_i32(addr, s->pc & ~2);
3874                 } else {
3875                     addr = load_reg(s, rn);
3876                 }
3877                 if (insn & (1 << 24)) /* pre-decrement */
3878                     tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
3879
3880                 if (dp)
3881                     offset = 8;
3882                 else
3883                     offset = 4;
3884                 for (i = 0; i < n; i++) {
3885                     if (insn & ARM_CP_RW_BIT) {
3886                         /* load */
3887                         gen_vfp_ld(s, dp, addr);
3888                         gen_mov_vreg_F0(dp, rd + i);
3889                     } else {
3890                         /* store */
3891                         gen_mov_F0_vreg(dp, rd + i);
3892                         gen_vfp_st(s, dp, addr);
3893                     }
3894                     tcg_gen_addi_i32(addr, addr, offset);
3895                 }
3896                 if (w) {
3897                     /* writeback */
3898                     if (insn & (1 << 24))
3899                         offset = -offset * n;
3900                     else if (dp && (insn & 1))
3901                         offset = 4;
3902                     else
3903                         offset = 0;
3904
3905                     if (offset != 0)
3906                         tcg_gen_addi_i32(addr, addr, offset);
3907                     store_reg(s, rn, addr);
3908                 } else {
3909                     tcg_temp_free_i32(addr);
3910                 }
3911             }
3912         }
3913         break;
3914     default:
3915         /* Should never happen.  */
3916         return 1;
3917     }
3918     return 0;
3919 }
3920
3921 static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
3922 {
3923     TranslationBlock *tb;
3924
3925     tb = s->tb;
3926     if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3927         tcg_gen_goto_tb(n);
3928         gen_set_pc_im(s, dest);
3929         tcg_gen_exit_tb((uintptr_t)tb + n);
3930     } else {
3931         gen_set_pc_im(s, dest);
3932         tcg_gen_exit_tb(0);
3933     }
3934 }
3935
3936 static inline void gen_jmp (DisasContext *s, uint32_t dest)
3937 {
3938     if (unlikely(s->singlestep_enabled || s->ss_active)) {
3939         /* An indirect jump so that we still trigger the debug exception.  */
3940         if (s->thumb)
3941             dest |= 1;
3942         gen_bx_im(s, dest);
3943     } else {
3944         gen_goto_tb(s, 0, dest);
3945         s->is_jmp = DISAS_TB_JUMP;
3946     }
3947 }
3948
3949 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
3950 {
3951     if (x)
3952         tcg_gen_sari_i32(t0, t0, 16);
3953     else
3954         gen_sxth(t0);
3955     if (y)
3956         tcg_gen_sari_i32(t1, t1, 16);
3957     else
3958         gen_sxth(t1);
3959     tcg_gen_mul_i32(t0, t0, t1);
3960 }
3961
3962 /* Return the mask of PSR bits set by a MSR instruction.  */
3963 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
3964 {
3965     uint32_t mask;
3966
3967     mask = 0;
3968     if (flags & (1 << 0))
3969         mask |= 0xff;
3970     if (flags & (1 << 1))
3971         mask |= 0xff00;
3972     if (flags & (1 << 2))
3973         mask |= 0xff0000;
3974     if (flags & (1 << 3))
3975         mask |= 0xff000000;
3976
3977     /* Mask out undefined bits.  */
3978     mask &= ~CPSR_RESERVED;
3979     if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
3980         mask &= ~CPSR_T;
3981     }
3982     if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
3983         mask &= ~CPSR_Q; /* V5TE in reality*/
3984     }
3985     if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
3986         mask &= ~(CPSR_E | CPSR_GE);
3987     }
3988     if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
3989         mask &= ~CPSR_IT;
3990     }
3991     /* Mask out execution state and reserved bits.  */
3992     if (!spsr) {
3993         mask &= ~(CPSR_EXEC | CPSR_RESERVED);
3994     }
3995     /* Mask out privileged bits.  */
3996     if (IS_USER(s))
3997         mask &= CPSR_USER;
3998     return mask;
3999 }
4000
4001 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
4002 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
4003 {
4004     TCGv_i32 tmp;
4005     if (spsr) {
4006         /* ??? This is also undefined in system mode.  */
4007         if (IS_USER(s))
4008             return 1;
4009
4010         tmp = load_cpu_field(spsr);
4011         tcg_gen_andi_i32(tmp, tmp, ~mask);
4012         tcg_gen_andi_i32(t0, t0, mask);
4013         tcg_gen_or_i32(tmp, tmp, t0);
4014         store_cpu_field(tmp, spsr);
4015     } else {
4016         gen_set_cpsr(t0, mask);
4017     }
4018     tcg_temp_free_i32(t0);
4019     gen_lookup_tb(s);
4020     return 0;
4021 }
4022
4023 /* Returns nonzero if access to the PSR is not permitted.  */
4024 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
4025 {
4026     TCGv_i32 tmp;
4027     tmp = tcg_temp_new_i32();
4028     tcg_gen_movi_i32(tmp, val);
4029     return gen_set_psr(s, mask, spsr, tmp);
4030 }
4031
4032 /* Generate an old-style exception return. Marks pc as dead. */
4033 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
4034 {
4035     TCGv_i32 tmp;
4036     store_reg(s, 15, pc);
4037     tmp = load_cpu_field(spsr);
4038     gen_set_cpsr(tmp, CPSR_ERET_MASK);
4039     tcg_temp_free_i32(tmp);
4040     s->is_jmp = DISAS_UPDATE;
4041 }
4042
4043 /* Generate a v6 exception return.  Marks both values as dead.  */
4044 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
4045 {
4046     gen_set_cpsr(cpsr, CPSR_ERET_MASK);
4047     tcg_temp_free_i32(cpsr);
4048     store_reg(s, 15, pc);
4049     s->is_jmp = DISAS_UPDATE;
4050 }
4051
4052 static void gen_nop_hint(DisasContext *s, int val)
4053 {
4054     switch (val) {
4055     case 3: /* wfi */
4056         gen_set_pc_im(s, s->pc);
4057         s->is_jmp = DISAS_WFI;
4058         break;
4059     case 2: /* wfe */
4060         gen_set_pc_im(s, s->pc);
4061         s->is_jmp = DISAS_WFE;
4062         break;
4063     case 4: /* sev */
4064     case 5: /* sevl */
4065         /* TODO: Implement SEV, SEVL and WFE.  May help SMP performance.  */
4066     default: /* nop */
4067         break;
4068     }
4069 }
4070
4071 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
4072
4073 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
4074 {
4075     switch (size) {
4076     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
4077     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
4078     case 2: tcg_gen_add_i32(t0, t0, t1); break;
4079     default: abort();
4080     }
4081 }
4082
4083 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
4084 {
4085     switch (size) {
4086     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
4087     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
4088     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
4089     default: return;
4090     }
4091 }
4092
4093 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
4094 #define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
4095 #define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
4096 #define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
4097 #define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
4098
4099 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
4100     switch ((size << 1) | u) { \
4101     case 0: \
4102         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
4103         break; \
4104     case 1: \
4105         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
4106         break; \
4107     case 2: \
4108         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
4109         break; \
4110     case 3: \
4111         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
4112         break; \
4113     case 4: \
4114         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
4115         break; \
4116     case 5: \
4117         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
4118         break; \
4119     default: return 1; \
4120     }} while (0)
4121
4122 #define GEN_NEON_INTEGER_OP(name) do { \
4123     switch ((size << 1) | u) { \
4124     case 0: \
4125         gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
4126         break; \
4127     case 1: \
4128         gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
4129         break; \
4130     case 2: \
4131         gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
4132         break; \
4133     case 3: \
4134         gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
4135         break; \
4136     case 4: \
4137         gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
4138         break; \
4139     case 5: \
4140         gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
4141         break; \
4142     default: return 1; \
4143     }} while (0)
4144
4145 static TCGv_i32 neon_load_scratch(int scratch)
4146 {
4147     TCGv_i32 tmp = tcg_temp_new_i32();
4148     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4149     return tmp;
4150 }
4151
4152 static void neon_store_scratch(int scratch, TCGv_i32 var)
4153 {
4154     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4155     tcg_temp_free_i32(var);
4156 }
4157
4158 static inline TCGv_i32 neon_get_scalar(int size, int reg)
4159 {
4160     TCGv_i32 tmp;
4161     if (size == 1) {
4162         tmp = neon_load_reg(reg & 7, reg >> 4);
4163         if (reg & 8) {
4164             gen_neon_dup_high16(tmp);
4165         } else {
4166             gen_neon_dup_low16(tmp);
4167         }
4168     } else {
4169         tmp = neon_load_reg(reg & 15, reg >> 4);
4170     }
4171     return tmp;
4172 }
4173
4174 static int gen_neon_unzip(int rd, int rm, int size, int q)
4175 {
4176     TCGv_i32 tmp, tmp2;
4177     if (!q && size == 2) {
4178         return 1;
4179     }
4180     tmp = tcg_const_i32(rd);
4181     tmp2 = tcg_const_i32(rm);
4182     if (q) {
4183         switch (size) {
4184         case 0:
4185             gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
4186             break;
4187         case 1:
4188             gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
4189             break;
4190         case 2:
4191             gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
4192             break;
4193         default:
4194             abort();
4195         }
4196     } else {
4197         switch (size) {
4198         case 0:
4199             gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
4200             break;
4201         case 1:
4202             gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
4203             break;
4204         default:
4205             abort();
4206         }
4207     }
4208     tcg_temp_free_i32(tmp);
4209     tcg_temp_free_i32(tmp2);
4210     return 0;
4211 }
4212
4213 static int gen_neon_zip(int rd, int rm, int size, int q)
4214 {
4215     TCGv_i32 tmp, tmp2;
4216     if (!q && size == 2) {
4217         return 1;
4218     }
4219     tmp = tcg_const_i32(rd);
4220     tmp2 = tcg_const_i32(rm);
4221     if (q) {
4222         switch (size) {
4223         case 0:
4224             gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
4225             break;
4226         case 1:
4227             gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
4228             break;
4229         case 2:
4230             gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
4231             break;
4232         default:
4233             abort();
4234         }
4235     } else {
4236         switch (size) {
4237         case 0:
4238             gen_helper_neon_zip8(cpu_env, tmp, tmp2);
4239             break;
4240         case 1:
4241             gen_helper_neon_zip16(cpu_env, tmp, tmp2);
4242             break;
4243         default:
4244             abort();
4245         }
4246     }
4247     tcg_temp_free_i32(tmp);
4248     tcg_temp_free_i32(tmp2);
4249     return 0;
4250 }
4251
4252 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4253 {
4254     TCGv_i32 rd, tmp;
4255
4256     rd = tcg_temp_new_i32();
4257     tmp = tcg_temp_new_i32();
4258
4259     tcg_gen_shli_i32(rd, t0, 8);
4260     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4261     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4262     tcg_gen_or_i32(rd, rd, tmp);
4263
4264     tcg_gen_shri_i32(t1, t1, 8);
4265     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4266     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4267     tcg_gen_or_i32(t1, t1, tmp);
4268     tcg_gen_mov_i32(t0, rd);
4269
4270     tcg_temp_free_i32(tmp);
4271     tcg_temp_free_i32(rd);
4272 }
4273
4274 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4275 {
4276     TCGv_i32 rd, tmp;
4277
4278     rd = tcg_temp_new_i32();
4279     tmp = tcg_temp_new_i32();
4280
4281     tcg_gen_shli_i32(rd, t0, 16);
4282     tcg_gen_andi_i32(tmp, t1, 0xffff);
4283     tcg_gen_or_i32(rd, rd, tmp);
4284     tcg_gen_shri_i32(t1, t1, 16);
4285     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4286     tcg_gen_or_i32(t1, t1, tmp);
4287     tcg_gen_mov_i32(t0, rd);
4288
4289     tcg_temp_free_i32(tmp);
4290     tcg_temp_free_i32(rd);
4291 }
4292
4293
4294 static struct {
4295     int nregs;
4296     int interleave;
4297     int spacing;
4298 } neon_ls_element_type[11] = {
4299     {4, 4, 1},
4300     {4, 4, 2},
4301     {4, 1, 1},
4302     {4, 2, 1},
4303     {3, 3, 1},
4304     {3, 3, 2},
4305     {3, 1, 1},
4306     {1, 1, 1},
4307     {2, 2, 1},
4308     {2, 2, 2},
4309     {2, 1, 1}
4310 };
4311
4312 /* Translate a NEON load/store element instruction.  Return nonzero if the
4313    instruction is invalid.  */
4314 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
4315 {
4316     int rd, rn, rm;
4317     int op;
4318     int nregs;
4319     int interleave;
4320     int spacing;
4321     int stride;
4322     int size;
4323     int reg;
4324     int pass;
4325     int load;
4326     int shift;
4327     int n;
4328     TCGv_i32 addr;
4329     TCGv_i32 tmp;
4330     TCGv_i32 tmp2;
4331     TCGv_i64 tmp64;
4332
4333     /* FIXME: this access check should not take precedence over UNDEF
4334      * for invalid encodings; we will generate incorrect syndrome information
4335      * for attempts to execute invalid vfp/neon encodings with FP disabled.
4336      */
4337     if (!s->cpacr_fpen) {
4338         gen_exception_insn(s, 4, EXCP_UDEF,
4339                            syn_fp_access_trap(1, 0xe, s->thumb));
4340         return 0;
4341     }
4342
4343     if (!s->vfp_enabled)
4344       return 1;
4345     VFP_DREG_D(rd, insn);
4346     rn = (insn >> 16) & 0xf;
4347     rm = insn & 0xf;
4348     load = (insn & (1 << 21)) != 0;
4349     if ((insn & (1 << 23)) == 0) {
4350         /* Load store all elements.  */
4351         op = (insn >> 8) & 0xf;
4352         size = (insn >> 6) & 3;
4353         if (op > 10)
4354             return 1;
4355         /* Catch UNDEF cases for bad values of align field */
4356         switch (op & 0xc) {
4357         case 4:
4358             if (((insn >> 5) & 1) == 1) {
4359                 return 1;
4360             }
4361             break;
4362         case 8:
4363             if (((insn >> 4) & 3) == 3) {
4364                 return 1;
4365             }
4366             break;
4367         default:
4368             break;
4369         }
4370         nregs = neon_ls_element_type[op].nregs;
4371         interleave = neon_ls_element_type[op].interleave;
4372         spacing = neon_ls_element_type[op].spacing;
4373         if (size == 3 && (interleave | spacing) != 1)
4374             return 1;
4375         addr = tcg_temp_new_i32();
4376         load_reg_var(s, addr, rn);
4377         stride = (1 << size) * interleave;
4378         for (reg = 0; reg < nregs; reg++) {
4379             if (interleave > 2 || (interleave == 2 && nregs == 2)) {
4380                 load_reg_var(s, addr, rn);
4381                 tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
4382             } else if (interleave == 2 && nregs == 4 && reg == 2) {
4383                 load_reg_var(s, addr, rn);
4384                 tcg_gen_addi_i32(addr, addr, 1 << size);
4385             }
4386             if (size == 3) {
4387                 tmp64 = tcg_temp_new_i64();
4388                 if (load) {
4389                     gen_aa32_ld64(tmp64, addr, get_mem_index(s));
4390                     neon_store_reg64(tmp64, rd);
4391                 } else {
4392                     neon_load_reg64(tmp64, rd);
4393                     gen_aa32_st64(tmp64, addr, get_mem_index(s));
4394                 }
4395                 tcg_temp_free_i64(tmp64);
4396                 tcg_gen_addi_i32(addr, addr, stride);
4397             } else {
4398                 for (pass = 0; pass < 2; pass++) {
4399                     if (size == 2) {
4400                         if (load) {
4401                             tmp = tcg_temp_new_i32();
4402                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4403                             neon_store_reg(rd, pass, tmp);
4404                         } else {
4405                             tmp = neon_load_reg(rd, pass);
4406                             gen_aa32_st32(tmp, addr, get_mem_index(s));
4407                             tcg_temp_free_i32(tmp);
4408                         }
4409                         tcg_gen_addi_i32(addr, addr, stride);
4410                     } else if (size == 1) {
4411                         if (load) {
4412                             tmp = tcg_temp_new_i32();
4413                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4414                             tcg_gen_addi_i32(addr, addr, stride);
4415                             tmp2 = tcg_temp_new_i32();
4416                             gen_aa32_ld16u(tmp2, addr, get_mem_index(s));
4417                             tcg_gen_addi_i32(addr, addr, stride);
4418                             tcg_gen_shli_i32(tmp2, tmp2, 16);
4419                             tcg_gen_or_i32(tmp, tmp, tmp2);
4420                             tcg_temp_free_i32(tmp2);
4421                             neon_store_reg(rd, pass, tmp);
4422                         } else {
4423                             tmp = neon_load_reg(rd, pass);
4424                             tmp2 = tcg_temp_new_i32();
4425                             tcg_gen_shri_i32(tmp2, tmp, 16);
4426                             gen_aa32_st16(tmp, addr, get_mem_index(s));
4427                             tcg_temp_free_i32(tmp);
4428                             tcg_gen_addi_i32(addr, addr, stride);
4429                             gen_aa32_st16(tmp2, addr, get_mem_index(s));
4430                             tcg_temp_free_i32(tmp2);
4431                             tcg_gen_addi_i32(addr, addr, stride);
4432                         }
4433                     } else /* size == 0 */ {
4434                         if (load) {
4435                             TCGV_UNUSED_I32(tmp2);
4436                             for (n = 0; n < 4; n++) {
4437                                 tmp = tcg_temp_new_i32();
4438                                 gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4439                                 tcg_gen_addi_i32(addr, addr, stride);
4440                                 if (n == 0) {
4441                                     tmp2 = tmp;
4442                                 } else {
4443                                     tcg_gen_shli_i32(tmp, tmp, n * 8);
4444                                     tcg_gen_or_i32(tmp2, tmp2, tmp);
4445                                     tcg_temp_free_i32(tmp);
4446                                 }
4447                             }
4448                             neon_store_reg(rd, pass, tmp2);
4449                         } else {
4450                             tmp2 = neon_load_reg(rd, pass);
4451                             for (n = 0; n < 4; n++) {
4452                                 tmp = tcg_temp_new_i32();
4453                                 if (n == 0) {
4454                                     tcg_gen_mov_i32(tmp, tmp2);
4455                                 } else {
4456                                     tcg_gen_shri_i32(tmp, tmp2, n * 8);
4457                                 }
4458                                 gen_aa32_st8(tmp, addr, get_mem_index(s));
4459                                 tcg_temp_free_i32(tmp);
4460                                 tcg_gen_addi_i32(addr, addr, stride);
4461                             }
4462                             tcg_temp_free_i32(tmp2);
4463                         }
4464                     }
4465                 }
4466             }
4467             rd += spacing;
4468         }
4469         tcg_temp_free_i32(addr);
4470         stride = nregs * 8;
4471     } else {
4472         size = (insn >> 10) & 3;
4473         if (size == 3) {
4474             /* Load single element to all lanes.  */
4475             int a = (insn >> 4) & 1;
4476             if (!load) {
4477                 return 1;
4478             }
4479             size = (insn >> 6) & 3;
4480             nregs = ((insn >> 8) & 3) + 1;
4481
4482             if (size == 3) {
4483                 if (nregs != 4 || a == 0) {
4484                     return 1;
4485                 }
4486                 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
4487                 size = 2;
4488             }
4489             if (nregs == 1 && a == 1 && size == 0) {
4490                 return 1;
4491             }
4492             if (nregs == 3 && a == 1) {
4493                 return 1;
4494             }
4495             addr = tcg_temp_new_i32();
4496             load_reg_var(s, addr, rn);
4497             if (nregs == 1) {
4498                 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
4499                 tmp = gen_load_and_replicate(s, addr, size);
4500                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4501                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4502                 if (insn & (1 << 5)) {
4503                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
4504                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
4505                 }
4506                 tcg_temp_free_i32(tmp);
4507             } else {
4508                 /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
4509                 stride = (insn & (1 << 5)) ? 2 : 1;
4510                 for (reg = 0; reg < nregs; reg++) {
4511                     tmp = gen_load_and_replicate(s, addr, size);
4512                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4513                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4514                     tcg_temp_free_i32(tmp);
4515                     tcg_gen_addi_i32(addr, addr, 1 << size);
4516                     rd += stride;
4517                 }
4518             }
4519             tcg_temp_free_i32(addr);
4520             stride = (1 << size) * nregs;
4521         } else {
4522             /* Single element.  */
4523             int idx = (insn >> 4) & 0xf;
4524             pass = (insn >> 7) & 1;
4525             switch (size) {
4526             case 0:
4527                 shift = ((insn >> 5) & 3) * 8;
4528                 stride = 1;
4529                 break;
4530             case 1:
4531                 shift = ((insn >> 6) & 1) * 16;
4532                 stride = (insn & (1 << 5)) ? 2 : 1;
4533                 break;
4534             case 2:
4535                 shift = 0;
4536                 stride = (insn & (1 << 6)) ? 2 : 1;
4537                 break;
4538             default:
4539                 abort();
4540             }
4541             nregs = ((insn >> 8) & 3) + 1;
4542             /* Catch the UNDEF cases. This is unavoidably a bit messy. */
4543             switch (nregs) {
4544             case 1:
4545                 if (((idx & (1 << size)) != 0) ||
4546                     (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
4547                     return 1;
4548                 }
4549                 break;
4550             case 3:
4551                 if ((idx & 1) != 0) {
4552                     return 1;
4553                 }
4554                 /* fall through */
4555             case 2:
4556                 if (size == 2 && (idx & 2) != 0) {
4557                     return 1;
4558                 }
4559                 break;
4560             case 4:
4561                 if ((size == 2) && ((idx & 3) == 3)) {
4562                     return 1;
4563                 }
4564                 break;
4565             default:
4566                 abort();
4567             }
4568             if ((rd + stride * (nregs - 1)) > 31) {
4569                 /* Attempts to write off the end of the register file
4570                  * are UNPREDICTABLE; we choose to UNDEF because otherwise
4571                  * the neon_load_reg() would write off the end of the array.
4572                  */
4573                 return 1;
4574             }
4575             addr = tcg_temp_new_i32();
4576             load_reg_var(s, addr, rn);
4577             for (reg = 0; reg < nregs; reg++) {
4578                 if (load) {
4579                     tmp = tcg_temp_new_i32();
4580                     switch (size) {
4581                     case 0:
4582                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4583                         break;
4584                     case 1:
4585                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4586                         break;
4587                     case 2:
4588                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4589                         break;
4590                     default: /* Avoid compiler warnings.  */
4591                         abort();
4592                     }
4593                     if (size != 2) {
4594                         tmp2 = neon_load_reg(rd, pass);
4595                         tcg_gen_deposit_i32(tmp, tmp2, tmp,
4596                                             shift, size ? 16 : 8);
4597                         tcg_temp_free_i32(tmp2);
4598                     }
4599                     neon_store_reg(rd, pass, tmp);
4600                 } else { /* Store */
4601                     tmp = neon_load_reg(rd, pass);
4602                     if (shift)
4603                         tcg_gen_shri_i32(tmp, tmp, shift);
4604                     switch (size) {
4605                     case 0:
4606                         gen_aa32_st8(tmp, addr, get_mem_index(s));
4607                         break;
4608                     case 1:
4609                         gen_aa32_st16(tmp, addr, get_mem_index(s));
4610                         break;
4611                     case 2:
4612                         gen_aa32_st32(tmp, addr, get_mem_index(s));
4613                         break;
4614                     }
4615                     tcg_temp_free_i32(tmp);
4616                 }
4617                 rd += stride;
4618                 tcg_gen_addi_i32(addr, addr, 1 << size);
4619             }
4620             tcg_temp_free_i32(addr);
4621             stride = nregs * (1 << size);
4622         }
4623     }
4624     if (rm != 15) {
4625         TCGv_i32 base;
4626
4627         base = load_reg(s, rn);
4628         if (rm == 13) {
4629             tcg_gen_addi_i32(base, base, stride);
4630         } else {
4631             TCGv_i32 index;
4632             index = load_reg(s, rm);
4633             tcg_gen_add_i32(base, base, index);
4634             tcg_temp_free_i32(index);
4635         }
4636         store_reg(s, rn, base);
4637     }
4638     return 0;
4639 }
4640
4641 /* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
4642 static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
4643 {
4644     tcg_gen_and_i32(t, t, c);
4645     tcg_gen_andc_i32(f, f, c);
4646     tcg_gen_or_i32(dest, t, f);
4647 }
4648
4649 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
4650 {
4651     switch (size) {
4652     case 0: gen_helper_neon_narrow_u8(dest, src); break;
4653     case 1: gen_helper_neon_narrow_u16(dest, src); break;
4654     case 2: tcg_gen_trunc_i64_i32(dest, src); break;
4655     default: abort();
4656     }
4657 }
4658
4659 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4660 {
4661     switch (size) {
4662     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
4663     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
4664     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
4665     default: abort();
4666     }
4667 }
4668
4669 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
4670 {
4671     switch (size) {
4672     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
4673     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
4674     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
4675     default: abort();
4676     }
4677 }
4678
4679 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4680 {
4681     switch (size) {
4682     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
4683     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
4684     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
4685     default: abort();
4686     }
4687 }
4688
4689 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
4690                                          int q, int u)
4691 {
4692     if (q) {
4693         if (u) {
4694             switch (size) {
4695             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4696             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4697             default: abort();
4698             }
4699         } else {
4700             switch (size) {
4701             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
4702             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
4703             default: abort();
4704             }
4705         }
4706     } else {
4707         if (u) {
4708             switch (size) {
4709             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
4710             case 2: gen_helper_neon_shl_u32(var, var, shift); break;
4711             default: abort();
4712             }
4713         } else {
4714             switch (size) {
4715             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4716             case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4717             default: abort();
4718             }
4719         }
4720     }
4721 }
4722
4723 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
4724 {
4725     if (u) {
4726         switch (size) {
4727         case 0: gen_helper_neon_widen_u8(dest, src); break;
4728         case 1: gen_helper_neon_widen_u16(dest, src); break;
4729         case 2: tcg_gen_extu_i32_i64(dest, src); break;
4730         default: abort();
4731         }
4732     } else {
4733         switch (size) {
4734         case 0: gen_helper_neon_widen_s8(dest, src); break;
4735         case 1: gen_helper_neon_widen_s16(dest, src); break;
4736         case 2: tcg_gen_ext_i32_i64(dest, src); break;
4737         default: abort();
4738         }
4739     }
4740     tcg_temp_free_i32(src);
4741 }
4742
4743 static inline void gen_neon_addl(int size)
4744 {
4745     switch (size) {
4746     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4747     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4748     case 2: tcg_gen_add_i64(CPU_V001); break;
4749     default: abort();
4750     }
4751 }
4752
4753 static inline void gen_neon_subl(int size)
4754 {
4755     switch (size) {
4756     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4757     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4758     case 2: tcg_gen_sub_i64(CPU_V001); break;
4759     default: abort();
4760     }
4761 }
4762
4763 static inline void gen_neon_negl(TCGv_i64 var, int size)
4764 {
4765     switch (size) {
4766     case 0: gen_helper_neon_negl_u16(var, var); break;
4767     case 1: gen_helper_neon_negl_u32(var, var); break;
4768     case 2:
4769         tcg_gen_neg_i64(var, var);
4770         break;
4771     default: abort();
4772     }
4773 }
4774
4775 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4776 {
4777     switch (size) {
4778     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4779     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4780     default: abort();
4781     }
4782 }
4783
4784 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
4785                                  int size, int u)
4786 {
4787     TCGv_i64 tmp;
4788
4789     switch ((size << 1) | u) {
4790     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4791     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4792     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4793     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4794     case 4:
4795         tmp = gen_muls_i64_i32(a, b);
4796         tcg_gen_mov_i64(dest, tmp);
4797         tcg_temp_free_i64(tmp);
4798         break;
4799     case 5:
4800         tmp = gen_mulu_i64_i32(a, b);
4801         tcg_gen_mov_i64(dest, tmp);
4802         tcg_temp_free_i64(tmp);
4803         break;
4804     default: abort();
4805     }
4806
4807     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4808        Don't forget to clean them now.  */
4809     if (size < 2) {
4810         tcg_temp_free_i32(a);
4811         tcg_temp_free_i32(b);
4812     }
4813 }
4814
4815 static void gen_neon_narrow_op(int op, int u, int size,
4816                                TCGv_i32 dest, TCGv_i64 src)
4817 {
4818     if (op) {
4819         if (u) {
4820             gen_neon_unarrow_sats(size, dest, src);
4821         } else {
4822             gen_neon_narrow(size, dest, src);
4823         }
4824     } else {
4825         if (u) {
4826             gen_neon_narrow_satu(size, dest, src);
4827         } else {
4828             gen_neon_narrow_sats(size, dest, src);
4829         }
4830     }
4831 }
4832
4833 /* Symbolic constants for op fields for Neon 3-register same-length.
4834  * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4835  * table A7-9.
4836  */
4837 #define NEON_3R_VHADD 0
4838 #define NEON_3R_VQADD 1
4839 #define NEON_3R_VRHADD 2
4840 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4841 #define NEON_3R_VHSUB 4
4842 #define NEON_3R_VQSUB 5
4843 #define NEON_3R_VCGT 6
4844 #define NEON_3R_VCGE 7
4845 #define NEON_3R_VSHL 8
4846 #define NEON_3R_VQSHL 9
4847 #define NEON_3R_VRSHL 10
4848 #define NEON_3R_VQRSHL 11
4849 #define NEON_3R_VMAX 12
4850 #define NEON_3R_VMIN 13
4851 #define NEON_3R_VABD 14
4852 #define NEON_3R_VABA 15
4853 #define NEON_3R_VADD_VSUB 16
4854 #define NEON_3R_VTST_VCEQ 17
4855 #define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
4856 #define NEON_3R_VMUL 19
4857 #define NEON_3R_VPMAX 20
4858 #define NEON_3R_VPMIN 21
4859 #define NEON_3R_VQDMULH_VQRDMULH 22
4860 #define NEON_3R_VPADD 23
4861 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
4862 #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
4863 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4864 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4865 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4866 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4867 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4868 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
4869
4870 static const uint8_t neon_3r_sizes[] = {
4871     [NEON_3R_VHADD] = 0x7,
4872     [NEON_3R_VQADD] = 0xf,
4873     [NEON_3R_VRHADD] = 0x7,
4874     [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4875     [NEON_3R_VHSUB] = 0x7,
4876     [NEON_3R_VQSUB] = 0xf,
4877     [NEON_3R_VCGT] = 0x7,
4878     [NEON_3R_VCGE] = 0x7,
4879     [NEON_3R_VSHL] = 0xf,
4880     [NEON_3R_VQSHL] = 0xf,
4881     [NEON_3R_VRSHL] = 0xf,
4882     [NEON_3R_VQRSHL] = 0xf,
4883     [NEON_3R_VMAX] = 0x7,
4884     [NEON_3R_VMIN] = 0x7,
4885     [NEON_3R_VABD] = 0x7,
4886     [NEON_3R_VABA] = 0x7,
4887     [NEON_3R_VADD_VSUB] = 0xf,
4888     [NEON_3R_VTST_VCEQ] = 0x7,
4889     [NEON_3R_VML] = 0x7,
4890     [NEON_3R_VMUL] = 0x7,
4891     [NEON_3R_VPMAX] = 0x7,
4892     [NEON_3R_VPMIN] = 0x7,
4893     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4894     [NEON_3R_VPADD] = 0x7,
4895     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
4896     [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
4897     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4898     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4899     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4900     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4901     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4902     [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
4903 };
4904
4905 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
4906  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4907  * table A7-13.
4908  */
4909 #define NEON_2RM_VREV64 0
4910 #define NEON_2RM_VREV32 1
4911 #define NEON_2RM_VREV16 2
4912 #define NEON_2RM_VPADDL 4
4913 #define NEON_2RM_VPADDL_U 5
4914 #define NEON_2RM_AESE 6 /* Includes AESD */
4915 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
4916 #define NEON_2RM_VCLS 8
4917 #define NEON_2RM_VCLZ 9
4918 #define NEON_2RM_VCNT 10
4919 #define NEON_2RM_VMVN 11
4920 #define NEON_2RM_VPADAL 12
4921 #define NEON_2RM_VPADAL_U 13
4922 #define NEON_2RM_VQABS 14
4923 #define NEON_2RM_VQNEG 15
4924 #define NEON_2RM_VCGT0 16
4925 #define NEON_2RM_VCGE0 17
4926 #define NEON_2RM_VCEQ0 18
4927 #define NEON_2RM_VCLE0 19
4928 #define NEON_2RM_VCLT0 20
4929 #define NEON_2RM_SHA1H 21
4930 #define NEON_2RM_VABS 22
4931 #define NEON_2RM_VNEG 23
4932 #define NEON_2RM_VCGT0_F 24
4933 #define NEON_2RM_VCGE0_F 25
4934 #define NEON_2RM_VCEQ0_F 26
4935 #define NEON_2RM_VCLE0_F 27
4936 #define NEON_2RM_VCLT0_F 28
4937 #define NEON_2RM_VABS_F 30
4938 #define NEON_2RM_VNEG_F 31
4939 #define NEON_2RM_VSWP 32
4940 #define NEON_2RM_VTRN 33
4941 #define NEON_2RM_VUZP 34
4942 #define NEON_2RM_VZIP 35
4943 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
4944 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
4945 #define NEON_2RM_VSHLL 38
4946 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
4947 #define NEON_2RM_VRINTN 40
4948 #define NEON_2RM_VRINTX 41
4949 #define NEON_2RM_VRINTA 42
4950 #define NEON_2RM_VRINTZ 43
4951 #define NEON_2RM_VCVT_F16_F32 44
4952 #define NEON_2RM_VRINTM 45
4953 #define NEON_2RM_VCVT_F32_F16 46
4954 #define NEON_2RM_VRINTP 47
4955 #define NEON_2RM_VCVTAU 48
4956 #define NEON_2RM_VCVTAS 49
4957 #define NEON_2RM_VCVTNU 50
4958 #define NEON_2RM_VCVTNS 51
4959 #define NEON_2RM_VCVTPU 52
4960 #define NEON_2RM_VCVTPS 53
4961 #define NEON_2RM_VCVTMU 54
4962 #define NEON_2RM_VCVTMS 55
4963 #define NEON_2RM_VRECPE 56
4964 #define NEON_2RM_VRSQRTE 57
4965 #define NEON_2RM_VRECPE_F 58
4966 #define NEON_2RM_VRSQRTE_F 59
4967 #define NEON_2RM_VCVT_FS 60
4968 #define NEON_2RM_VCVT_FU 61
4969 #define NEON_2RM_VCVT_SF 62
4970 #define NEON_2RM_VCVT_UF 63
4971
4972 static int neon_2rm_is_float_op(int op)
4973 {
4974     /* Return true if this neon 2reg-misc op is float-to-float */
4975     return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
4976             (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
4977             op == NEON_2RM_VRINTM ||
4978             (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
4979             op >= NEON_2RM_VRECPE_F);
4980 }
4981
4982 /* Each entry in this array has bit n set if the insn allows
4983  * size value n (otherwise it will UNDEF). Since unallocated
4984  * op values will have no bits set they always UNDEF.
4985  */
4986 static const uint8_t neon_2rm_sizes[] = {
4987     [NEON_2RM_VREV64] = 0x7,
4988     [NEON_2RM_VREV32] = 0x3,
4989     [NEON_2RM_VREV16] = 0x1,
4990     [NEON_2RM_VPADDL] = 0x7,
4991     [NEON_2RM_VPADDL_U] = 0x7,
4992     [NEON_2RM_AESE] = 0x1,
4993     [NEON_2RM_AESMC] = 0x1,
4994     [NEON_2RM_VCLS] = 0x7,
4995     [NEON_2RM_VCLZ] = 0x7,
4996     [NEON_2RM_VCNT] = 0x1,
4997     [NEON_2RM_VMVN] = 0x1,
4998     [NEON_2RM_VPADAL] = 0x7,
4999     [NEON_2RM_VPADAL_U] = 0x7,
5000     [NEON_2RM_VQABS] = 0x7,
5001     [NEON_2RM_VQNEG] = 0x7,
5002     [NEON_2RM_VCGT0] = 0x7,
5003     [NEON_2RM_VCGE0] = 0x7,
5004     [NEON_2RM_VCEQ0] = 0x7,
5005     [NEON_2RM_VCLE0] = 0x7,
5006     [NEON_2RM_VCLT0] = 0x7,
5007     [NEON_2RM_SHA1H] = 0x4,
5008     [NEON_2RM_VABS] = 0x7,
5009     [NEON_2RM_VNEG] = 0x7,
5010     [NEON_2RM_VCGT0_F] = 0x4,
5011     [NEON_2RM_VCGE0_F] = 0x4,
5012     [NEON_2RM_VCEQ0_F] = 0x4,
5013     [NEON_2RM_VCLE0_F] = 0x4,
5014     [NEON_2RM_VCLT0_F] = 0x4,
5015     [NEON_2RM_VABS_F] = 0x4,
5016     [NEON_2RM_VNEG_F] = 0x4,
5017     [NEON_2RM_VSWP] = 0x1,
5018     [NEON_2RM_VTRN] = 0x7,
5019     [NEON_2RM_VUZP] = 0x7,
5020     [NEON_2RM_VZIP] = 0x7,
5021     [NEON_2RM_VMOVN] = 0x7,
5022     [NEON_2RM_VQMOVN] = 0x7,
5023     [NEON_2RM_VSHLL] = 0x7,
5024     [NEON_2RM_SHA1SU1] = 0x4,
5025     [NEON_2RM_VRINTN] = 0x4,
5026     [NEON_2RM_VRINTX] = 0x4,
5027     [NEON_2RM_VRINTA] = 0x4,
5028     [NEON_2RM_VRINTZ] = 0x4,
5029     [NEON_2RM_VCVT_F16_F32] = 0x2,
5030     [NEON_2RM_VRINTM] = 0x4,
5031     [NEON_2RM_VCVT_F32_F16] = 0x2,
5032     [NEON_2RM_VRINTP] = 0x4,
5033     [NEON_2RM_VCVTAU] = 0x4,
5034     [NEON_2RM_VCVTAS] = 0x4,
5035     [NEON_2RM_VCVTNU] = 0x4,
5036     [NEON_2RM_VCVTNS] = 0x4,
5037     [NEON_2RM_VCVTPU] = 0x4,
5038     [NEON_2RM_VCVTPS] = 0x4,
5039     [NEON_2RM_VCVTMU] = 0x4,
5040     [NEON_2RM_VCVTMS] = 0x4,
5041     [NEON_2RM_VRECPE] = 0x4,
5042     [NEON_2RM_VRSQRTE] = 0x4,
5043     [NEON_2RM_VRECPE_F] = 0x4,
5044     [NEON_2RM_VRSQRTE_F] = 0x4,
5045     [NEON_2RM_VCVT_FS] = 0x4,
5046     [NEON_2RM_VCVT_FU] = 0x4,
5047     [NEON_2RM_VCVT_SF] = 0x4,
5048     [NEON_2RM_VCVT_UF] = 0x4,
5049 };
5050
5051 /* Translate a NEON data processing instruction.  Return nonzero if the
5052    instruction is invalid.
5053    We process data in a mixture of 32-bit and 64-bit chunks.
5054    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5055
5056 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5057 {
5058     int op;
5059     int q;
5060     int rd, rn, rm;
5061     int size;
5062     int shift;
5063     int pass;
5064     int count;
5065     int pairwise;
5066     int u;
5067     uint32_t imm, mask;
5068     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5069     TCGv_i64 tmp64;
5070
5071     /* FIXME: this access check should not take precedence over UNDEF
5072      * for invalid encodings; we will generate incorrect syndrome information
5073      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5074      */
5075     if (!s->cpacr_fpen) {
5076         gen_exception_insn(s, 4, EXCP_UDEF,
5077                            syn_fp_access_trap(1, 0xe, s->thumb));
5078         return 0;
5079     }
5080
5081     if (!s->vfp_enabled)
5082       return 1;
5083     q = (insn & (1 << 6)) != 0;
5084     u = (insn >> 24) & 1;
5085     VFP_DREG_D(rd, insn);
5086     VFP_DREG_N(rn, insn);
5087     VFP_DREG_M(rm, insn);
5088     size = (insn >> 20) & 3;
5089     if ((insn & (1 << 23)) == 0) {
5090         /* Three register same length.  */
5091         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5092         /* Catch invalid op and bad size combinations: UNDEF */
5093         if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5094             return 1;
5095         }
5096         /* All insns of this form UNDEF for either this condition or the
5097          * superset of cases "Q==1"; we catch the latter later.
5098          */
5099         if (q && ((rd | rn | rm) & 1)) {
5100             return 1;
5101         }
5102         /*
5103          * The SHA-1/SHA-256 3-register instructions require special treatment
5104          * here, as their size field is overloaded as an op type selector, and
5105          * they all consume their input in a single pass.
5106          */
5107         if (op == NEON_3R_SHA) {
5108             if (!q) {
5109                 return 1;
5110             }
5111             if (!u) { /* SHA-1 */
5112                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
5113                     return 1;
5114                 }
5115                 tmp = tcg_const_i32(rd);
5116                 tmp2 = tcg_const_i32(rn);
5117                 tmp3 = tcg_const_i32(rm);
5118                 tmp4 = tcg_const_i32(size);
5119                 gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
5120                 tcg_temp_free_i32(tmp4);
5121             } else { /* SHA-256 */
5122                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
5123                     return 1;
5124                 }
5125                 tmp = tcg_const_i32(rd);
5126                 tmp2 = tcg_const_i32(rn);
5127                 tmp3 = tcg_const_i32(rm);
5128                 switch (size) {
5129                 case 0:
5130                     gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
5131                     break;
5132                 case 1:
5133                     gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
5134                     break;
5135                 case 2:
5136                     gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
5137                     break;
5138                 }
5139             }
5140             tcg_temp_free_i32(tmp);
5141             tcg_temp_free_i32(tmp2);
5142             tcg_temp_free_i32(tmp3);
5143             return 0;
5144         }
5145         if (size == 3 && op != NEON_3R_LOGIC) {
5146             /* 64-bit element instructions. */
5147             for (pass = 0; pass < (q ? 2 : 1); pass++) {
5148                 neon_load_reg64(cpu_V0, rn + pass);
5149                 neon_load_reg64(cpu_V1, rm + pass);
5150                 switch (op) {
5151                 case NEON_3R_VQADD:
5152                     if (u) {
5153                         gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
5154                                                  cpu_V0, cpu_V1);
5155                     } else {
5156                         gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
5157                                                  cpu_V0, cpu_V1);
5158                     }
5159                     break;
5160                 case NEON_3R_VQSUB:
5161                     if (u) {
5162                         gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
5163                                                  cpu_V0, cpu_V1);
5164                     } else {
5165                         gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
5166                                                  cpu_V0, cpu_V1);
5167                     }
5168                     break;
5169                 case NEON_3R_VSHL:
5170                     if (u) {
5171                         gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
5172                     } else {
5173                         gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
5174                     }
5175                     break;
5176                 case NEON_3R_VQSHL:
5177                     if (u) {
5178                         gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5179                                                  cpu_V1, cpu_V0);
5180                     } else {
5181                         gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5182                                                  cpu_V1, cpu_V0);
5183                     }
5184                     break;
5185                 case NEON_3R_VRSHL:
5186                     if (u) {
5187                         gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5188                     } else {
5189                         gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5190                     }
5191                     break;
5192                 case NEON_3R_VQRSHL:
5193                     if (u) {
5194                         gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5195                                                   cpu_V1, cpu_V0);
5196                     } else {
5197                         gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5198                                                   cpu_V1, cpu_V0);
5199                     }
5200                     break;
5201                 case NEON_3R_VADD_VSUB:
5202                     if (u) {
5203                         tcg_gen_sub_i64(CPU_V001);
5204                     } else {
5205                         tcg_gen_add_i64(CPU_V001);
5206                     }
5207                     break;
5208                 default:
5209                     abort();
5210                 }
5211                 neon_store_reg64(cpu_V0, rd + pass);
5212             }
5213             return 0;
5214         }
5215         pairwise = 0;
5216         switch (op) {
5217         case NEON_3R_VSHL:
5218         case NEON_3R_VQSHL:
5219         case NEON_3R_VRSHL:
5220         case NEON_3R_VQRSHL:
5221             {
5222                 int rtmp;
5223                 /* Shift instruction operands are reversed.  */
5224                 rtmp = rn;
5225                 rn = rm;
5226                 rm = rtmp;
5227             }
5228             break;
5229         case NEON_3R_VPADD:
5230             if (u) {
5231                 return 1;
5232             }
5233             /* Fall through */
5234         case NEON_3R_VPMAX:
5235         case NEON_3R_VPMIN:
5236             pairwise = 1;
5237             break;
5238         case NEON_3R_FLOAT_ARITH:
5239             pairwise = (u && size < 2); /* if VPADD (float) */
5240             break;
5241         case NEON_3R_FLOAT_MINMAX:
5242             pairwise = u; /* if VPMIN/VPMAX (float) */
5243             break;
5244         case NEON_3R_FLOAT_CMP:
5245             if (!u && size) {
5246                 /* no encoding for U=0 C=1x */
5247                 return 1;
5248             }
5249             break;
5250         case NEON_3R_FLOAT_ACMP:
5251             if (!u) {
5252                 return 1;
5253             }
5254             break;
5255         case NEON_3R_FLOAT_MISC:
5256             /* VMAXNM/VMINNM in ARMv8 */
5257             if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5258                 return 1;
5259             }
5260             break;
5261         case NEON_3R_VMUL:
5262             if (u && (size != 0)) {
5263                 /* UNDEF on invalid size for polynomial subcase */
5264                 return 1;
5265             }
5266             break;
5267         case NEON_3R_VFM:
5268             if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
5269                 return 1;
5270             }
5271             break;
5272         default:
5273             break;
5274         }
5275
5276         if (pairwise && q) {
5277             /* All the pairwise insns UNDEF if Q is set */
5278             return 1;
5279         }
5280
5281         for (pass = 0; pass < (q ? 4 : 2); pass++) {
5282
5283         if (pairwise) {
5284             /* Pairwise.  */
5285             if (pass < 1) {
5286                 tmp = neon_load_reg(rn, 0);
5287                 tmp2 = neon_load_reg(rn, 1);
5288             } else {
5289                 tmp = neon_load_reg(rm, 0);
5290                 tmp2 = neon_load_reg(rm, 1);
5291             }
5292         } else {
5293             /* Elementwise.  */
5294             tmp = neon_load_reg(rn, pass);
5295             tmp2 = neon_load_reg(rm, pass);
5296         }
5297         switch (op) {
5298         case NEON_3R_VHADD:
5299             GEN_NEON_INTEGER_OP(hadd);
5300             break;
5301         case NEON_3R_VQADD:
5302             GEN_NEON_INTEGER_OP_ENV(qadd);
5303             break;
5304         case NEON_3R_VRHADD:
5305             GEN_NEON_INTEGER_OP(rhadd);
5306             break;
5307         case NEON_3R_LOGIC: /* Logic ops.  */
5308             switch ((u << 2) | size) {
5309             case 0: /* VAND */
5310                 tcg_gen_and_i32(tmp, tmp, tmp2);
5311                 break;
5312             case 1: /* BIC */
5313                 tcg_gen_andc_i32(tmp, tmp, tmp2);
5314                 break;
5315             case 2: /* VORR */
5316                 tcg_gen_or_i32(tmp, tmp, tmp2);
5317                 break;
5318             case 3: /* VORN */
5319                 tcg_gen_orc_i32(tmp, tmp, tmp2);
5320                 break;
5321             case 4: /* VEOR */
5322                 tcg_gen_xor_i32(tmp, tmp, tmp2);
5323                 break;
5324             case 5: /* VBSL */
5325                 tmp3 = neon_load_reg(rd, pass);
5326                 gen_neon_bsl(tmp, tmp, tmp2, tmp3);
5327                 tcg_temp_free_i32(tmp3);
5328                 break;
5329             case 6: /* VBIT */
5330                 tmp3 = neon_load_reg(rd, pass);
5331                 gen_neon_bsl(tmp, tmp, tmp3, tmp2);
5332                 tcg_temp_free_i32(tmp3);
5333                 break;
5334             case 7: /* VBIF */
5335                 tmp3 = neon_load_reg(rd, pass);
5336                 gen_neon_bsl(tmp, tmp3, tmp, tmp2);
5337                 tcg_temp_free_i32(tmp3);
5338                 break;
5339             }
5340             break;
5341         case NEON_3R_VHSUB:
5342             GEN_NEON_INTEGER_OP(hsub);
5343             break;
5344         case NEON_3R_VQSUB:
5345             GEN_NEON_INTEGER_OP_ENV(qsub);
5346             break;
5347         case NEON_3R_VCGT:
5348             GEN_NEON_INTEGER_OP(cgt);
5349             break;
5350         case NEON_3R_VCGE:
5351             GEN_NEON_INTEGER_OP(cge);
5352             break;
5353         case NEON_3R_VSHL:
5354             GEN_NEON_INTEGER_OP(shl);
5355             break;
5356         case NEON_3R_VQSHL:
5357             GEN_NEON_INTEGER_OP_ENV(qshl);
5358             break;
5359         case NEON_3R_VRSHL:
5360             GEN_NEON_INTEGER_OP(rshl);
5361             break;
5362         case NEON_3R_VQRSHL:
5363             GEN_NEON_INTEGER_OP_ENV(qrshl);
5364             break;
5365         case NEON_3R_VMAX:
5366             GEN_NEON_INTEGER_OP(max);
5367             break;
5368         case NEON_3R_VMIN:
5369             GEN_NEON_INTEGER_OP(min);
5370             break;
5371         case NEON_3R_VABD:
5372             GEN_NEON_INTEGER_OP(abd);
5373             break;
5374         case NEON_3R_VABA:
5375             GEN_NEON_INTEGER_OP(abd);
5376             tcg_temp_free_i32(tmp2);
5377             tmp2 = neon_load_reg(rd, pass);
5378             gen_neon_add(size, tmp, tmp2);
5379             break;
5380         case NEON_3R_VADD_VSUB:
5381             if (!u) { /* VADD */
5382                 gen_neon_add(size, tmp, tmp2);
5383             } else { /* VSUB */
5384                 switch (size) {
5385                 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
5386                 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
5387                 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
5388                 default: abort();
5389                 }
5390             }
5391             break;
5392         case NEON_3R_VTST_VCEQ:
5393             if (!u) { /* VTST */
5394                 switch (size) {
5395                 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
5396                 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
5397                 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
5398                 default: abort();
5399                 }
5400             } else { /* VCEQ */
5401                 switch (size) {
5402                 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
5403                 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
5404                 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
5405                 default: abort();
5406                 }
5407             }
5408             break;
5409         case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
5410             switch (size) {
5411             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5412             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5413             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5414             default: abort();
5415             }
5416             tcg_temp_free_i32(tmp2);
5417             tmp2 = neon_load_reg(rd, pass);
5418             if (u) { /* VMLS */
5419                 gen_neon_rsb(size, tmp, tmp2);
5420             } else { /* VMLA */
5421                 gen_neon_add(size, tmp, tmp2);
5422             }
5423             break;
5424         case NEON_3R_VMUL:
5425             if (u) { /* polynomial */
5426                 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
5427             } else { /* Integer */
5428                 switch (size) {
5429                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5430                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5431                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5432                 default: abort();
5433                 }
5434             }
5435             break;
5436         case NEON_3R_VPMAX:
5437             GEN_NEON_INTEGER_OP(pmax);
5438             break;
5439         case NEON_3R_VPMIN:
5440             GEN_NEON_INTEGER_OP(pmin);
5441             break;
5442         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
5443             if (!u) { /* VQDMULH */
5444                 switch (size) {
5445                 case 1:
5446                     gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5447                     break;
5448                 case 2:
5449                     gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5450                     break;
5451                 default: abort();
5452                 }
5453             } else { /* VQRDMULH */
5454                 switch (size) {
5455                 case 1:
5456                     gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5457                     break;
5458                 case 2:
5459                     gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5460                     break;
5461                 default: abort();
5462                 }
5463             }
5464             break;
5465         case NEON_3R_VPADD:
5466             switch (size) {
5467             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5468             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5469             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5470             default: abort();
5471             }
5472             break;
5473         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5474         {
5475             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5476             switch ((u << 2) | size) {
5477             case 0: /* VADD */
5478             case 4: /* VPADD */
5479                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5480                 break;
5481             case 2: /* VSUB */
5482                 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5483                 break;
5484             case 6: /* VABD */
5485                 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5486                 break;
5487             default:
5488                 abort();
5489             }
5490             tcg_temp_free_ptr(fpstatus);
5491             break;
5492         }
5493         case NEON_3R_FLOAT_MULTIPLY:
5494         {
5495             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5496             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5497             if (!u) {
5498                 tcg_temp_free_i32(tmp2);
5499                 tmp2 = neon_load_reg(rd, pass);
5500                 if (size == 0) {
5501                     gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5502                 } else {
5503                     gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5504                 }
5505             }
5506             tcg_temp_free_ptr(fpstatus);
5507             break;
5508         }
5509         case NEON_3R_FLOAT_CMP:
5510         {
5511             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5512             if (!u) {
5513                 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5514             } else {
5515                 if (size == 0) {
5516                     gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5517                 } else {
5518                     gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5519                 }
5520             }
5521             tcg_temp_free_ptr(fpstatus);
5522             break;
5523         }
5524         case NEON_3R_FLOAT_ACMP:
5525         {
5526             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5527             if (size == 0) {
5528                 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5529             } else {
5530                 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5531             }
5532             tcg_temp_free_ptr(fpstatus);
5533             break;
5534         }
5535         case NEON_3R_FLOAT_MINMAX:
5536         {
5537             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5538             if (size == 0) {
5539                 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5540             } else {
5541                 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5542             }
5543             tcg_temp_free_ptr(fpstatus);
5544             break;
5545         }
5546         case NEON_3R_FLOAT_MISC:
5547             if (u) {
5548                 /* VMAXNM/VMINNM */
5549                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5550                 if (size == 0) {
5551                     gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5552                 } else {
5553                     gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5554                 }
5555                 tcg_temp_free_ptr(fpstatus);
5556             } else {
5557                 if (size == 0) {
5558                     gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5559                 } else {
5560                     gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5561               }
5562             }
5563             break;
5564         case NEON_3R_VFM:
5565         {
5566             /* VFMA, VFMS: fused multiply-add */
5567             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5568             TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5569             if (size) {
5570                 /* VFMS */
5571                 gen_helper_vfp_negs(tmp, tmp);
5572             }
5573             gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5574             tcg_temp_free_i32(tmp3);
5575             tcg_temp_free_ptr(fpstatus);
5576             break;
5577         }
5578         default:
5579             abort();
5580         }
5581         tcg_temp_free_i32(tmp2);
5582
5583         /* Save the result.  For elementwise operations we can put it
5584            straight into the destination register.  For pairwise operations
5585            we have to be careful to avoid clobbering the source operands.  */
5586         if (pairwise && rd == rm) {
5587             neon_store_scratch(pass, tmp);
5588         } else {
5589             neon_store_reg(rd, pass, tmp);
5590         }
5591
5592         } /* for pass */
5593         if (pairwise && rd == rm) {
5594             for (pass = 0; pass < (q ? 4 : 2); pass++) {
5595                 tmp = neon_load_scratch(pass);
5596                 neon_store_reg(rd, pass, tmp);
5597             }
5598         }
5599         /* End of 3 register same size operations.  */
5600     } else if (insn & (1 << 4)) {
5601         if ((insn & 0x00380080) != 0) {
5602             /* Two registers and shift.  */
5603             op = (insn >> 8) & 0xf;
5604             if (insn & (1 << 7)) {
5605                 /* 64-bit shift. */
5606                 if (op > 7) {
5607                     return 1;
5608                 }
5609                 size = 3;
5610             } else {
5611                 size = 2;
5612                 while ((insn & (1 << (size + 19))) == 0)
5613                     size--;
5614             }
5615             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5616             /* To avoid excessive duplication of ops we implement shift
5617                by immediate using the variable shift operations.  */
5618             if (op < 8) {
5619                 /* Shift by immediate:
5620                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5621                 if (q && ((rd | rm) & 1)) {
5622                     return 1;
5623                 }
5624                 if (!u && (op == 4 || op == 6)) {
5625                     return 1;
5626                 }
5627                 /* Right shifts are encoded as N - shift, where N is the
5628                    element size in bits.  */
5629                 if (op <= 4)
5630                     shift = shift - (1 << (size + 3));
5631                 if (size == 3) {
5632                     count = q + 1;
5633                 } else {
5634                     count = q ? 4: 2;
5635                 }
5636                 switch (size) {
5637                 case 0:
5638                     imm = (uint8_t) shift;
5639                     imm |= imm << 8;
5640                     imm |= imm << 16;
5641                     break;
5642                 case 1:
5643                     imm = (uint16_t) shift;
5644                     imm |= imm << 16;
5645                     break;
5646                 case 2:
5647                 case 3:
5648                     imm = shift;
5649                     break;
5650                 default:
5651                     abort();
5652                 }
5653
5654                 for (pass = 0; pass < count; pass++) {
5655                     if (size == 3) {
5656                         neon_load_reg64(cpu_V0, rm + pass);
5657                         tcg_gen_movi_i64(cpu_V1, imm);
5658                         switch (op) {
5659                         case 0:  /* VSHR */
5660                         case 1:  /* VSRA */
5661                             if (u)
5662                                 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5663                             else
5664                                 gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
5665                             break;
5666                         case 2: /* VRSHR */
5667                         case 3: /* VRSRA */
5668                             if (u)
5669                                 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5670                             else
5671                                 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5672                             break;
5673                         case 4: /* VSRI */
5674                         case 5: /* VSHL, VSLI */
5675                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5676                             break;
5677                         case 6: /* VQSHLU */
5678                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5679                                                       cpu_V0, cpu_V1);
5680                             break;
5681                         case 7: /* VQSHL */
5682                             if (u) {
5683                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5684                                                          cpu_V0, cpu_V1);
5685                             } else {
5686                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5687                                                          cpu_V0, cpu_V1);
5688                             }
5689                             break;
5690                         }
5691                         if (op == 1 || op == 3) {
5692                             /* Accumulate.  */
5693                             neon_load_reg64(cpu_V1, rd + pass);
5694                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5695                         } else if (op == 4 || (op == 5 && u)) {
5696                             /* Insert */
5697                             neon_load_reg64(cpu_V1, rd + pass);
5698                             uint64_t mask;
5699                             if (shift < -63 || shift > 63) {
5700                                 mask = 0;
5701                             } else {
5702                                 if (op == 4) {
5703                                     mask = 0xffffffffffffffffull >> -shift;
5704                                 } else {
5705                                     mask = 0xffffffffffffffffull << shift;
5706                                 }
5707                             }
5708                             tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
5709                             tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5710                         }
5711                         neon_store_reg64(cpu_V0, rd + pass);
5712                     } else { /* size < 3 */
5713                         /* Operands in T0 and T1.  */
5714                         tmp = neon_load_reg(rm, pass);
5715                         tmp2 = tcg_temp_new_i32();
5716                         tcg_gen_movi_i32(tmp2, imm);
5717                         switch (op) {
5718                         case 0:  /* VSHR */
5719                         case 1:  /* VSRA */
5720                             GEN_NEON_INTEGER_OP(shl);
5721                             break;
5722                         case 2: /* VRSHR */
5723                         case 3: /* VRSRA */
5724                             GEN_NEON_INTEGER_OP(rshl);
5725                             break;
5726                         case 4: /* VSRI */
5727                         case 5: /* VSHL, VSLI */
5728                             switch (size) {
5729                             case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
5730                             case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
5731                             case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
5732                             default: abort();
5733                             }
5734                             break;
5735                         case 6: /* VQSHLU */
5736                             switch (size) {
5737                             case 0:
5738                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5739                                                          tmp, tmp2);
5740                                 break;
5741                             case 1:
5742                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5743                                                           tmp, tmp2);
5744                                 break;
5745                             case 2:
5746                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5747                                                           tmp, tmp2);
5748                                 break;
5749                             default:
5750                                 abort();
5751                             }
5752                             break;
5753                         case 7: /* VQSHL */
5754                             GEN_NEON_INTEGER_OP_ENV(qshl);
5755                             break;
5756                         }
5757                         tcg_temp_free_i32(tmp2);
5758
5759                         if (op == 1 || op == 3) {
5760                             /* Accumulate.  */
5761                             tmp2 = neon_load_reg(rd, pass);
5762                             gen_neon_add(size, tmp, tmp2);
5763                             tcg_temp_free_i32(tmp2);
5764                         } else if (op == 4 || (op == 5 && u)) {
5765                             /* Insert */
5766                             switch (size) {
5767                             case 0:
5768                                 if (op == 4)
5769                                     mask = 0xff >> -shift;
5770                                 else
5771                                     mask = (uint8_t)(0xff << shift);
5772                                 mask |= mask << 8;
5773                                 mask |= mask << 16;
5774                                 break;
5775                             case 1:
5776                                 if (op == 4)
5777                                     mask = 0xffff >> -shift;
5778                                 else
5779                                     mask = (uint16_t)(0xffff << shift);
5780                                 mask |= mask << 16;
5781                                 break;
5782                             case 2:
5783                                 if (shift < -31 || shift > 31) {
5784                                     mask = 0;
5785                                 } else {
5786                                     if (op == 4)
5787                                         mask = 0xffffffffu >> -shift;
5788                                     else
5789                                         mask = 0xffffffffu << shift;
5790                                 }
5791                                 break;
5792                             default:
5793                                 abort();
5794                             }
5795                             tmp2 = neon_load_reg(rd, pass);
5796                             tcg_gen_andi_i32(tmp, tmp, mask);
5797                             tcg_gen_andi_i32(tmp2, tmp2, ~mask);
5798                             tcg_gen_or_i32(tmp, tmp, tmp2);
5799                             tcg_temp_free_i32(tmp2);
5800                         }
5801                         neon_store_reg(rd, pass, tmp);
5802                     }
5803                 } /* for pass */
5804             } else if (op < 10) {
5805                 /* Shift by immediate and narrow:
5806                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5807                 int input_unsigned = (op == 8) ? !u : u;
5808                 if (rm & 1) {
5809                     return 1;
5810                 }
5811                 shift = shift - (1 << (size + 3));
5812                 size++;
5813                 if (size == 3) {
5814                     tmp64 = tcg_const_i64(shift);
5815                     neon_load_reg64(cpu_V0, rm);
5816                     neon_load_reg64(cpu_V1, rm + 1);
5817                     for (pass = 0; pass < 2; pass++) {
5818                         TCGv_i64 in;
5819                         if (pass == 0) {
5820                             in = cpu_V0;
5821                         } else {
5822                             in = cpu_V1;
5823                         }
5824                         if (q) {
5825                             if (input_unsigned) {
5826                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5827                             } else {
5828                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5829                             }
5830                         } else {
5831                             if (input_unsigned) {
5832                                 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5833                             } else {
5834                                 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5835                             }
5836                         }
5837                         tmp = tcg_temp_new_i32();
5838                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5839                         neon_store_reg(rd, pass, tmp);
5840                     } /* for pass */
5841                     tcg_temp_free_i64(tmp64);
5842                 } else {
5843                     if (size == 1) {
5844                         imm = (uint16_t)shift;
5845                         imm |= imm << 16;
5846                     } else {
5847                         /* size == 2 */
5848                         imm = (uint32_t)shift;
5849                     }
5850                     tmp2 = tcg_const_i32(imm);
5851                     tmp4 = neon_load_reg(rm + 1, 0);
5852                     tmp5 = neon_load_reg(rm + 1, 1);
5853                     for (pass = 0; pass < 2; pass++) {
5854                         if (pass == 0) {
5855                             tmp = neon_load_reg(rm, 0);
5856                         } else {
5857                             tmp = tmp4;
5858                         }
5859                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5860                                               input_unsigned);
5861                         if (pass == 0) {
5862                             tmp3 = neon_load_reg(rm, 1);
5863                         } else {
5864                             tmp3 = tmp5;
5865                         }
5866                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5867                                               input_unsigned);
5868                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5869                         tcg_temp_free_i32(tmp);
5870                         tcg_temp_free_i32(tmp3);
5871                         tmp = tcg_temp_new_i32();
5872                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5873                         neon_store_reg(rd, pass, tmp);
5874                     } /* for pass */
5875                     tcg_temp_free_i32(tmp2);
5876                 }
5877             } else if (op == 10) {
5878                 /* VSHLL, VMOVL */
5879                 if (q || (rd & 1)) {
5880                     return 1;
5881                 }
5882                 tmp = neon_load_reg(rm, 0);
5883                 tmp2 = neon_load_reg(rm, 1);
5884                 for (pass = 0; pass < 2; pass++) {
5885                     if (pass == 1)
5886                         tmp = tmp2;
5887
5888                     gen_neon_widen(cpu_V0, tmp, size, u);
5889
5890                     if (shift != 0) {
5891                         /* The shift is less than the width of the source
5892                            type, so we can just shift the whole register.  */
5893                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5894                         /* Widen the result of shift: we need to clear
5895                          * the potential overflow bits resulting from
5896                          * left bits of the narrow input appearing as
5897                          * right bits of left the neighbour narrow
5898                          * input.  */
5899                         if (size < 2 || !u) {
5900                             uint64_t imm64;
5901                             if (size == 0) {
5902                                 imm = (0xffu >> (8 - shift));
5903                                 imm |= imm << 16;
5904                             } else if (size == 1) {
5905                                 imm = 0xffff >> (16 - shift);
5906                             } else {
5907                                 /* size == 2 */
5908                                 imm = 0xffffffff >> (32 - shift);
5909                             }
5910                             if (size < 2) {
5911                                 imm64 = imm | (((uint64_t)imm) << 32);
5912                             } else {
5913                                 imm64 = imm;
5914                             }
5915                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5916                         }
5917                     }
5918                     neon_store_reg64(cpu_V0, rd + pass);
5919                 }
5920             } else if (op >= 14) {
5921                 /* VCVT fixed-point.  */
5922                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5923                     return 1;
5924                 }
5925                 /* We have already masked out the must-be-1 top bit of imm6,
5926                  * hence this 32-shift where the ARM ARM has 64-imm6.
5927                  */
5928                 shift = 32 - shift;
5929                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5930                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5931                     if (!(op & 1)) {
5932                         if (u)
5933                             gen_vfp_ulto(0, shift, 1);
5934                         else
5935                             gen_vfp_slto(0, shift, 1);
5936                     } else {
5937                         if (u)
5938                             gen_vfp_toul(0, shift, 1);
5939                         else
5940                             gen_vfp_tosl(0, shift, 1);
5941                     }
5942                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
5943                 }
5944             } else {
5945                 return 1;
5946             }
5947         } else { /* (insn & 0x00380080) == 0 */
5948             int invert;
5949             if (q && (rd & 1)) {
5950                 return 1;
5951             }
5952
5953             op = (insn >> 8) & 0xf;
5954             /* One register and immediate.  */
5955             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5956             invert = (insn & (1 << 5)) != 0;
5957             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5958              * We choose to not special-case this and will behave as if a
5959              * valid constant encoding of 0 had been given.
5960              */
5961             switch (op) {
5962             case 0: case 1:
5963                 /* no-op */
5964                 break;
5965             case 2: case 3:
5966                 imm <<= 8;
5967                 break;
5968             case 4: case 5:
5969                 imm <<= 16;
5970                 break;
5971             case 6: case 7:
5972                 imm <<= 24;
5973                 break;
5974             case 8: case 9:
5975                 imm |= imm << 16;
5976                 break;
5977             case 10: case 11:
5978                 imm = (imm << 8) | (imm << 24);
5979                 break;
5980             case 12:
5981                 imm = (imm << 8) | 0xff;
5982                 break;
5983             case 13:
5984                 imm = (imm << 16) | 0xffff;
5985                 break;
5986             case 14:
5987                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5988                 if (invert)
5989                     imm = ~imm;
5990                 break;
5991             case 15:
5992                 if (invert) {
5993                     return 1;
5994                 }
5995                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5996                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5997                 break;
5998             }
5999             if (invert)
6000                 imm = ~imm;
6001
6002             for (pass = 0; pass < (q ? 4 : 2); pass++) {
6003                 if (op & 1 && op < 12) {
6004                     tmp = neon_load_reg(rd, pass);
6005                     if (invert) {
6006                         /* The immediate value has already been inverted, so
6007                            BIC becomes AND.  */
6008                         tcg_gen_andi_i32(tmp, tmp, imm);
6009                     } else {
6010                         tcg_gen_ori_i32(tmp, tmp, imm);
6011                     }
6012                 } else {
6013                     /* VMOV, VMVN.  */
6014                     tmp = tcg_temp_new_i32();
6015                     if (op == 14 && invert) {
6016                         int n;
6017                         uint32_t val;
6018                         val = 0;
6019                         for (n = 0; n < 4; n++) {
6020                             if (imm & (1 << (n + (pass & 1) * 4)))
6021                                 val |= 0xff << (n * 8);
6022                         }
6023                         tcg_gen_movi_i32(tmp, val);
6024                     } else {
6025                         tcg_gen_movi_i32(tmp, imm);
6026                     }
6027                 }
6028                 neon_store_reg(rd, pass, tmp);
6029             }
6030         }
6031     } else { /* (insn & 0x00800010 == 0x00800000) */
6032         if (size != 3) {
6033             op = (insn >> 8) & 0xf;
6034             if ((insn & (1 << 6)) == 0) {
6035                 /* Three registers of different lengths.  */
6036                 int src1_wide;
6037                 int src2_wide;
6038                 int prewiden;
6039                 /* undefreq: bit 0 : UNDEF if size == 0
6040                  *           bit 1 : UNDEF if size == 1
6041                  *           bit 2 : UNDEF if size == 2
6042                  *           bit 3 : UNDEF if U == 1
6043                  * Note that [2:0] set implies 'always UNDEF'
6044                  */
6045                 int undefreq;
6046                 /* prewiden, src1_wide, src2_wide, undefreq */
6047                 static const int neon_3reg_wide[16][4] = {
6048                     {1, 0, 0, 0}, /* VADDL */
6049                     {1, 1, 0, 0}, /* VADDW */
6050                     {1, 0, 0, 0}, /* VSUBL */
6051                     {1, 1, 0, 0}, /* VSUBW */
6052                     {0, 1, 1, 0}, /* VADDHN */
6053                     {0, 0, 0, 0}, /* VABAL */
6054                     {0, 1, 1, 0}, /* VSUBHN */
6055                     {0, 0, 0, 0}, /* VABDL */
6056                     {0, 0, 0, 0}, /* VMLAL */
6057                     {0, 0, 0, 9}, /* VQDMLAL */
6058                     {0, 0, 0, 0}, /* VMLSL */
6059                     {0, 0, 0, 9}, /* VQDMLSL */
6060                     {0, 0, 0, 0}, /* Integer VMULL */
6061                     {0, 0, 0, 1}, /* VQDMULL */
6062                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
6063                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
6064                 };
6065
6066                 prewiden = neon_3reg_wide[op][0];
6067                 src1_wide = neon_3reg_wide[op][1];
6068                 src2_wide = neon_3reg_wide[op][2];
6069                 undefreq = neon_3reg_wide[op][3];
6070
6071                 if ((undefreq & (1 << size)) ||
6072                     ((undefreq & 8) && u)) {
6073                     return 1;
6074                 }
6075                 if ((src1_wide && (rn & 1)) ||
6076                     (src2_wide && (rm & 1)) ||
6077                     (!src2_wide && (rd & 1))) {
6078                     return 1;
6079                 }
6080
6081                 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
6082                  * outside the loop below as it only performs a single pass.
6083                  */
6084                 if (op == 14 && size == 2) {
6085                     TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
6086
6087                     if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
6088                         return 1;
6089                     }
6090                     tcg_rn = tcg_temp_new_i64();
6091                     tcg_rm = tcg_temp_new_i64();
6092                     tcg_rd = tcg_temp_new_i64();
6093                     neon_load_reg64(tcg_rn, rn);
6094                     neon_load_reg64(tcg_rm, rm);
6095                     gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
6096                     neon_store_reg64(tcg_rd, rd);
6097                     gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
6098                     neon_store_reg64(tcg_rd, rd + 1);
6099                     tcg_temp_free_i64(tcg_rn);
6100                     tcg_temp_free_i64(tcg_rm);
6101                     tcg_temp_free_i64(tcg_rd);
6102                     return 0;
6103                 }
6104
6105                 /* Avoid overlapping operands.  Wide source operands are
6106                    always aligned so will never overlap with wide
6107                    destinations in problematic ways.  */
6108                 if (rd == rm && !src2_wide) {
6109                     tmp = neon_load_reg(rm, 1);
6110                     neon_store_scratch(2, tmp);
6111                 } else if (rd == rn && !src1_wide) {
6112                     tmp = neon_load_reg(rn, 1);
6113                     neon_store_scratch(2, tmp);
6114                 }
6115                 TCGV_UNUSED_I32(tmp3);
6116                 for (pass = 0; pass < 2; pass++) {
6117                     if (src1_wide) {
6118                         neon_load_reg64(cpu_V0, rn + pass);
6119                         TCGV_UNUSED_I32(tmp);
6120                     } else {
6121                         if (pass == 1 && rd == rn) {
6122                             tmp = neon_load_scratch(2);
6123                         } else {
6124                             tmp = neon_load_reg(rn, pass);
6125                         }
6126                         if (prewiden) {
6127                             gen_neon_widen(cpu_V0, tmp, size, u);
6128                         }
6129                     }
6130                     if (src2_wide) {
6131                         neon_load_reg64(cpu_V1, rm + pass);
6132                         TCGV_UNUSED_I32(tmp2);
6133                     } else {
6134                         if (pass == 1 && rd == rm) {
6135                             tmp2 = neon_load_scratch(2);
6136                         } else {
6137                             tmp2 = neon_load_reg(rm, pass);
6138                         }
6139                         if (prewiden) {
6140                             gen_neon_widen(cpu_V1, tmp2, size, u);
6141                         }
6142                     }
6143                     switch (op) {
6144                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6145                         gen_neon_addl(size);
6146                         break;
6147                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6148                         gen_neon_subl(size);
6149                         break;
6150                     case 5: case 7: /* VABAL, VABDL */
6151                         switch ((size << 1) | u) {
6152                         case 0:
6153                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6154                             break;
6155                         case 1:
6156                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6157                             break;
6158                         case 2:
6159                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6160                             break;
6161                         case 3:
6162                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6163                             break;
6164                         case 4:
6165                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6166                             break;
6167                         case 5:
6168                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6169                             break;
6170                         default: abort();
6171                         }
6172                         tcg_temp_free_i32(tmp2);
6173                         tcg_temp_free_i32(tmp);
6174                         break;
6175                     case 8: case 9: case 10: case 11: case 12: case 13:
6176                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6177                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6178                         break;
6179                     case 14: /* Polynomial VMULL */
6180                         gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
6181                         tcg_temp_free_i32(tmp2);
6182                         tcg_temp_free_i32(tmp);
6183                         break;
6184                     default: /* 15 is RESERVED: caught earlier  */
6185                         abort();
6186                     }
6187                     if (op == 13) {
6188                         /* VQDMULL */
6189                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6190                         neon_store_reg64(cpu_V0, rd + pass);
6191                     } else if (op == 5 || (op >= 8 && op <= 11)) {
6192                         /* Accumulate.  */
6193                         neon_load_reg64(cpu_V1, rd + pass);
6194                         switch (op) {
6195                         case 10: /* VMLSL */
6196                             gen_neon_negl(cpu_V0, size);
6197                             /* Fall through */
6198                         case 5: case 8: /* VABAL, VMLAL */
6199                             gen_neon_addl(size);
6200                             break;
6201                         case 9: case 11: /* VQDMLAL, VQDMLSL */
6202                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6203                             if (op == 11) {
6204                                 gen_neon_negl(cpu_V0, size);
6205                             }
6206                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6207                             break;
6208                         default:
6209                             abort();
6210                         }
6211                         neon_store_reg64(cpu_V0, rd + pass);
6212                     } else if (op == 4 || op == 6) {
6213                         /* Narrowing operation.  */
6214                         tmp = tcg_temp_new_i32();
6215                         if (!u) {
6216                             switch (size) {
6217                             case 0:
6218                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6219                                 break;
6220                             case 1:
6221                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6222                                 break;
6223                             case 2:
6224                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6225                                 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
6226                                 break;
6227                             default: abort();
6228                             }
6229                         } else {
6230                             switch (size) {
6231                             case 0:
6232                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6233                                 break;
6234                             case 1:
6235                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6236                                 break;
6237                             case 2:
6238                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6239                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6240                                 tcg_gen_trunc_i64_i32(tmp, cpu_V0);
6241                                 break;
6242                             default: abort();
6243                             }
6244                         }
6245                         if (pass == 0) {
6246                             tmp3 = tmp;
6247                         } else {
6248                             neon_store_reg(rd, 0, tmp3);
6249                             neon_store_reg(rd, 1, tmp);
6250                         }
6251                     } else {
6252                         /* Write back the result.  */
6253                         neon_store_reg64(cpu_V0, rd + pass);
6254                     }
6255                 }
6256             } else {
6257                 /* Two registers and a scalar. NB that for ops of this form
6258                  * the ARM ARM labels bit 24 as Q, but it is in our variable
6259                  * 'u', not 'q'.
6260                  */
6261                 if (size == 0) {
6262                     return 1;
6263                 }
6264                 switch (op) {
6265                 case 1: /* Float VMLA scalar */
6266                 case 5: /* Floating point VMLS scalar */
6267                 case 9: /* Floating point VMUL scalar */
6268                     if (size == 1) {
6269                         return 1;
6270                     }
6271                     /* fall through */
6272                 case 0: /* Integer VMLA scalar */
6273                 case 4: /* Integer VMLS scalar */
6274                 case 8: /* Integer VMUL scalar */
6275                 case 12: /* VQDMULH scalar */
6276                 case 13: /* VQRDMULH scalar */
6277                     if (u && ((rd | rn) & 1)) {
6278                         return 1;
6279                     }
6280                     tmp = neon_get_scalar(size, rm);
6281                     neon_store_scratch(0, tmp);
6282                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
6283                         tmp = neon_load_scratch(0);
6284                         tmp2 = neon_load_reg(rn, pass);
6285                         if (op == 12) {
6286                             if (size == 1) {
6287                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6288                             } else {
6289                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6290                             }
6291                         } else if (op == 13) {
6292                             if (size == 1) {
6293                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6294                             } else {
6295                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6296                             }
6297                         } else if (op & 1) {
6298                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6299                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6300                             tcg_temp_free_ptr(fpstatus);
6301                         } else {
6302                             switch (size) {
6303                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6304                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6305                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6306                             default: abort();
6307                             }
6308                         }
6309                         tcg_temp_free_i32(tmp2);
6310                         if (op < 8) {
6311                             /* Accumulate.  */
6312                             tmp2 = neon_load_reg(rd, pass);
6313                             switch (op) {
6314                             case 0:
6315                                 gen_neon_add(size, tmp, tmp2);
6316                                 break;
6317                             case 1:
6318                             {
6319                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6320                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6321                                 tcg_temp_free_ptr(fpstatus);
6322                                 break;
6323                             }
6324                             case 4:
6325                                 gen_neon_rsb(size, tmp, tmp2);
6326                                 break;
6327                             case 5:
6328                             {
6329                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6330                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6331                                 tcg_temp_free_ptr(fpstatus);
6332                                 break;
6333                             }
6334                             default:
6335                                 abort();
6336                             }
6337                             tcg_temp_free_i32(tmp2);
6338                         }
6339                         neon_store_reg(rd, pass, tmp);
6340                     }
6341                     break;
6342                 case 3: /* VQDMLAL scalar */
6343                 case 7: /* VQDMLSL scalar */
6344                 case 11: /* VQDMULL scalar */
6345                     if (u == 1) {
6346                         return 1;
6347                     }
6348                     /* fall through */
6349                 case 2: /* VMLAL sclar */
6350                 case 6: /* VMLSL scalar */
6351                 case 10: /* VMULL scalar */
6352                     if (rd & 1) {
6353                         return 1;
6354                     }
6355                     tmp2 = neon_get_scalar(size, rm);
6356                     /* We need a copy of tmp2 because gen_neon_mull
6357                      * deletes it during pass 0.  */
6358                     tmp4 = tcg_temp_new_i32();
6359                     tcg_gen_mov_i32(tmp4, tmp2);
6360                     tmp3 = neon_load_reg(rn, 1);
6361
6362                     for (pass = 0; pass < 2; pass++) {
6363                         if (pass == 0) {
6364                             tmp = neon_load_reg(rn, 0);
6365                         } else {
6366                             tmp = tmp3;
6367                             tmp2 = tmp4;
6368                         }
6369                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6370                         if (op != 11) {
6371                             neon_load_reg64(cpu_V1, rd + pass);
6372                         }
6373                         switch (op) {
6374                         case 6:
6375                             gen_neon_negl(cpu_V0, size);
6376                             /* Fall through */
6377                         case 2:
6378                             gen_neon_addl(size);
6379                             break;
6380                         case 3: case 7:
6381                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6382                             if (op == 7) {
6383                                 gen_neon_negl(cpu_V0, size);
6384                             }
6385                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6386                             break;
6387                         case 10:
6388                             /* no-op */
6389                             break;
6390                         case 11:
6391                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6392                             break;
6393                         default:
6394                             abort();
6395                         }
6396                         neon_store_reg64(cpu_V0, rd + pass);
6397                     }
6398
6399
6400                     break;
6401                 default: /* 14 and 15 are RESERVED */
6402                     return 1;
6403                 }
6404             }
6405         } else { /* size == 3 */
6406             if (!u) {
6407                 /* Extract.  */
6408                 imm = (insn >> 8) & 0xf;
6409
6410                 if (imm > 7 && !q)
6411                     return 1;
6412
6413                 if (q && ((rd | rn | rm) & 1)) {
6414                     return 1;
6415                 }
6416
6417                 if (imm == 0) {
6418                     neon_load_reg64(cpu_V0, rn);
6419                     if (q) {
6420                         neon_load_reg64(cpu_V1, rn + 1);
6421                     }
6422                 } else if (imm == 8) {
6423                     neon_load_reg64(cpu_V0, rn + 1);
6424                     if (q) {
6425                         neon_load_reg64(cpu_V1, rm);
6426                     }
6427                 } else if (q) {
6428                     tmp64 = tcg_temp_new_i64();
6429                     if (imm < 8) {
6430                         neon_load_reg64(cpu_V0, rn);
6431                         neon_load_reg64(tmp64, rn + 1);
6432                     } else {
6433                         neon_load_reg64(cpu_V0, rn + 1);
6434                         neon_load_reg64(tmp64, rm);
6435                     }
6436                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6437                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6438                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6439                     if (imm < 8) {
6440                         neon_load_reg64(cpu_V1, rm);
6441                     } else {
6442                         neon_load_reg64(cpu_V1, rm + 1);
6443                         imm -= 8;
6444                     }
6445                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6446                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6447                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6448                     tcg_temp_free_i64(tmp64);
6449                 } else {
6450                     /* BUGFIX */
6451                     neon_load_reg64(cpu_V0, rn);
6452                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6453                     neon_load_reg64(cpu_V1, rm);
6454                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6455                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6456                 }
6457                 neon_store_reg64(cpu_V0, rd);
6458                 if (q) {
6459                     neon_store_reg64(cpu_V1, rd + 1);
6460                 }
6461             } else if ((insn & (1 << 11)) == 0) {
6462                 /* Two register misc.  */
6463                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6464                 size = (insn >> 18) & 3;
6465                 /* UNDEF for unknown op values and bad op-size combinations */
6466                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6467                     return 1;
6468                 }
6469                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6470                     q && ((rm | rd) & 1)) {
6471                     return 1;
6472                 }
6473                 switch (op) {
6474                 case NEON_2RM_VREV64:
6475                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6476                         tmp = neon_load_reg(rm, pass * 2);
6477                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6478                         switch (size) {
6479                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6480                         case 1: gen_swap_half(tmp); break;
6481                         case 2: /* no-op */ break;
6482                         default: abort();
6483                         }
6484                         neon_store_reg(rd, pass * 2 + 1, tmp);
6485                         if (size == 2) {
6486                             neon_store_reg(rd, pass * 2, tmp2);
6487                         } else {
6488                             switch (size) {
6489                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6490                             case 1: gen_swap_half(tmp2); break;
6491                             default: abort();
6492                             }
6493                             neon_store_reg(rd, pass * 2, tmp2);
6494                         }
6495                     }
6496                     break;
6497                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6498                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6499                     for (pass = 0; pass < q + 1; pass++) {
6500                         tmp = neon_load_reg(rm, pass * 2);
6501                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6502                         tmp = neon_load_reg(rm, pass * 2 + 1);
6503                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6504                         switch (size) {
6505                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6506                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6507                         case 2: tcg_gen_add_i64(CPU_V001); break;
6508                         default: abort();
6509                         }
6510                         if (op >= NEON_2RM_VPADAL) {
6511                             /* Accumulate.  */
6512                             neon_load_reg64(cpu_V1, rd + pass);
6513                             gen_neon_addl(size);
6514                         }
6515                         neon_store_reg64(cpu_V0, rd + pass);
6516                     }
6517                     break;
6518                 case NEON_2RM_VTRN:
6519                     if (size == 2) {
6520                         int n;
6521                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6522                             tmp = neon_load_reg(rm, n);
6523                             tmp2 = neon_load_reg(rd, n + 1);
6524                             neon_store_reg(rm, n, tmp2);
6525                             neon_store_reg(rd, n + 1, tmp);
6526                         }
6527                     } else {
6528                         goto elementwise;
6529                     }
6530                     break;
6531                 case NEON_2RM_VUZP:
6532                     if (gen_neon_unzip(rd, rm, size, q)) {
6533                         return 1;
6534                     }
6535                     break;
6536                 case NEON_2RM_VZIP:
6537                     if (gen_neon_zip(rd, rm, size, q)) {
6538                         return 1;
6539                     }
6540                     break;
6541                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6542                     /* also VQMOVUN; op field and mnemonics don't line up */
6543                     if (rm & 1) {
6544                         return 1;
6545                     }
6546                     TCGV_UNUSED_I32(tmp2);
6547                     for (pass = 0; pass < 2; pass++) {
6548                         neon_load_reg64(cpu_V0, rm + pass);
6549                         tmp = tcg_temp_new_i32();
6550                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6551                                            tmp, cpu_V0);
6552                         if (pass == 0) {
6553                             tmp2 = tmp;
6554                         } else {
6555                             neon_store_reg(rd, 0, tmp2);
6556                             neon_store_reg(rd, 1, tmp);
6557                         }
6558                     }
6559                     break;
6560                 case NEON_2RM_VSHLL:
6561                     if (q || (rd & 1)) {
6562                         return 1;
6563                     }
6564                     tmp = neon_load_reg(rm, 0);
6565                     tmp2 = neon_load_reg(rm, 1);
6566                     for (pass = 0; pass < 2; pass++) {
6567                         if (pass == 1)
6568                             tmp = tmp2;
6569                         gen_neon_widen(cpu_V0, tmp, size, 1);
6570                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6571                         neon_store_reg64(cpu_V0, rd + pass);
6572                     }
6573                     break;
6574                 case NEON_2RM_VCVT_F16_F32:
6575                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6576                         q || (rm & 1)) {
6577                         return 1;
6578                     }
6579                     tmp = tcg_temp_new_i32();
6580                     tmp2 = tcg_temp_new_i32();
6581                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
6582                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6583                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
6584                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6585                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6586                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6587                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
6588                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6589                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
6590                     neon_store_reg(rd, 0, tmp2);
6591                     tmp2 = tcg_temp_new_i32();
6592                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6593                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6594                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6595                     neon_store_reg(rd, 1, tmp2);
6596                     tcg_temp_free_i32(tmp);
6597                     break;
6598                 case NEON_2RM_VCVT_F32_F16:
6599                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6600                         q || (rd & 1)) {
6601                         return 1;
6602                     }
6603                     tmp3 = tcg_temp_new_i32();
6604                     tmp = neon_load_reg(rm, 0);
6605                     tmp2 = neon_load_reg(rm, 1);
6606                     tcg_gen_ext16u_i32(tmp3, tmp);
6607                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6608                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
6609                     tcg_gen_shri_i32(tmp3, tmp, 16);
6610                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6611                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
6612                     tcg_temp_free_i32(tmp);
6613                     tcg_gen_ext16u_i32(tmp3, tmp2);
6614                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6615                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
6616                     tcg_gen_shri_i32(tmp3, tmp2, 16);
6617                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6618                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
6619                     tcg_temp_free_i32(tmp2);
6620                     tcg_temp_free_i32(tmp3);
6621                     break;
6622                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6623                     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
6624                         || ((rm | rd) & 1)) {
6625                         return 1;
6626                     }
6627                     tmp = tcg_const_i32(rd);
6628                     tmp2 = tcg_const_i32(rm);
6629
6630                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6631                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6632                       */
6633                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6634
6635                     if (op == NEON_2RM_AESE) {
6636                         gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
6637                     } else {
6638                         gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
6639                     }
6640                     tcg_temp_free_i32(tmp);
6641                     tcg_temp_free_i32(tmp2);
6642                     tcg_temp_free_i32(tmp3);
6643                     break;
6644                 case NEON_2RM_SHA1H:
6645                     if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
6646                         || ((rm | rd) & 1)) {
6647                         return 1;
6648                     }
6649                     tmp = tcg_const_i32(rd);
6650                     tmp2 = tcg_const_i32(rm);
6651
6652                     gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
6653
6654                     tcg_temp_free_i32(tmp);
6655                     tcg_temp_free_i32(tmp2);
6656                     break;
6657                 case NEON_2RM_SHA1SU1:
6658                     if ((rm | rd) & 1) {
6659                             return 1;
6660                     }
6661                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6662                     if (q) {
6663                         if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
6664                             return 1;
6665                         }
6666                     } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
6667                         return 1;
6668                     }
6669                     tmp = tcg_const_i32(rd);
6670                     tmp2 = tcg_const_i32(rm);
6671                     if (q) {
6672                         gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
6673                     } else {
6674                         gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
6675                     }
6676                     tcg_temp_free_i32(tmp);
6677                     tcg_temp_free_i32(tmp2);
6678                     break;
6679                 default:
6680                 elementwise:
6681                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6682                         if (neon_2rm_is_float_op(op)) {
6683                             tcg_gen_ld_f32(cpu_F0s, cpu_env,
6684                                            neon_reg_offset(rm, pass));
6685                             TCGV_UNUSED_I32(tmp);
6686                         } else {
6687                             tmp = neon_load_reg(rm, pass);
6688                         }
6689                         switch (op) {
6690                         case NEON_2RM_VREV32:
6691                             switch (size) {
6692                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6693                             case 1: gen_swap_half(tmp); break;
6694                             default: abort();
6695                             }
6696                             break;
6697                         case NEON_2RM_VREV16:
6698                             gen_rev16(tmp);
6699                             break;
6700                         case NEON_2RM_VCLS:
6701                             switch (size) {
6702                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6703                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6704                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6705                             default: abort();
6706                             }
6707                             break;
6708                         case NEON_2RM_VCLZ:
6709                             switch (size) {
6710                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6711                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6712                             case 2: gen_helper_clz(tmp, tmp); break;
6713                             default: abort();
6714                             }
6715                             break;
6716                         case NEON_2RM_VCNT:
6717                             gen_helper_neon_cnt_u8(tmp, tmp);
6718                             break;
6719                         case NEON_2RM_VMVN:
6720                             tcg_gen_not_i32(tmp, tmp);
6721                             break;
6722                         case NEON_2RM_VQABS:
6723                             switch (size) {
6724                             case 0:
6725                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6726                                 break;
6727                             case 1:
6728                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6729                                 break;
6730                             case 2:
6731                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6732                                 break;
6733                             default: abort();
6734                             }
6735                             break;
6736                         case NEON_2RM_VQNEG:
6737                             switch (size) {
6738                             case 0:
6739                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6740                                 break;
6741                             case 1:
6742                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6743                                 break;
6744                             case 2:
6745                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6746                                 break;
6747                             default: abort();
6748                             }
6749                             break;
6750                         case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6751                             tmp2 = tcg_const_i32(0);
6752                             switch(size) {
6753                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6754                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6755                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6756                             default: abort();
6757                             }
6758                             tcg_temp_free_i32(tmp2);
6759                             if (op == NEON_2RM_VCLE0) {
6760                                 tcg_gen_not_i32(tmp, tmp);
6761                             }
6762                             break;
6763                         case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6764                             tmp2 = tcg_const_i32(0);
6765                             switch(size) {
6766                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6767                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6768                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6769                             default: abort();
6770                             }
6771                             tcg_temp_free_i32(tmp2);
6772                             if (op == NEON_2RM_VCLT0) {
6773                                 tcg_gen_not_i32(tmp, tmp);
6774                             }
6775                             break;
6776                         case NEON_2RM_VCEQ0:
6777                             tmp2 = tcg_const_i32(0);
6778                             switch(size) {
6779                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6780                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6781                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6782                             default: abort();
6783                             }
6784                             tcg_temp_free_i32(tmp2);
6785                             break;
6786                         case NEON_2RM_VABS:
6787                             switch(size) {
6788                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
6789                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
6790                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
6791                             default: abort();
6792                             }
6793                             break;
6794                         case NEON_2RM_VNEG:
6795                             tmp2 = tcg_const_i32(0);
6796                             gen_neon_rsb(size, tmp, tmp2);
6797                             tcg_temp_free_i32(tmp2);
6798                             break;
6799                         case NEON_2RM_VCGT0_F:
6800                         {
6801                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6802                             tmp2 = tcg_const_i32(0);
6803                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6804                             tcg_temp_free_i32(tmp2);
6805                             tcg_temp_free_ptr(fpstatus);
6806                             break;
6807                         }
6808                         case NEON_2RM_VCGE0_F:
6809                         {
6810                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6811                             tmp2 = tcg_const_i32(0);
6812                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6813                             tcg_temp_free_i32(tmp2);
6814                             tcg_temp_free_ptr(fpstatus);
6815                             break;
6816                         }
6817                         case NEON_2RM_VCEQ0_F:
6818                         {
6819                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6820                             tmp2 = tcg_const_i32(0);
6821                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6822                             tcg_temp_free_i32(tmp2);
6823                             tcg_temp_free_ptr(fpstatus);
6824                             break;
6825                         }
6826                         case NEON_2RM_VCLE0_F:
6827                         {
6828                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6829                             tmp2 = tcg_const_i32(0);
6830                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6831                             tcg_temp_free_i32(tmp2);
6832                             tcg_temp_free_ptr(fpstatus);
6833                             break;
6834                         }
6835                         case NEON_2RM_VCLT0_F:
6836                         {
6837                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6838                             tmp2 = tcg_const_i32(0);
6839                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6840                             tcg_temp_free_i32(tmp2);
6841                             tcg_temp_free_ptr(fpstatus);
6842                             break;
6843                         }
6844                         case NEON_2RM_VABS_F:
6845                             gen_vfp_abs(0);
6846                             break;
6847                         case NEON_2RM_VNEG_F:
6848                             gen_vfp_neg(0);
6849                             break;
6850                         case NEON_2RM_VSWP:
6851                             tmp2 = neon_load_reg(rd, pass);
6852                             neon_store_reg(rm, pass, tmp2);
6853                             break;
6854                         case NEON_2RM_VTRN:
6855                             tmp2 = neon_load_reg(rd, pass);
6856                             switch (size) {
6857                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6858                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6859                             default: abort();
6860                             }
6861                             neon_store_reg(rm, pass, tmp2);
6862                             break;
6863                         case NEON_2RM_VRINTN:
6864                         case NEON_2RM_VRINTA:
6865                         case NEON_2RM_VRINTM:
6866                         case NEON_2RM_VRINTP:
6867                         case NEON_2RM_VRINTZ:
6868                         {
6869                             TCGv_i32 tcg_rmode;
6870                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6871                             int rmode;
6872
6873                             if (op == NEON_2RM_VRINTZ) {
6874                                 rmode = FPROUNDING_ZERO;
6875                             } else {
6876                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6877                             }
6878
6879                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6880                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6881                                                       cpu_env);
6882                             gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
6883                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6884                                                       cpu_env);
6885                             tcg_temp_free_ptr(fpstatus);
6886                             tcg_temp_free_i32(tcg_rmode);
6887                             break;
6888                         }
6889                         case NEON_2RM_VRINTX:
6890                         {
6891                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6892                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
6893                             tcg_temp_free_ptr(fpstatus);
6894                             break;
6895                         }
6896                         case NEON_2RM_VCVTAU:
6897                         case NEON_2RM_VCVTAS:
6898                         case NEON_2RM_VCVTNU:
6899                         case NEON_2RM_VCVTNS:
6900                         case NEON_2RM_VCVTPU:
6901                         case NEON_2RM_VCVTPS:
6902                         case NEON_2RM_VCVTMU:
6903                         case NEON_2RM_VCVTMS:
6904                         {
6905                             bool is_signed = !extract32(insn, 7, 1);
6906                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6907                             TCGv_i32 tcg_rmode, tcg_shift;
6908                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6909
6910                             tcg_shift = tcg_const_i32(0);
6911                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6912                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6913                                                       cpu_env);
6914
6915                             if (is_signed) {
6916                                 gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
6917                                                      tcg_shift, fpst);
6918                             } else {
6919                                 gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
6920                                                      tcg_shift, fpst);
6921                             }
6922
6923                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6924                                                       cpu_env);
6925                             tcg_temp_free_i32(tcg_rmode);
6926                             tcg_temp_free_i32(tcg_shift);
6927                             tcg_temp_free_ptr(fpst);
6928                             break;
6929                         }
6930                         case NEON_2RM_VRECPE:
6931                         {
6932                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6933                             gen_helper_recpe_u32(tmp, tmp, fpstatus);
6934                             tcg_temp_free_ptr(fpstatus);
6935                             break;
6936                         }
6937                         case NEON_2RM_VRSQRTE:
6938                         {
6939                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6940                             gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6941                             tcg_temp_free_ptr(fpstatus);
6942                             break;
6943                         }
6944                         case NEON_2RM_VRECPE_F:
6945                         {
6946                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6947                             gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
6948                             tcg_temp_free_ptr(fpstatus);
6949                             break;
6950                         }
6951                         case NEON_2RM_VRSQRTE_F:
6952                         {
6953                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6954                             gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
6955                             tcg_temp_free_ptr(fpstatus);
6956                             break;
6957                         }
6958                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6959                             gen_vfp_sito(0, 1);
6960                             break;
6961                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6962                             gen_vfp_uito(0, 1);
6963                             break;
6964                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6965                             gen_vfp_tosiz(0, 1);
6966                             break;
6967                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6968                             gen_vfp_touiz(0, 1);
6969                             break;
6970                         default:
6971                             /* Reserved op values were caught by the
6972                              * neon_2rm_sizes[] check earlier.
6973                              */
6974                             abort();
6975                         }
6976                         if (neon_2rm_is_float_op(op)) {
6977                             tcg_gen_st_f32(cpu_F0s, cpu_env,
6978                                            neon_reg_offset(rd, pass));
6979                         } else {
6980                             neon_store_reg(rd, pass, tmp);
6981                         }
6982                     }
6983                     break;
6984                 }
6985             } else if ((insn & (1 << 10)) == 0) {
6986                 /* VTBL, VTBX.  */
6987                 int n = ((insn >> 8) & 3) + 1;
6988                 if ((rn + n) > 32) {
6989                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6990                      * helper function running off the end of the register file.
6991                      */
6992                     return 1;
6993                 }
6994                 n <<= 3;
6995                 if (insn & (1 << 6)) {
6996                     tmp = neon_load_reg(rd, 0);
6997                 } else {
6998                     tmp = tcg_temp_new_i32();
6999                     tcg_gen_movi_i32(tmp, 0);
7000                 }
7001                 tmp2 = neon_load_reg(rm, 0);
7002                 tmp4 = tcg_const_i32(rn);
7003                 tmp5 = tcg_const_i32(n);
7004                 gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
7005                 tcg_temp_free_i32(tmp);
7006                 if (insn & (1 << 6)) {
7007                     tmp = neon_load_reg(rd, 1);
7008                 } else {
7009                     tmp = tcg_temp_new_i32();
7010                     tcg_gen_movi_i32(tmp, 0);
7011                 }
7012                 tmp3 = neon_load_reg(rm, 1);
7013                 gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
7014                 tcg_temp_free_i32(tmp5);
7015                 tcg_temp_free_i32(tmp4);
7016                 neon_store_reg(rd, 0, tmp2);
7017                 neon_store_reg(rd, 1, tmp3);
7018                 tcg_temp_free_i32(tmp);
7019             } else if ((insn & 0x380) == 0) {
7020                 /* VDUP */
7021                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7022                     return 1;
7023                 }
7024                 if (insn & (1 << 19)) {
7025                     tmp = neon_load_reg(rm, 1);
7026                 } else {
7027                     tmp = neon_load_reg(rm, 0);
7028                 }
7029                 if (insn & (1 << 16)) {
7030                     gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
7031                 } else if (insn & (1 << 17)) {
7032                     if ((insn >> 18) & 1)
7033                         gen_neon_dup_high16(tmp);
7034                     else
7035                         gen_neon_dup_low16(tmp);
7036                 }
7037                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7038                     tmp2 = tcg_temp_new_i32();
7039                     tcg_gen_mov_i32(tmp2, tmp);
7040                     neon_store_reg(rd, pass, tmp2);
7041                 }
7042                 tcg_temp_free_i32(tmp);
7043             } else {
7044                 return 1;
7045             }
7046         }
7047     }
7048     return 0;
7049 }
7050
7051 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7052 {
7053     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7054     const ARMCPRegInfo *ri;
7055
7056     cpnum = (insn >> 8) & 0xf;
7057
7058     /* First check for coprocessor space used for XScale/iwMMXt insns */
7059     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7060         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7061             return 1;
7062         }
7063         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7064             return disas_iwmmxt_insn(s, insn);
7065         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7066             return disas_dsp_insn(s, insn);
7067         }
7068         return 1;
7069     }
7070
7071     /* Otherwise treat as a generic register access */
7072     is64 = (insn & (1 << 25)) == 0;
7073     if (!is64 && ((insn & (1 << 4)) == 0)) {
7074         /* cdp */
7075         return 1;
7076     }
7077
7078     crm = insn & 0xf;
7079     if (is64) {
7080         crn = 0;
7081         opc1 = (insn >> 4) & 0xf;
7082         opc2 = 0;
7083         rt2 = (insn >> 16) & 0xf;
7084     } else {
7085         crn = (insn >> 16) & 0xf;
7086         opc1 = (insn >> 21) & 7;
7087         opc2 = (insn >> 5) & 7;
7088         rt2 = 0;
7089     }
7090     isread = (insn >> 20) & 1;
7091     rt = (insn >> 12) & 0xf;
7092
7093     ri = get_arm_cp_reginfo(s->cp_regs,
7094                             ENCODE_CP_REG(cpnum, is64, crn, crm, opc1, opc2));
7095     if (ri) {
7096         /* Check access permissions */
7097         if (!cp_access_ok(s->current_el, ri, isread)) {
7098             return 1;
7099         }
7100
7101         if (ri->accessfn ||
7102             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7103             /* Emit code to perform further access permissions checks at
7104              * runtime; this may result in an exception.
7105              * Note that on XScale all cp0..c13 registers do an access check
7106              * call in order to handle c15_cpar.
7107              */
7108             TCGv_ptr tmpptr;
7109             TCGv_i32 tcg_syn;
7110             uint32_t syndrome;
7111
7112             /* Note that since we are an implementation which takes an
7113              * exception on a trapped conditional instruction only if the
7114              * instruction passes its condition code check, we can take
7115              * advantage of the clause in the ARM ARM that allows us to set
7116              * the COND field in the instruction to 0xE in all cases.
7117              * We could fish the actual condition out of the insn (ARM)
7118              * or the condexec bits (Thumb) but it isn't necessary.
7119              */
7120             switch (cpnum) {
7121             case 14:
7122                 if (is64) {
7123                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7124                                                  isread, s->thumb);
7125                 } else {
7126                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7127                                                 rt, isread, s->thumb);
7128                 }
7129                 break;
7130             case 15:
7131                 if (is64) {
7132                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7133                                                  isread, s->thumb);
7134                 } else {
7135                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7136                                                 rt, isread, s->thumb);
7137                 }
7138                 break;
7139             default:
7140                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7141                  * so this can only happen if this is an ARMv7 or earlier CPU,
7142                  * in which case the syndrome information won't actually be
7143                  * guest visible.
7144                  */
7145                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7146                 syndrome = syn_uncategorized();
7147                 break;
7148             }
7149
7150             gen_set_pc_im(s, s->pc);
7151             tmpptr = tcg_const_ptr(ri);
7152             tcg_syn = tcg_const_i32(syndrome);
7153             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
7154             tcg_temp_free_ptr(tmpptr);
7155             tcg_temp_free_i32(tcg_syn);
7156         }
7157
7158         /* Handle special cases first */
7159         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7160         case ARM_CP_NOP:
7161             return 0;
7162         case ARM_CP_WFI:
7163             if (isread) {
7164                 return 1;
7165             }
7166             gen_set_pc_im(s, s->pc);
7167             s->is_jmp = DISAS_WFI;
7168             return 0;
7169         default:
7170             break;
7171         }
7172
7173         if (use_icount && (ri->type & ARM_CP_IO)) {
7174             gen_io_start();
7175         }
7176
7177         if (isread) {
7178             /* Read */
7179             if (is64) {
7180                 TCGv_i64 tmp64;
7181                 TCGv_i32 tmp;
7182                 if (ri->type & ARM_CP_CONST) {
7183                     tmp64 = tcg_const_i64(ri->resetvalue);
7184                 } else if (ri->readfn) {
7185                     TCGv_ptr tmpptr;
7186                     tmp64 = tcg_temp_new_i64();
7187                     tmpptr = tcg_const_ptr(ri);
7188                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7189                     tcg_temp_free_ptr(tmpptr);
7190                 } else {
7191                     tmp64 = tcg_temp_new_i64();
7192                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7193                 }
7194                 tmp = tcg_temp_new_i32();
7195                 tcg_gen_trunc_i64_i32(tmp, tmp64);
7196                 store_reg(s, rt, tmp);
7197                 tcg_gen_shri_i64(tmp64, tmp64, 32);
7198                 tmp = tcg_temp_new_i32();
7199                 tcg_gen_trunc_i64_i32(tmp, tmp64);
7200                 tcg_temp_free_i64(tmp64);
7201                 store_reg(s, rt2, tmp);
7202             } else {
7203                 TCGv_i32 tmp;
7204                 if (ri->type & ARM_CP_CONST) {
7205                     tmp = tcg_const_i32(ri->resetvalue);
7206                 } else if (ri->readfn) {
7207                     TCGv_ptr tmpptr;
7208                     tmp = tcg_temp_new_i32();
7209                     tmpptr = tcg_const_ptr(ri);
7210                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7211                     tcg_temp_free_ptr(tmpptr);
7212                 } else {
7213                     tmp = load_cpu_offset(ri->fieldoffset);
7214                 }
7215                 if (rt == 15) {
7216                     /* Destination register of r15 for 32 bit loads sets
7217                      * the condition codes from the high 4 bits of the value
7218                      */
7219                     gen_set_nzcv(tmp);
7220                     tcg_temp_free_i32(tmp);
7221                 } else {
7222                     store_reg(s, rt, tmp);
7223                 }
7224             }
7225         } else {
7226             /* Write */
7227             if (ri->type & ARM_CP_CONST) {
7228                 /* If not forbidden by access permissions, treat as WI */
7229                 return 0;
7230             }
7231
7232             if (is64) {
7233                 TCGv_i32 tmplo, tmphi;
7234                 TCGv_i64 tmp64 = tcg_temp_new_i64();
7235                 tmplo = load_reg(s, rt);
7236                 tmphi = load_reg(s, rt2);
7237                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7238                 tcg_temp_free_i32(tmplo);
7239                 tcg_temp_free_i32(tmphi);
7240                 if (ri->writefn) {
7241                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
7242                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7243                     tcg_temp_free_ptr(tmpptr);
7244                 } else {
7245                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7246                 }
7247                 tcg_temp_free_i64(tmp64);
7248             } else {
7249                 if (ri->writefn) {
7250                     TCGv_i32 tmp;
7251                     TCGv_ptr tmpptr;
7252                     tmp = load_reg(s, rt);
7253                     tmpptr = tcg_const_ptr(ri);
7254                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7255                     tcg_temp_free_ptr(tmpptr);
7256                     tcg_temp_free_i32(tmp);
7257                 } else {
7258                     TCGv_i32 tmp = load_reg(s, rt);
7259                     store_cpu_offset(tmp, ri->fieldoffset);
7260                 }
7261             }
7262         }
7263
7264         if (use_icount && (ri->type & ARM_CP_IO)) {
7265             /* I/O operations must end the TB here (whether read or write) */
7266             gen_io_end();
7267             gen_lookup_tb(s);
7268         } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7269             /* We default to ending the TB on a coprocessor register write,
7270              * but allow this to be suppressed by the register definition
7271              * (usually only necessary to work around guest bugs).
7272              */
7273             gen_lookup_tb(s);
7274         }
7275
7276         return 0;
7277     }
7278
7279     /* Unknown register; this might be a guest error or a QEMU
7280      * unimplemented feature.
7281      */
7282     if (is64) {
7283         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7284                       "64 bit system register cp:%d opc1: %d crm:%d\n",
7285                       isread ? "read" : "write", cpnum, opc1, crm);
7286     } else {
7287         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7288                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d\n",
7289                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2);
7290     }
7291
7292     return 1;
7293 }
7294
7295
7296 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7297 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7298 {
7299     TCGv_i32 tmp;
7300     tmp = tcg_temp_new_i32();
7301     tcg_gen_trunc_i64_i32(tmp, val);
7302     store_reg(s, rlow, tmp);
7303     tmp = tcg_temp_new_i32();
7304     tcg_gen_shri_i64(val, val, 32);
7305     tcg_gen_trunc_i64_i32(tmp, val);
7306     store_reg(s, rhigh, tmp);
7307 }
7308
7309 /* load a 32-bit value from a register and perform a 64-bit accumulate.  */
7310 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
7311 {
7312     TCGv_i64 tmp;
7313     TCGv_i32 tmp2;
7314
7315     /* Load value and extend to 64 bits.  */
7316     tmp = tcg_temp_new_i64();
7317     tmp2 = load_reg(s, rlow);
7318     tcg_gen_extu_i32_i64(tmp, tmp2);
7319     tcg_temp_free_i32(tmp2);
7320     tcg_gen_add_i64(val, val, tmp);
7321     tcg_temp_free_i64(tmp);
7322 }
7323
7324 /* load and add a 64-bit value from a register pair.  */
7325 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7326 {
7327     TCGv_i64 tmp;
7328     TCGv_i32 tmpl;
7329     TCGv_i32 tmph;
7330
7331     /* Load 64-bit value rd:rn.  */
7332     tmpl = load_reg(s, rlow);
7333     tmph = load_reg(s, rhigh);
7334     tmp = tcg_temp_new_i64();
7335     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7336     tcg_temp_free_i32(tmpl);
7337     tcg_temp_free_i32(tmph);
7338     tcg_gen_add_i64(val, val, tmp);
7339     tcg_temp_free_i64(tmp);
7340 }
7341
7342 /* Set N and Z flags from hi|lo.  */
7343 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7344 {
7345     tcg_gen_mov_i32(cpu_NF, hi);
7346     tcg_gen_or_i32(cpu_ZF, lo, hi);
7347 }
7348
7349 /* Load/Store exclusive instructions are implemented by remembering
7350    the value/address loaded, and seeing if these are the same
7351    when the store is performed. This should be sufficient to implement
7352    the architecturally mandated semantics, and avoids having to monitor
7353    regular stores.
7354
7355    In system emulation mode only one CPU will be running at once, so
7356    this sequence is effectively atomic.  In user emulation mode we
7357    throw an exception and handle the atomic operation elsewhere.  */
7358 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7359                                TCGv_i32 addr, int size)
7360 {
7361     TCGv_i32 tmp = tcg_temp_new_i32();
7362
7363     s->is_ldex = true;
7364
7365     switch (size) {
7366     case 0:
7367         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7368         break;
7369     case 1:
7370         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
7371         break;
7372     case 2:
7373     case 3:
7374         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7375         break;
7376     default:
7377         abort();
7378     }
7379
7380     if (size == 3) {
7381         TCGv_i32 tmp2 = tcg_temp_new_i32();
7382         TCGv_i32 tmp3 = tcg_temp_new_i32();
7383
7384         tcg_gen_addi_i32(tmp2, addr, 4);
7385         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7386         tcg_temp_free_i32(tmp2);
7387         tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
7388         store_reg(s, rt2, tmp3);
7389     } else {
7390         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7391     }
7392
7393     store_reg(s, rt, tmp);
7394     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7395 }
7396
7397 static void gen_clrex(DisasContext *s)
7398 {
7399     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7400 }
7401
7402 #ifdef CONFIG_USER_ONLY
7403 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7404                                 TCGv_i32 addr, int size)
7405 {
7406     tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
7407     tcg_gen_movi_i32(cpu_exclusive_info,
7408                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
7409     gen_exception_internal_insn(s, 4, EXCP_STREX);
7410 }
7411 #else
7412 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7413                                 TCGv_i32 addr, int size)
7414 {
7415     TCGv_i32 tmp;
7416     TCGv_i64 val64, extaddr;
7417     int done_label;
7418     int fail_label;
7419
7420     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7421          [addr] = {Rt};
7422          {Rd} = 0;
7423        } else {
7424          {Rd} = 1;
7425        } */
7426     fail_label = gen_new_label();
7427     done_label = gen_new_label();
7428     extaddr = tcg_temp_new_i64();
7429     tcg_gen_extu_i32_i64(extaddr, addr);
7430     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7431     tcg_temp_free_i64(extaddr);
7432
7433     tmp = tcg_temp_new_i32();
7434     switch (size) {
7435     case 0:
7436         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7437         break;
7438     case 1:
7439         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
7440         break;
7441     case 2:
7442     case 3:
7443         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7444         break;
7445     default:
7446         abort();
7447     }
7448
7449     val64 = tcg_temp_new_i64();
7450     if (size == 3) {
7451         TCGv_i32 tmp2 = tcg_temp_new_i32();
7452         TCGv_i32 tmp3 = tcg_temp_new_i32();
7453         tcg_gen_addi_i32(tmp2, addr, 4);
7454         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7455         tcg_temp_free_i32(tmp2);
7456         tcg_gen_concat_i32_i64(val64, tmp, tmp3);
7457         tcg_temp_free_i32(tmp3);
7458     } else {
7459         tcg_gen_extu_i32_i64(val64, tmp);
7460     }
7461     tcg_temp_free_i32(tmp);
7462
7463     tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
7464     tcg_temp_free_i64(val64);
7465
7466     tmp = load_reg(s, rt);
7467     switch (size) {
7468     case 0:
7469         gen_aa32_st8(tmp, addr, get_mem_index(s));
7470         break;
7471     case 1:
7472         gen_aa32_st16(tmp, addr, get_mem_index(s));
7473         break;
7474     case 2:
7475     case 3:
7476         gen_aa32_st32(tmp, addr, get_mem_index(s));
7477         break;
7478     default:
7479         abort();
7480     }
7481     tcg_temp_free_i32(tmp);
7482     if (size == 3) {
7483         tcg_gen_addi_i32(addr, addr, 4);
7484         tmp = load_reg(s, rt2);
7485         gen_aa32_st32(tmp, addr, get_mem_index(s));
7486         tcg_temp_free_i32(tmp);
7487     }
7488     tcg_gen_movi_i32(cpu_R[rd], 0);
7489     tcg_gen_br(done_label);
7490     gen_set_label(fail_label);
7491     tcg_gen_movi_i32(cpu_R[rd], 1);
7492     gen_set_label(done_label);
7493     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7494 }
7495 #endif
7496
7497 /* gen_srs:
7498  * @env: CPUARMState
7499  * @s: DisasContext
7500  * @mode: mode field from insn (which stack to store to)
7501  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7502  * @writeback: true if writeback bit set
7503  *
7504  * Generate code for the SRS (Store Return State) insn.
7505  */
7506 static void gen_srs(DisasContext *s,
7507                     uint32_t mode, uint32_t amode, bool writeback)
7508 {
7509     int32_t offset;
7510     TCGv_i32 addr = tcg_temp_new_i32();
7511     TCGv_i32 tmp = tcg_const_i32(mode);
7512     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7513     tcg_temp_free_i32(tmp);
7514     switch (amode) {
7515     case 0: /* DA */
7516         offset = -4;
7517         break;
7518     case 1: /* IA */
7519         offset = 0;
7520         break;
7521     case 2: /* DB */
7522         offset = -8;
7523         break;
7524     case 3: /* IB */
7525         offset = 4;
7526         break;
7527     default:
7528         abort();
7529     }
7530     tcg_gen_addi_i32(addr, addr, offset);
7531     tmp = load_reg(s, 14);
7532     gen_aa32_st32(tmp, addr, get_mem_index(s));
7533     tcg_temp_free_i32(tmp);
7534     tmp = load_cpu_field(spsr);
7535     tcg_gen_addi_i32(addr, addr, 4);
7536     gen_aa32_st32(tmp, addr, get_mem_index(s));
7537     tcg_temp_free_i32(tmp);
7538     if (writeback) {
7539         switch (amode) {
7540         case 0:
7541             offset = -8;
7542             break;
7543         case 1:
7544             offset = 4;
7545             break;
7546         case 2:
7547             offset = -4;
7548             break;
7549         case 3:
7550             offset = 0;
7551             break;
7552         default:
7553             abort();
7554         }
7555         tcg_gen_addi_i32(addr, addr, offset);
7556         tmp = tcg_const_i32(mode);
7557         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7558         tcg_temp_free_i32(tmp);
7559     }
7560     tcg_temp_free_i32(addr);
7561 }
7562
7563 static void disas_arm_insn(CPUARMState * env, DisasContext *s)
7564 {
7565     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
7566     TCGv_i32 tmp;
7567     TCGv_i32 tmp2;
7568     TCGv_i32 tmp3;
7569     TCGv_i32 addr;
7570     TCGv_i64 tmp64;
7571
7572     insn = arm_ldl_code(env, s->pc, s->bswap_code);
7573     s->pc += 4;
7574
7575     /* M variants do not implement ARM mode.  */
7576     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7577         goto illegal_op;
7578     }
7579     cond = insn >> 28;
7580     if (cond == 0xf){
7581         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
7582          * choose to UNDEF. In ARMv5 and above the space is used
7583          * for miscellaneous unconditional instructions.
7584          */
7585         ARCH(5);
7586
7587         /* Unconditional instructions.  */
7588         if (((insn >> 25) & 7) == 1) {
7589             /* NEON Data processing.  */
7590             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7591                 goto illegal_op;
7592             }
7593
7594             if (disas_neon_data_insn(s, insn)) {
7595                 goto illegal_op;
7596             }
7597             return;
7598         }
7599         if ((insn & 0x0f100000) == 0x04000000) {
7600             /* NEON load/store.  */
7601             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7602                 goto illegal_op;
7603             }
7604
7605             if (disas_neon_ls_insn(s, insn)) {
7606                 goto illegal_op;
7607             }
7608             return;
7609         }
7610         if ((insn & 0x0f000e10) == 0x0e000a00) {
7611             /* VFP.  */
7612             if (disas_vfp_insn(s, insn)) {
7613                 goto illegal_op;
7614             }
7615             return;
7616         }
7617         if (((insn & 0x0f30f000) == 0x0510f000) ||
7618             ((insn & 0x0f30f010) == 0x0710f000)) {
7619             if ((insn & (1 << 22)) == 0) {
7620                 /* PLDW; v7MP */
7621                 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7622                     goto illegal_op;
7623                 }
7624             }
7625             /* Otherwise PLD; v5TE+ */
7626             ARCH(5TE);
7627             return;
7628         }
7629         if (((insn & 0x0f70f000) == 0x0450f000) ||
7630             ((insn & 0x0f70f010) == 0x0650f000)) {
7631             ARCH(7);
7632             return; /* PLI; V7 */
7633         }
7634         if (((insn & 0x0f700000) == 0x04100000) ||
7635             ((insn & 0x0f700010) == 0x06100000)) {
7636             if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7637                 goto illegal_op;
7638             }
7639             return; /* v7MP: Unallocated memory hint: must NOP */
7640         }
7641
7642         if ((insn & 0x0ffffdff) == 0x01010000) {
7643             ARCH(6);
7644             /* setend */
7645             if (((insn >> 9) & 1) != s->bswap_code) {
7646                 /* Dynamic endianness switching not implemented. */
7647                 qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
7648                 goto illegal_op;
7649             }
7650             return;
7651         } else if ((insn & 0x0fffff00) == 0x057ff000) {
7652             switch ((insn >> 4) & 0xf) {
7653             case 1: /* clrex */
7654                 ARCH(6K);
7655                 gen_clrex(s);
7656                 return;
7657             case 4: /* dsb */
7658             case 5: /* dmb */
7659             case 6: /* isb */
7660                 ARCH(7);
7661                 /* We don't emulate caches so these are a no-op.  */
7662                 return;
7663             default:
7664                 goto illegal_op;
7665             }
7666         } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
7667             /* srs */
7668             if (IS_USER(s)) {
7669                 goto illegal_op;
7670             }
7671             ARCH(6);
7672             gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
7673             return;
7674         } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
7675             /* rfe */
7676             int32_t offset;
7677             if (IS_USER(s))
7678                 goto illegal_op;
7679             ARCH(6);
7680             rn = (insn >> 16) & 0xf;
7681             addr = load_reg(s, rn);
7682             i = (insn >> 23) & 3;
7683             switch (i) {
7684             case 0: offset = -4; break; /* DA */
7685             case 1: offset = 0; break; /* IA */
7686             case 2: offset = -8; break; /* DB */
7687             case 3: offset = 4; break; /* IB */
7688             default: abort();
7689             }
7690             if (offset)
7691                 tcg_gen_addi_i32(addr, addr, offset);
7692             /* Load PC into tmp and CPSR into tmp2.  */
7693             tmp = tcg_temp_new_i32();
7694             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7695             tcg_gen_addi_i32(addr, addr, 4);
7696             tmp2 = tcg_temp_new_i32();
7697             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
7698             if (insn & (1 << 21)) {
7699                 /* Base writeback.  */
7700                 switch (i) {
7701                 case 0: offset = -8; break;
7702                 case 1: offset = 4; break;
7703                 case 2: offset = -4; break;
7704                 case 3: offset = 0; break;
7705                 default: abort();
7706                 }
7707                 if (offset)
7708                     tcg_gen_addi_i32(addr, addr, offset);
7709                 store_reg(s, rn, addr);
7710             } else {
7711                 tcg_temp_free_i32(addr);
7712             }
7713             gen_rfe(s, tmp, tmp2);
7714             return;
7715         } else if ((insn & 0x0e000000) == 0x0a000000) {
7716             /* branch link and change to thumb (blx <offset>) */
7717             int32_t offset;
7718
7719             val = (uint32_t)s->pc;
7720             tmp = tcg_temp_new_i32();
7721             tcg_gen_movi_i32(tmp, val);
7722             store_reg(s, 14, tmp);
7723             /* Sign-extend the 24-bit offset */
7724             offset = (((int32_t)insn) << 8) >> 8;
7725             /* offset * 4 + bit24 * 2 + (thumb bit) */
7726             val += (offset << 2) | ((insn >> 23) & 2) | 1;
7727             /* pipeline offset */
7728             val += 4;
7729             /* protected by ARCH(5); above, near the start of uncond block */
7730             gen_bx_im(s, val);
7731             return;
7732         } else if ((insn & 0x0e000f00) == 0x0c000100) {
7733             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7734                 /* iWMMXt register transfer.  */
7735                 if (extract32(s->c15_cpar, 1, 1)) {
7736                     if (!disas_iwmmxt_insn(s, insn)) {
7737                         return;
7738                     }
7739                 }
7740             }
7741         } else if ((insn & 0x0fe00000) == 0x0c400000) {
7742             /* Coprocessor double register transfer.  */
7743             ARCH(5TE);
7744         } else if ((insn & 0x0f000010) == 0x0e000010) {
7745             /* Additional coprocessor register transfer.  */
7746         } else if ((insn & 0x0ff10020) == 0x01000000) {
7747             uint32_t mask;
7748             uint32_t val;
7749             /* cps (privileged) */
7750             if (IS_USER(s))
7751                 return;
7752             mask = val = 0;
7753             if (insn & (1 << 19)) {
7754                 if (insn & (1 << 8))
7755                     mask |= CPSR_A;
7756                 if (insn & (1 << 7))
7757                     mask |= CPSR_I;
7758                 if (insn & (1 << 6))
7759                     mask |= CPSR_F;
7760                 if (insn & (1 << 18))
7761                     val |= mask;
7762             }
7763             if (insn & (1 << 17)) {
7764                 mask |= CPSR_M;
7765                 val |= (insn & 0x1f);
7766             }
7767             if (mask) {
7768                 gen_set_psr_im(s, mask, 0, val);
7769             }
7770             return;
7771         }
7772         goto illegal_op;
7773     }
7774     if (cond != 0xe) {
7775         /* if not always execute, we generate a conditional jump to
7776            next instruction */
7777         s->condlabel = gen_new_label();
7778         arm_gen_test_cc(cond ^ 1, s->condlabel);
7779         s->condjmp = 1;
7780     }
7781     if ((insn & 0x0f900000) == 0x03000000) {
7782         if ((insn & (1 << 21)) == 0) {
7783             ARCH(6T2);
7784             rd = (insn >> 12) & 0xf;
7785             val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
7786             if ((insn & (1 << 22)) == 0) {
7787                 /* MOVW */
7788                 tmp = tcg_temp_new_i32();
7789                 tcg_gen_movi_i32(tmp, val);
7790             } else {
7791                 /* MOVT */
7792                 tmp = load_reg(s, rd);
7793                 tcg_gen_ext16u_i32(tmp, tmp);
7794                 tcg_gen_ori_i32(tmp, tmp, val << 16);
7795             }
7796             store_reg(s, rd, tmp);
7797         } else {
7798             if (((insn >> 12) & 0xf) != 0xf)
7799                 goto illegal_op;
7800             if (((insn >> 16) & 0xf) == 0) {
7801                 gen_nop_hint(s, insn & 0xff);
7802             } else {
7803                 /* CPSR = immediate */
7804                 val = insn & 0xff;
7805                 shift = ((insn >> 8) & 0xf) * 2;
7806                 if (shift)
7807                     val = (val >> shift) | (val << (32 - shift));
7808                 i = ((insn & (1 << 22)) != 0);
7809                 if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
7810                                    i, val)) {
7811                     goto illegal_op;
7812                 }
7813             }
7814         }
7815     } else if ((insn & 0x0f900000) == 0x01000000
7816                && (insn & 0x00000090) != 0x00000090) {
7817         /* miscellaneous instructions */
7818         op1 = (insn >> 21) & 3;
7819         sh = (insn >> 4) & 0xf;
7820         rm = insn & 0xf;
7821         switch (sh) {
7822         case 0x0: /* move program status register */
7823             if (op1 & 1) {
7824                 /* PSR = reg */
7825                 tmp = load_reg(s, rm);
7826                 i = ((op1 & 2) != 0);
7827                 if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
7828                     goto illegal_op;
7829             } else {
7830                 /* reg = PSR */
7831                 rd = (insn >> 12) & 0xf;
7832                 if (op1 & 2) {
7833                     if (IS_USER(s))
7834                         goto illegal_op;
7835                     tmp = load_cpu_field(spsr);
7836                 } else {
7837                     tmp = tcg_temp_new_i32();
7838                     gen_helper_cpsr_read(tmp, cpu_env);
7839                 }
7840                 store_reg(s, rd, tmp);
7841             }
7842             break;
7843         case 0x1:
7844             if (op1 == 1) {
7845                 /* branch/exchange thumb (bx).  */
7846                 ARCH(4T);
7847                 tmp = load_reg(s, rm);
7848                 gen_bx(s, tmp);
7849             } else if (op1 == 3) {
7850                 /* clz */
7851                 ARCH(5);
7852                 rd = (insn >> 12) & 0xf;
7853                 tmp = load_reg(s, rm);
7854                 gen_helper_clz(tmp, tmp);
7855                 store_reg(s, rd, tmp);
7856             } else {
7857                 goto illegal_op;
7858             }
7859             break;
7860         case 0x2:
7861             if (op1 == 1) {
7862                 ARCH(5J); /* bxj */
7863                 /* Trivial implementation equivalent to bx.  */
7864                 tmp = load_reg(s, rm);
7865                 gen_bx(s, tmp);
7866             } else {
7867                 goto illegal_op;
7868             }
7869             break;
7870         case 0x3:
7871             if (op1 != 1)
7872               goto illegal_op;
7873
7874             ARCH(5);
7875             /* branch link/exchange thumb (blx) */
7876             tmp = load_reg(s, rm);
7877             tmp2 = tcg_temp_new_i32();
7878             tcg_gen_movi_i32(tmp2, s->pc);
7879             store_reg(s, 14, tmp2);
7880             gen_bx(s, tmp);
7881             break;
7882         case 0x4:
7883         {
7884             /* crc32/crc32c */
7885             uint32_t c = extract32(insn, 8, 4);
7886
7887             /* Check this CPU supports ARMv8 CRC instructions.
7888              * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
7889              * Bits 8, 10 and 11 should be zero.
7890              */
7891             if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
7892                 (c & 0xd) != 0) {
7893                 goto illegal_op;
7894             }
7895
7896             rn = extract32(insn, 16, 4);
7897             rd = extract32(insn, 12, 4);
7898
7899             tmp = load_reg(s, rn);
7900             tmp2 = load_reg(s, rm);
7901             if (op1 == 0) {
7902                 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
7903             } else if (op1 == 1) {
7904                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
7905             }
7906             tmp3 = tcg_const_i32(1 << op1);
7907             if (c & 0x2) {
7908                 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
7909             } else {
7910                 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
7911             }
7912             tcg_temp_free_i32(tmp2);
7913             tcg_temp_free_i32(tmp3);
7914             store_reg(s, rd, tmp);
7915             break;
7916         }
7917         case 0x5: /* saturating add/subtract */
7918             ARCH(5TE);
7919             rd = (insn >> 12) & 0xf;
7920             rn = (insn >> 16) & 0xf;
7921             tmp = load_reg(s, rm);
7922             tmp2 = load_reg(s, rn);
7923             if (op1 & 2)
7924                 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
7925             if (op1 & 1)
7926                 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
7927             else
7928                 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
7929             tcg_temp_free_i32(tmp2);
7930             store_reg(s, rd, tmp);
7931             break;
7932         case 7:
7933         {
7934             int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
7935             switch (op1) {
7936             case 1:
7937                 /* bkpt */
7938                 ARCH(5);
7939                 gen_exception_insn(s, 4, EXCP_BKPT,
7940                                    syn_aa32_bkpt(imm16, false));
7941                 break;
7942             case 2:
7943                 /* Hypervisor call (v7) */
7944                 ARCH(7);
7945                 if (IS_USER(s)) {
7946                     goto illegal_op;
7947                 }
7948                 gen_hvc(s, imm16);
7949                 break;
7950             case 3:
7951                 /* Secure monitor call (v6+) */
7952                 ARCH(6K);
7953                 if (IS_USER(s)) {
7954                     goto illegal_op;
7955                 }
7956                 gen_smc(s);
7957                 break;
7958             default:
7959                 goto illegal_op;
7960             }
7961             break;
7962         }
7963         case 0x8: /* signed multiply */
7964         case 0xa:
7965         case 0xc:
7966         case 0xe:
7967             ARCH(5TE);
7968             rs = (insn >> 8) & 0xf;
7969             rn = (insn >> 12) & 0xf;
7970             rd = (insn >> 16) & 0xf;
7971             if (op1 == 1) {
7972                 /* (32 * 16) >> 16 */
7973                 tmp = load_reg(s, rm);
7974                 tmp2 = load_reg(s, rs);
7975                 if (sh & 4)
7976                     tcg_gen_sari_i32(tmp2, tmp2, 16);
7977                 else
7978                     gen_sxth(tmp2);
7979                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
7980                 tcg_gen_shri_i64(tmp64, tmp64, 16);
7981                 tmp = tcg_temp_new_i32();
7982                 tcg_gen_trunc_i64_i32(tmp, tmp64);
7983                 tcg_temp_free_i64(tmp64);
7984                 if ((sh & 2) == 0) {
7985                     tmp2 = load_reg(s, rn);
7986                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
7987                     tcg_temp_free_i32(tmp2);
7988                 }
7989                 store_reg(s, rd, tmp);
7990             } else {
7991                 /* 16 * 16 */
7992                 tmp = load_reg(s, rm);
7993                 tmp2 = load_reg(s, rs);
7994                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
7995                 tcg_temp_free_i32(tmp2);
7996                 if (op1 == 2) {
7997                     tmp64 = tcg_temp_new_i64();
7998                     tcg_gen_ext_i32_i64(tmp64, tmp);
7999                     tcg_temp_free_i32(tmp);
8000                     gen_addq(s, tmp64, rn, rd);
8001                     gen_storeq_reg(s, rn, rd, tmp64);
8002                     tcg_temp_free_i64(tmp64);
8003                 } else {
8004                     if (op1 == 0) {
8005                         tmp2 = load_reg(s, rn);
8006                         gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8007                         tcg_temp_free_i32(tmp2);
8008                     }
8009                     store_reg(s, rd, tmp);
8010                 }
8011             }
8012             break;
8013         default:
8014             goto illegal_op;
8015         }
8016     } else if (((insn & 0x0e000000) == 0 &&
8017                 (insn & 0x00000090) != 0x90) ||
8018                ((insn & 0x0e000000) == (1 << 25))) {
8019         int set_cc, logic_cc, shiftop;
8020
8021         op1 = (insn >> 21) & 0xf;
8022         set_cc = (insn >> 20) & 1;
8023         logic_cc = table_logic_cc[op1] & set_cc;
8024
8025         /* data processing instruction */
8026         if (insn & (1 << 25)) {
8027             /* immediate operand */
8028             val = insn & 0xff;
8029             shift = ((insn >> 8) & 0xf) * 2;
8030             if (shift) {
8031                 val = (val >> shift) | (val << (32 - shift));
8032             }
8033             tmp2 = tcg_temp_new_i32();
8034             tcg_gen_movi_i32(tmp2, val);
8035             if (logic_cc && shift) {
8036                 gen_set_CF_bit31(tmp2);
8037             }
8038         } else {
8039             /* register */
8040             rm = (insn) & 0xf;
8041             tmp2 = load_reg(s, rm);
8042             shiftop = (insn >> 5) & 3;
8043             if (!(insn & (1 << 4))) {
8044                 shift = (insn >> 7) & 0x1f;
8045                 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8046             } else {
8047                 rs = (insn >> 8) & 0xf;
8048                 tmp = load_reg(s, rs);
8049                 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
8050             }
8051         }
8052         if (op1 != 0x0f && op1 != 0x0d) {
8053             rn = (insn >> 16) & 0xf;
8054             tmp = load_reg(s, rn);
8055         } else {
8056             TCGV_UNUSED_I32(tmp);
8057         }
8058         rd = (insn >> 12) & 0xf;
8059         switch(op1) {
8060         case 0x00:
8061             tcg_gen_and_i32(tmp, tmp, tmp2);
8062             if (logic_cc) {
8063                 gen_logic_CC(tmp);
8064             }
8065             store_reg_bx(s, rd, tmp);
8066             break;
8067         case 0x01:
8068             tcg_gen_xor_i32(tmp, tmp, tmp2);
8069             if (logic_cc) {
8070                 gen_logic_CC(tmp);
8071             }
8072             store_reg_bx(s, rd, tmp);
8073             break;
8074         case 0x02:
8075             if (set_cc && rd == 15) {
8076                 /* SUBS r15, ... is used for exception return.  */
8077                 if (IS_USER(s)) {
8078                     goto illegal_op;
8079                 }
8080                 gen_sub_CC(tmp, tmp, tmp2);
8081                 gen_exception_return(s, tmp);
8082             } else {
8083                 if (set_cc) {
8084                     gen_sub_CC(tmp, tmp, tmp2);
8085                 } else {
8086                     tcg_gen_sub_i32(tmp, tmp, tmp2);
8087                 }
8088                 store_reg_bx(s, rd, tmp);
8089             }
8090             break;
8091         case 0x03:
8092             if (set_cc) {
8093                 gen_sub_CC(tmp, tmp2, tmp);
8094             } else {
8095                 tcg_gen_sub_i32(tmp, tmp2, tmp);
8096             }
8097             store_reg_bx(s, rd, tmp);
8098             break;
8099         case 0x04:
8100             if (set_cc) {
8101                 gen_add_CC(tmp, tmp, tmp2);
8102             } else {
8103                 tcg_gen_add_i32(tmp, tmp, tmp2);
8104             }
8105             store_reg_bx(s, rd, tmp);
8106             break;
8107         case 0x05:
8108             if (set_cc) {
8109                 gen_adc_CC(tmp, tmp, tmp2);
8110             } else {
8111                 gen_add_carry(tmp, tmp, tmp2);
8112             }
8113             store_reg_bx(s, rd, tmp);
8114             break;
8115         case 0x06:
8116             if (set_cc) {
8117                 gen_sbc_CC(tmp, tmp, tmp2);
8118             } else {
8119                 gen_sub_carry(tmp, tmp, tmp2);
8120             }
8121             store_reg_bx(s, rd, tmp);
8122             break;
8123         case 0x07:
8124             if (set_cc) {
8125                 gen_sbc_CC(tmp, tmp2, tmp);
8126             } else {
8127                 gen_sub_carry(tmp, tmp2, tmp);
8128             }
8129             store_reg_bx(s, rd, tmp);
8130             break;
8131         case 0x08:
8132             if (set_cc) {
8133                 tcg_gen_and_i32(tmp, tmp, tmp2);
8134                 gen_logic_CC(tmp);
8135             }
8136             tcg_temp_free_i32(tmp);
8137             break;
8138         case 0x09:
8139             if (set_cc) {
8140                 tcg_gen_xor_i32(tmp, tmp, tmp2);
8141                 gen_logic_CC(tmp);
8142             }
8143             tcg_temp_free_i32(tmp);
8144             break;
8145         case 0x0a:
8146             if (set_cc) {
8147                 gen_sub_CC(tmp, tmp, tmp2);
8148             }
8149             tcg_temp_free_i32(tmp);
8150             break;
8151         case 0x0b:
8152             if (set_cc) {
8153                 gen_add_CC(tmp, tmp, tmp2);
8154             }
8155             tcg_temp_free_i32(tmp);
8156             break;
8157         case 0x0c:
8158             tcg_gen_or_i32(tmp, tmp, tmp2);
8159             if (logic_cc) {
8160                 gen_logic_CC(tmp);
8161             }
8162             store_reg_bx(s, rd, tmp);
8163             break;
8164         case 0x0d:
8165             if (logic_cc && rd == 15) {
8166                 /* MOVS r15, ... is used for exception return.  */
8167                 if (IS_USER(s)) {
8168                     goto illegal_op;
8169                 }
8170                 gen_exception_return(s, tmp2);
8171             } else {
8172                 if (logic_cc) {
8173                     gen_logic_CC(tmp2);
8174                 }
8175                 store_reg_bx(s, rd, tmp2);
8176             }
8177             break;
8178         case 0x0e:
8179             tcg_gen_andc_i32(tmp, tmp, tmp2);
8180             if (logic_cc) {
8181                 gen_logic_CC(tmp);
8182             }
8183             store_reg_bx(s, rd, tmp);
8184             break;
8185         default:
8186         case 0x0f:
8187             tcg_gen_not_i32(tmp2, tmp2);
8188             if (logic_cc) {
8189                 gen_logic_CC(tmp2);
8190             }
8191             store_reg_bx(s, rd, tmp2);
8192             break;
8193         }
8194         if (op1 != 0x0f && op1 != 0x0d) {
8195             tcg_temp_free_i32(tmp2);
8196         }
8197     } else {
8198         /* other instructions */
8199         op1 = (insn >> 24) & 0xf;
8200         switch(op1) {
8201         case 0x0:
8202         case 0x1:
8203             /* multiplies, extra load/stores */
8204             sh = (insn >> 5) & 3;
8205             if (sh == 0) {
8206                 if (op1 == 0x0) {
8207                     rd = (insn >> 16) & 0xf;
8208                     rn = (insn >> 12) & 0xf;
8209                     rs = (insn >> 8) & 0xf;
8210                     rm = (insn) & 0xf;
8211                     op1 = (insn >> 20) & 0xf;
8212                     switch (op1) {
8213                     case 0: case 1: case 2: case 3: case 6:
8214                         /* 32 bit mul */
8215                         tmp = load_reg(s, rs);
8216                         tmp2 = load_reg(s, rm);
8217                         tcg_gen_mul_i32(tmp, tmp, tmp2);
8218                         tcg_temp_free_i32(tmp2);
8219                         if (insn & (1 << 22)) {
8220                             /* Subtract (mls) */
8221                             ARCH(6T2);
8222                             tmp2 = load_reg(s, rn);
8223                             tcg_gen_sub_i32(tmp, tmp2, tmp);
8224                             tcg_temp_free_i32(tmp2);
8225                         } else if (insn & (1 << 21)) {
8226                             /* Add */
8227                             tmp2 = load_reg(s, rn);
8228                             tcg_gen_add_i32(tmp, tmp, tmp2);
8229                             tcg_temp_free_i32(tmp2);
8230                         }
8231                         if (insn & (1 << 20))
8232                             gen_logic_CC(tmp);
8233                         store_reg(s, rd, tmp);
8234                         break;
8235                     case 4:
8236                         /* 64 bit mul double accumulate (UMAAL) */
8237                         ARCH(6);
8238                         tmp = load_reg(s, rs);
8239                         tmp2 = load_reg(s, rm);
8240                         tmp64 = gen_mulu_i64_i32(tmp, tmp2);
8241                         gen_addq_lo(s, tmp64, rn);
8242                         gen_addq_lo(s, tmp64, rd);
8243                         gen_storeq_reg(s, rn, rd, tmp64);
8244                         tcg_temp_free_i64(tmp64);
8245                         break;
8246                     case 8: case 9: case 10: case 11:
8247                     case 12: case 13: case 14: case 15:
8248                         /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
8249                         tmp = load_reg(s, rs);
8250                         tmp2 = load_reg(s, rm);
8251                         if (insn & (1 << 22)) {
8252                             tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
8253                         } else {
8254                             tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
8255                         }
8256                         if (insn & (1 << 21)) { /* mult accumulate */
8257                             TCGv_i32 al = load_reg(s, rn);
8258                             TCGv_i32 ah = load_reg(s, rd);
8259                             tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
8260                             tcg_temp_free_i32(al);
8261                             tcg_temp_free_i32(ah);
8262                         }
8263                         if (insn & (1 << 20)) {
8264                             gen_logicq_cc(tmp, tmp2);
8265                         }
8266                         store_reg(s, rn, tmp);
8267                         store_reg(s, rd, tmp2);
8268                         break;
8269                     default:
8270                         goto illegal_op;
8271                     }
8272                 } else {
8273                     rn = (insn >> 16) & 0xf;
8274                     rd = (insn >> 12) & 0xf;
8275                     if (insn & (1 << 23)) {
8276                         /* load/store exclusive */
8277                         int op2 = (insn >> 8) & 3;
8278                         op1 = (insn >> 21) & 0x3;
8279
8280                         switch (op2) {
8281                         case 0: /* lda/stl */
8282                             if (op1 == 1) {
8283                                 goto illegal_op;
8284                             }
8285                             ARCH(8);
8286                             break;
8287                         case 1: /* reserved */
8288                             goto illegal_op;
8289                         case 2: /* ldaex/stlex */
8290                             ARCH(8);
8291                             break;
8292                         case 3: /* ldrex/strex */
8293                             if (op1) {
8294                                 ARCH(6K);
8295                             } else {
8296                                 ARCH(6);
8297                             }
8298                             break;
8299                         }
8300
8301                         addr = tcg_temp_local_new_i32();
8302                         load_reg_var(s, addr, rn);
8303
8304                         /* Since the emulation does not have barriers,
8305                            the acquire/release semantics need no special
8306                            handling */
8307                         if (op2 == 0) {
8308                             if (insn & (1 << 20)) {
8309                                 tmp = tcg_temp_new_i32();
8310                                 switch (op1) {
8311                                 case 0: /* lda */
8312                                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8313                                     break;
8314                                 case 2: /* ldab */
8315                                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
8316                                     break;
8317                                 case 3: /* ldah */
8318                                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8319                                     break;
8320                                 default:
8321                                     abort();
8322                                 }
8323                                 store_reg(s, rd, tmp);
8324                             } else {
8325                                 rm = insn & 0xf;
8326                                 tmp = load_reg(s, rm);
8327                                 switch (op1) {
8328                                 case 0: /* stl */
8329                                     gen_aa32_st32(tmp, addr, get_mem_index(s));
8330                                     break;
8331                                 case 2: /* stlb */
8332                                     gen_aa32_st8(tmp, addr, get_mem_index(s));
8333                                     break;
8334                                 case 3: /* stlh */
8335                                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8336                                     break;
8337                                 default:
8338                                     abort();
8339                                 }
8340                                 tcg_temp_free_i32(tmp);
8341                             }
8342                         } else if (insn & (1 << 20)) {
8343                             switch (op1) {
8344                             case 0: /* ldrex */
8345                                 gen_load_exclusive(s, rd, 15, addr, 2);
8346                                 break;
8347                             case 1: /* ldrexd */
8348                                 gen_load_exclusive(s, rd, rd + 1, addr, 3);
8349                                 break;
8350                             case 2: /* ldrexb */
8351                                 gen_load_exclusive(s, rd, 15, addr, 0);
8352                                 break;
8353                             case 3: /* ldrexh */
8354                                 gen_load_exclusive(s, rd, 15, addr, 1);
8355                                 break;
8356                             default:
8357                                 abort();
8358                             }
8359                         } else {
8360                             rm = insn & 0xf;
8361                             switch (op1) {
8362                             case 0:  /*  strex */
8363                                 gen_store_exclusive(s, rd, rm, 15, addr, 2);
8364                                 break;
8365                             case 1: /*  strexd */
8366                                 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
8367                                 break;
8368                             case 2: /*  strexb */
8369                                 gen_store_exclusive(s, rd, rm, 15, addr, 0);
8370                                 break;
8371                             case 3: /* strexh */
8372                                 gen_store_exclusive(s, rd, rm, 15, addr, 1);
8373                                 break;
8374                             default:
8375                                 abort();
8376                             }
8377                         }
8378                         tcg_temp_free_i32(addr);
8379                     } else {
8380                         /* SWP instruction */
8381                         rm = (insn) & 0xf;
8382
8383                         /* ??? This is not really atomic.  However we know
8384                            we never have multiple CPUs running in parallel,
8385                            so it is good enough.  */
8386                         addr = load_reg(s, rn);
8387                         tmp = load_reg(s, rm);
8388                         tmp2 = tcg_temp_new_i32();
8389                         if (insn & (1 << 22)) {
8390                             gen_aa32_ld8u(tmp2, addr, get_mem_index(s));
8391                             gen_aa32_st8(tmp, addr, get_mem_index(s));
8392                         } else {
8393                             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
8394                             gen_aa32_st32(tmp, addr, get_mem_index(s));
8395                         }
8396                         tcg_temp_free_i32(tmp);
8397                         tcg_temp_free_i32(addr);
8398                         store_reg(s, rd, tmp2);
8399                     }
8400                 }
8401             } else {
8402                 int address_offset;
8403                 int load;
8404                 /* Misc load/store */
8405                 rn = (insn >> 16) & 0xf;
8406                 rd = (insn >> 12) & 0xf;
8407                 addr = load_reg(s, rn);
8408                 if (insn & (1 << 24))
8409                     gen_add_datah_offset(s, insn, 0, addr);
8410                 address_offset = 0;
8411                 if (insn & (1 << 20)) {
8412                     /* load */
8413                     tmp = tcg_temp_new_i32();
8414                     switch(sh) {
8415                     case 1:
8416                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8417                         break;
8418                     case 2:
8419                         gen_aa32_ld8s(tmp, addr, get_mem_index(s));
8420                         break;
8421                     default:
8422                     case 3:
8423                         gen_aa32_ld16s(tmp, addr, get_mem_index(s));
8424                         break;
8425                     }
8426                     load = 1;
8427                 } else if (sh & 2) {
8428                     ARCH(5TE);
8429                     /* doubleword */
8430                     if (sh & 1) {
8431                         /* store */
8432                         tmp = load_reg(s, rd);
8433                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8434                         tcg_temp_free_i32(tmp);
8435                         tcg_gen_addi_i32(addr, addr, 4);
8436                         tmp = load_reg(s, rd + 1);
8437                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8438                         tcg_temp_free_i32(tmp);
8439                         load = 0;
8440                     } else {
8441                         /* load */
8442                         tmp = tcg_temp_new_i32();
8443                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8444                         store_reg(s, rd, tmp);
8445                         tcg_gen_addi_i32(addr, addr, 4);
8446                         tmp = tcg_temp_new_i32();
8447                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8448                         rd++;
8449                         load = 1;
8450                     }
8451                     address_offset = -4;
8452                 } else {
8453                     /* store */
8454                     tmp = load_reg(s, rd);
8455                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8456                     tcg_temp_free_i32(tmp);
8457                     load = 0;
8458                 }
8459                 /* Perform base writeback before the loaded value to
8460                    ensure correct behavior with overlapping index registers.
8461                    ldrd with base writeback is is undefined if the
8462                    destination and index registers overlap.  */
8463                 if (!(insn & (1 << 24))) {
8464                     gen_add_datah_offset(s, insn, address_offset, addr);
8465                     store_reg(s, rn, addr);
8466                 } else if (insn & (1 << 21)) {
8467                     if (address_offset)
8468                         tcg_gen_addi_i32(addr, addr, address_offset);
8469                     store_reg(s, rn, addr);
8470                 } else {
8471                     tcg_temp_free_i32(addr);
8472                 }
8473                 if (load) {
8474                     /* Complete the load.  */
8475                     store_reg(s, rd, tmp);
8476                 }
8477             }
8478             break;
8479         case 0x4:
8480         case 0x5:
8481             goto do_ldst;
8482         case 0x6:
8483         case 0x7:
8484             if (insn & (1 << 4)) {
8485                 ARCH(6);
8486                 /* Armv6 Media instructions.  */
8487                 rm = insn & 0xf;
8488                 rn = (insn >> 16) & 0xf;
8489                 rd = (insn >> 12) & 0xf;
8490                 rs = (insn >> 8) & 0xf;
8491                 switch ((insn >> 23) & 3) {
8492                 case 0: /* Parallel add/subtract.  */
8493                     op1 = (insn >> 20) & 7;
8494                     tmp = load_reg(s, rn);
8495                     tmp2 = load_reg(s, rm);
8496                     sh = (insn >> 5) & 7;
8497                     if ((op1 & 3) == 0 || sh == 5 || sh == 6)
8498                         goto illegal_op;
8499                     gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
8500                     tcg_temp_free_i32(tmp2);
8501                     store_reg(s, rd, tmp);
8502                     break;
8503                 case 1:
8504                     if ((insn & 0x00700020) == 0) {
8505                         /* Halfword pack.  */
8506                         tmp = load_reg(s, rn);
8507                         tmp2 = load_reg(s, rm);
8508                         shift = (insn >> 7) & 0x1f;
8509                         if (insn & (1 << 6)) {
8510                             /* pkhtb */
8511                             if (shift == 0)
8512                                 shift = 31;
8513                             tcg_gen_sari_i32(tmp2, tmp2, shift);
8514                             tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
8515                             tcg_gen_ext16u_i32(tmp2, tmp2);
8516                         } else {
8517                             /* pkhbt */
8518                             if (shift)
8519                                 tcg_gen_shli_i32(tmp2, tmp2, shift);
8520                             tcg_gen_ext16u_i32(tmp, tmp);
8521                             tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
8522                         }
8523                         tcg_gen_or_i32(tmp, tmp, tmp2);
8524                         tcg_temp_free_i32(tmp2);
8525                         store_reg(s, rd, tmp);
8526                     } else if ((insn & 0x00200020) == 0x00200000) {
8527                         /* [us]sat */
8528                         tmp = load_reg(s, rm);
8529                         shift = (insn >> 7) & 0x1f;
8530                         if (insn & (1 << 6)) {
8531                             if (shift == 0)
8532                                 shift = 31;
8533                             tcg_gen_sari_i32(tmp, tmp, shift);
8534                         } else {
8535                             tcg_gen_shli_i32(tmp, tmp, shift);
8536                         }
8537                         sh = (insn >> 16) & 0x1f;
8538                         tmp2 = tcg_const_i32(sh);
8539                         if (insn & (1 << 22))
8540                           gen_helper_usat(tmp, cpu_env, tmp, tmp2);
8541                         else
8542                           gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
8543                         tcg_temp_free_i32(tmp2);
8544                         store_reg(s, rd, tmp);
8545                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
8546                         /* [us]sat16 */
8547                         tmp = load_reg(s, rm);
8548                         sh = (insn >> 16) & 0x1f;
8549                         tmp2 = tcg_const_i32(sh);
8550                         if (insn & (1 << 22))
8551                           gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
8552                         else
8553                           gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
8554                         tcg_temp_free_i32(tmp2);
8555                         store_reg(s, rd, tmp);
8556                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
8557                         /* Select bytes.  */
8558                         tmp = load_reg(s, rn);
8559                         tmp2 = load_reg(s, rm);
8560                         tmp3 = tcg_temp_new_i32();
8561                         tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
8562                         gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
8563                         tcg_temp_free_i32(tmp3);
8564                         tcg_temp_free_i32(tmp2);
8565                         store_reg(s, rd, tmp);
8566                     } else if ((insn & 0x000003e0) == 0x00000060) {
8567                         tmp = load_reg(s, rm);
8568                         shift = (insn >> 10) & 3;
8569                         /* ??? In many cases it's not necessary to do a
8570                            rotate, a shift is sufficient.  */
8571                         if (shift != 0)
8572                             tcg_gen_rotri_i32(tmp, tmp, shift * 8);
8573                         op1 = (insn >> 20) & 7;
8574                         switch (op1) {
8575                         case 0: gen_sxtb16(tmp);  break;
8576                         case 2: gen_sxtb(tmp);    break;
8577                         case 3: gen_sxth(tmp);    break;
8578                         case 4: gen_uxtb16(tmp);  break;
8579                         case 6: gen_uxtb(tmp);    break;
8580                         case 7: gen_uxth(tmp);    break;
8581                         default: goto illegal_op;
8582                         }
8583                         if (rn != 15) {
8584                             tmp2 = load_reg(s, rn);
8585                             if ((op1 & 3) == 0) {
8586                                 gen_add16(tmp, tmp2);
8587                             } else {
8588                                 tcg_gen_add_i32(tmp, tmp, tmp2);
8589                                 tcg_temp_free_i32(tmp2);
8590                             }
8591                         }
8592                         store_reg(s, rd, tmp);
8593                     } else if ((insn & 0x003f0f60) == 0x003f0f20) {
8594                         /* rev */
8595                         tmp = load_reg(s, rm);
8596                         if (insn & (1 << 22)) {
8597                             if (insn & (1 << 7)) {
8598                                 gen_revsh(tmp);
8599                             } else {
8600                                 ARCH(6T2);
8601                                 gen_helper_rbit(tmp, tmp);
8602                             }
8603                         } else {
8604                             if (insn & (1 << 7))
8605                                 gen_rev16(tmp);
8606                             else
8607                                 tcg_gen_bswap32_i32(tmp, tmp);
8608                         }
8609                         store_reg(s, rd, tmp);
8610                     } else {
8611                         goto illegal_op;
8612                     }
8613                     break;
8614                 case 2: /* Multiplies (Type 3).  */
8615                     switch ((insn >> 20) & 0x7) {
8616                     case 5:
8617                         if (((insn >> 6) ^ (insn >> 7)) & 1) {
8618                             /* op2 not 00x or 11x : UNDEF */
8619                             goto illegal_op;
8620                         }
8621                         /* Signed multiply most significant [accumulate].
8622                            (SMMUL, SMMLA, SMMLS) */
8623                         tmp = load_reg(s, rm);
8624                         tmp2 = load_reg(s, rs);
8625                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
8626
8627                         if (rd != 15) {
8628                             tmp = load_reg(s, rd);
8629                             if (insn & (1 << 6)) {
8630                                 tmp64 = gen_subq_msw(tmp64, tmp);
8631                             } else {
8632                                 tmp64 = gen_addq_msw(tmp64, tmp);
8633                             }
8634                         }
8635                         if (insn & (1 << 5)) {
8636                             tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
8637                         }
8638                         tcg_gen_shri_i64(tmp64, tmp64, 32);
8639                         tmp = tcg_temp_new_i32();
8640                         tcg_gen_trunc_i64_i32(tmp, tmp64);
8641                         tcg_temp_free_i64(tmp64);
8642                         store_reg(s, rn, tmp);
8643                         break;
8644                     case 0:
8645                     case 4:
8646                         /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
8647                         if (insn & (1 << 7)) {
8648                             goto illegal_op;
8649                         }
8650                         tmp = load_reg(s, rm);
8651                         tmp2 = load_reg(s, rs);
8652                         if (insn & (1 << 5))
8653                             gen_swap_half(tmp2);
8654                         gen_smul_dual(tmp, tmp2);
8655                         if (insn & (1 << 22)) {
8656                             /* smlald, smlsld */
8657                             TCGv_i64 tmp64_2;
8658
8659                             tmp64 = tcg_temp_new_i64();
8660                             tmp64_2 = tcg_temp_new_i64();
8661                             tcg_gen_ext_i32_i64(tmp64, tmp);
8662                             tcg_gen_ext_i32_i64(tmp64_2, tmp2);
8663                             tcg_temp_free_i32(tmp);
8664                             tcg_temp_free_i32(tmp2);
8665                             if (insn & (1 << 6)) {
8666                                 tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
8667                             } else {
8668                                 tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
8669                             }
8670                             tcg_temp_free_i64(tmp64_2);
8671                             gen_addq(s, tmp64, rd, rn);
8672                             gen_storeq_reg(s, rd, rn, tmp64);
8673                             tcg_temp_free_i64(tmp64);
8674                         } else {
8675                             /* smuad, smusd, smlad, smlsd */
8676                             if (insn & (1 << 6)) {
8677                                 /* This subtraction cannot overflow. */
8678                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
8679                             } else {
8680                                 /* This addition cannot overflow 32 bits;
8681                                  * however it may overflow considered as a
8682                                  * signed operation, in which case we must set
8683                                  * the Q flag.
8684                                  */
8685                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8686                             }
8687                             tcg_temp_free_i32(tmp2);
8688                             if (rd != 15)
8689                               {
8690                                 tmp2 = load_reg(s, rd);
8691                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8692                                 tcg_temp_free_i32(tmp2);
8693                               }
8694                             store_reg(s, rn, tmp);
8695                         }
8696                         break;
8697                     case 1:
8698                     case 3:
8699                         /* SDIV, UDIV */
8700                         if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
8701                             goto illegal_op;
8702                         }
8703                         if (((insn >> 5) & 7) || (rd != 15)) {
8704                             goto illegal_op;
8705                         }
8706                         tmp = load_reg(s, rm);
8707                         tmp2 = load_reg(s, rs);
8708                         if (insn & (1 << 21)) {
8709                             gen_helper_udiv(tmp, tmp, tmp2);
8710                         } else {
8711                             gen_helper_sdiv(tmp, tmp, tmp2);
8712                         }
8713                         tcg_temp_free_i32(tmp2);
8714                         store_reg(s, rn, tmp);
8715                         break;
8716                     default:
8717                         goto illegal_op;
8718                     }
8719                     break;
8720                 case 3:
8721                     op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
8722                     switch (op1) {
8723                     case 0: /* Unsigned sum of absolute differences.  */
8724                         ARCH(6);
8725                         tmp = load_reg(s, rm);
8726                         tmp2 = load_reg(s, rs);
8727                         gen_helper_usad8(tmp, tmp, tmp2);
8728                         tcg_temp_free_i32(tmp2);
8729                         if (rd != 15) {
8730                             tmp2 = load_reg(s, rd);
8731                             tcg_gen_add_i32(tmp, tmp, tmp2);
8732                             tcg_temp_free_i32(tmp2);
8733                         }
8734                         store_reg(s, rn, tmp);
8735                         break;
8736                     case 0x20: case 0x24: case 0x28: case 0x2c:
8737                         /* Bitfield insert/clear.  */
8738                         ARCH(6T2);
8739                         shift = (insn >> 7) & 0x1f;
8740                         i = (insn >> 16) & 0x1f;
8741                         i = i + 1 - shift;
8742                         if (rm == 15) {
8743                             tmp = tcg_temp_new_i32();
8744                             tcg_gen_movi_i32(tmp, 0);
8745                         } else {
8746                             tmp = load_reg(s, rm);
8747                         }
8748                         if (i != 32) {
8749                             tmp2 = load_reg(s, rd);
8750                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
8751                             tcg_temp_free_i32(tmp2);
8752                         }
8753                         store_reg(s, rd, tmp);
8754                         break;
8755                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
8756                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
8757                         ARCH(6T2);
8758                         tmp = load_reg(s, rm);
8759                         shift = (insn >> 7) & 0x1f;
8760                         i = ((insn >> 16) & 0x1f) + 1;
8761                         if (shift + i > 32)
8762                             goto illegal_op;
8763                         if (i < 32) {
8764                             if (op1 & 0x20) {
8765                                 gen_ubfx(tmp, shift, (1u << i) - 1);
8766                             } else {
8767                                 gen_sbfx(tmp, shift, i);
8768                             }
8769                         }
8770                         store_reg(s, rd, tmp);
8771                         break;
8772                     default:
8773                         goto illegal_op;
8774                     }
8775                     break;
8776                 }
8777                 break;
8778             }
8779         do_ldst:
8780             /* Check for undefined extension instructions
8781              * per the ARM Bible IE:
8782              * xxxx 0111 1111 xxxx  xxxx xxxx 1111 xxxx
8783              */
8784             sh = (0xf << 20) | (0xf << 4);
8785             if (op1 == 0x7 && ((insn & sh) == sh))
8786             {
8787                 goto illegal_op;
8788             }
8789             /* load/store byte/word */
8790             rn = (insn >> 16) & 0xf;
8791             rd = (insn >> 12) & 0xf;
8792             tmp2 = load_reg(s, rn);
8793             if ((insn & 0x01200000) == 0x00200000) {
8794                 /* ldrt/strt */
8795                 i = MMU_USER_IDX;
8796             } else {
8797                 i = get_mem_index(s);
8798             }
8799             if (insn & (1 << 24))
8800                 gen_add_data_offset(s, insn, tmp2);
8801             if (insn & (1 << 20)) {
8802                 /* load */
8803                 tmp = tcg_temp_new_i32();
8804                 if (insn & (1 << 22)) {
8805                     gen_aa32_ld8u(tmp, tmp2, i);
8806                 } else {
8807                     gen_aa32_ld32u(tmp, tmp2, i);
8808                 }
8809             } else {
8810                 /* store */
8811                 tmp = load_reg(s, rd);
8812                 if (insn & (1 << 22)) {
8813                     gen_aa32_st8(tmp, tmp2, i);
8814                 } else {
8815                     gen_aa32_st32(tmp, tmp2, i);
8816                 }
8817                 tcg_temp_free_i32(tmp);
8818             }
8819             if (!(insn & (1 << 24))) {
8820                 gen_add_data_offset(s, insn, tmp2);
8821                 store_reg(s, rn, tmp2);
8822             } else if (insn & (1 << 21)) {
8823                 store_reg(s, rn, tmp2);
8824             } else {
8825                 tcg_temp_free_i32(tmp2);
8826             }
8827             if (insn & (1 << 20)) {
8828                 /* Complete the load.  */
8829                 store_reg_from_load(s, rd, tmp);
8830             }
8831             break;
8832         case 0x08:
8833         case 0x09:
8834             {
8835                 int j, n, user, loaded_base;
8836                 TCGv_i32 loaded_var;
8837                 /* load/store multiple words */
8838                 /* XXX: store correct base if write back */
8839                 user = 0;
8840                 if (insn & (1 << 22)) {
8841                     if (IS_USER(s))
8842                         goto illegal_op; /* only usable in supervisor mode */
8843
8844                     if ((insn & (1 << 15)) == 0)
8845                         user = 1;
8846                 }
8847                 rn = (insn >> 16) & 0xf;
8848                 addr = load_reg(s, rn);
8849
8850                 /* compute total size */
8851                 loaded_base = 0;
8852                 TCGV_UNUSED_I32(loaded_var);
8853                 n = 0;
8854                 for(i=0;i<16;i++) {
8855                     if (insn & (1 << i))
8856                         n++;
8857                 }
8858                 /* XXX: test invalid n == 0 case ? */
8859                 if (insn & (1 << 23)) {
8860                     if (insn & (1 << 24)) {
8861                         /* pre increment */
8862                         tcg_gen_addi_i32(addr, addr, 4);
8863                     } else {
8864                         /* post increment */
8865                     }
8866                 } else {
8867                     if (insn & (1 << 24)) {
8868                         /* pre decrement */
8869                         tcg_gen_addi_i32(addr, addr, -(n * 4));
8870                     } else {
8871                         /* post decrement */
8872                         if (n != 1)
8873                         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8874                     }
8875                 }
8876                 j = 0;
8877                 for(i=0;i<16;i++) {
8878                     if (insn & (1 << i)) {
8879                         if (insn & (1 << 20)) {
8880                             /* load */
8881                             tmp = tcg_temp_new_i32();
8882                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8883                             if (user) {
8884                                 tmp2 = tcg_const_i32(i);
8885                                 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
8886                                 tcg_temp_free_i32(tmp2);
8887                                 tcg_temp_free_i32(tmp);
8888                             } else if (i == rn) {
8889                                 loaded_var = tmp;
8890                                 loaded_base = 1;
8891                             } else {
8892                                 store_reg_from_load(s, i, tmp);
8893                             }
8894                         } else {
8895                             /* store */
8896                             if (i == 15) {
8897                                 /* special case: r15 = PC + 8 */
8898                                 val = (long)s->pc + 4;
8899                                 tmp = tcg_temp_new_i32();
8900                                 tcg_gen_movi_i32(tmp, val);
8901                             } else if (user) {
8902                                 tmp = tcg_temp_new_i32();
8903                                 tmp2 = tcg_const_i32(i);
8904                                 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
8905                                 tcg_temp_free_i32(tmp2);
8906                             } else {
8907                                 tmp = load_reg(s, i);
8908                             }
8909                             gen_aa32_st32(tmp, addr, get_mem_index(s));
8910                             tcg_temp_free_i32(tmp);
8911                         }
8912                         j++;
8913                         /* no need to add after the last transfer */
8914                         if (j != n)
8915                             tcg_gen_addi_i32(addr, addr, 4);
8916                     }
8917                 }
8918                 if (insn & (1 << 21)) {
8919                     /* write back */
8920                     if (insn & (1 << 23)) {
8921                         if (insn & (1 << 24)) {
8922                             /* pre increment */
8923                         } else {
8924                             /* post increment */
8925                             tcg_gen_addi_i32(addr, addr, 4);
8926                         }
8927                     } else {
8928                         if (insn & (1 << 24)) {
8929                             /* pre decrement */
8930                             if (n != 1)
8931                                 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8932                         } else {
8933                             /* post decrement */
8934                             tcg_gen_addi_i32(addr, addr, -(n * 4));
8935                         }
8936                     }
8937                     store_reg(s, rn, addr);
8938                 } else {
8939                     tcg_temp_free_i32(addr);
8940                 }
8941                 if (loaded_base) {
8942                     store_reg(s, rn, loaded_var);
8943                 }
8944                 if ((insn & (1 << 22)) && !user) {
8945                     /* Restore CPSR from SPSR.  */
8946                     tmp = load_cpu_field(spsr);
8947                     gen_set_cpsr(tmp, CPSR_ERET_MASK);
8948                     tcg_temp_free_i32(tmp);
8949                     s->is_jmp = DISAS_UPDATE;
8950                 }
8951             }
8952             break;
8953         case 0xa:
8954         case 0xb:
8955             {
8956                 int32_t offset;
8957
8958                 /* branch (and link) */
8959                 val = (int32_t)s->pc;
8960                 if (insn & (1 << 24)) {
8961                     tmp = tcg_temp_new_i32();
8962                     tcg_gen_movi_i32(tmp, val);
8963                     store_reg(s, 14, tmp);
8964                 }
8965                 offset = sextract32(insn << 2, 0, 26);
8966                 val += offset + 4;
8967                 gen_jmp(s, val);
8968             }
8969             break;
8970         case 0xc:
8971         case 0xd:
8972         case 0xe:
8973             if (((insn >> 8) & 0xe) == 10) {
8974                 /* VFP.  */
8975                 if (disas_vfp_insn(s, insn)) {
8976                     goto illegal_op;
8977                 }
8978             } else if (disas_coproc_insn(s, insn)) {
8979                 /* Coprocessor.  */
8980                 goto illegal_op;
8981             }
8982             break;
8983         case 0xf:
8984             /* swi */
8985             gen_set_pc_im(s, s->pc);
8986             s->svc_imm = extract32(insn, 0, 24);
8987             s->is_jmp = DISAS_SWI;
8988             break;
8989         default:
8990         illegal_op:
8991             gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
8992             break;
8993         }
8994     }
8995 }
8996
8997 /* Return true if this is a Thumb-2 logical op.  */
8998 static int
8999 thumb2_logic_op(int op)
9000 {
9001     return (op < 8);
9002 }
9003
9004 /* Generate code for a Thumb-2 data processing operation.  If CONDS is nonzero
9005    then set condition code flags based on the result of the operation.
9006    If SHIFTER_OUT is nonzero then set the carry flag for logical operations
9007    to the high bit of T1.
9008    Returns zero if the opcode is valid.  */
9009
9010 static int
9011 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
9012                    TCGv_i32 t0, TCGv_i32 t1)
9013 {
9014     int logic_cc;
9015
9016     logic_cc = 0;
9017     switch (op) {
9018     case 0: /* and */
9019         tcg_gen_and_i32(t0, t0, t1);
9020         logic_cc = conds;
9021         break;
9022     case 1: /* bic */
9023         tcg_gen_andc_i32(t0, t0, t1);
9024         logic_cc = conds;
9025         break;
9026     case 2: /* orr */
9027         tcg_gen_or_i32(t0, t0, t1);
9028         logic_cc = conds;
9029         break;
9030     case 3: /* orn */
9031         tcg_gen_orc_i32(t0, t0, t1);
9032         logic_cc = conds;
9033         break;
9034     case 4: /* eor */
9035         tcg_gen_xor_i32(t0, t0, t1);
9036         logic_cc = conds;
9037         break;
9038     case 8: /* add */
9039         if (conds)
9040             gen_add_CC(t0, t0, t1);
9041         else
9042             tcg_gen_add_i32(t0, t0, t1);
9043         break;
9044     case 10: /* adc */
9045         if (conds)
9046             gen_adc_CC(t0, t0, t1);
9047         else
9048             gen_adc(t0, t1);
9049         break;
9050     case 11: /* sbc */
9051         if (conds) {
9052             gen_sbc_CC(t0, t0, t1);
9053         } else {
9054             gen_sub_carry(t0, t0, t1);
9055         }
9056         break;
9057     case 13: /* sub */
9058         if (conds)
9059             gen_sub_CC(t0, t0, t1);
9060         else
9061             tcg_gen_sub_i32(t0, t0, t1);
9062         break;
9063     case 14: /* rsb */
9064         if (conds)
9065             gen_sub_CC(t0, t1, t0);
9066         else
9067             tcg_gen_sub_i32(t0, t1, t0);
9068         break;
9069     default: /* 5, 6, 7, 9, 12, 15. */
9070         return 1;
9071     }
9072     if (logic_cc) {
9073         gen_logic_CC(t0);
9074         if (shifter_out)
9075             gen_set_CF_bit31(t1);
9076     }
9077     return 0;
9078 }
9079
9080 /* Translate a 32-bit thumb instruction.  Returns nonzero if the instruction
9081    is not legal.  */
9082 static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw1)
9083 {
9084     uint32_t insn, imm, shift, offset;
9085     uint32_t rd, rn, rm, rs;
9086     TCGv_i32 tmp;
9087     TCGv_i32 tmp2;
9088     TCGv_i32 tmp3;
9089     TCGv_i32 addr;
9090     TCGv_i64 tmp64;
9091     int op;
9092     int shiftop;
9093     int conds;
9094     int logic_cc;
9095
9096     if (!(arm_dc_feature(s, ARM_FEATURE_THUMB2)
9097           || arm_dc_feature(s, ARM_FEATURE_M))) {
9098         /* Thumb-1 cores may need to treat bl and blx as a pair of
9099            16-bit instructions to get correct prefetch abort behavior.  */
9100         insn = insn_hw1;
9101         if ((insn & (1 << 12)) == 0) {
9102             ARCH(5);
9103             /* Second half of blx.  */
9104             offset = ((insn & 0x7ff) << 1);
9105             tmp = load_reg(s, 14);
9106             tcg_gen_addi_i32(tmp, tmp, offset);
9107             tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
9108
9109             tmp2 = tcg_temp_new_i32();
9110             tcg_gen_movi_i32(tmp2, s->pc | 1);
9111             store_reg(s, 14, tmp2);
9112             gen_bx(s, tmp);
9113             return 0;
9114         }
9115         if (insn & (1 << 11)) {
9116             /* Second half of bl.  */
9117             offset = ((insn & 0x7ff) << 1) | 1;
9118             tmp = load_reg(s, 14);
9119             tcg_gen_addi_i32(tmp, tmp, offset);
9120
9121             tmp2 = tcg_temp_new_i32();
9122             tcg_gen_movi_i32(tmp2, s->pc | 1);
9123             store_reg(s, 14, tmp2);
9124             gen_bx(s, tmp);
9125             return 0;
9126         }
9127         if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
9128             /* Instruction spans a page boundary.  Implement it as two
9129                16-bit instructions in case the second half causes an
9130                prefetch abort.  */
9131             offset = ((int32_t)insn << 21) >> 9;
9132             tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset);
9133             return 0;
9134         }
9135         /* Fall through to 32-bit decode.  */
9136     }
9137
9138     insn = arm_lduw_code(env, s->pc, s->bswap_code);
9139     s->pc += 2;
9140     insn |= (uint32_t)insn_hw1 << 16;
9141
9142     if ((insn & 0xf800e800) != 0xf000e800) {
9143         ARCH(6T2);
9144     }
9145
9146     rn = (insn >> 16) & 0xf;
9147     rs = (insn >> 12) & 0xf;
9148     rd = (insn >> 8) & 0xf;
9149     rm = insn & 0xf;
9150     switch ((insn >> 25) & 0xf) {
9151     case 0: case 1: case 2: case 3:
9152         /* 16-bit instructions.  Should never happen.  */
9153         abort();
9154     case 4:
9155         if (insn & (1 << 22)) {
9156             /* Other load/store, table branch.  */
9157             if (insn & 0x01200000) {
9158                 /* Load/store doubleword.  */
9159                 if (rn == 15) {
9160                     addr = tcg_temp_new_i32();
9161                     tcg_gen_movi_i32(addr, s->pc & ~3);
9162                 } else {
9163                     addr = load_reg(s, rn);
9164                 }
9165                 offset = (insn & 0xff) * 4;
9166                 if ((insn & (1 << 23)) == 0)
9167                     offset = -offset;
9168                 if (insn & (1 << 24)) {
9169                     tcg_gen_addi_i32(addr, addr, offset);
9170                     offset = 0;
9171                 }
9172                 if (insn & (1 << 20)) {
9173                     /* ldrd */
9174                     tmp = tcg_temp_new_i32();
9175                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9176                     store_reg(s, rs, tmp);
9177                     tcg_gen_addi_i32(addr, addr, 4);
9178                     tmp = tcg_temp_new_i32();
9179                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9180                     store_reg(s, rd, tmp);
9181                 } else {
9182                     /* strd */
9183                     tmp = load_reg(s, rs);
9184                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9185                     tcg_temp_free_i32(tmp);
9186                     tcg_gen_addi_i32(addr, addr, 4);
9187                     tmp = load_reg(s, rd);
9188                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9189                     tcg_temp_free_i32(tmp);
9190                 }
9191                 if (insn & (1 << 21)) {
9192                     /* Base writeback.  */
9193                     if (rn == 15)
9194                         goto illegal_op;
9195                     tcg_gen_addi_i32(addr, addr, offset - 4);
9196                     store_reg(s, rn, addr);
9197                 } else {
9198                     tcg_temp_free_i32(addr);
9199                 }
9200             } else if ((insn & (1 << 23)) == 0) {
9201                 /* Load/store exclusive word.  */
9202                 addr = tcg_temp_local_new_i32();
9203                 load_reg_var(s, addr, rn);
9204                 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
9205                 if (insn & (1 << 20)) {
9206                     gen_load_exclusive(s, rs, 15, addr, 2);
9207                 } else {
9208                     gen_store_exclusive(s, rd, rs, 15, addr, 2);
9209                 }
9210                 tcg_temp_free_i32(addr);
9211             } else if ((insn & (7 << 5)) == 0) {
9212                 /* Table Branch.  */
9213                 if (rn == 15) {
9214                     addr = tcg_temp_new_i32();
9215                     tcg_gen_movi_i32(addr, s->pc);
9216                 } else {
9217                     addr = load_reg(s, rn);
9218                 }
9219                 tmp = load_reg(s, rm);
9220                 tcg_gen_add_i32(addr, addr, tmp);
9221                 if (insn & (1 << 4)) {
9222                     /* tbh */
9223                     tcg_gen_add_i32(addr, addr, tmp);
9224                     tcg_temp_free_i32(tmp);
9225                     tmp = tcg_temp_new_i32();
9226                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9227                 } else { /* tbb */
9228                     tcg_temp_free_i32(tmp);
9229                     tmp = tcg_temp_new_i32();
9230                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9231                 }
9232                 tcg_temp_free_i32(addr);
9233                 tcg_gen_shli_i32(tmp, tmp, 1);
9234                 tcg_gen_addi_i32(tmp, tmp, s->pc);
9235                 store_reg(s, 15, tmp);
9236             } else {
9237                 int op2 = (insn >> 6) & 0x3;
9238                 op = (insn >> 4) & 0x3;
9239                 switch (op2) {
9240                 case 0:
9241                     goto illegal_op;
9242                 case 1:
9243                     /* Load/store exclusive byte/halfword/doubleword */
9244                     if (op == 2) {
9245                         goto illegal_op;
9246                     }
9247                     ARCH(7);
9248                     break;
9249                 case 2:
9250                     /* Load-acquire/store-release */
9251                     if (op == 3) {
9252                         goto illegal_op;
9253                     }
9254                     /* Fall through */
9255                 case 3:
9256                     /* Load-acquire/store-release exclusive */
9257                     ARCH(8);
9258                     break;
9259                 }
9260                 addr = tcg_temp_local_new_i32();
9261                 load_reg_var(s, addr, rn);
9262                 if (!(op2 & 1)) {
9263                     if (insn & (1 << 20)) {
9264                         tmp = tcg_temp_new_i32();
9265                         switch (op) {
9266                         case 0: /* ldab */
9267                             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9268                             break;
9269                         case 1: /* ldah */
9270                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9271                             break;
9272                         case 2: /* lda */
9273                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9274                             break;
9275                         default:
9276                             abort();
9277                         }
9278                         store_reg(s, rs, tmp);
9279                     } else {
9280                         tmp = load_reg(s, rs);
9281                         switch (op) {
9282                         case 0: /* stlb */
9283                             gen_aa32_st8(tmp, addr, get_mem_index(s));
9284                             break;
9285                         case 1: /* stlh */
9286                             gen_aa32_st16(tmp, addr, get_mem_index(s));
9287                             break;
9288                         case 2: /* stl */
9289                             gen_aa32_st32(tmp, addr, get_mem_index(s));
9290                             break;
9291                         default:
9292                             abort();
9293                         }
9294                         tcg_temp_free_i32(tmp);
9295                     }
9296                 } else if (insn & (1 << 20)) {
9297                     gen_load_exclusive(s, rs, rd, addr, op);
9298                 } else {
9299                     gen_store_exclusive(s, rm, rs, rd, addr, op);
9300                 }
9301                 tcg_temp_free_i32(addr);
9302             }
9303         } else {
9304             /* Load/store multiple, RFE, SRS.  */
9305             if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
9306                 /* RFE, SRS: not available in user mode or on M profile */
9307                 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
9308                     goto illegal_op;
9309                 }
9310                 if (insn & (1 << 20)) {
9311                     /* rfe */
9312                     addr = load_reg(s, rn);
9313                     if ((insn & (1 << 24)) == 0)
9314                         tcg_gen_addi_i32(addr, addr, -8);
9315                     /* Load PC into tmp and CPSR into tmp2.  */
9316                     tmp = tcg_temp_new_i32();
9317                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9318                     tcg_gen_addi_i32(addr, addr, 4);
9319                     tmp2 = tcg_temp_new_i32();
9320                     gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
9321                     if (insn & (1 << 21)) {
9322                         /* Base writeback.  */
9323                         if (insn & (1 << 24)) {
9324                             tcg_gen_addi_i32(addr, addr, 4);
9325                         } else {
9326                             tcg_gen_addi_i32(addr, addr, -4);
9327                         }
9328                         store_reg(s, rn, addr);
9329                     } else {
9330                         tcg_temp_free_i32(addr);
9331                     }
9332                     gen_rfe(s, tmp, tmp2);
9333                 } else {
9334                     /* srs */
9335                     gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
9336                             insn & (1 << 21));
9337                 }
9338             } else {
9339                 int i, loaded_base = 0;
9340                 TCGv_i32 loaded_var;
9341                 /* Load/store multiple.  */
9342                 addr = load_reg(s, rn);
9343                 offset = 0;
9344                 for (i = 0; i < 16; i++) {
9345                     if (insn & (1 << i))
9346                         offset += 4;
9347                 }
9348                 if (insn & (1 << 24)) {
9349                     tcg_gen_addi_i32(addr, addr, -offset);
9350                 }
9351
9352                 TCGV_UNUSED_I32(loaded_var);
9353                 for (i = 0; i < 16; i++) {
9354                     if ((insn & (1 << i)) == 0)
9355                         continue;
9356                     if (insn & (1 << 20)) {
9357                         /* Load.  */
9358                         tmp = tcg_temp_new_i32();
9359                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9360                         if (i == 15) {
9361                             gen_bx(s, tmp);
9362                         } else if (i == rn) {
9363                             loaded_var = tmp;
9364                             loaded_base = 1;
9365                         } else {
9366                             store_reg(s, i, tmp);
9367                         }
9368                     } else {
9369                         /* Store.  */
9370                         tmp = load_reg(s, i);
9371                         gen_aa32_st32(tmp, addr, get_mem_index(s));
9372                         tcg_temp_free_i32(tmp);
9373                     }
9374                     tcg_gen_addi_i32(addr, addr, 4);
9375                 }
9376                 if (loaded_base) {
9377                     store_reg(s, rn, loaded_var);
9378                 }
9379                 if (insn & (1 << 21)) {
9380                     /* Base register writeback.  */
9381                     if (insn & (1 << 24)) {
9382                         tcg_gen_addi_i32(addr, addr, -offset);
9383                     }
9384                     /* Fault if writeback register is in register list.  */
9385                     if (insn & (1 << rn))
9386                         goto illegal_op;
9387                     store_reg(s, rn, addr);
9388                 } else {
9389                     tcg_temp_free_i32(addr);
9390                 }
9391             }
9392         }
9393         break;
9394     case 5:
9395
9396         op = (insn >> 21) & 0xf;
9397         if (op == 6) {
9398             /* Halfword pack.  */
9399             tmp = load_reg(s, rn);
9400             tmp2 = load_reg(s, rm);
9401             shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
9402             if (insn & (1 << 5)) {
9403                 /* pkhtb */
9404                 if (shift == 0)
9405                     shift = 31;
9406                 tcg_gen_sari_i32(tmp2, tmp2, shift);
9407                 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
9408                 tcg_gen_ext16u_i32(tmp2, tmp2);
9409             } else {
9410                 /* pkhbt */
9411                 if (shift)
9412                     tcg_gen_shli_i32(tmp2, tmp2, shift);
9413                 tcg_gen_ext16u_i32(tmp, tmp);
9414                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
9415             }
9416             tcg_gen_or_i32(tmp, tmp, tmp2);
9417             tcg_temp_free_i32(tmp2);
9418             store_reg(s, rd, tmp);
9419         } else {
9420             /* Data processing register constant shift.  */
9421             if (rn == 15) {
9422                 tmp = tcg_temp_new_i32();
9423                 tcg_gen_movi_i32(tmp, 0);
9424             } else {
9425                 tmp = load_reg(s, rn);
9426             }
9427             tmp2 = load_reg(s, rm);
9428
9429             shiftop = (insn >> 4) & 3;
9430             shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
9431             conds = (insn & (1 << 20)) != 0;
9432             logic_cc = (conds && thumb2_logic_op(op));
9433             gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
9434             if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
9435                 goto illegal_op;
9436             tcg_temp_free_i32(tmp2);
9437             if (rd != 15) {
9438                 store_reg(s, rd, tmp);
9439             } else {
9440                 tcg_temp_free_i32(tmp);
9441             }
9442         }
9443         break;
9444     case 13: /* Misc data processing.  */
9445         op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
9446         if (op < 4 && (insn & 0xf000) != 0xf000)
9447             goto illegal_op;
9448         switch (op) {
9449         case 0: /* Register controlled shift.  */
9450             tmp = load_reg(s, rn);
9451             tmp2 = load_reg(s, rm);
9452             if ((insn & 0x70) != 0)
9453                 goto illegal_op;
9454             op = (insn >> 21) & 3;
9455             logic_cc = (insn & (1 << 20)) != 0;
9456             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
9457             if (logic_cc)
9458                 gen_logic_CC(tmp);
9459             store_reg_bx(s, rd, tmp);
9460             break;
9461         case 1: /* Sign/zero extend.  */
9462             tmp = load_reg(s, rm);
9463             shift = (insn >> 4) & 3;
9464             /* ??? In many cases it's not necessary to do a
9465                rotate, a shift is sufficient.  */
9466             if (shift != 0)
9467                 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
9468             op = (insn >> 20) & 7;
9469             switch (op) {
9470             case 0: gen_sxth(tmp);   break;
9471             case 1: gen_uxth(tmp);   break;
9472             case 2: gen_sxtb16(tmp); break;
9473             case 3: gen_uxtb16(tmp); break;
9474             case 4: gen_sxtb(tmp);   break;
9475             case 5: gen_uxtb(tmp);   break;
9476             default: goto illegal_op;
9477             }
9478             if (rn != 15) {
9479                 tmp2 = load_reg(s, rn);
9480                 if ((op >> 1) == 1) {
9481                     gen_add16(tmp, tmp2);
9482                 } else {
9483                     tcg_gen_add_i32(tmp, tmp, tmp2);
9484                     tcg_temp_free_i32(tmp2);
9485                 }
9486             }
9487             store_reg(s, rd, tmp);
9488             break;
9489         case 2: /* SIMD add/subtract.  */
9490             op = (insn >> 20) & 7;
9491             shift = (insn >> 4) & 7;
9492             if ((op & 3) == 3 || (shift & 3) == 3)
9493                 goto illegal_op;
9494             tmp = load_reg(s, rn);
9495             tmp2 = load_reg(s, rm);
9496             gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
9497             tcg_temp_free_i32(tmp2);
9498             store_reg(s, rd, tmp);
9499             break;
9500         case 3: /* Other data processing.  */
9501             op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
9502             if (op < 4) {
9503                 /* Saturating add/subtract.  */
9504                 tmp = load_reg(s, rn);
9505                 tmp2 = load_reg(s, rm);
9506                 if (op & 1)
9507                     gen_helper_double_saturate(tmp, cpu_env, tmp);
9508                 if (op & 2)
9509                     gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
9510                 else
9511                     gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
9512                 tcg_temp_free_i32(tmp2);
9513             } else {
9514                 tmp = load_reg(s, rn);
9515                 switch (op) {
9516                 case 0x0a: /* rbit */
9517                     gen_helper_rbit(tmp, tmp);
9518                     break;
9519                 case 0x08: /* rev */
9520                     tcg_gen_bswap32_i32(tmp, tmp);
9521                     break;
9522                 case 0x09: /* rev16 */
9523                     gen_rev16(tmp);
9524                     break;
9525                 case 0x0b: /* revsh */
9526                     gen_revsh(tmp);
9527                     break;
9528                 case 0x10: /* sel */
9529                     tmp2 = load_reg(s, rm);
9530                     tmp3 = tcg_temp_new_i32();
9531                     tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
9532                     gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
9533                     tcg_temp_free_i32(tmp3);
9534                     tcg_temp_free_i32(tmp2);
9535                     break;
9536                 case 0x18: /* clz */
9537                     gen_helper_clz(tmp, tmp);
9538                     break;
9539                 case 0x20:
9540                 case 0x21:
9541                 case 0x22:
9542                 case 0x28:
9543                 case 0x29:
9544                 case 0x2a:
9545                 {
9546                     /* crc32/crc32c */
9547                     uint32_t sz = op & 0x3;
9548                     uint32_t c = op & 0x8;
9549
9550                     if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
9551                         goto illegal_op;
9552                     }
9553
9554                     tmp2 = load_reg(s, rm);
9555                     if (sz == 0) {
9556                         tcg_gen_andi_i32(tmp2, tmp2, 0xff);
9557                     } else if (sz == 1) {
9558                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
9559                     }
9560                     tmp3 = tcg_const_i32(1 << sz);
9561                     if (c) {
9562                         gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
9563                     } else {
9564                         gen_helper_crc32(tmp, tmp, tmp2, tmp3);
9565                     }
9566                     tcg_temp_free_i32(tmp2);
9567                     tcg_temp_free_i32(tmp3);
9568                     break;
9569                 }
9570                 default:
9571                     goto illegal_op;
9572                 }
9573             }
9574             store_reg(s, rd, tmp);
9575             break;
9576         case 4: case 5: /* 32-bit multiply.  Sum of absolute differences.  */
9577             op = (insn >> 4) & 0xf;
9578             tmp = load_reg(s, rn);
9579             tmp2 = load_reg(s, rm);
9580             switch ((insn >> 20) & 7) {
9581             case 0: /* 32 x 32 -> 32 */
9582                 tcg_gen_mul_i32(tmp, tmp, tmp2);
9583                 tcg_temp_free_i32(tmp2);
9584                 if (rs != 15) {
9585                     tmp2 = load_reg(s, rs);
9586                     if (op)
9587                         tcg_gen_sub_i32(tmp, tmp2, tmp);
9588                     else
9589                         tcg_gen_add_i32(tmp, tmp, tmp2);
9590                     tcg_temp_free_i32(tmp2);
9591                 }
9592                 break;
9593             case 1: /* 16 x 16 -> 32 */
9594                 gen_mulxy(tmp, tmp2, op & 2, op & 1);
9595                 tcg_temp_free_i32(tmp2);
9596                 if (rs != 15) {
9597                     tmp2 = load_reg(s, rs);
9598                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9599                     tcg_temp_free_i32(tmp2);
9600                 }
9601                 break;
9602             case 2: /* Dual multiply add.  */
9603             case 4: /* Dual multiply subtract.  */
9604                 if (op)
9605                     gen_swap_half(tmp2);
9606                 gen_smul_dual(tmp, tmp2);
9607                 if (insn & (1 << 22)) {
9608                     /* This subtraction cannot overflow. */
9609                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9610                 } else {
9611                     /* This addition cannot overflow 32 bits;
9612                      * however it may overflow considered as a signed
9613                      * operation, in which case we must set the Q flag.
9614                      */
9615                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9616                 }
9617                 tcg_temp_free_i32(tmp2);
9618                 if (rs != 15)
9619                   {
9620                     tmp2 = load_reg(s, rs);
9621                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9622                     tcg_temp_free_i32(tmp2);
9623                   }
9624                 break;
9625             case 3: /* 32 * 16 -> 32msb */
9626                 if (op)
9627                     tcg_gen_sari_i32(tmp2, tmp2, 16);
9628                 else
9629                     gen_sxth(tmp2);
9630                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9631                 tcg_gen_shri_i64(tmp64, tmp64, 16);
9632                 tmp = tcg_temp_new_i32();
9633                 tcg_gen_trunc_i64_i32(tmp, tmp64);
9634                 tcg_temp_free_i64(tmp64);
9635                 if (rs != 15)
9636                   {
9637                     tmp2 = load_reg(s, rs);
9638                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9639                     tcg_temp_free_i32(tmp2);
9640                   }
9641                 break;
9642             case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
9643                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9644                 if (rs != 15) {
9645                     tmp = load_reg(s, rs);
9646                     if (insn & (1 << 20)) {
9647                         tmp64 = gen_addq_msw(tmp64, tmp);
9648                     } else {
9649                         tmp64 = gen_subq_msw(tmp64, tmp);
9650                     }
9651                 }
9652                 if (insn & (1 << 4)) {
9653                     tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
9654                 }
9655                 tcg_gen_shri_i64(tmp64, tmp64, 32);
9656                 tmp = tcg_temp_new_i32();
9657                 tcg_gen_trunc_i64_i32(tmp, tmp64);
9658                 tcg_temp_free_i64(tmp64);
9659                 break;
9660             case 7: /* Unsigned sum of absolute differences.  */
9661                 gen_helper_usad8(tmp, tmp, tmp2);
9662                 tcg_temp_free_i32(tmp2);
9663                 if (rs != 15) {
9664                     tmp2 = load_reg(s, rs);
9665                     tcg_gen_add_i32(tmp, tmp, tmp2);
9666                     tcg_temp_free_i32(tmp2);
9667                 }
9668                 break;
9669             }
9670             store_reg(s, rd, tmp);
9671             break;
9672         case 6: case 7: /* 64-bit multiply, Divide.  */
9673             op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
9674             tmp = load_reg(s, rn);
9675             tmp2 = load_reg(s, rm);
9676             if ((op & 0x50) == 0x10) {
9677                 /* sdiv, udiv */
9678                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
9679                     goto illegal_op;
9680                 }
9681                 if (op & 0x20)
9682                     gen_helper_udiv(tmp, tmp, tmp2);
9683                 else
9684                     gen_helper_sdiv(tmp, tmp, tmp2);
9685                 tcg_temp_free_i32(tmp2);
9686                 store_reg(s, rd, tmp);
9687             } else if ((op & 0xe) == 0xc) {
9688                 /* Dual multiply accumulate long.  */
9689                 if (op & 1)
9690                     gen_swap_half(tmp2);
9691                 gen_smul_dual(tmp, tmp2);
9692                 if (op & 0x10) {
9693                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9694                 } else {
9695                     tcg_gen_add_i32(tmp, tmp, tmp2);
9696                 }
9697                 tcg_temp_free_i32(tmp2);
9698                 /* BUGFIX */
9699                 tmp64 = tcg_temp_new_i64();
9700                 tcg_gen_ext_i32_i64(tmp64, tmp);
9701                 tcg_temp_free_i32(tmp);
9702                 gen_addq(s, tmp64, rs, rd);
9703                 gen_storeq_reg(s, rs, rd, tmp64);
9704                 tcg_temp_free_i64(tmp64);
9705             } else {
9706                 if (op & 0x20) {
9707                     /* Unsigned 64-bit multiply  */
9708                     tmp64 = gen_mulu_i64_i32(tmp, tmp2);
9709                 } else {
9710                     if (op & 8) {
9711                         /* smlalxy */
9712                         gen_mulxy(tmp, tmp2, op & 2, op & 1);
9713                         tcg_temp_free_i32(tmp2);
9714                         tmp64 = tcg_temp_new_i64();
9715                         tcg_gen_ext_i32_i64(tmp64, tmp);
9716                         tcg_temp_free_i32(tmp);
9717                     } else {
9718                         /* Signed 64-bit multiply  */
9719                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
9720                     }
9721                 }
9722                 if (op & 4) {
9723                     /* umaal */
9724                     gen_addq_lo(s, tmp64, rs);
9725                     gen_addq_lo(s, tmp64, rd);
9726                 } else if (op & 0x40) {
9727                     /* 64-bit accumulate.  */
9728                     gen_addq(s, tmp64, rs, rd);
9729                 }
9730                 gen_storeq_reg(s, rs, rd, tmp64);
9731                 tcg_temp_free_i64(tmp64);
9732             }
9733             break;
9734         }
9735         break;
9736     case 6: case 7: case 14: case 15:
9737         /* Coprocessor.  */
9738         if (((insn >> 24) & 3) == 3) {
9739             /* Translate into the equivalent ARM encoding.  */
9740             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
9741             if (disas_neon_data_insn(s, insn)) {
9742                 goto illegal_op;
9743             }
9744         } else if (((insn >> 8) & 0xe) == 10) {
9745             if (disas_vfp_insn(s, insn)) {
9746                 goto illegal_op;
9747             }
9748         } else {
9749             if (insn & (1 << 28))
9750                 goto illegal_op;
9751             if (disas_coproc_insn(s, insn)) {
9752                 goto illegal_op;
9753             }
9754         }
9755         break;
9756     case 8: case 9: case 10: case 11:
9757         if (insn & (1 << 15)) {
9758             /* Branches, misc control.  */
9759             if (insn & 0x5000) {
9760                 /* Unconditional branch.  */
9761                 /* signextend(hw1[10:0]) -> offset[:12].  */
9762                 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
9763                 /* hw1[10:0] -> offset[11:1].  */
9764                 offset |= (insn & 0x7ff) << 1;
9765                 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
9766                    offset[24:22] already have the same value because of the
9767                    sign extension above.  */
9768                 offset ^= ((~insn) & (1 << 13)) << 10;
9769                 offset ^= ((~insn) & (1 << 11)) << 11;
9770
9771                 if (insn & (1 << 14)) {
9772                     /* Branch and link.  */
9773                     tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
9774                 }
9775
9776                 offset += s->pc;
9777                 if (insn & (1 << 12)) {
9778                     /* b/bl */
9779                     gen_jmp(s, offset);
9780                 } else {
9781                     /* blx */
9782                     offset &= ~(uint32_t)2;
9783                     /* thumb2 bx, no need to check */
9784                     gen_bx_im(s, offset);
9785                 }
9786             } else if (((insn >> 23) & 7) == 7) {
9787                 /* Misc control */
9788                 if (insn & (1 << 13))
9789                     goto illegal_op;
9790
9791                 if (insn & (1 << 26)) {
9792                     if (!(insn & (1 << 20))) {
9793                         /* Hypervisor call (v7) */
9794                         int imm16 = extract32(insn, 16, 4) << 12
9795                             | extract32(insn, 0, 12);
9796                         ARCH(7);
9797                         if (IS_USER(s)) {
9798                             goto illegal_op;
9799                         }
9800                         gen_hvc(s, imm16);
9801                     } else {
9802                         /* Secure monitor call (v6+) */
9803                         ARCH(6K);
9804                         if (IS_USER(s)) {
9805                             goto illegal_op;
9806                         }
9807                         gen_smc(s);
9808                     }
9809                 } else {
9810                     op = (insn >> 20) & 7;
9811                     switch (op) {
9812                     case 0: /* msr cpsr.  */
9813                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9814                             tmp = load_reg(s, rn);
9815                             addr = tcg_const_i32(insn & 0xff);
9816                             gen_helper_v7m_msr(cpu_env, addr, tmp);
9817                             tcg_temp_free_i32(addr);
9818                             tcg_temp_free_i32(tmp);
9819                             gen_lookup_tb(s);
9820                             break;
9821                         }
9822                         /* fall through */
9823                     case 1: /* msr spsr.  */
9824                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9825                             goto illegal_op;
9826                         }
9827                         tmp = load_reg(s, rn);
9828                         if (gen_set_psr(s,
9829                               msr_mask(s, (insn >> 8) & 0xf, op == 1),
9830                               op == 1, tmp))
9831                             goto illegal_op;
9832                         break;
9833                     case 2: /* cps, nop-hint.  */
9834                         if (((insn >> 8) & 7) == 0) {
9835                             gen_nop_hint(s, insn & 0xff);
9836                         }
9837                         /* Implemented as NOP in user mode.  */
9838                         if (IS_USER(s))
9839                             break;
9840                         offset = 0;
9841                         imm = 0;
9842                         if (insn & (1 << 10)) {
9843                             if (insn & (1 << 7))
9844                                 offset |= CPSR_A;
9845                             if (insn & (1 << 6))
9846                                 offset |= CPSR_I;
9847                             if (insn & (1 << 5))
9848                                 offset |= CPSR_F;
9849                             if (insn & (1 << 9))
9850                                 imm = CPSR_A | CPSR_I | CPSR_F;
9851                         }
9852                         if (insn & (1 << 8)) {
9853                             offset |= 0x1f;
9854                             imm |= (insn & 0x1f);
9855                         }
9856                         if (offset) {
9857                             gen_set_psr_im(s, offset, 0, imm);
9858                         }
9859                         break;
9860                     case 3: /* Special control operations.  */
9861                         ARCH(7);
9862                         op = (insn >> 4) & 0xf;
9863                         switch (op) {
9864                         case 2: /* clrex */
9865                             gen_clrex(s);
9866                             break;
9867                         case 4: /* dsb */
9868                         case 5: /* dmb */
9869                         case 6: /* isb */
9870                             /* These execute as NOPs.  */
9871                             break;
9872                         default:
9873                             goto illegal_op;
9874                         }
9875                         break;
9876                     case 4: /* bxj */
9877                         /* Trivial implementation equivalent to bx.  */
9878                         tmp = load_reg(s, rn);
9879                         gen_bx(s, tmp);
9880                         break;
9881                     case 5: /* Exception return.  */
9882                         if (IS_USER(s)) {
9883                             goto illegal_op;
9884                         }
9885                         if (rn != 14 || rd != 15) {
9886                             goto illegal_op;
9887                         }
9888                         tmp = load_reg(s, rn);
9889                         tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
9890                         gen_exception_return(s, tmp);
9891                         break;
9892                     case 6: /* mrs cpsr.  */
9893                         tmp = tcg_temp_new_i32();
9894                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9895                             addr = tcg_const_i32(insn & 0xff);
9896                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
9897                             tcg_temp_free_i32(addr);
9898                         } else {
9899                             gen_helper_cpsr_read(tmp, cpu_env);
9900                         }
9901                         store_reg(s, rd, tmp);
9902                         break;
9903                     case 7: /* mrs spsr.  */
9904                         /* Not accessible in user mode.  */
9905                         if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
9906                             goto illegal_op;
9907                         }
9908                         tmp = load_cpu_field(spsr);
9909                         store_reg(s, rd, tmp);
9910                         break;
9911                     }
9912                 }
9913             } else {
9914                 /* Conditional branch.  */
9915                 op = (insn >> 22) & 0xf;
9916                 /* Generate a conditional jump to next instruction.  */
9917                 s->condlabel = gen_new_label();
9918                 arm_gen_test_cc(op ^ 1, s->condlabel);
9919                 s->condjmp = 1;
9920
9921                 /* offset[11:1] = insn[10:0] */
9922                 offset = (insn & 0x7ff) << 1;
9923                 /* offset[17:12] = insn[21:16].  */
9924                 offset |= (insn & 0x003f0000) >> 4;
9925                 /* offset[31:20] = insn[26].  */
9926                 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
9927                 /* offset[18] = insn[13].  */
9928                 offset |= (insn & (1 << 13)) << 5;
9929                 /* offset[19] = insn[11].  */
9930                 offset |= (insn & (1 << 11)) << 8;
9931
9932                 /* jump to the offset */
9933                 gen_jmp(s, s->pc + offset);
9934             }
9935         } else {
9936             /* Data processing immediate.  */
9937             if (insn & (1 << 25)) {
9938                 if (insn & (1 << 24)) {
9939                     if (insn & (1 << 20))
9940                         goto illegal_op;
9941                     /* Bitfield/Saturate.  */
9942                     op = (insn >> 21) & 7;
9943                     imm = insn & 0x1f;
9944                     shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
9945                     if (rn == 15) {
9946                         tmp = tcg_temp_new_i32();
9947                         tcg_gen_movi_i32(tmp, 0);
9948                     } else {
9949                         tmp = load_reg(s, rn);
9950                     }
9951                     switch (op) {
9952                     case 2: /* Signed bitfield extract.  */
9953                         imm++;
9954                         if (shift + imm > 32)
9955                             goto illegal_op;
9956                         if (imm < 32)
9957                             gen_sbfx(tmp, shift, imm);
9958                         break;
9959                     case 6: /* Unsigned bitfield extract.  */
9960                         imm++;
9961                         if (shift + imm > 32)
9962                             goto illegal_op;
9963                         if (imm < 32)
9964                             gen_ubfx(tmp, shift, (1u << imm) - 1);
9965                         break;
9966                     case 3: /* Bitfield insert/clear.  */
9967                         if (imm < shift)
9968                             goto illegal_op;
9969                         imm = imm + 1 - shift;
9970                         if (imm != 32) {
9971                             tmp2 = load_reg(s, rd);
9972                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
9973                             tcg_temp_free_i32(tmp2);
9974                         }
9975                         break;
9976                     case 7:
9977                         goto illegal_op;
9978                     default: /* Saturate.  */
9979                         if (shift) {
9980                             if (op & 1)
9981                                 tcg_gen_sari_i32(tmp, tmp, shift);
9982                             else
9983                                 tcg_gen_shli_i32(tmp, tmp, shift);
9984                         }
9985                         tmp2 = tcg_const_i32(imm);
9986                         if (op & 4) {
9987                             /* Unsigned.  */
9988                             if ((op & 1) && shift == 0)
9989                                 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
9990                             else
9991                                 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
9992                         } else {
9993                             /* Signed.  */
9994                             if ((op & 1) && shift == 0)
9995                                 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
9996                             else
9997                                 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
9998                         }
9999                         tcg_temp_free_i32(tmp2);
10000                         break;
10001                     }
10002                     store_reg(s, rd, tmp);
10003                 } else {
10004                     imm = ((insn & 0x04000000) >> 15)
10005                           | ((insn & 0x7000) >> 4) | (insn & 0xff);
10006                     if (insn & (1 << 22)) {
10007                         /* 16-bit immediate.  */
10008                         imm |= (insn >> 4) & 0xf000;
10009                         if (insn & (1 << 23)) {
10010                             /* movt */
10011                             tmp = load_reg(s, rd);
10012                             tcg_gen_ext16u_i32(tmp, tmp);
10013                             tcg_gen_ori_i32(tmp, tmp, imm << 16);
10014                         } else {
10015                             /* movw */
10016                             tmp = tcg_temp_new_i32();
10017                             tcg_gen_movi_i32(tmp, imm);
10018                         }
10019                     } else {
10020                         /* Add/sub 12-bit immediate.  */
10021                         if (rn == 15) {
10022                             offset = s->pc & ~(uint32_t)3;
10023                             if (insn & (1 << 23))
10024                                 offset -= imm;
10025                             else
10026                                 offset += imm;
10027                             tmp = tcg_temp_new_i32();
10028                             tcg_gen_movi_i32(tmp, offset);
10029                         } else {
10030                             tmp = load_reg(s, rn);
10031                             if (insn & (1 << 23))
10032                                 tcg_gen_subi_i32(tmp, tmp, imm);
10033                             else
10034                                 tcg_gen_addi_i32(tmp, tmp, imm);
10035                         }
10036                     }
10037                     store_reg(s, rd, tmp);
10038                 }
10039             } else {
10040                 int shifter_out = 0;
10041                 /* modified 12-bit immediate.  */
10042                 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
10043                 imm = (insn & 0xff);
10044                 switch (shift) {
10045                 case 0: /* XY */
10046                     /* Nothing to do.  */
10047                     break;
10048                 case 1: /* 00XY00XY */
10049                     imm |= imm << 16;
10050                     break;
10051                 case 2: /* XY00XY00 */
10052                     imm |= imm << 16;
10053                     imm <<= 8;
10054                     break;
10055                 case 3: /* XYXYXYXY */
10056                     imm |= imm << 16;
10057                     imm |= imm << 8;
10058                     break;
10059                 default: /* Rotated constant.  */
10060                     shift = (shift << 1) | (imm >> 7);
10061                     imm |= 0x80;
10062                     imm = imm << (32 - shift);
10063                     shifter_out = 1;
10064                     break;
10065                 }
10066                 tmp2 = tcg_temp_new_i32();
10067                 tcg_gen_movi_i32(tmp2, imm);
10068                 rn = (insn >> 16) & 0xf;
10069                 if (rn == 15) {
10070                     tmp = tcg_temp_new_i32();
10071                     tcg_gen_movi_i32(tmp, 0);
10072                 } else {
10073                     tmp = load_reg(s, rn);
10074                 }
10075                 op = (insn >> 21) & 0xf;
10076                 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
10077                                        shifter_out, tmp, tmp2))
10078                     goto illegal_op;
10079                 tcg_temp_free_i32(tmp2);
10080                 rd = (insn >> 8) & 0xf;
10081                 if (rd != 15) {
10082                     store_reg(s, rd, tmp);
10083                 } else {
10084                     tcg_temp_free_i32(tmp);
10085                 }
10086             }
10087         }
10088         break;
10089     case 12: /* Load/store single data item.  */
10090         {
10091         int postinc = 0;
10092         int writeback = 0;
10093         int memidx;
10094         if ((insn & 0x01100000) == 0x01000000) {
10095             if (disas_neon_ls_insn(s, insn)) {
10096                 goto illegal_op;
10097             }
10098             break;
10099         }
10100         op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
10101         if (rs == 15) {
10102             if (!(insn & (1 << 20))) {
10103                 goto illegal_op;
10104             }
10105             if (op != 2) {
10106                 /* Byte or halfword load space with dest == r15 : memory hints.
10107                  * Catch them early so we don't emit pointless addressing code.
10108                  * This space is a mix of:
10109                  *  PLD/PLDW/PLI,  which we implement as NOPs (note that unlike
10110                  *     the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
10111                  *     cores)
10112                  *  unallocated hints, which must be treated as NOPs
10113                  *  UNPREDICTABLE space, which we NOP or UNDEF depending on
10114                  *     which is easiest for the decoding logic
10115                  *  Some space which must UNDEF
10116                  */
10117                 int op1 = (insn >> 23) & 3;
10118                 int op2 = (insn >> 6) & 0x3f;
10119                 if (op & 2) {
10120                     goto illegal_op;
10121                 }
10122                 if (rn == 15) {
10123                     /* UNPREDICTABLE, unallocated hint or
10124                      * PLD/PLDW/PLI (literal)
10125                      */
10126                     return 0;
10127                 }
10128                 if (op1 & 1) {
10129                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10130                 }
10131                 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
10132                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10133                 }
10134                 /* UNDEF space, or an UNPREDICTABLE */
10135                 return 1;
10136             }
10137         }
10138         memidx = get_mem_index(s);
10139         if (rn == 15) {
10140             addr = tcg_temp_new_i32();
10141             /* PC relative.  */
10142             /* s->pc has already been incremented by 4.  */
10143             imm = s->pc & 0xfffffffc;
10144             if (insn & (1 << 23))
10145                 imm += insn & 0xfff;
10146             else
10147                 imm -= insn & 0xfff;
10148             tcg_gen_movi_i32(addr, imm);
10149         } else {
10150             addr = load_reg(s, rn);
10151             if (insn & (1 << 23)) {
10152                 /* Positive offset.  */
10153                 imm = insn & 0xfff;
10154                 tcg_gen_addi_i32(addr, addr, imm);
10155             } else {
10156                 imm = insn & 0xff;
10157                 switch ((insn >> 8) & 0xf) {
10158                 case 0x0: /* Shifted Register.  */
10159                     shift = (insn >> 4) & 0xf;
10160                     if (shift > 3) {
10161                         tcg_temp_free_i32(addr);
10162                         goto illegal_op;
10163                     }
10164                     tmp = load_reg(s, rm);
10165                     if (shift)
10166                         tcg_gen_shli_i32(tmp, tmp, shift);
10167                     tcg_gen_add_i32(addr, addr, tmp);
10168                     tcg_temp_free_i32(tmp);
10169                     break;
10170                 case 0xc: /* Negative offset.  */
10171                     tcg_gen_addi_i32(addr, addr, -imm);
10172                     break;
10173                 case 0xe: /* User privilege.  */
10174                     tcg_gen_addi_i32(addr, addr, imm);
10175                     memidx = MMU_USER_IDX;
10176                     break;
10177                 case 0x9: /* Post-decrement.  */
10178                     imm = -imm;
10179                     /* Fall through.  */
10180                 case 0xb: /* Post-increment.  */
10181                     postinc = 1;
10182                     writeback = 1;
10183                     break;
10184                 case 0xd: /* Pre-decrement.  */
10185                     imm = -imm;
10186                     /* Fall through.  */
10187                 case 0xf: /* Pre-increment.  */
10188                     tcg_gen_addi_i32(addr, addr, imm);
10189                     writeback = 1;
10190                     break;
10191                 default:
10192                     tcg_temp_free_i32(addr);
10193                     goto illegal_op;
10194                 }
10195             }
10196         }
10197         if (insn & (1 << 20)) {
10198             /* Load.  */
10199             tmp = tcg_temp_new_i32();
10200             switch (op) {
10201             case 0:
10202                 gen_aa32_ld8u(tmp, addr, memidx);
10203                 break;
10204             case 4:
10205                 gen_aa32_ld8s(tmp, addr, memidx);
10206                 break;
10207             case 1:
10208                 gen_aa32_ld16u(tmp, addr, memidx);
10209                 break;
10210             case 5:
10211                 gen_aa32_ld16s(tmp, addr, memidx);
10212                 break;
10213             case 2:
10214                 gen_aa32_ld32u(tmp, addr, memidx);
10215                 break;
10216             default:
10217                 tcg_temp_free_i32(tmp);
10218                 tcg_temp_free_i32(addr);
10219                 goto illegal_op;
10220             }
10221             if (rs == 15) {
10222                 gen_bx(s, tmp);
10223             } else {
10224                 store_reg(s, rs, tmp);
10225             }
10226         } else {
10227             /* Store.  */
10228             tmp = load_reg(s, rs);
10229             switch (op) {
10230             case 0:
10231                 gen_aa32_st8(tmp, addr, memidx);
10232                 break;
10233             case 1:
10234                 gen_aa32_st16(tmp, addr, memidx);
10235                 break;
10236             case 2:
10237                 gen_aa32_st32(tmp, addr, memidx);
10238                 break;
10239             default:
10240                 tcg_temp_free_i32(tmp);
10241                 tcg_temp_free_i32(addr);
10242                 goto illegal_op;
10243             }
10244             tcg_temp_free_i32(tmp);
10245         }
10246         if (postinc)
10247             tcg_gen_addi_i32(addr, addr, imm);
10248         if (writeback) {
10249             store_reg(s, rn, addr);
10250         } else {
10251             tcg_temp_free_i32(addr);
10252         }
10253         }
10254         break;
10255     default:
10256         goto illegal_op;
10257     }
10258     return 0;
10259 illegal_op:
10260     return 1;
10261 }
10262
10263 static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
10264 {
10265     uint32_t val, insn, op, rm, rn, rd, shift, cond;
10266     int32_t offset;
10267     int i;
10268     TCGv_i32 tmp;
10269     TCGv_i32 tmp2;
10270     TCGv_i32 addr;
10271
10272     if (s->condexec_mask) {
10273         cond = s->condexec_cond;
10274         if (cond != 0x0e) {     /* Skip conditional when condition is AL. */
10275           s->condlabel = gen_new_label();
10276           arm_gen_test_cc(cond ^ 1, s->condlabel);
10277           s->condjmp = 1;
10278         }
10279     }
10280
10281     insn = arm_lduw_code(env, s->pc, s->bswap_code);
10282     s->pc += 2;
10283
10284     switch (insn >> 12) {
10285     case 0: case 1:
10286
10287         rd = insn & 7;
10288         op = (insn >> 11) & 3;
10289         if (op == 3) {
10290             /* add/subtract */
10291             rn = (insn >> 3) & 7;
10292             tmp = load_reg(s, rn);
10293             if (insn & (1 << 10)) {
10294                 /* immediate */
10295                 tmp2 = tcg_temp_new_i32();
10296                 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
10297             } else {
10298                 /* reg */
10299                 rm = (insn >> 6) & 7;
10300                 tmp2 = load_reg(s, rm);
10301             }
10302             if (insn & (1 << 9)) {
10303                 if (s->condexec_mask)
10304                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10305                 else
10306                     gen_sub_CC(tmp, tmp, tmp2);
10307             } else {
10308                 if (s->condexec_mask)
10309                     tcg_gen_add_i32(tmp, tmp, tmp2);
10310                 else
10311                     gen_add_CC(tmp, tmp, tmp2);
10312             }
10313             tcg_temp_free_i32(tmp2);
10314             store_reg(s, rd, tmp);
10315         } else {
10316             /* shift immediate */
10317             rm = (insn >> 3) & 7;
10318             shift = (insn >> 6) & 0x1f;
10319             tmp = load_reg(s, rm);
10320             gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
10321             if (!s->condexec_mask)
10322                 gen_logic_CC(tmp);
10323             store_reg(s, rd, tmp);
10324         }
10325         break;
10326     case 2: case 3:
10327         /* arithmetic large immediate */
10328         op = (insn >> 11) & 3;
10329         rd = (insn >> 8) & 0x7;
10330         if (op == 0) { /* mov */
10331             tmp = tcg_temp_new_i32();
10332             tcg_gen_movi_i32(tmp, insn & 0xff);
10333             if (!s->condexec_mask)
10334                 gen_logic_CC(tmp);
10335             store_reg(s, rd, tmp);
10336         } else {
10337             tmp = load_reg(s, rd);
10338             tmp2 = tcg_temp_new_i32();
10339             tcg_gen_movi_i32(tmp2, insn & 0xff);
10340             switch (op) {
10341             case 1: /* cmp */
10342                 gen_sub_CC(tmp, tmp, tmp2);
10343                 tcg_temp_free_i32(tmp);
10344                 tcg_temp_free_i32(tmp2);
10345                 break;
10346             case 2: /* add */
10347                 if (s->condexec_mask)
10348                     tcg_gen_add_i32(tmp, tmp, tmp2);
10349                 else
10350                     gen_add_CC(tmp, tmp, tmp2);
10351                 tcg_temp_free_i32(tmp2);
10352                 store_reg(s, rd, tmp);
10353                 break;
10354             case 3: /* sub */
10355                 if (s->condexec_mask)
10356                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10357                 else
10358                     gen_sub_CC(tmp, tmp, tmp2);
10359                 tcg_temp_free_i32(tmp2);
10360                 store_reg(s, rd, tmp);
10361                 break;
10362             }
10363         }
10364         break;
10365     case 4:
10366         if (insn & (1 << 11)) {
10367             rd = (insn >> 8) & 7;
10368             /* load pc-relative.  Bit 1 of PC is ignored.  */
10369             val = s->pc + 2 + ((insn & 0xff) * 4);
10370             val &= ~(uint32_t)2;
10371             addr = tcg_temp_new_i32();
10372             tcg_gen_movi_i32(addr, val);
10373             tmp = tcg_temp_new_i32();
10374             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10375             tcg_temp_free_i32(addr);
10376             store_reg(s, rd, tmp);
10377             break;
10378         }
10379         if (insn & (1 << 10)) {
10380             /* data processing extended or blx */
10381             rd = (insn & 7) | ((insn >> 4) & 8);
10382             rm = (insn >> 3) & 0xf;
10383             op = (insn >> 8) & 3;
10384             switch (op) {
10385             case 0: /* add */
10386                 tmp = load_reg(s, rd);
10387                 tmp2 = load_reg(s, rm);
10388                 tcg_gen_add_i32(tmp, tmp, tmp2);
10389                 tcg_temp_free_i32(tmp2);
10390                 store_reg(s, rd, tmp);
10391                 break;
10392             case 1: /* cmp */
10393                 tmp = load_reg(s, rd);
10394                 tmp2 = load_reg(s, rm);
10395                 gen_sub_CC(tmp, tmp, tmp2);
10396                 tcg_temp_free_i32(tmp2);
10397                 tcg_temp_free_i32(tmp);
10398                 break;
10399             case 2: /* mov/cpy */
10400                 tmp = load_reg(s, rm);
10401                 store_reg(s, rd, tmp);
10402                 break;
10403             case 3:/* branch [and link] exchange thumb register */
10404                 tmp = load_reg(s, rm);
10405                 if (insn & (1 << 7)) {
10406                     ARCH(5);
10407                     val = (uint32_t)s->pc | 1;
10408                     tmp2 = tcg_temp_new_i32();
10409                     tcg_gen_movi_i32(tmp2, val);
10410                     store_reg(s, 14, tmp2);
10411                 }
10412                 /* already thumb, no need to check */
10413                 gen_bx(s, tmp);
10414                 break;
10415             }
10416             break;
10417         }
10418
10419         /* data processing register */
10420         rd = insn & 7;
10421         rm = (insn >> 3) & 7;
10422         op = (insn >> 6) & 0xf;
10423         if (op == 2 || op == 3 || op == 4 || op == 7) {
10424             /* the shift/rotate ops want the operands backwards */
10425             val = rm;
10426             rm = rd;
10427             rd = val;
10428             val = 1;
10429         } else {
10430             val = 0;
10431         }
10432
10433         if (op == 9) { /* neg */
10434             tmp = tcg_temp_new_i32();
10435             tcg_gen_movi_i32(tmp, 0);
10436         } else if (op != 0xf) { /* mvn doesn't read its first operand */
10437             tmp = load_reg(s, rd);
10438         } else {
10439             TCGV_UNUSED_I32(tmp);
10440         }
10441
10442         tmp2 = load_reg(s, rm);
10443         switch (op) {
10444         case 0x0: /* and */
10445             tcg_gen_and_i32(tmp, tmp, tmp2);
10446             if (!s->condexec_mask)
10447                 gen_logic_CC(tmp);
10448             break;
10449         case 0x1: /* eor */
10450             tcg_gen_xor_i32(tmp, tmp, tmp2);
10451             if (!s->condexec_mask)
10452                 gen_logic_CC(tmp);
10453             break;
10454         case 0x2: /* lsl */
10455             if (s->condexec_mask) {
10456                 gen_shl(tmp2, tmp2, tmp);
10457             } else {
10458                 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
10459                 gen_logic_CC(tmp2);
10460             }
10461             break;
10462         case 0x3: /* lsr */
10463             if (s->condexec_mask) {
10464                 gen_shr(tmp2, tmp2, tmp);
10465             } else {
10466                 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
10467                 gen_logic_CC(tmp2);
10468             }
10469             break;
10470         case 0x4: /* asr */
10471             if (s->condexec_mask) {
10472                 gen_sar(tmp2, tmp2, tmp);
10473             } else {
10474                 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
10475                 gen_logic_CC(tmp2);
10476             }
10477             break;
10478         case 0x5: /* adc */
10479             if (s->condexec_mask) {
10480                 gen_adc(tmp, tmp2);
10481             } else {
10482                 gen_adc_CC(tmp, tmp, tmp2);
10483             }
10484             break;
10485         case 0x6: /* sbc */
10486             if (s->condexec_mask) {
10487                 gen_sub_carry(tmp, tmp, tmp2);
10488             } else {
10489                 gen_sbc_CC(tmp, tmp, tmp2);
10490             }
10491             break;
10492         case 0x7: /* ror */
10493             if (s->condexec_mask) {
10494                 tcg_gen_andi_i32(tmp, tmp, 0x1f);
10495                 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
10496             } else {
10497                 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
10498                 gen_logic_CC(tmp2);
10499             }
10500             break;
10501         case 0x8: /* tst */
10502             tcg_gen_and_i32(tmp, tmp, tmp2);
10503             gen_logic_CC(tmp);
10504             rd = 16;
10505             break;
10506         case 0x9: /* neg */
10507             if (s->condexec_mask)
10508                 tcg_gen_neg_i32(tmp, tmp2);
10509             else
10510                 gen_sub_CC(tmp, tmp, tmp2);
10511             break;
10512         case 0xa: /* cmp */
10513             gen_sub_CC(tmp, tmp, tmp2);
10514             rd = 16;
10515             break;
10516         case 0xb: /* cmn */
10517             gen_add_CC(tmp, tmp, tmp2);
10518             rd = 16;
10519             break;
10520         case 0xc: /* orr */
10521             tcg_gen_or_i32(tmp, tmp, tmp2);
10522             if (!s->condexec_mask)
10523                 gen_logic_CC(tmp);
10524             break;
10525         case 0xd: /* mul */
10526             tcg_gen_mul_i32(tmp, tmp, tmp2);
10527             if (!s->condexec_mask)
10528                 gen_logic_CC(tmp);
10529             break;
10530         case 0xe: /* bic */
10531             tcg_gen_andc_i32(tmp, tmp, tmp2);
10532             if (!s->condexec_mask)
10533                 gen_logic_CC(tmp);
10534             break;
10535         case 0xf: /* mvn */
10536             tcg_gen_not_i32(tmp2, tmp2);
10537             if (!s->condexec_mask)
10538                 gen_logic_CC(tmp2);
10539             val = 1;
10540             rm = rd;
10541             break;
10542         }
10543         if (rd != 16) {
10544             if (val) {
10545                 store_reg(s, rm, tmp2);
10546                 if (op != 0xf)
10547                     tcg_temp_free_i32(tmp);
10548             } else {
10549                 store_reg(s, rd, tmp);
10550                 tcg_temp_free_i32(tmp2);
10551             }
10552         } else {
10553             tcg_temp_free_i32(tmp);
10554             tcg_temp_free_i32(tmp2);
10555         }
10556         break;
10557
10558     case 5:
10559         /* load/store register offset.  */
10560         rd = insn & 7;
10561         rn = (insn >> 3) & 7;
10562         rm = (insn >> 6) & 7;
10563         op = (insn >> 9) & 7;
10564         addr = load_reg(s, rn);
10565         tmp = load_reg(s, rm);
10566         tcg_gen_add_i32(addr, addr, tmp);
10567         tcg_temp_free_i32(tmp);
10568
10569         if (op < 3) { /* store */
10570             tmp = load_reg(s, rd);
10571         } else {
10572             tmp = tcg_temp_new_i32();
10573         }
10574
10575         switch (op) {
10576         case 0: /* str */
10577             gen_aa32_st32(tmp, addr, get_mem_index(s));
10578             break;
10579         case 1: /* strh */
10580             gen_aa32_st16(tmp, addr, get_mem_index(s));
10581             break;
10582         case 2: /* strb */
10583             gen_aa32_st8(tmp, addr, get_mem_index(s));
10584             break;
10585         case 3: /* ldrsb */
10586             gen_aa32_ld8s(tmp, addr, get_mem_index(s));
10587             break;
10588         case 4: /* ldr */
10589             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10590             break;
10591         case 5: /* ldrh */
10592             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10593             break;
10594         case 6: /* ldrb */
10595             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10596             break;
10597         case 7: /* ldrsh */
10598             gen_aa32_ld16s(tmp, addr, get_mem_index(s));
10599             break;
10600         }
10601         if (op >= 3) { /* load */
10602             store_reg(s, rd, tmp);
10603         } else {
10604             tcg_temp_free_i32(tmp);
10605         }
10606         tcg_temp_free_i32(addr);
10607         break;
10608
10609     case 6:
10610         /* load/store word immediate offset */
10611         rd = insn & 7;
10612         rn = (insn >> 3) & 7;
10613         addr = load_reg(s, rn);
10614         val = (insn >> 4) & 0x7c;
10615         tcg_gen_addi_i32(addr, addr, val);
10616
10617         if (insn & (1 << 11)) {
10618             /* load */
10619             tmp = tcg_temp_new_i32();
10620             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10621             store_reg(s, rd, tmp);
10622         } else {
10623             /* store */
10624             tmp = load_reg(s, rd);
10625             gen_aa32_st32(tmp, addr, get_mem_index(s));
10626             tcg_temp_free_i32(tmp);
10627         }
10628         tcg_temp_free_i32(addr);
10629         break;
10630
10631     case 7:
10632         /* load/store byte immediate offset */
10633         rd = insn & 7;
10634         rn = (insn >> 3) & 7;
10635         addr = load_reg(s, rn);
10636         val = (insn >> 6) & 0x1f;
10637         tcg_gen_addi_i32(addr, addr, val);
10638
10639         if (insn & (1 << 11)) {
10640             /* load */
10641             tmp = tcg_temp_new_i32();
10642             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10643             store_reg(s, rd, tmp);
10644         } else {
10645             /* store */
10646             tmp = load_reg(s, rd);
10647             gen_aa32_st8(tmp, addr, get_mem_index(s));
10648             tcg_temp_free_i32(tmp);
10649         }
10650         tcg_temp_free_i32(addr);
10651         break;
10652
10653     case 8:
10654         /* load/store halfword immediate offset */
10655         rd = insn & 7;
10656         rn = (insn >> 3) & 7;
10657         addr = load_reg(s, rn);
10658         val = (insn >> 5) & 0x3e;
10659         tcg_gen_addi_i32(addr, addr, val);
10660
10661         if (insn & (1 << 11)) {
10662             /* load */
10663             tmp = tcg_temp_new_i32();
10664             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10665             store_reg(s, rd, tmp);
10666         } else {
10667             /* store */
10668             tmp = load_reg(s, rd);
10669             gen_aa32_st16(tmp, addr, get_mem_index(s));
10670             tcg_temp_free_i32(tmp);
10671         }
10672         tcg_temp_free_i32(addr);
10673         break;
10674
10675     case 9:
10676         /* load/store from stack */
10677         rd = (insn >> 8) & 7;
10678         addr = load_reg(s, 13);
10679         val = (insn & 0xff) * 4;
10680         tcg_gen_addi_i32(addr, addr, val);
10681
10682         if (insn & (1 << 11)) {
10683             /* load */
10684             tmp = tcg_temp_new_i32();
10685             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10686             store_reg(s, rd, tmp);
10687         } else {
10688             /* store */
10689             tmp = load_reg(s, rd);
10690             gen_aa32_st32(tmp, addr, get_mem_index(s));
10691             tcg_temp_free_i32(tmp);
10692         }
10693         tcg_temp_free_i32(addr);
10694         break;
10695
10696     case 10:
10697         /* add to high reg */
10698         rd = (insn >> 8) & 7;
10699         if (insn & (1 << 11)) {
10700             /* SP */
10701             tmp = load_reg(s, 13);
10702         } else {
10703             /* PC. bit 1 is ignored.  */
10704             tmp = tcg_temp_new_i32();
10705             tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
10706         }
10707         val = (insn & 0xff) * 4;
10708         tcg_gen_addi_i32(tmp, tmp, val);
10709         store_reg(s, rd, tmp);
10710         break;
10711
10712     case 11:
10713         /* misc */
10714         op = (insn >> 8) & 0xf;
10715         switch (op) {
10716         case 0:
10717             /* adjust stack pointer */
10718             tmp = load_reg(s, 13);
10719             val = (insn & 0x7f) * 4;
10720             if (insn & (1 << 7))
10721                 val = -(int32_t)val;
10722             tcg_gen_addi_i32(tmp, tmp, val);
10723             store_reg(s, 13, tmp);
10724             break;
10725
10726         case 2: /* sign/zero extend.  */
10727             ARCH(6);
10728             rd = insn & 7;
10729             rm = (insn >> 3) & 7;
10730             tmp = load_reg(s, rm);
10731             switch ((insn >> 6) & 3) {
10732             case 0: gen_sxth(tmp); break;
10733             case 1: gen_sxtb(tmp); break;
10734             case 2: gen_uxth(tmp); break;
10735             case 3: gen_uxtb(tmp); break;
10736             }
10737             store_reg(s, rd, tmp);
10738             break;
10739         case 4: case 5: case 0xc: case 0xd:
10740             /* push/pop */
10741             addr = load_reg(s, 13);
10742             if (insn & (1 << 8))
10743                 offset = 4;
10744             else
10745                 offset = 0;
10746             for (i = 0; i < 8; i++) {
10747                 if (insn & (1 << i))
10748                     offset += 4;
10749             }
10750             if ((insn & (1 << 11)) == 0) {
10751                 tcg_gen_addi_i32(addr, addr, -offset);
10752             }
10753             for (i = 0; i < 8; i++) {
10754                 if (insn & (1 << i)) {
10755                     if (insn & (1 << 11)) {
10756                         /* pop */
10757                         tmp = tcg_temp_new_i32();
10758                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10759                         store_reg(s, i, tmp);
10760                     } else {
10761                         /* push */
10762                         tmp = load_reg(s, i);
10763                         gen_aa32_st32(tmp, addr, get_mem_index(s));
10764                         tcg_temp_free_i32(tmp);
10765                     }
10766                     /* advance to the next address.  */
10767                     tcg_gen_addi_i32(addr, addr, 4);
10768                 }
10769             }
10770             TCGV_UNUSED_I32(tmp);
10771             if (insn & (1 << 8)) {
10772                 if (insn & (1 << 11)) {
10773                     /* pop pc */
10774                     tmp = tcg_temp_new_i32();
10775                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10776                     /* don't set the pc until the rest of the instruction
10777                        has completed */
10778                 } else {
10779                     /* push lr */
10780                     tmp = load_reg(s, 14);
10781                     gen_aa32_st32(tmp, addr, get_mem_index(s));
10782                     tcg_temp_free_i32(tmp);
10783                 }
10784                 tcg_gen_addi_i32(addr, addr, 4);
10785             }
10786             if ((insn & (1 << 11)) == 0) {
10787                 tcg_gen_addi_i32(addr, addr, -offset);
10788             }
10789             /* write back the new stack pointer */
10790             store_reg(s, 13, addr);
10791             /* set the new PC value */
10792             if ((insn & 0x0900) == 0x0900) {
10793                 store_reg_from_load(s, 15, tmp);
10794             }
10795             break;
10796
10797         case 1: case 3: case 9: case 11: /* czb */
10798             rm = insn & 7;
10799             tmp = load_reg(s, rm);
10800             s->condlabel = gen_new_label();
10801             s->condjmp = 1;
10802             if (insn & (1 << 11))
10803                 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
10804             else
10805                 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
10806             tcg_temp_free_i32(tmp);
10807             offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
10808             val = (uint32_t)s->pc + 2;
10809             val += offset;
10810             gen_jmp(s, val);
10811             break;
10812
10813         case 15: /* IT, nop-hint.  */
10814             if ((insn & 0xf) == 0) {
10815                 gen_nop_hint(s, (insn >> 4) & 0xf);
10816                 break;
10817             }
10818             /* If Then.  */
10819             s->condexec_cond = (insn >> 4) & 0xe;
10820             s->condexec_mask = insn & 0x1f;
10821             /* No actual code generated for this insn, just setup state.  */
10822             break;
10823
10824         case 0xe: /* bkpt */
10825         {
10826             int imm8 = extract32(insn, 0, 8);
10827             ARCH(5);
10828             gen_exception_insn(s, 2, EXCP_BKPT, syn_aa32_bkpt(imm8, true));
10829             break;
10830         }
10831
10832         case 0xa: /* rev */
10833             ARCH(6);
10834             rn = (insn >> 3) & 0x7;
10835             rd = insn & 0x7;
10836             tmp = load_reg(s, rn);
10837             switch ((insn >> 6) & 3) {
10838             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
10839             case 1: gen_rev16(tmp); break;
10840             case 3: gen_revsh(tmp); break;
10841             default: goto illegal_op;
10842             }
10843             store_reg(s, rd, tmp);
10844             break;
10845
10846         case 6:
10847             switch ((insn >> 5) & 7) {
10848             case 2:
10849                 /* setend */
10850                 ARCH(6);
10851                 if (((insn >> 3) & 1) != s->bswap_code) {
10852                     /* Dynamic endianness switching not implemented. */
10853                     qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
10854                     goto illegal_op;
10855                 }
10856                 break;
10857             case 3:
10858                 /* cps */
10859                 ARCH(6);
10860                 if (IS_USER(s)) {
10861                     break;
10862                 }
10863                 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10864                     tmp = tcg_const_i32((insn & (1 << 4)) != 0);
10865                     /* FAULTMASK */
10866                     if (insn & 1) {
10867                         addr = tcg_const_i32(19);
10868                         gen_helper_v7m_msr(cpu_env, addr, tmp);
10869                         tcg_temp_free_i32(addr);
10870                     }
10871                     /* PRIMASK */
10872                     if (insn & 2) {
10873                         addr = tcg_const_i32(16);
10874                         gen_helper_v7m_msr(cpu_env, addr, tmp);
10875                         tcg_temp_free_i32(addr);
10876                     }
10877                     tcg_temp_free_i32(tmp);
10878                     gen_lookup_tb(s);
10879                 } else {
10880                     if (insn & (1 << 4)) {
10881                         shift = CPSR_A | CPSR_I | CPSR_F;
10882                     } else {
10883                         shift = 0;
10884                     }
10885                     gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
10886                 }
10887                 break;
10888             default:
10889                 goto undef;
10890             }
10891             break;
10892
10893         default:
10894             goto undef;
10895         }
10896         break;
10897
10898     case 12:
10899     {
10900         /* load/store multiple */
10901         TCGv_i32 loaded_var;
10902         TCGV_UNUSED_I32(loaded_var);
10903         rn = (insn >> 8) & 0x7;
10904         addr = load_reg(s, rn);
10905         for (i = 0; i < 8; i++) {
10906             if (insn & (1 << i)) {
10907                 if (insn & (1 << 11)) {
10908                     /* load */
10909                     tmp = tcg_temp_new_i32();
10910                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10911                     if (i == rn) {
10912                         loaded_var = tmp;
10913                     } else {
10914                         store_reg(s, i, tmp);
10915                     }
10916                 } else {
10917                     /* store */
10918                     tmp = load_reg(s, i);
10919                     gen_aa32_st32(tmp, addr, get_mem_index(s));
10920                     tcg_temp_free_i32(tmp);
10921                 }
10922                 /* advance to the next address */
10923                 tcg_gen_addi_i32(addr, addr, 4);
10924             }
10925         }
10926         if ((insn & (1 << rn)) == 0) {
10927             /* base reg not in list: base register writeback */
10928             store_reg(s, rn, addr);
10929         } else {
10930             /* base reg in list: if load, complete it now */
10931             if (insn & (1 << 11)) {
10932                 store_reg(s, rn, loaded_var);
10933             }
10934             tcg_temp_free_i32(addr);
10935         }
10936         break;
10937     }
10938     case 13:
10939         /* conditional branch or swi */
10940         cond = (insn >> 8) & 0xf;
10941         if (cond == 0xe)
10942             goto undef;
10943
10944         if (cond == 0xf) {
10945             /* swi */
10946             gen_set_pc_im(s, s->pc);
10947             s->svc_imm = extract32(insn, 0, 8);
10948             s->is_jmp = DISAS_SWI;
10949             break;
10950         }
10951         /* generate a conditional jump to next instruction */
10952         s->condlabel = gen_new_label();
10953         arm_gen_test_cc(cond ^ 1, s->condlabel);
10954         s->condjmp = 1;
10955
10956         /* jump to the offset */
10957         val = (uint32_t)s->pc + 2;
10958         offset = ((int32_t)insn << 24) >> 24;
10959         val += offset << 1;
10960         gen_jmp(s, val);
10961         break;
10962
10963     case 14:
10964         if (insn & (1 << 11)) {
10965             if (disas_thumb2_insn(env, s, insn))
10966               goto undef32;
10967             break;
10968         }
10969         /* unconditional branch */
10970         val = (uint32_t)s->pc;
10971         offset = ((int32_t)insn << 21) >> 21;
10972         val += (offset << 1) + 2;
10973         gen_jmp(s, val);
10974         break;
10975
10976     case 15:
10977         if (disas_thumb2_insn(env, s, insn))
10978             goto undef32;
10979         break;
10980     }
10981     return;
10982 undef32:
10983     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized());
10984     return;
10985 illegal_op:
10986 undef:
10987     gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized());
10988 }
10989
10990 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
10991    basic block 'tb'. If search_pc is TRUE, also generate PC
10992    information for each intermediate instruction. */
10993 static inline void gen_intermediate_code_internal(ARMCPU *cpu,
10994                                                   TranslationBlock *tb,
10995                                                   bool search_pc)
10996 {
10997     CPUState *cs = CPU(cpu);
10998     CPUARMState *env = &cpu->env;
10999     DisasContext dc1, *dc = &dc1;
11000     CPUBreakpoint *bp;
11001     uint16_t *gen_opc_end;
11002     int j, lj;
11003     target_ulong pc_start;
11004     target_ulong next_page_start;
11005     int num_insns;
11006     int max_insns;
11007
11008     /* generate intermediate code */
11009
11010     /* The A64 decoder has its own top level loop, because it doesn't need
11011      * the A32/T32 complexity to do with conditional execution/IT blocks/etc.
11012      */
11013     if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
11014         gen_intermediate_code_internal_a64(cpu, tb, search_pc);
11015         return;
11016     }
11017
11018     pc_start = tb->pc;
11019
11020     dc->tb = tb;
11021
11022     gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
11023
11024     dc->is_jmp = DISAS_NEXT;
11025     dc->pc = pc_start;
11026     dc->singlestep_enabled = cs->singlestep_enabled;
11027     dc->condjmp = 0;
11028
11029     dc->aarch64 = 0;
11030     dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
11031     dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
11032     dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
11033     dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
11034 #if !defined(CONFIG_USER_ONLY)
11035     dc->user = (ARM_TBFLAG_PRIV(tb->flags) == 0);
11036 #endif
11037     dc->cpacr_fpen = ARM_TBFLAG_CPACR_FPEN(tb->flags);
11038     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
11039     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
11040     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
11041     dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
11042     dc->cp_regs = cpu->cp_regs;
11043     dc->current_el = arm_current_el(env);
11044     dc->features = env->features;
11045
11046     /* Single step state. The code-generation logic here is:
11047      *  SS_ACTIVE == 0:
11048      *   generate code with no special handling for single-stepping (except
11049      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11050      *   this happens anyway because those changes are all system register or
11051      *   PSTATE writes).
11052      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11053      *   emit code for one insn
11054      *   emit code to clear PSTATE.SS
11055      *   emit code to generate software step exception for completed step
11056      *   end TB (as usual for having generated an exception)
11057      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11058      *   emit code to generate a software step exception
11059      *   end the TB
11060      */
11061     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11062     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11063     dc->is_ldex = false;
11064     dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
11065
11066     cpu_F0s = tcg_temp_new_i32();
11067     cpu_F1s = tcg_temp_new_i32();
11068     cpu_F0d = tcg_temp_new_i64();
11069     cpu_F1d = tcg_temp_new_i64();
11070     cpu_V0 = cpu_F0d;
11071     cpu_V1 = cpu_F1d;
11072     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
11073     cpu_M0 = tcg_temp_new_i64();
11074     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11075     lj = -1;
11076     num_insns = 0;
11077     max_insns = tb->cflags & CF_COUNT_MASK;
11078     if (max_insns == 0)
11079         max_insns = CF_COUNT_MASK;
11080
11081     gen_tb_start();
11082
11083     tcg_clear_temp_count();
11084
11085     /* A note on handling of the condexec (IT) bits:
11086      *
11087      * We want to avoid the overhead of having to write the updated condexec
11088      * bits back to the CPUARMState for every instruction in an IT block. So:
11089      * (1) if the condexec bits are not already zero then we write
11090      * zero back into the CPUARMState now. This avoids complications trying
11091      * to do it at the end of the block. (For example if we don't do this
11092      * it's hard to identify whether we can safely skip writing condexec
11093      * at the end of the TB, which we definitely want to do for the case
11094      * where a TB doesn't do anything with the IT state at all.)
11095      * (2) if we are going to leave the TB then we call gen_set_condexec()
11096      * which will write the correct value into CPUARMState if zero is wrong.
11097      * This is done both for leaving the TB at the end, and for leaving
11098      * it because of an exception we know will happen, which is done in
11099      * gen_exception_insn(). The latter is necessary because we need to
11100      * leave the TB with the PC/IT state just prior to execution of the
11101      * instruction which caused the exception.
11102      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11103      * then the CPUARMState will be wrong and we need to reset it.
11104      * This is handled in the same way as restoration of the
11105      * PC in these situations: we will be called again with search_pc=1
11106      * and generate a mapping of the condexec bits for each PC in
11107      * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
11108      * this to restore the condexec bits.
11109      *
11110      * Note that there are no instructions which can read the condexec
11111      * bits, and none which can write non-static values to them, so
11112      * we don't need to care about whether CPUARMState is correct in the
11113      * middle of a TB.
11114      */
11115
11116     /* Reset the conditional execution bits immediately. This avoids
11117        complications trying to do it at the end of the block.  */
11118     if (dc->condexec_mask || dc->condexec_cond)
11119       {
11120         TCGv_i32 tmp = tcg_temp_new_i32();
11121         tcg_gen_movi_i32(tmp, 0);
11122         store_cpu_field(tmp, condexec_bits);
11123       }
11124     do {
11125 #ifdef CONFIG_USER_ONLY
11126         /* Intercept jump to the magic kernel page.  */
11127         if (dc->pc >= 0xffff0000) {
11128             /* We always get here via a jump, so know we are not in a
11129                conditional execution block.  */
11130             gen_exception_internal(EXCP_KERNEL_TRAP);
11131             dc->is_jmp = DISAS_UPDATE;
11132             break;
11133         }
11134 #else
11135         if (dc->pc >= 0xfffffff0 && arm_dc_feature(dc, ARM_FEATURE_M)) {
11136             /* We always get here via a jump, so know we are not in a
11137                conditional execution block.  */
11138             gen_exception_internal(EXCP_EXCEPTION_EXIT);
11139             dc->is_jmp = DISAS_UPDATE;
11140             break;
11141         }
11142 #endif
11143
11144         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11145             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11146                 if (bp->pc == dc->pc) {
11147                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11148                     /* Advance PC so that clearing the breakpoint will
11149                        invalidate this TB.  */
11150                     dc->pc += 2;
11151                     goto done_generating;
11152                 }
11153             }
11154         }
11155         if (search_pc) {
11156             j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
11157             if (lj < j) {
11158                 lj++;
11159                 while (lj < j)
11160                     tcg_ctx.gen_opc_instr_start[lj++] = 0;
11161             }
11162             tcg_ctx.gen_opc_pc[lj] = dc->pc;
11163             gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
11164             tcg_ctx.gen_opc_instr_start[lj] = 1;
11165             tcg_ctx.gen_opc_icount[lj] = num_insns;
11166         }
11167
11168         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
11169             gen_io_start();
11170
11171         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
11172             tcg_gen_debug_insn_start(dc->pc);
11173         }
11174
11175         if (dc->ss_active && !dc->pstate_ss) {
11176             /* Singlestep state is Active-pending.
11177              * If we're in this state at the start of a TB then either
11178              *  a) we just took an exception to an EL which is being debugged
11179              *     and this is the first insn in the exception handler
11180              *  b) debug exceptions were masked and we just unmasked them
11181              *     without changing EL (eg by clearing PSTATE.D)
11182              * In either case we're going to take a swstep exception in the
11183              * "did not step an insn" case, and so the syndrome ISV and EX
11184              * bits should be zero.
11185              */
11186             assert(num_insns == 0);
11187             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0));
11188             goto done_generating;
11189         }
11190
11191         if (dc->thumb) {
11192             disas_thumb_insn(env, dc);
11193             if (dc->condexec_mask) {
11194                 dc->condexec_cond = (dc->condexec_cond & 0xe)
11195                                    | ((dc->condexec_mask >> 4) & 1);
11196                 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11197                 if (dc->condexec_mask == 0) {
11198                     dc->condexec_cond = 0;
11199                 }
11200             }
11201         } else {
11202             disas_arm_insn(env, dc);
11203         }
11204
11205         if (dc->condjmp && !dc->is_jmp) {
11206             gen_set_label(dc->condlabel);
11207             dc->condjmp = 0;
11208         }
11209
11210         if (tcg_check_temp_count()) {
11211             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11212                     dc->pc);
11213         }
11214
11215         /* Translation stops when a conditional branch is encountered.
11216          * Otherwise the subsequent code could get translated several times.
11217          * Also stop translation when a page boundary is reached.  This
11218          * ensures prefetch aborts occur at the right place.  */
11219         num_insns ++;
11220     } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
11221              !cs->singlestep_enabled &&
11222              !singlestep &&
11223              !dc->ss_active &&
11224              dc->pc < next_page_start &&
11225              num_insns < max_insns);
11226
11227     if (tb->cflags & CF_LAST_IO) {
11228         if (dc->condjmp) {
11229             /* FIXME:  This can theoretically happen with self-modifying
11230                code.  */
11231             cpu_abort(cs, "IO on conditional branch instruction");
11232         }
11233         gen_io_end();
11234     }
11235
11236     /* At this stage dc->condjmp will only be set when the skipped
11237        instruction was a conditional branch or trap, and the PC has
11238        already been written.  */
11239     if (unlikely(cs->singlestep_enabled || dc->ss_active)) {
11240         /* Make sure the pc is updated, and raise a debug exception.  */
11241         if (dc->condjmp) {
11242             gen_set_condexec(dc);
11243             if (dc->is_jmp == DISAS_SWI) {
11244                 gen_ss_advance(dc);
11245                 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
11246             } else if (dc->is_jmp == DISAS_HVC) {
11247                 gen_ss_advance(dc);
11248                 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm));
11249             } else if (dc->is_jmp == DISAS_SMC) {
11250                 gen_ss_advance(dc);
11251                 gen_exception(EXCP_SMC, syn_aa32_smc());
11252             } else if (dc->ss_active) {
11253                 gen_step_complete_exception(dc);
11254             } else {
11255                 gen_exception_internal(EXCP_DEBUG);
11256             }
11257             gen_set_label(dc->condlabel);
11258         }
11259         if (dc->condjmp || !dc->is_jmp) {
11260             gen_set_pc_im(dc, dc->pc);
11261             dc->condjmp = 0;
11262         }
11263         gen_set_condexec(dc);
11264         if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
11265             gen_ss_advance(dc);
11266             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
11267         } else if (dc->is_jmp == DISAS_HVC && !dc->condjmp) {
11268             gen_ss_advance(dc);
11269             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm));
11270         } else if (dc->is_jmp == DISAS_SMC && !dc->condjmp) {
11271             gen_ss_advance(dc);
11272             gen_exception(EXCP_SMC, syn_aa32_smc());
11273         } else if (dc->ss_active) {
11274             gen_step_complete_exception(dc);
11275         } else {
11276             /* FIXME: Single stepping a WFI insn will not halt
11277                the CPU.  */
11278             gen_exception_internal(EXCP_DEBUG);
11279         }
11280     } else {
11281         /* While branches must always occur at the end of an IT block,
11282            there are a few other things that can cause us to terminate
11283            the TB in the middle of an IT block:
11284             - Exception generating instructions (bkpt, swi, undefined).
11285             - Page boundaries.
11286             - Hardware watchpoints.
11287            Hardware breakpoints have already been handled and skip this code.
11288          */
11289         gen_set_condexec(dc);
11290         switch(dc->is_jmp) {
11291         case DISAS_NEXT:
11292             gen_goto_tb(dc, 1, dc->pc);
11293             break;
11294         default:
11295         case DISAS_JUMP:
11296         case DISAS_UPDATE:
11297             /* indicate that the hash table must be used to find the next TB */
11298             tcg_gen_exit_tb(0);
11299             break;
11300         case DISAS_TB_JUMP:
11301             /* nothing more to generate */
11302             break;
11303         case DISAS_WFI:
11304             gen_helper_wfi(cpu_env);
11305             break;
11306         case DISAS_WFE:
11307             gen_helper_wfe(cpu_env);
11308             break;
11309         case DISAS_SWI:
11310             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb));
11311             break;
11312         case DISAS_HVC:
11313             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm));
11314             break;
11315         case DISAS_SMC:
11316             gen_exception(EXCP_SMC, syn_aa32_smc());
11317             break;
11318         }
11319         if (dc->condjmp) {
11320             gen_set_label(dc->condlabel);
11321             gen_set_condexec(dc);
11322             gen_goto_tb(dc, 1, dc->pc);
11323             dc->condjmp = 0;
11324         }
11325     }
11326
11327 done_generating:
11328     gen_tb_end(tb, num_insns);
11329     *tcg_ctx.gen_opc_ptr = INDEX_op_end;
11330
11331 #ifdef DEBUG_DISAS
11332     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11333         qemu_log("----------------\n");
11334         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11335         log_target_disas(env, pc_start, dc->pc - pc_start,
11336                          dc->thumb | (dc->bswap_code << 1));
11337         qemu_log("\n");
11338     }
11339 #endif
11340     if (search_pc) {
11341         j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
11342         lj++;
11343         while (lj <= j)
11344             tcg_ctx.gen_opc_instr_start[lj++] = 0;
11345     } else {
11346         tb->size = dc->pc - pc_start;
11347         tb->icount = num_insns;
11348     }
11349 }
11350
11351 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
11352 {
11353     gen_intermediate_code_internal(arm_env_get_cpu(env), tb, false);
11354 }
11355
11356 void gen_intermediate_code_pc(CPUARMState *env, TranslationBlock *tb)
11357 {
11358     gen_intermediate_code_internal(arm_env_get_cpu(env), tb, true);
11359 }
11360
11361 static const char *cpu_mode_names[16] = {
11362   "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
11363   "???", "???", "hyp", "und", "???", "???", "???", "sys"
11364 };
11365
11366 void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
11367                         int flags)
11368 {
11369     ARMCPU *cpu = ARM_CPU(cs);
11370     CPUARMState *env = &cpu->env;
11371     int i;
11372     uint32_t psr;
11373
11374     if (is_a64(env)) {
11375         aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
11376         return;
11377     }
11378
11379     for(i=0;i<16;i++) {
11380         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
11381         if ((i % 4) == 3)
11382             cpu_fprintf(f, "\n");
11383         else
11384             cpu_fprintf(f, " ");
11385     }
11386     psr = cpsr_read(env);
11387     cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n",
11388                 psr,
11389                 psr & (1 << 31) ? 'N' : '-',
11390                 psr & (1 << 30) ? 'Z' : '-',
11391                 psr & (1 << 29) ? 'C' : '-',
11392                 psr & (1 << 28) ? 'V' : '-',
11393                 psr & CPSR_T ? 'T' : 'A',
11394                 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
11395
11396     if (flags & CPU_DUMP_FPU) {
11397         int numvfpregs = 0;
11398         if (arm_feature(env, ARM_FEATURE_VFP)) {
11399             numvfpregs += 16;
11400         }
11401         if (arm_feature(env, ARM_FEATURE_VFP3)) {
11402             numvfpregs += 16;
11403         }
11404         for (i = 0; i < numvfpregs; i++) {
11405             uint64_t v = float64_val(env->vfp.regs[i]);
11406             cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
11407                         i * 2, (uint32_t)v,
11408                         i * 2 + 1, (uint32_t)(v >> 32),
11409                         i, v);
11410         }
11411         cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
11412     }
11413 }
11414
11415 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
11416 {
11417     if (is_a64(env)) {
11418         env->pc = tcg_ctx.gen_opc_pc[pc_pos];
11419         env->condexec_bits = 0;
11420     } else {
11421         env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
11422         env->condexec_bits = gen_opc_condexec_bits[pc_pos];
11423     }
11424 }
This page took 0.709562 seconds and 4 git commands to generate.