]> Git Repo - qemu.git/blob - target-arm/translate.c
get_maintainer.pl: \C is deprecated
[qemu.git] / target-arm / translate.c
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include <stdarg.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <inttypes.h>
26
27 #include "cpu.h"
28 #include "internals.h"
29 #include "disas/disas.h"
30 #include "tcg-op.h"
31 #include "qemu/log.h"
32 #include "qemu/bitops.h"
33 #include "arm_ldst.h"
34
35 #include "exec/helper-proto.h"
36 #include "exec/helper-gen.h"
37
38 #include "trace-tcg.h"
39
40
41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J    0
46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
51
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
53
54 #include "translate.h"
55 static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
56
57 #if defined(CONFIG_USER_ONLY)
58 #define IS_USER(s) 1
59 #else
60 #define IS_USER(s) (s->user)
61 #endif
62
63 TCGv_ptr cpu_env;
64 /* We reuse the same 64-bit temporaries for efficiency.  */
65 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
66 static TCGv_i32 cpu_R[16];
67 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
68 TCGv_i64 cpu_exclusive_addr;
69 TCGv_i64 cpu_exclusive_val;
70 #ifdef CONFIG_USER_ONLY
71 TCGv_i64 cpu_exclusive_test;
72 TCGv_i32 cpu_exclusive_info;
73 #endif
74
75 /* FIXME:  These should be removed.  */
76 static TCGv_i32 cpu_F0s, cpu_F1s;
77 static TCGv_i64 cpu_F0d, cpu_F1d;
78
79 #include "exec/gen-icount.h"
80
81 static const char *regnames[] =
82     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
83       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
84
85 /* initialize TCG globals.  */
86 void arm_translate_init(void)
87 {
88     int i;
89
90     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
91
92     for (i = 0; i < 16; i++) {
93         cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
94                                           offsetof(CPUARMState, regs[i]),
95                                           regnames[i]);
96     }
97     cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
98     cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
99     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
100     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
101
102     cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
103         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
104     cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
105         offsetof(CPUARMState, exclusive_val), "exclusive_val");
106 #ifdef CONFIG_USER_ONLY
107     cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
108         offsetof(CPUARMState, exclusive_test), "exclusive_test");
109     cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
110         offsetof(CPUARMState, exclusive_info), "exclusive_info");
111 #endif
112
113     a64_translate_init();
114 }
115
116 static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s)
117 {
118     /* Return the mmu_idx to use for A32/T32 "unprivileged load/store"
119      * insns:
120      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
121      *  otherwise, access as if at PL0.
122      */
123     switch (s->mmu_idx) {
124     case ARMMMUIdx_S1E2:        /* this one is UNPREDICTABLE */
125     case ARMMMUIdx_S12NSE0:
126     case ARMMMUIdx_S12NSE1:
127         return ARMMMUIdx_S12NSE0;
128     case ARMMMUIdx_S1E3:
129     case ARMMMUIdx_S1SE0:
130     case ARMMMUIdx_S1SE1:
131         return ARMMMUIdx_S1SE0;
132     case ARMMMUIdx_S2NS:
133     default:
134         g_assert_not_reached();
135     }
136 }
137
138 static inline TCGv_i32 load_cpu_offset(int offset)
139 {
140     TCGv_i32 tmp = tcg_temp_new_i32();
141     tcg_gen_ld_i32(tmp, cpu_env, offset);
142     return tmp;
143 }
144
145 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
146
147 static inline void store_cpu_offset(TCGv_i32 var, int offset)
148 {
149     tcg_gen_st_i32(var, cpu_env, offset);
150     tcg_temp_free_i32(var);
151 }
152
153 #define store_cpu_field(var, name) \
154     store_cpu_offset(var, offsetof(CPUARMState, name))
155
156 /* Set a variable to the value of a CPU register.  */
157 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
158 {
159     if (reg == 15) {
160         uint32_t addr;
161         /* normally, since we updated PC, we need only to add one insn */
162         if (s->thumb)
163             addr = (long)s->pc + 2;
164         else
165             addr = (long)s->pc + 4;
166         tcg_gen_movi_i32(var, addr);
167     } else {
168         tcg_gen_mov_i32(var, cpu_R[reg]);
169     }
170 }
171
172 /* Create a new temporary and set it to the value of a CPU register.  */
173 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
174 {
175     TCGv_i32 tmp = tcg_temp_new_i32();
176     load_reg_var(s, tmp, reg);
177     return tmp;
178 }
179
180 /* Set a CPU register.  The source must be a temporary and will be
181    marked as dead.  */
182 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
183 {
184     if (reg == 15) {
185         tcg_gen_andi_i32(var, var, ~1);
186         s->is_jmp = DISAS_JUMP;
187     }
188     tcg_gen_mov_i32(cpu_R[reg], var);
189     tcg_temp_free_i32(var);
190 }
191
192 /* Value extensions.  */
193 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
194 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
195 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
196 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
197
198 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
199 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
200
201
202 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
203 {
204     TCGv_i32 tmp_mask = tcg_const_i32(mask);
205     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
206     tcg_temp_free_i32(tmp_mask);
207 }
208 /* Set NZCV flags from the high 4 bits of var.  */
209 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
210
211 static void gen_exception_internal(int excp)
212 {
213     TCGv_i32 tcg_excp = tcg_const_i32(excp);
214
215     assert(excp_is_internal(excp));
216     gen_helper_exception_internal(cpu_env, tcg_excp);
217     tcg_temp_free_i32(tcg_excp);
218 }
219
220 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
221 {
222     TCGv_i32 tcg_excp = tcg_const_i32(excp);
223     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
224     TCGv_i32 tcg_el = tcg_const_i32(target_el);
225
226     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
227                                        tcg_syn, tcg_el);
228
229     tcg_temp_free_i32(tcg_el);
230     tcg_temp_free_i32(tcg_syn);
231     tcg_temp_free_i32(tcg_excp);
232 }
233
234 static void gen_ss_advance(DisasContext *s)
235 {
236     /* If the singlestep state is Active-not-pending, advance to
237      * Active-pending.
238      */
239     if (s->ss_active) {
240         s->pstate_ss = 0;
241         gen_helper_clear_pstate_ss(cpu_env);
242     }
243 }
244
245 static void gen_step_complete_exception(DisasContext *s)
246 {
247     /* We just completed step of an insn. Move from Active-not-pending
248      * to Active-pending, and then also take the swstep exception.
249      * This corresponds to making the (IMPDEF) choice to prioritize
250      * swstep exceptions over asynchronous exceptions taken to an exception
251      * level where debug is disabled. This choice has the advantage that
252      * we do not need to maintain internal state corresponding to the
253      * ISV/EX syndrome bits between completion of the step and generation
254      * of the exception, and our syndrome information is always correct.
255      */
256     gen_ss_advance(s);
257     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
258                   default_exception_el(s));
259     s->is_jmp = DISAS_EXC;
260 }
261
262 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
263 {
264     TCGv_i32 tmp1 = tcg_temp_new_i32();
265     TCGv_i32 tmp2 = tcg_temp_new_i32();
266     tcg_gen_ext16s_i32(tmp1, a);
267     tcg_gen_ext16s_i32(tmp2, b);
268     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
269     tcg_temp_free_i32(tmp2);
270     tcg_gen_sari_i32(a, a, 16);
271     tcg_gen_sari_i32(b, b, 16);
272     tcg_gen_mul_i32(b, b, a);
273     tcg_gen_mov_i32(a, tmp1);
274     tcg_temp_free_i32(tmp1);
275 }
276
277 /* Byteswap each halfword.  */
278 static void gen_rev16(TCGv_i32 var)
279 {
280     TCGv_i32 tmp = tcg_temp_new_i32();
281     tcg_gen_shri_i32(tmp, var, 8);
282     tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
283     tcg_gen_shli_i32(var, var, 8);
284     tcg_gen_andi_i32(var, var, 0xff00ff00);
285     tcg_gen_or_i32(var, var, tmp);
286     tcg_temp_free_i32(tmp);
287 }
288
289 /* Byteswap low halfword and sign extend.  */
290 static void gen_revsh(TCGv_i32 var)
291 {
292     tcg_gen_ext16u_i32(var, var);
293     tcg_gen_bswap16_i32(var, var);
294     tcg_gen_ext16s_i32(var, var);
295 }
296
297 /* Unsigned bitfield extract.  */
298 static void gen_ubfx(TCGv_i32 var, int shift, uint32_t mask)
299 {
300     if (shift)
301         tcg_gen_shri_i32(var, var, shift);
302     tcg_gen_andi_i32(var, var, mask);
303 }
304
305 /* Signed bitfield extract.  */
306 static void gen_sbfx(TCGv_i32 var, int shift, int width)
307 {
308     uint32_t signbit;
309
310     if (shift)
311         tcg_gen_sari_i32(var, var, shift);
312     if (shift + width < 32) {
313         signbit = 1u << (width - 1);
314         tcg_gen_andi_i32(var, var, (1u << width) - 1);
315         tcg_gen_xori_i32(var, var, signbit);
316         tcg_gen_subi_i32(var, var, signbit);
317     }
318 }
319
320 /* Return (b << 32) + a. Mark inputs as dead */
321 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
322 {
323     TCGv_i64 tmp64 = tcg_temp_new_i64();
324
325     tcg_gen_extu_i32_i64(tmp64, b);
326     tcg_temp_free_i32(b);
327     tcg_gen_shli_i64(tmp64, tmp64, 32);
328     tcg_gen_add_i64(a, tmp64, a);
329
330     tcg_temp_free_i64(tmp64);
331     return a;
332 }
333
334 /* Return (b << 32) - a. Mark inputs as dead. */
335 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
336 {
337     TCGv_i64 tmp64 = tcg_temp_new_i64();
338
339     tcg_gen_extu_i32_i64(tmp64, b);
340     tcg_temp_free_i32(b);
341     tcg_gen_shli_i64(tmp64, tmp64, 32);
342     tcg_gen_sub_i64(a, tmp64, a);
343
344     tcg_temp_free_i64(tmp64);
345     return a;
346 }
347
348 /* 32x32->64 multiply.  Marks inputs as dead.  */
349 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
350 {
351     TCGv_i32 lo = tcg_temp_new_i32();
352     TCGv_i32 hi = tcg_temp_new_i32();
353     TCGv_i64 ret;
354
355     tcg_gen_mulu2_i32(lo, hi, a, b);
356     tcg_temp_free_i32(a);
357     tcg_temp_free_i32(b);
358
359     ret = tcg_temp_new_i64();
360     tcg_gen_concat_i32_i64(ret, lo, hi);
361     tcg_temp_free_i32(lo);
362     tcg_temp_free_i32(hi);
363
364     return ret;
365 }
366
367 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
368 {
369     TCGv_i32 lo = tcg_temp_new_i32();
370     TCGv_i32 hi = tcg_temp_new_i32();
371     TCGv_i64 ret;
372
373     tcg_gen_muls2_i32(lo, hi, a, b);
374     tcg_temp_free_i32(a);
375     tcg_temp_free_i32(b);
376
377     ret = tcg_temp_new_i64();
378     tcg_gen_concat_i32_i64(ret, lo, hi);
379     tcg_temp_free_i32(lo);
380     tcg_temp_free_i32(hi);
381
382     return ret;
383 }
384
385 /* Swap low and high halfwords.  */
386 static void gen_swap_half(TCGv_i32 var)
387 {
388     TCGv_i32 tmp = tcg_temp_new_i32();
389     tcg_gen_shri_i32(tmp, var, 16);
390     tcg_gen_shli_i32(var, var, 16);
391     tcg_gen_or_i32(var, var, tmp);
392     tcg_temp_free_i32(tmp);
393 }
394
395 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
396     tmp = (t0 ^ t1) & 0x8000;
397     t0 &= ~0x8000;
398     t1 &= ~0x8000;
399     t0 = (t0 + t1) ^ tmp;
400  */
401
402 static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
403 {
404     TCGv_i32 tmp = tcg_temp_new_i32();
405     tcg_gen_xor_i32(tmp, t0, t1);
406     tcg_gen_andi_i32(tmp, tmp, 0x8000);
407     tcg_gen_andi_i32(t0, t0, ~0x8000);
408     tcg_gen_andi_i32(t1, t1, ~0x8000);
409     tcg_gen_add_i32(t0, t0, t1);
410     tcg_gen_xor_i32(t0, t0, tmp);
411     tcg_temp_free_i32(tmp);
412     tcg_temp_free_i32(t1);
413 }
414
415 /* Set CF to the top bit of var.  */
416 static void gen_set_CF_bit31(TCGv_i32 var)
417 {
418     tcg_gen_shri_i32(cpu_CF, var, 31);
419 }
420
421 /* Set N and Z flags from var.  */
422 static inline void gen_logic_CC(TCGv_i32 var)
423 {
424     tcg_gen_mov_i32(cpu_NF, var);
425     tcg_gen_mov_i32(cpu_ZF, var);
426 }
427
428 /* T0 += T1 + CF.  */
429 static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
430 {
431     tcg_gen_add_i32(t0, t0, t1);
432     tcg_gen_add_i32(t0, t0, cpu_CF);
433 }
434
435 /* dest = T0 + T1 + CF. */
436 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
437 {
438     tcg_gen_add_i32(dest, t0, t1);
439     tcg_gen_add_i32(dest, dest, cpu_CF);
440 }
441
442 /* dest = T0 - T1 + CF - 1.  */
443 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
444 {
445     tcg_gen_sub_i32(dest, t0, t1);
446     tcg_gen_add_i32(dest, dest, cpu_CF);
447     tcg_gen_subi_i32(dest, dest, 1);
448 }
449
450 /* dest = T0 + T1. Compute C, N, V and Z flags */
451 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
452 {
453     TCGv_i32 tmp = tcg_temp_new_i32();
454     tcg_gen_movi_i32(tmp, 0);
455     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
456     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
457     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
458     tcg_gen_xor_i32(tmp, t0, t1);
459     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
460     tcg_temp_free_i32(tmp);
461     tcg_gen_mov_i32(dest, cpu_NF);
462 }
463
464 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
465 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
466 {
467     TCGv_i32 tmp = tcg_temp_new_i32();
468     if (TCG_TARGET_HAS_add2_i32) {
469         tcg_gen_movi_i32(tmp, 0);
470         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
471         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
472     } else {
473         TCGv_i64 q0 = tcg_temp_new_i64();
474         TCGv_i64 q1 = tcg_temp_new_i64();
475         tcg_gen_extu_i32_i64(q0, t0);
476         tcg_gen_extu_i32_i64(q1, t1);
477         tcg_gen_add_i64(q0, q0, q1);
478         tcg_gen_extu_i32_i64(q1, cpu_CF);
479         tcg_gen_add_i64(q0, q0, q1);
480         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
481         tcg_temp_free_i64(q0);
482         tcg_temp_free_i64(q1);
483     }
484     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
485     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
486     tcg_gen_xor_i32(tmp, t0, t1);
487     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
488     tcg_temp_free_i32(tmp);
489     tcg_gen_mov_i32(dest, cpu_NF);
490 }
491
492 /* dest = T0 - T1. Compute C, N, V and Z flags */
493 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
494 {
495     TCGv_i32 tmp;
496     tcg_gen_sub_i32(cpu_NF, t0, t1);
497     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
498     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500     tmp = tcg_temp_new_i32();
501     tcg_gen_xor_i32(tmp, t0, t1);
502     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
503     tcg_temp_free_i32(tmp);
504     tcg_gen_mov_i32(dest, cpu_NF);
505 }
506
507 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
508 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
509 {
510     TCGv_i32 tmp = tcg_temp_new_i32();
511     tcg_gen_not_i32(tmp, t1);
512     gen_adc_CC(dest, t0, tmp);
513     tcg_temp_free_i32(tmp);
514 }
515
516 #define GEN_SHIFT(name)                                               \
517 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
518 {                                                                     \
519     TCGv_i32 tmp1, tmp2, tmp3;                                        \
520     tmp1 = tcg_temp_new_i32();                                        \
521     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
522     tmp2 = tcg_const_i32(0);                                          \
523     tmp3 = tcg_const_i32(0x1f);                                       \
524     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
525     tcg_temp_free_i32(tmp3);                                          \
526     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
527     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
528     tcg_temp_free_i32(tmp2);                                          \
529     tcg_temp_free_i32(tmp1);                                          \
530 }
531 GEN_SHIFT(shl)
532 GEN_SHIFT(shr)
533 #undef GEN_SHIFT
534
535 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
536 {
537     TCGv_i32 tmp1, tmp2;
538     tmp1 = tcg_temp_new_i32();
539     tcg_gen_andi_i32(tmp1, t1, 0xff);
540     tmp2 = tcg_const_i32(0x1f);
541     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
542     tcg_temp_free_i32(tmp2);
543     tcg_gen_sar_i32(dest, t0, tmp1);
544     tcg_temp_free_i32(tmp1);
545 }
546
547 static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
548 {
549     TCGv_i32 c0 = tcg_const_i32(0);
550     TCGv_i32 tmp = tcg_temp_new_i32();
551     tcg_gen_neg_i32(tmp, src);
552     tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
553     tcg_temp_free_i32(c0);
554     tcg_temp_free_i32(tmp);
555 }
556
557 static void shifter_out_im(TCGv_i32 var, int shift)
558 {
559     if (shift == 0) {
560         tcg_gen_andi_i32(cpu_CF, var, 1);
561     } else {
562         tcg_gen_shri_i32(cpu_CF, var, shift);
563         if (shift != 31) {
564             tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
565         }
566     }
567 }
568
569 /* Shift by immediate.  Includes special handling for shift == 0.  */
570 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
571                                     int shift, int flags)
572 {
573     switch (shiftop) {
574     case 0: /* LSL */
575         if (shift != 0) {
576             if (flags)
577                 shifter_out_im(var, 32 - shift);
578             tcg_gen_shli_i32(var, var, shift);
579         }
580         break;
581     case 1: /* LSR */
582         if (shift == 0) {
583             if (flags) {
584                 tcg_gen_shri_i32(cpu_CF, var, 31);
585             }
586             tcg_gen_movi_i32(var, 0);
587         } else {
588             if (flags)
589                 shifter_out_im(var, shift - 1);
590             tcg_gen_shri_i32(var, var, shift);
591         }
592         break;
593     case 2: /* ASR */
594         if (shift == 0)
595             shift = 32;
596         if (flags)
597             shifter_out_im(var, shift - 1);
598         if (shift == 32)
599           shift = 31;
600         tcg_gen_sari_i32(var, var, shift);
601         break;
602     case 3: /* ROR/RRX */
603         if (shift != 0) {
604             if (flags)
605                 shifter_out_im(var, shift - 1);
606             tcg_gen_rotri_i32(var, var, shift); break;
607         } else {
608             TCGv_i32 tmp = tcg_temp_new_i32();
609             tcg_gen_shli_i32(tmp, cpu_CF, 31);
610             if (flags)
611                 shifter_out_im(var, 0);
612             tcg_gen_shri_i32(var, var, 1);
613             tcg_gen_or_i32(var, var, tmp);
614             tcg_temp_free_i32(tmp);
615         }
616     }
617 };
618
619 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
620                                      TCGv_i32 shift, int flags)
621 {
622     if (flags) {
623         switch (shiftop) {
624         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
625         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
626         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
627         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
628         }
629     } else {
630         switch (shiftop) {
631         case 0:
632             gen_shl(var, var, shift);
633             break;
634         case 1:
635             gen_shr(var, var, shift);
636             break;
637         case 2:
638             gen_sar(var, var, shift);
639             break;
640         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
641                 tcg_gen_rotr_i32(var, var, shift); break;
642         }
643     }
644     tcg_temp_free_i32(shift);
645 }
646
647 #define PAS_OP(pfx) \
648     switch (op2) {  \
649     case 0: gen_pas_helper(glue(pfx,add16)); break; \
650     case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
651     case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
652     case 3: gen_pas_helper(glue(pfx,sub16)); break; \
653     case 4: gen_pas_helper(glue(pfx,add8)); break; \
654     case 7: gen_pas_helper(glue(pfx,sub8)); break; \
655     }
656 static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
657 {
658     TCGv_ptr tmp;
659
660     switch (op1) {
661 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
662     case 1:
663         tmp = tcg_temp_new_ptr();
664         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
665         PAS_OP(s)
666         tcg_temp_free_ptr(tmp);
667         break;
668     case 5:
669         tmp = tcg_temp_new_ptr();
670         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
671         PAS_OP(u)
672         tcg_temp_free_ptr(tmp);
673         break;
674 #undef gen_pas_helper
675 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
676     case 2:
677         PAS_OP(q);
678         break;
679     case 3:
680         PAS_OP(sh);
681         break;
682     case 6:
683         PAS_OP(uq);
684         break;
685     case 7:
686         PAS_OP(uh);
687         break;
688 #undef gen_pas_helper
689     }
690 }
691 #undef PAS_OP
692
693 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
694 #define PAS_OP(pfx) \
695     switch (op1) {  \
696     case 0: gen_pas_helper(glue(pfx,add8)); break; \
697     case 1: gen_pas_helper(glue(pfx,add16)); break; \
698     case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
699     case 4: gen_pas_helper(glue(pfx,sub8)); break; \
700     case 5: gen_pas_helper(glue(pfx,sub16)); break; \
701     case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
702     }
703 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
704 {
705     TCGv_ptr tmp;
706
707     switch (op2) {
708 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
709     case 0:
710         tmp = tcg_temp_new_ptr();
711         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
712         PAS_OP(s)
713         tcg_temp_free_ptr(tmp);
714         break;
715     case 4:
716         tmp = tcg_temp_new_ptr();
717         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
718         PAS_OP(u)
719         tcg_temp_free_ptr(tmp);
720         break;
721 #undef gen_pas_helper
722 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
723     case 1:
724         PAS_OP(q);
725         break;
726     case 2:
727         PAS_OP(sh);
728         break;
729     case 5:
730         PAS_OP(uq);
731         break;
732     case 6:
733         PAS_OP(uh);
734         break;
735 #undef gen_pas_helper
736     }
737 }
738 #undef PAS_OP
739
740 /*
741  * Generate a conditional based on ARM condition code cc.
742  * This is common between ARM and Aarch64 targets.
743  */
744 void arm_test_cc(DisasCompare *cmp, int cc)
745 {
746     TCGv_i32 value;
747     TCGCond cond;
748     bool global = true;
749
750     switch (cc) {
751     case 0: /* eq: Z */
752     case 1: /* ne: !Z */
753         cond = TCG_COND_EQ;
754         value = cpu_ZF;
755         break;
756
757     case 2: /* cs: C */
758     case 3: /* cc: !C */
759         cond = TCG_COND_NE;
760         value = cpu_CF;
761         break;
762
763     case 4: /* mi: N */
764     case 5: /* pl: !N */
765         cond = TCG_COND_LT;
766         value = cpu_NF;
767         break;
768
769     case 6: /* vs: V */
770     case 7: /* vc: !V */
771         cond = TCG_COND_LT;
772         value = cpu_VF;
773         break;
774
775     case 8: /* hi: C && !Z */
776     case 9: /* ls: !C || Z -> !(C && !Z) */
777         cond = TCG_COND_NE;
778         value = tcg_temp_new_i32();
779         global = false;
780         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
781            ZF is non-zero for !Z; so AND the two subexpressions.  */
782         tcg_gen_neg_i32(value, cpu_CF);
783         tcg_gen_and_i32(value, value, cpu_ZF);
784         break;
785
786     case 10: /* ge: N == V -> N ^ V == 0 */
787     case 11: /* lt: N != V -> N ^ V != 0 */
788         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
789         cond = TCG_COND_GE;
790         value = tcg_temp_new_i32();
791         global = false;
792         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
793         break;
794
795     case 12: /* gt: !Z && N == V */
796     case 13: /* le: Z || N != V */
797         cond = TCG_COND_NE;
798         value = tcg_temp_new_i32();
799         global = false;
800         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
801          * the sign bit then AND with ZF to yield the result.  */
802         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
803         tcg_gen_sari_i32(value, value, 31);
804         tcg_gen_andc_i32(value, cpu_ZF, value);
805         break;
806
807     case 14: /* always */
808     case 15: /* always */
809         /* Use the ALWAYS condition, which will fold early.
810          * It doesn't matter what we use for the value.  */
811         cond = TCG_COND_ALWAYS;
812         value = cpu_ZF;
813         goto no_invert;
814
815     default:
816         fprintf(stderr, "Bad condition code 0x%x\n", cc);
817         abort();
818     }
819
820     if (cc & 1) {
821         cond = tcg_invert_cond(cond);
822     }
823
824  no_invert:
825     cmp->cond = cond;
826     cmp->value = value;
827     cmp->value_global = global;
828 }
829
830 void arm_free_cc(DisasCompare *cmp)
831 {
832     if (!cmp->value_global) {
833         tcg_temp_free_i32(cmp->value);
834     }
835 }
836
837 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
838 {
839     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
840 }
841
842 void arm_gen_test_cc(int cc, TCGLabel *label)
843 {
844     DisasCompare cmp;
845     arm_test_cc(&cmp, cc);
846     arm_jump_cc(&cmp, label);
847     arm_free_cc(&cmp);
848 }
849
850 static const uint8_t table_logic_cc[16] = {
851     1, /* and */
852     1, /* xor */
853     0, /* sub */
854     0, /* rsb */
855     0, /* add */
856     0, /* adc */
857     0, /* sbc */
858     0, /* rsc */
859     1, /* andl */
860     1, /* xorl */
861     0, /* cmp */
862     0, /* cmn */
863     1, /* orr */
864     1, /* mov */
865     1, /* bic */
866     1, /* mvn */
867 };
868
869 /* Set PC and Thumb state from an immediate address.  */
870 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
871 {
872     TCGv_i32 tmp;
873
874     s->is_jmp = DISAS_UPDATE;
875     if (s->thumb != (addr & 1)) {
876         tmp = tcg_temp_new_i32();
877         tcg_gen_movi_i32(tmp, addr & 1);
878         tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
879         tcg_temp_free_i32(tmp);
880     }
881     tcg_gen_movi_i32(cpu_R[15], addr & ~1);
882 }
883
884 /* Set PC and Thumb state from var.  var is marked as dead.  */
885 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
886 {
887     s->is_jmp = DISAS_UPDATE;
888     tcg_gen_andi_i32(cpu_R[15], var, ~1);
889     tcg_gen_andi_i32(var, var, 1);
890     store_cpu_field(var, thumb);
891 }
892
893 /* Variant of store_reg which uses branch&exchange logic when storing
894    to r15 in ARM architecture v7 and above. The source must be a temporary
895    and will be marked as dead. */
896 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
897 {
898     if (reg == 15 && ENABLE_ARCH_7) {
899         gen_bx(s, var);
900     } else {
901         store_reg(s, reg, var);
902     }
903 }
904
905 /* Variant of store_reg which uses branch&exchange logic when storing
906  * to r15 in ARM architecture v5T and above. This is used for storing
907  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
908  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
909 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
910 {
911     if (reg == 15 && ENABLE_ARCH_5) {
912         gen_bx(s, var);
913     } else {
914         store_reg(s, reg, var);
915     }
916 }
917
918 /* Abstractions of "generate code to do a guest load/store for
919  * AArch32", where a vaddr is always 32 bits (and is zero
920  * extended if we're a 64 bit core) and  data is also
921  * 32 bits unless specifically doing a 64 bit access.
922  * These functions work like tcg_gen_qemu_{ld,st}* except
923  * that the address argument is TCGv_i32 rather than TCGv.
924  */
925 #if TARGET_LONG_BITS == 32
926
927 #define DO_GEN_LD(SUFF, OPC)                                             \
928 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
929 {                                                                        \
930     tcg_gen_qemu_ld_i32(val, addr, index, OPC);                          \
931 }
932
933 #define DO_GEN_ST(SUFF, OPC)                                             \
934 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
935 {                                                                        \
936     tcg_gen_qemu_st_i32(val, addr, index, OPC);                          \
937 }
938
939 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
940 {
941     tcg_gen_qemu_ld_i64(val, addr, index, MO_TEQ);
942 }
943
944 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
945 {
946     tcg_gen_qemu_st_i64(val, addr, index, MO_TEQ);
947 }
948
949 #else
950
951 #define DO_GEN_LD(SUFF, OPC)                                             \
952 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
953 {                                                                        \
954     TCGv addr64 = tcg_temp_new();                                        \
955     tcg_gen_extu_i32_i64(addr64, addr);                                  \
956     tcg_gen_qemu_ld_i32(val, addr64, index, OPC);                        \
957     tcg_temp_free(addr64);                                               \
958 }
959
960 #define DO_GEN_ST(SUFF, OPC)                                             \
961 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
962 {                                                                        \
963     TCGv addr64 = tcg_temp_new();                                        \
964     tcg_gen_extu_i32_i64(addr64, addr);                                  \
965     tcg_gen_qemu_st_i32(val, addr64, index, OPC);                        \
966     tcg_temp_free(addr64);                                               \
967 }
968
969 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
970 {
971     TCGv addr64 = tcg_temp_new();
972     tcg_gen_extu_i32_i64(addr64, addr);
973     tcg_gen_qemu_ld_i64(val, addr64, index, MO_TEQ);
974     tcg_temp_free(addr64);
975 }
976
977 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
978 {
979     TCGv addr64 = tcg_temp_new();
980     tcg_gen_extu_i32_i64(addr64, addr);
981     tcg_gen_qemu_st_i64(val, addr64, index, MO_TEQ);
982     tcg_temp_free(addr64);
983 }
984
985 #endif
986
987 DO_GEN_LD(8s, MO_SB)
988 DO_GEN_LD(8u, MO_UB)
989 DO_GEN_LD(16s, MO_TESW)
990 DO_GEN_LD(16u, MO_TEUW)
991 DO_GEN_LD(32u, MO_TEUL)
992 DO_GEN_ST(8, MO_UB)
993 DO_GEN_ST(16, MO_TEUW)
994 DO_GEN_ST(32, MO_TEUL)
995
996 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
997 {
998     tcg_gen_movi_i32(cpu_R[15], val);
999 }
1000
1001 static inline void gen_hvc(DisasContext *s, int imm16)
1002 {
1003     /* The pre HVC helper handles cases when HVC gets trapped
1004      * as an undefined insn by runtime configuration (ie before
1005      * the insn really executes).
1006      */
1007     gen_set_pc_im(s, s->pc - 4);
1008     gen_helper_pre_hvc(cpu_env);
1009     /* Otherwise we will treat this as a real exception which
1010      * happens after execution of the insn. (The distinction matters
1011      * for the PC value reported to the exception handler and also
1012      * for single stepping.)
1013      */
1014     s->svc_imm = imm16;
1015     gen_set_pc_im(s, s->pc);
1016     s->is_jmp = DISAS_HVC;
1017 }
1018
1019 static inline void gen_smc(DisasContext *s)
1020 {
1021     /* As with HVC, we may take an exception either before or after
1022      * the insn executes.
1023      */
1024     TCGv_i32 tmp;
1025
1026     gen_set_pc_im(s, s->pc - 4);
1027     tmp = tcg_const_i32(syn_aa32_smc());
1028     gen_helper_pre_smc(cpu_env, tmp);
1029     tcg_temp_free_i32(tmp);
1030     gen_set_pc_im(s, s->pc);
1031     s->is_jmp = DISAS_SMC;
1032 }
1033
1034 static inline void
1035 gen_set_condexec (DisasContext *s)
1036 {
1037     if (s->condexec_mask) {
1038         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
1039         TCGv_i32 tmp = tcg_temp_new_i32();
1040         tcg_gen_movi_i32(tmp, val);
1041         store_cpu_field(tmp, condexec_bits);
1042     }
1043 }
1044
1045 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
1046 {
1047     gen_set_condexec(s);
1048     gen_set_pc_im(s, s->pc - offset);
1049     gen_exception_internal(excp);
1050     s->is_jmp = DISAS_JUMP;
1051 }
1052
1053 static void gen_exception_insn(DisasContext *s, int offset, int excp,
1054                                int syn, uint32_t target_el)
1055 {
1056     gen_set_condexec(s);
1057     gen_set_pc_im(s, s->pc - offset);
1058     gen_exception(excp, syn, target_el);
1059     s->is_jmp = DISAS_JUMP;
1060 }
1061
1062 /* Force a TB lookup after an instruction that changes the CPU state.  */
1063 static inline void gen_lookup_tb(DisasContext *s)
1064 {
1065     tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1066     s->is_jmp = DISAS_UPDATE;
1067 }
1068
1069 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1070                                        TCGv_i32 var)
1071 {
1072     int val, rm, shift, shiftop;
1073     TCGv_i32 offset;
1074
1075     if (!(insn & (1 << 25))) {
1076         /* immediate */
1077         val = insn & 0xfff;
1078         if (!(insn & (1 << 23)))
1079             val = -val;
1080         if (val != 0)
1081             tcg_gen_addi_i32(var, var, val);
1082     } else {
1083         /* shift/register */
1084         rm = (insn) & 0xf;
1085         shift = (insn >> 7) & 0x1f;
1086         shiftop = (insn >> 5) & 3;
1087         offset = load_reg(s, rm);
1088         gen_arm_shift_im(offset, shiftop, shift, 0);
1089         if (!(insn & (1 << 23)))
1090             tcg_gen_sub_i32(var, var, offset);
1091         else
1092             tcg_gen_add_i32(var, var, offset);
1093         tcg_temp_free_i32(offset);
1094     }
1095 }
1096
1097 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1098                                         int extra, TCGv_i32 var)
1099 {
1100     int val, rm;
1101     TCGv_i32 offset;
1102
1103     if (insn & (1 << 22)) {
1104         /* immediate */
1105         val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1106         if (!(insn & (1 << 23)))
1107             val = -val;
1108         val += extra;
1109         if (val != 0)
1110             tcg_gen_addi_i32(var, var, val);
1111     } else {
1112         /* register */
1113         if (extra)
1114             tcg_gen_addi_i32(var, var, extra);
1115         rm = (insn) & 0xf;
1116         offset = load_reg(s, rm);
1117         if (!(insn & (1 << 23)))
1118             tcg_gen_sub_i32(var, var, offset);
1119         else
1120             tcg_gen_add_i32(var, var, offset);
1121         tcg_temp_free_i32(offset);
1122     }
1123 }
1124
1125 static TCGv_ptr get_fpstatus_ptr(int neon)
1126 {
1127     TCGv_ptr statusptr = tcg_temp_new_ptr();
1128     int offset;
1129     if (neon) {
1130         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1131     } else {
1132         offset = offsetof(CPUARMState, vfp.fp_status);
1133     }
1134     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1135     return statusptr;
1136 }
1137
1138 #define VFP_OP2(name)                                                 \
1139 static inline void gen_vfp_##name(int dp)                             \
1140 {                                                                     \
1141     TCGv_ptr fpst = get_fpstatus_ptr(0);                              \
1142     if (dp) {                                                         \
1143         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst);    \
1144     } else {                                                          \
1145         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst);    \
1146     }                                                                 \
1147     tcg_temp_free_ptr(fpst);                                          \
1148 }
1149
1150 VFP_OP2(add)
1151 VFP_OP2(sub)
1152 VFP_OP2(mul)
1153 VFP_OP2(div)
1154
1155 #undef VFP_OP2
1156
1157 static inline void gen_vfp_F1_mul(int dp)
1158 {
1159     /* Like gen_vfp_mul() but put result in F1 */
1160     TCGv_ptr fpst = get_fpstatus_ptr(0);
1161     if (dp) {
1162         gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
1163     } else {
1164         gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
1165     }
1166     tcg_temp_free_ptr(fpst);
1167 }
1168
1169 static inline void gen_vfp_F1_neg(int dp)
1170 {
1171     /* Like gen_vfp_neg() but put result in F1 */
1172     if (dp) {
1173         gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
1174     } else {
1175         gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
1176     }
1177 }
1178
1179 static inline void gen_vfp_abs(int dp)
1180 {
1181     if (dp)
1182         gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1183     else
1184         gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1185 }
1186
1187 static inline void gen_vfp_neg(int dp)
1188 {
1189     if (dp)
1190         gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1191     else
1192         gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1193 }
1194
1195 static inline void gen_vfp_sqrt(int dp)
1196 {
1197     if (dp)
1198         gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1199     else
1200         gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1201 }
1202
1203 static inline void gen_vfp_cmp(int dp)
1204 {
1205     if (dp)
1206         gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1207     else
1208         gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1209 }
1210
1211 static inline void gen_vfp_cmpe(int dp)
1212 {
1213     if (dp)
1214         gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1215     else
1216         gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1217 }
1218
1219 static inline void gen_vfp_F1_ld0(int dp)
1220 {
1221     if (dp)
1222         tcg_gen_movi_i64(cpu_F1d, 0);
1223     else
1224         tcg_gen_movi_i32(cpu_F1s, 0);
1225 }
1226
1227 #define VFP_GEN_ITOF(name) \
1228 static inline void gen_vfp_##name(int dp, int neon) \
1229 { \
1230     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1231     if (dp) { \
1232         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1233     } else { \
1234         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1235     } \
1236     tcg_temp_free_ptr(statusptr); \
1237 }
1238
1239 VFP_GEN_ITOF(uito)
1240 VFP_GEN_ITOF(sito)
1241 #undef VFP_GEN_ITOF
1242
1243 #define VFP_GEN_FTOI(name) \
1244 static inline void gen_vfp_##name(int dp, int neon) \
1245 { \
1246     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1247     if (dp) { \
1248         gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1249     } else { \
1250         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1251     } \
1252     tcg_temp_free_ptr(statusptr); \
1253 }
1254
1255 VFP_GEN_FTOI(toui)
1256 VFP_GEN_FTOI(touiz)
1257 VFP_GEN_FTOI(tosi)
1258 VFP_GEN_FTOI(tosiz)
1259 #undef VFP_GEN_FTOI
1260
1261 #define VFP_GEN_FIX(name, round) \
1262 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1263 { \
1264     TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1265     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1266     if (dp) { \
1267         gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1268                                         statusptr); \
1269     } else { \
1270         gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1271                                         statusptr); \
1272     } \
1273     tcg_temp_free_i32(tmp_shift); \
1274     tcg_temp_free_ptr(statusptr); \
1275 }
1276 VFP_GEN_FIX(tosh, _round_to_zero)
1277 VFP_GEN_FIX(tosl, _round_to_zero)
1278 VFP_GEN_FIX(touh, _round_to_zero)
1279 VFP_GEN_FIX(toul, _round_to_zero)
1280 VFP_GEN_FIX(shto, )
1281 VFP_GEN_FIX(slto, )
1282 VFP_GEN_FIX(uhto, )
1283 VFP_GEN_FIX(ulto, )
1284 #undef VFP_GEN_FIX
1285
1286 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
1287 {
1288     if (dp) {
1289         gen_aa32_ld64(cpu_F0d, addr, get_mem_index(s));
1290     } else {
1291         gen_aa32_ld32u(cpu_F0s, addr, get_mem_index(s));
1292     }
1293 }
1294
1295 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
1296 {
1297     if (dp) {
1298         gen_aa32_st64(cpu_F0d, addr, get_mem_index(s));
1299     } else {
1300         gen_aa32_st32(cpu_F0s, addr, get_mem_index(s));
1301     }
1302 }
1303
1304 static inline long
1305 vfp_reg_offset (int dp, int reg)
1306 {
1307     if (dp)
1308         return offsetof(CPUARMState, vfp.regs[reg]);
1309     else if (reg & 1) {
1310         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1311           + offsetof(CPU_DoubleU, l.upper);
1312     } else {
1313         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1314           + offsetof(CPU_DoubleU, l.lower);
1315     }
1316 }
1317
1318 /* Return the offset of a 32-bit piece of a NEON register.
1319    zero is the least significant end of the register.  */
1320 static inline long
1321 neon_reg_offset (int reg, int n)
1322 {
1323     int sreg;
1324     sreg = reg * 2 + n;
1325     return vfp_reg_offset(0, sreg);
1326 }
1327
1328 static TCGv_i32 neon_load_reg(int reg, int pass)
1329 {
1330     TCGv_i32 tmp = tcg_temp_new_i32();
1331     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1332     return tmp;
1333 }
1334
1335 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1336 {
1337     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1338     tcg_temp_free_i32(var);
1339 }
1340
1341 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1342 {
1343     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1344 }
1345
1346 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1347 {
1348     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1349 }
1350
1351 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1352 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1353 #define tcg_gen_st_f32 tcg_gen_st_i32
1354 #define tcg_gen_st_f64 tcg_gen_st_i64
1355
1356 static inline void gen_mov_F0_vreg(int dp, int reg)
1357 {
1358     if (dp)
1359         tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1360     else
1361         tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1362 }
1363
1364 static inline void gen_mov_F1_vreg(int dp, int reg)
1365 {
1366     if (dp)
1367         tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1368     else
1369         tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1370 }
1371
1372 static inline void gen_mov_vreg_F0(int dp, int reg)
1373 {
1374     if (dp)
1375         tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1376     else
1377         tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1378 }
1379
1380 #define ARM_CP_RW_BIT   (1 << 20)
1381
1382 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1383 {
1384     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1385 }
1386
1387 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1388 {
1389     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1390 }
1391
1392 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1393 {
1394     TCGv_i32 var = tcg_temp_new_i32();
1395     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1396     return var;
1397 }
1398
1399 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1400 {
1401     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1402     tcg_temp_free_i32(var);
1403 }
1404
1405 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1406 {
1407     iwmmxt_store_reg(cpu_M0, rn);
1408 }
1409
1410 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1411 {
1412     iwmmxt_load_reg(cpu_M0, rn);
1413 }
1414
1415 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1416 {
1417     iwmmxt_load_reg(cpu_V1, rn);
1418     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1419 }
1420
1421 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1422 {
1423     iwmmxt_load_reg(cpu_V1, rn);
1424     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1425 }
1426
1427 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1428 {
1429     iwmmxt_load_reg(cpu_V1, rn);
1430     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1431 }
1432
1433 #define IWMMXT_OP(name) \
1434 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1435 { \
1436     iwmmxt_load_reg(cpu_V1, rn); \
1437     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1438 }
1439
1440 #define IWMMXT_OP_ENV(name) \
1441 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1442 { \
1443     iwmmxt_load_reg(cpu_V1, rn); \
1444     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1445 }
1446
1447 #define IWMMXT_OP_ENV_SIZE(name) \
1448 IWMMXT_OP_ENV(name##b) \
1449 IWMMXT_OP_ENV(name##w) \
1450 IWMMXT_OP_ENV(name##l)
1451
1452 #define IWMMXT_OP_ENV1(name) \
1453 static inline void gen_op_iwmmxt_##name##_M0(void) \
1454 { \
1455     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1456 }
1457
1458 IWMMXT_OP(maddsq)
1459 IWMMXT_OP(madduq)
1460 IWMMXT_OP(sadb)
1461 IWMMXT_OP(sadw)
1462 IWMMXT_OP(mulslw)
1463 IWMMXT_OP(mulshw)
1464 IWMMXT_OP(mululw)
1465 IWMMXT_OP(muluhw)
1466 IWMMXT_OP(macsw)
1467 IWMMXT_OP(macuw)
1468
1469 IWMMXT_OP_ENV_SIZE(unpackl)
1470 IWMMXT_OP_ENV_SIZE(unpackh)
1471
1472 IWMMXT_OP_ENV1(unpacklub)
1473 IWMMXT_OP_ENV1(unpackluw)
1474 IWMMXT_OP_ENV1(unpacklul)
1475 IWMMXT_OP_ENV1(unpackhub)
1476 IWMMXT_OP_ENV1(unpackhuw)
1477 IWMMXT_OP_ENV1(unpackhul)
1478 IWMMXT_OP_ENV1(unpacklsb)
1479 IWMMXT_OP_ENV1(unpacklsw)
1480 IWMMXT_OP_ENV1(unpacklsl)
1481 IWMMXT_OP_ENV1(unpackhsb)
1482 IWMMXT_OP_ENV1(unpackhsw)
1483 IWMMXT_OP_ENV1(unpackhsl)
1484
1485 IWMMXT_OP_ENV_SIZE(cmpeq)
1486 IWMMXT_OP_ENV_SIZE(cmpgtu)
1487 IWMMXT_OP_ENV_SIZE(cmpgts)
1488
1489 IWMMXT_OP_ENV_SIZE(mins)
1490 IWMMXT_OP_ENV_SIZE(minu)
1491 IWMMXT_OP_ENV_SIZE(maxs)
1492 IWMMXT_OP_ENV_SIZE(maxu)
1493
1494 IWMMXT_OP_ENV_SIZE(subn)
1495 IWMMXT_OP_ENV_SIZE(addn)
1496 IWMMXT_OP_ENV_SIZE(subu)
1497 IWMMXT_OP_ENV_SIZE(addu)
1498 IWMMXT_OP_ENV_SIZE(subs)
1499 IWMMXT_OP_ENV_SIZE(adds)
1500
1501 IWMMXT_OP_ENV(avgb0)
1502 IWMMXT_OP_ENV(avgb1)
1503 IWMMXT_OP_ENV(avgw0)
1504 IWMMXT_OP_ENV(avgw1)
1505
1506 IWMMXT_OP_ENV(packuw)
1507 IWMMXT_OP_ENV(packul)
1508 IWMMXT_OP_ENV(packuq)
1509 IWMMXT_OP_ENV(packsw)
1510 IWMMXT_OP_ENV(packsl)
1511 IWMMXT_OP_ENV(packsq)
1512
1513 static void gen_op_iwmmxt_set_mup(void)
1514 {
1515     TCGv_i32 tmp;
1516     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1517     tcg_gen_ori_i32(tmp, tmp, 2);
1518     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1519 }
1520
1521 static void gen_op_iwmmxt_set_cup(void)
1522 {
1523     TCGv_i32 tmp;
1524     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1525     tcg_gen_ori_i32(tmp, tmp, 1);
1526     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1527 }
1528
1529 static void gen_op_iwmmxt_setpsr_nz(void)
1530 {
1531     TCGv_i32 tmp = tcg_temp_new_i32();
1532     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1533     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1534 }
1535
1536 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1537 {
1538     iwmmxt_load_reg(cpu_V1, rn);
1539     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1540     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1541 }
1542
1543 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1544                                      TCGv_i32 dest)
1545 {
1546     int rd;
1547     uint32_t offset;
1548     TCGv_i32 tmp;
1549
1550     rd = (insn >> 16) & 0xf;
1551     tmp = load_reg(s, rd);
1552
1553     offset = (insn & 0xff) << ((insn >> 7) & 2);
1554     if (insn & (1 << 24)) {
1555         /* Pre indexed */
1556         if (insn & (1 << 23))
1557             tcg_gen_addi_i32(tmp, tmp, offset);
1558         else
1559             tcg_gen_addi_i32(tmp, tmp, -offset);
1560         tcg_gen_mov_i32(dest, tmp);
1561         if (insn & (1 << 21))
1562             store_reg(s, rd, tmp);
1563         else
1564             tcg_temp_free_i32(tmp);
1565     } else if (insn & (1 << 21)) {
1566         /* Post indexed */
1567         tcg_gen_mov_i32(dest, tmp);
1568         if (insn & (1 << 23))
1569             tcg_gen_addi_i32(tmp, tmp, offset);
1570         else
1571             tcg_gen_addi_i32(tmp, tmp, -offset);
1572         store_reg(s, rd, tmp);
1573     } else if (!(insn & (1 << 23)))
1574         return 1;
1575     return 0;
1576 }
1577
1578 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1579 {
1580     int rd = (insn >> 0) & 0xf;
1581     TCGv_i32 tmp;
1582
1583     if (insn & (1 << 8)) {
1584         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1585             return 1;
1586         } else {
1587             tmp = iwmmxt_load_creg(rd);
1588         }
1589     } else {
1590         tmp = tcg_temp_new_i32();
1591         iwmmxt_load_reg(cpu_V0, rd);
1592         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1593     }
1594     tcg_gen_andi_i32(tmp, tmp, mask);
1595     tcg_gen_mov_i32(dest, tmp);
1596     tcg_temp_free_i32(tmp);
1597     return 0;
1598 }
1599
1600 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1601    (ie. an undefined instruction).  */
1602 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1603 {
1604     int rd, wrd;
1605     int rdhi, rdlo, rd0, rd1, i;
1606     TCGv_i32 addr;
1607     TCGv_i32 tmp, tmp2, tmp3;
1608
1609     if ((insn & 0x0e000e00) == 0x0c000000) {
1610         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1611             wrd = insn & 0xf;
1612             rdlo = (insn >> 12) & 0xf;
1613             rdhi = (insn >> 16) & 0xf;
1614             if (insn & ARM_CP_RW_BIT) {                 /* TMRRC */
1615                 iwmmxt_load_reg(cpu_V0, wrd);
1616                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1617                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1618                 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
1619             } else {                                    /* TMCRR */
1620                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1621                 iwmmxt_store_reg(cpu_V0, wrd);
1622                 gen_op_iwmmxt_set_mup();
1623             }
1624             return 0;
1625         }
1626
1627         wrd = (insn >> 12) & 0xf;
1628         addr = tcg_temp_new_i32();
1629         if (gen_iwmmxt_address(s, insn, addr)) {
1630             tcg_temp_free_i32(addr);
1631             return 1;
1632         }
1633         if (insn & ARM_CP_RW_BIT) {
1634             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1635                 tmp = tcg_temp_new_i32();
1636                 gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1637                 iwmmxt_store_creg(wrd, tmp);
1638             } else {
1639                 i = 1;
1640                 if (insn & (1 << 8)) {
1641                     if (insn & (1 << 22)) {             /* WLDRD */
1642                         gen_aa32_ld64(cpu_M0, addr, get_mem_index(s));
1643                         i = 0;
1644                     } else {                            /* WLDRW wRd */
1645                         tmp = tcg_temp_new_i32();
1646                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1647                     }
1648                 } else {
1649                     tmp = tcg_temp_new_i32();
1650                     if (insn & (1 << 22)) {             /* WLDRH */
1651                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
1652                     } else {                            /* WLDRB */
1653                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
1654                     }
1655                 }
1656                 if (i) {
1657                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1658                     tcg_temp_free_i32(tmp);
1659                 }
1660                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1661             }
1662         } else {
1663             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1664                 tmp = iwmmxt_load_creg(wrd);
1665                 gen_aa32_st32(tmp, addr, get_mem_index(s));
1666             } else {
1667                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1668                 tmp = tcg_temp_new_i32();
1669                 if (insn & (1 << 8)) {
1670                     if (insn & (1 << 22)) {             /* WSTRD */
1671                         gen_aa32_st64(cpu_M0, addr, get_mem_index(s));
1672                     } else {                            /* WSTRW wRd */
1673                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1674                         gen_aa32_st32(tmp, addr, get_mem_index(s));
1675                     }
1676                 } else {
1677                     if (insn & (1 << 22)) {             /* WSTRH */
1678                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1679                         gen_aa32_st16(tmp, addr, get_mem_index(s));
1680                     } else {                            /* WSTRB */
1681                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1682                         gen_aa32_st8(tmp, addr, get_mem_index(s));
1683                     }
1684                 }
1685             }
1686             tcg_temp_free_i32(tmp);
1687         }
1688         tcg_temp_free_i32(addr);
1689         return 0;
1690     }
1691
1692     if ((insn & 0x0f000000) != 0x0e000000)
1693         return 1;
1694
1695     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1696     case 0x000:                                         /* WOR */
1697         wrd = (insn >> 12) & 0xf;
1698         rd0 = (insn >> 0) & 0xf;
1699         rd1 = (insn >> 16) & 0xf;
1700         gen_op_iwmmxt_movq_M0_wRn(rd0);
1701         gen_op_iwmmxt_orq_M0_wRn(rd1);
1702         gen_op_iwmmxt_setpsr_nz();
1703         gen_op_iwmmxt_movq_wRn_M0(wrd);
1704         gen_op_iwmmxt_set_mup();
1705         gen_op_iwmmxt_set_cup();
1706         break;
1707     case 0x011:                                         /* TMCR */
1708         if (insn & 0xf)
1709             return 1;
1710         rd = (insn >> 12) & 0xf;
1711         wrd = (insn >> 16) & 0xf;
1712         switch (wrd) {
1713         case ARM_IWMMXT_wCID:
1714         case ARM_IWMMXT_wCASF:
1715             break;
1716         case ARM_IWMMXT_wCon:
1717             gen_op_iwmmxt_set_cup();
1718             /* Fall through.  */
1719         case ARM_IWMMXT_wCSSF:
1720             tmp = iwmmxt_load_creg(wrd);
1721             tmp2 = load_reg(s, rd);
1722             tcg_gen_andc_i32(tmp, tmp, tmp2);
1723             tcg_temp_free_i32(tmp2);
1724             iwmmxt_store_creg(wrd, tmp);
1725             break;
1726         case ARM_IWMMXT_wCGR0:
1727         case ARM_IWMMXT_wCGR1:
1728         case ARM_IWMMXT_wCGR2:
1729         case ARM_IWMMXT_wCGR3:
1730             gen_op_iwmmxt_set_cup();
1731             tmp = load_reg(s, rd);
1732             iwmmxt_store_creg(wrd, tmp);
1733             break;
1734         default:
1735             return 1;
1736         }
1737         break;
1738     case 0x100:                                         /* WXOR */
1739         wrd = (insn >> 12) & 0xf;
1740         rd0 = (insn >> 0) & 0xf;
1741         rd1 = (insn >> 16) & 0xf;
1742         gen_op_iwmmxt_movq_M0_wRn(rd0);
1743         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1744         gen_op_iwmmxt_setpsr_nz();
1745         gen_op_iwmmxt_movq_wRn_M0(wrd);
1746         gen_op_iwmmxt_set_mup();
1747         gen_op_iwmmxt_set_cup();
1748         break;
1749     case 0x111:                                         /* TMRC */
1750         if (insn & 0xf)
1751             return 1;
1752         rd = (insn >> 12) & 0xf;
1753         wrd = (insn >> 16) & 0xf;
1754         tmp = iwmmxt_load_creg(wrd);
1755         store_reg(s, rd, tmp);
1756         break;
1757     case 0x300:                                         /* WANDN */
1758         wrd = (insn >> 12) & 0xf;
1759         rd0 = (insn >> 0) & 0xf;
1760         rd1 = (insn >> 16) & 0xf;
1761         gen_op_iwmmxt_movq_M0_wRn(rd0);
1762         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1763         gen_op_iwmmxt_andq_M0_wRn(rd1);
1764         gen_op_iwmmxt_setpsr_nz();
1765         gen_op_iwmmxt_movq_wRn_M0(wrd);
1766         gen_op_iwmmxt_set_mup();
1767         gen_op_iwmmxt_set_cup();
1768         break;
1769     case 0x200:                                         /* WAND */
1770         wrd = (insn >> 12) & 0xf;
1771         rd0 = (insn >> 0) & 0xf;
1772         rd1 = (insn >> 16) & 0xf;
1773         gen_op_iwmmxt_movq_M0_wRn(rd0);
1774         gen_op_iwmmxt_andq_M0_wRn(rd1);
1775         gen_op_iwmmxt_setpsr_nz();
1776         gen_op_iwmmxt_movq_wRn_M0(wrd);
1777         gen_op_iwmmxt_set_mup();
1778         gen_op_iwmmxt_set_cup();
1779         break;
1780     case 0x810: case 0xa10:                             /* WMADD */
1781         wrd = (insn >> 12) & 0xf;
1782         rd0 = (insn >> 0) & 0xf;
1783         rd1 = (insn >> 16) & 0xf;
1784         gen_op_iwmmxt_movq_M0_wRn(rd0);
1785         if (insn & (1 << 21))
1786             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1787         else
1788             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1789         gen_op_iwmmxt_movq_wRn_M0(wrd);
1790         gen_op_iwmmxt_set_mup();
1791         break;
1792     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1793         wrd = (insn >> 12) & 0xf;
1794         rd0 = (insn >> 16) & 0xf;
1795         rd1 = (insn >> 0) & 0xf;
1796         gen_op_iwmmxt_movq_M0_wRn(rd0);
1797         switch ((insn >> 22) & 3) {
1798         case 0:
1799             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1800             break;
1801         case 1:
1802             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1803             break;
1804         case 2:
1805             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1806             break;
1807         case 3:
1808             return 1;
1809         }
1810         gen_op_iwmmxt_movq_wRn_M0(wrd);
1811         gen_op_iwmmxt_set_mup();
1812         gen_op_iwmmxt_set_cup();
1813         break;
1814     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1815         wrd = (insn >> 12) & 0xf;
1816         rd0 = (insn >> 16) & 0xf;
1817         rd1 = (insn >> 0) & 0xf;
1818         gen_op_iwmmxt_movq_M0_wRn(rd0);
1819         switch ((insn >> 22) & 3) {
1820         case 0:
1821             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1822             break;
1823         case 1:
1824             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1825             break;
1826         case 2:
1827             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1828             break;
1829         case 3:
1830             return 1;
1831         }
1832         gen_op_iwmmxt_movq_wRn_M0(wrd);
1833         gen_op_iwmmxt_set_mup();
1834         gen_op_iwmmxt_set_cup();
1835         break;
1836     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1837         wrd = (insn >> 12) & 0xf;
1838         rd0 = (insn >> 16) & 0xf;
1839         rd1 = (insn >> 0) & 0xf;
1840         gen_op_iwmmxt_movq_M0_wRn(rd0);
1841         if (insn & (1 << 22))
1842             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1843         else
1844             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1845         if (!(insn & (1 << 20)))
1846             gen_op_iwmmxt_addl_M0_wRn(wrd);
1847         gen_op_iwmmxt_movq_wRn_M0(wrd);
1848         gen_op_iwmmxt_set_mup();
1849         break;
1850     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1851         wrd = (insn >> 12) & 0xf;
1852         rd0 = (insn >> 16) & 0xf;
1853         rd1 = (insn >> 0) & 0xf;
1854         gen_op_iwmmxt_movq_M0_wRn(rd0);
1855         if (insn & (1 << 21)) {
1856             if (insn & (1 << 20))
1857                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1858             else
1859                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1860         } else {
1861             if (insn & (1 << 20))
1862                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1863             else
1864                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1865         }
1866         gen_op_iwmmxt_movq_wRn_M0(wrd);
1867         gen_op_iwmmxt_set_mup();
1868         break;
1869     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1870         wrd = (insn >> 12) & 0xf;
1871         rd0 = (insn >> 16) & 0xf;
1872         rd1 = (insn >> 0) & 0xf;
1873         gen_op_iwmmxt_movq_M0_wRn(rd0);
1874         if (insn & (1 << 21))
1875             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1876         else
1877             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1878         if (!(insn & (1 << 20))) {
1879             iwmmxt_load_reg(cpu_V1, wrd);
1880             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1881         }
1882         gen_op_iwmmxt_movq_wRn_M0(wrd);
1883         gen_op_iwmmxt_set_mup();
1884         break;
1885     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1886         wrd = (insn >> 12) & 0xf;
1887         rd0 = (insn >> 16) & 0xf;
1888         rd1 = (insn >> 0) & 0xf;
1889         gen_op_iwmmxt_movq_M0_wRn(rd0);
1890         switch ((insn >> 22) & 3) {
1891         case 0:
1892             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1893             break;
1894         case 1:
1895             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1896             break;
1897         case 2:
1898             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1899             break;
1900         case 3:
1901             return 1;
1902         }
1903         gen_op_iwmmxt_movq_wRn_M0(wrd);
1904         gen_op_iwmmxt_set_mup();
1905         gen_op_iwmmxt_set_cup();
1906         break;
1907     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1908         wrd = (insn >> 12) & 0xf;
1909         rd0 = (insn >> 16) & 0xf;
1910         rd1 = (insn >> 0) & 0xf;
1911         gen_op_iwmmxt_movq_M0_wRn(rd0);
1912         if (insn & (1 << 22)) {
1913             if (insn & (1 << 20))
1914                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1915             else
1916                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1917         } else {
1918             if (insn & (1 << 20))
1919                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1920             else
1921                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1922         }
1923         gen_op_iwmmxt_movq_wRn_M0(wrd);
1924         gen_op_iwmmxt_set_mup();
1925         gen_op_iwmmxt_set_cup();
1926         break;
1927     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1928         wrd = (insn >> 12) & 0xf;
1929         rd0 = (insn >> 16) & 0xf;
1930         rd1 = (insn >> 0) & 0xf;
1931         gen_op_iwmmxt_movq_M0_wRn(rd0);
1932         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1933         tcg_gen_andi_i32(tmp, tmp, 7);
1934         iwmmxt_load_reg(cpu_V1, rd1);
1935         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1936         tcg_temp_free_i32(tmp);
1937         gen_op_iwmmxt_movq_wRn_M0(wrd);
1938         gen_op_iwmmxt_set_mup();
1939         break;
1940     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1941         if (((insn >> 6) & 3) == 3)
1942             return 1;
1943         rd = (insn >> 12) & 0xf;
1944         wrd = (insn >> 16) & 0xf;
1945         tmp = load_reg(s, rd);
1946         gen_op_iwmmxt_movq_M0_wRn(wrd);
1947         switch ((insn >> 6) & 3) {
1948         case 0:
1949             tmp2 = tcg_const_i32(0xff);
1950             tmp3 = tcg_const_i32((insn & 7) << 3);
1951             break;
1952         case 1:
1953             tmp2 = tcg_const_i32(0xffff);
1954             tmp3 = tcg_const_i32((insn & 3) << 4);
1955             break;
1956         case 2:
1957             tmp2 = tcg_const_i32(0xffffffff);
1958             tmp3 = tcg_const_i32((insn & 1) << 5);
1959             break;
1960         default:
1961             TCGV_UNUSED_I32(tmp2);
1962             TCGV_UNUSED_I32(tmp3);
1963         }
1964         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1965         tcg_temp_free_i32(tmp3);
1966         tcg_temp_free_i32(tmp2);
1967         tcg_temp_free_i32(tmp);
1968         gen_op_iwmmxt_movq_wRn_M0(wrd);
1969         gen_op_iwmmxt_set_mup();
1970         break;
1971     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1972         rd = (insn >> 12) & 0xf;
1973         wrd = (insn >> 16) & 0xf;
1974         if (rd == 15 || ((insn >> 22) & 3) == 3)
1975             return 1;
1976         gen_op_iwmmxt_movq_M0_wRn(wrd);
1977         tmp = tcg_temp_new_i32();
1978         switch ((insn >> 22) & 3) {
1979         case 0:
1980             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1981             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1982             if (insn & 8) {
1983                 tcg_gen_ext8s_i32(tmp, tmp);
1984             } else {
1985                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1986             }
1987             break;
1988         case 1:
1989             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1990             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1991             if (insn & 8) {
1992                 tcg_gen_ext16s_i32(tmp, tmp);
1993             } else {
1994                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1995             }
1996             break;
1997         case 2:
1998             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1999             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2000             break;
2001         }
2002         store_reg(s, rd, tmp);
2003         break;
2004     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
2005         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2006             return 1;
2007         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2008         switch ((insn >> 22) & 3) {
2009         case 0:
2010             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
2011             break;
2012         case 1:
2013             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
2014             break;
2015         case 2:
2016             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
2017             break;
2018         }
2019         tcg_gen_shli_i32(tmp, tmp, 28);
2020         gen_set_nzcv(tmp);
2021         tcg_temp_free_i32(tmp);
2022         break;
2023     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
2024         if (((insn >> 6) & 3) == 3)
2025             return 1;
2026         rd = (insn >> 12) & 0xf;
2027         wrd = (insn >> 16) & 0xf;
2028         tmp = load_reg(s, rd);
2029         switch ((insn >> 6) & 3) {
2030         case 0:
2031             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
2032             break;
2033         case 1:
2034             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
2035             break;
2036         case 2:
2037             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
2038             break;
2039         }
2040         tcg_temp_free_i32(tmp);
2041         gen_op_iwmmxt_movq_wRn_M0(wrd);
2042         gen_op_iwmmxt_set_mup();
2043         break;
2044     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
2045         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2046             return 1;
2047         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2048         tmp2 = tcg_temp_new_i32();
2049         tcg_gen_mov_i32(tmp2, tmp);
2050         switch ((insn >> 22) & 3) {
2051         case 0:
2052             for (i = 0; i < 7; i ++) {
2053                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2054                 tcg_gen_and_i32(tmp, tmp, tmp2);
2055             }
2056             break;
2057         case 1:
2058             for (i = 0; i < 3; i ++) {
2059                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2060                 tcg_gen_and_i32(tmp, tmp, tmp2);
2061             }
2062             break;
2063         case 2:
2064             tcg_gen_shli_i32(tmp2, tmp2, 16);
2065             tcg_gen_and_i32(tmp, tmp, tmp2);
2066             break;
2067         }
2068         gen_set_nzcv(tmp);
2069         tcg_temp_free_i32(tmp2);
2070         tcg_temp_free_i32(tmp);
2071         break;
2072     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2073         wrd = (insn >> 12) & 0xf;
2074         rd0 = (insn >> 16) & 0xf;
2075         gen_op_iwmmxt_movq_M0_wRn(rd0);
2076         switch ((insn >> 22) & 3) {
2077         case 0:
2078             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2079             break;
2080         case 1:
2081             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2082             break;
2083         case 2:
2084             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2085             break;
2086         case 3:
2087             return 1;
2088         }
2089         gen_op_iwmmxt_movq_wRn_M0(wrd);
2090         gen_op_iwmmxt_set_mup();
2091         break;
2092     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2093         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2094             return 1;
2095         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2096         tmp2 = tcg_temp_new_i32();
2097         tcg_gen_mov_i32(tmp2, tmp);
2098         switch ((insn >> 22) & 3) {
2099         case 0:
2100             for (i = 0; i < 7; i ++) {
2101                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2102                 tcg_gen_or_i32(tmp, tmp, tmp2);
2103             }
2104             break;
2105         case 1:
2106             for (i = 0; i < 3; i ++) {
2107                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2108                 tcg_gen_or_i32(tmp, tmp, tmp2);
2109             }
2110             break;
2111         case 2:
2112             tcg_gen_shli_i32(tmp2, tmp2, 16);
2113             tcg_gen_or_i32(tmp, tmp, tmp2);
2114             break;
2115         }
2116         gen_set_nzcv(tmp);
2117         tcg_temp_free_i32(tmp2);
2118         tcg_temp_free_i32(tmp);
2119         break;
2120     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2121         rd = (insn >> 12) & 0xf;
2122         rd0 = (insn >> 16) & 0xf;
2123         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2124             return 1;
2125         gen_op_iwmmxt_movq_M0_wRn(rd0);
2126         tmp = tcg_temp_new_i32();
2127         switch ((insn >> 22) & 3) {
2128         case 0:
2129             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2130             break;
2131         case 1:
2132             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2133             break;
2134         case 2:
2135             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2136             break;
2137         }
2138         store_reg(s, rd, tmp);
2139         break;
2140     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2141     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         rd1 = (insn >> 0) & 0xf;
2145         gen_op_iwmmxt_movq_M0_wRn(rd0);
2146         switch ((insn >> 22) & 3) {
2147         case 0:
2148             if (insn & (1 << 21))
2149                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2150             else
2151                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2152             break;
2153         case 1:
2154             if (insn & (1 << 21))
2155                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2156             else
2157                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2158             break;
2159         case 2:
2160             if (insn & (1 << 21))
2161                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2162             else
2163                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2164             break;
2165         case 3:
2166             return 1;
2167         }
2168         gen_op_iwmmxt_movq_wRn_M0(wrd);
2169         gen_op_iwmmxt_set_mup();
2170         gen_op_iwmmxt_set_cup();
2171         break;
2172     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2173     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2174         wrd = (insn >> 12) & 0xf;
2175         rd0 = (insn >> 16) & 0xf;
2176         gen_op_iwmmxt_movq_M0_wRn(rd0);
2177         switch ((insn >> 22) & 3) {
2178         case 0:
2179             if (insn & (1 << 21))
2180                 gen_op_iwmmxt_unpacklsb_M0();
2181             else
2182                 gen_op_iwmmxt_unpacklub_M0();
2183             break;
2184         case 1:
2185             if (insn & (1 << 21))
2186                 gen_op_iwmmxt_unpacklsw_M0();
2187             else
2188                 gen_op_iwmmxt_unpackluw_M0();
2189             break;
2190         case 2:
2191             if (insn & (1 << 21))
2192                 gen_op_iwmmxt_unpacklsl_M0();
2193             else
2194                 gen_op_iwmmxt_unpacklul_M0();
2195             break;
2196         case 3:
2197             return 1;
2198         }
2199         gen_op_iwmmxt_movq_wRn_M0(wrd);
2200         gen_op_iwmmxt_set_mup();
2201         gen_op_iwmmxt_set_cup();
2202         break;
2203     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2204     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2205         wrd = (insn >> 12) & 0xf;
2206         rd0 = (insn >> 16) & 0xf;
2207         gen_op_iwmmxt_movq_M0_wRn(rd0);
2208         switch ((insn >> 22) & 3) {
2209         case 0:
2210             if (insn & (1 << 21))
2211                 gen_op_iwmmxt_unpackhsb_M0();
2212             else
2213                 gen_op_iwmmxt_unpackhub_M0();
2214             break;
2215         case 1:
2216             if (insn & (1 << 21))
2217                 gen_op_iwmmxt_unpackhsw_M0();
2218             else
2219                 gen_op_iwmmxt_unpackhuw_M0();
2220             break;
2221         case 2:
2222             if (insn & (1 << 21))
2223                 gen_op_iwmmxt_unpackhsl_M0();
2224             else
2225                 gen_op_iwmmxt_unpackhul_M0();
2226             break;
2227         case 3:
2228             return 1;
2229         }
2230         gen_op_iwmmxt_movq_wRn_M0(wrd);
2231         gen_op_iwmmxt_set_mup();
2232         gen_op_iwmmxt_set_cup();
2233         break;
2234     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2235     case 0x214: case 0x614: case 0xa14: case 0xe14:
2236         if (((insn >> 22) & 3) == 0)
2237             return 1;
2238         wrd = (insn >> 12) & 0xf;
2239         rd0 = (insn >> 16) & 0xf;
2240         gen_op_iwmmxt_movq_M0_wRn(rd0);
2241         tmp = tcg_temp_new_i32();
2242         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2243             tcg_temp_free_i32(tmp);
2244             return 1;
2245         }
2246         switch ((insn >> 22) & 3) {
2247         case 1:
2248             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2249             break;
2250         case 2:
2251             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2252             break;
2253         case 3:
2254             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2255             break;
2256         }
2257         tcg_temp_free_i32(tmp);
2258         gen_op_iwmmxt_movq_wRn_M0(wrd);
2259         gen_op_iwmmxt_set_mup();
2260         gen_op_iwmmxt_set_cup();
2261         break;
2262     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2263     case 0x014: case 0x414: case 0x814: case 0xc14:
2264         if (((insn >> 22) & 3) == 0)
2265             return 1;
2266         wrd = (insn >> 12) & 0xf;
2267         rd0 = (insn >> 16) & 0xf;
2268         gen_op_iwmmxt_movq_M0_wRn(rd0);
2269         tmp = tcg_temp_new_i32();
2270         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2271             tcg_temp_free_i32(tmp);
2272             return 1;
2273         }
2274         switch ((insn >> 22) & 3) {
2275         case 1:
2276             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         case 2:
2279             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2280             break;
2281         case 3:
2282             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2283             break;
2284         }
2285         tcg_temp_free_i32(tmp);
2286         gen_op_iwmmxt_movq_wRn_M0(wrd);
2287         gen_op_iwmmxt_set_mup();
2288         gen_op_iwmmxt_set_cup();
2289         break;
2290     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2291     case 0x114: case 0x514: case 0x914: case 0xd14:
2292         if (((insn >> 22) & 3) == 0)
2293             return 1;
2294         wrd = (insn >> 12) & 0xf;
2295         rd0 = (insn >> 16) & 0xf;
2296         gen_op_iwmmxt_movq_M0_wRn(rd0);
2297         tmp = tcg_temp_new_i32();
2298         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2299             tcg_temp_free_i32(tmp);
2300             return 1;
2301         }
2302         switch ((insn >> 22) & 3) {
2303         case 1:
2304             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2305             break;
2306         case 2:
2307             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2308             break;
2309         case 3:
2310             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2311             break;
2312         }
2313         tcg_temp_free_i32(tmp);
2314         gen_op_iwmmxt_movq_wRn_M0(wrd);
2315         gen_op_iwmmxt_set_mup();
2316         gen_op_iwmmxt_set_cup();
2317         break;
2318     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2319     case 0x314: case 0x714: case 0xb14: case 0xf14:
2320         if (((insn >> 22) & 3) == 0)
2321             return 1;
2322         wrd = (insn >> 12) & 0xf;
2323         rd0 = (insn >> 16) & 0xf;
2324         gen_op_iwmmxt_movq_M0_wRn(rd0);
2325         tmp = tcg_temp_new_i32();
2326         switch ((insn >> 22) & 3) {
2327         case 1:
2328             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2329                 tcg_temp_free_i32(tmp);
2330                 return 1;
2331             }
2332             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2333             break;
2334         case 2:
2335             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2336                 tcg_temp_free_i32(tmp);
2337                 return 1;
2338             }
2339             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2340             break;
2341         case 3:
2342             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2343                 tcg_temp_free_i32(tmp);
2344                 return 1;
2345             }
2346             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2347             break;
2348         }
2349         tcg_temp_free_i32(tmp);
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         gen_op_iwmmxt_set_cup();
2353         break;
2354     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2355     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2356         wrd = (insn >> 12) & 0xf;
2357         rd0 = (insn >> 16) & 0xf;
2358         rd1 = (insn >> 0) & 0xf;
2359         gen_op_iwmmxt_movq_M0_wRn(rd0);
2360         switch ((insn >> 22) & 3) {
2361         case 0:
2362             if (insn & (1 << 21))
2363                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2364             else
2365                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2366             break;
2367         case 1:
2368             if (insn & (1 << 21))
2369                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2370             else
2371                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2372             break;
2373         case 2:
2374             if (insn & (1 << 21))
2375                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2376             else
2377                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2378             break;
2379         case 3:
2380             return 1;
2381         }
2382         gen_op_iwmmxt_movq_wRn_M0(wrd);
2383         gen_op_iwmmxt_set_mup();
2384         break;
2385     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2386     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2387         wrd = (insn >> 12) & 0xf;
2388         rd0 = (insn >> 16) & 0xf;
2389         rd1 = (insn >> 0) & 0xf;
2390         gen_op_iwmmxt_movq_M0_wRn(rd0);
2391         switch ((insn >> 22) & 3) {
2392         case 0:
2393             if (insn & (1 << 21))
2394                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2395             else
2396                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2397             break;
2398         case 1:
2399             if (insn & (1 << 21))
2400                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2401             else
2402                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2403             break;
2404         case 2:
2405             if (insn & (1 << 21))
2406                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2407             else
2408                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2409             break;
2410         case 3:
2411             return 1;
2412         }
2413         gen_op_iwmmxt_movq_wRn_M0(wrd);
2414         gen_op_iwmmxt_set_mup();
2415         break;
2416     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2417     case 0x402: case 0x502: case 0x602: case 0x702:
2418         wrd = (insn >> 12) & 0xf;
2419         rd0 = (insn >> 16) & 0xf;
2420         rd1 = (insn >> 0) & 0xf;
2421         gen_op_iwmmxt_movq_M0_wRn(rd0);
2422         tmp = tcg_const_i32((insn >> 20) & 3);
2423         iwmmxt_load_reg(cpu_V1, rd1);
2424         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2425         tcg_temp_free_i32(tmp);
2426         gen_op_iwmmxt_movq_wRn_M0(wrd);
2427         gen_op_iwmmxt_set_mup();
2428         break;
2429     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2430     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2431     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2432     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2433         wrd = (insn >> 12) & 0xf;
2434         rd0 = (insn >> 16) & 0xf;
2435         rd1 = (insn >> 0) & 0xf;
2436         gen_op_iwmmxt_movq_M0_wRn(rd0);
2437         switch ((insn >> 20) & 0xf) {
2438         case 0x0:
2439             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2440             break;
2441         case 0x1:
2442             gen_op_iwmmxt_subub_M0_wRn(rd1);
2443             break;
2444         case 0x3:
2445             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2446             break;
2447         case 0x4:
2448             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2449             break;
2450         case 0x5:
2451             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2452             break;
2453         case 0x7:
2454             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2455             break;
2456         case 0x8:
2457             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2458             break;
2459         case 0x9:
2460             gen_op_iwmmxt_subul_M0_wRn(rd1);
2461             break;
2462         case 0xb:
2463             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2464             break;
2465         default:
2466             return 1;
2467         }
2468         gen_op_iwmmxt_movq_wRn_M0(wrd);
2469         gen_op_iwmmxt_set_mup();
2470         gen_op_iwmmxt_set_cup();
2471         break;
2472     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2473     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2474     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2475     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2476         wrd = (insn >> 12) & 0xf;
2477         rd0 = (insn >> 16) & 0xf;
2478         gen_op_iwmmxt_movq_M0_wRn(rd0);
2479         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2480         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2481         tcg_temp_free_i32(tmp);
2482         gen_op_iwmmxt_movq_wRn_M0(wrd);
2483         gen_op_iwmmxt_set_mup();
2484         gen_op_iwmmxt_set_cup();
2485         break;
2486     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2487     case 0x418: case 0x518: case 0x618: case 0x718:
2488     case 0x818: case 0x918: case 0xa18: case 0xb18:
2489     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2490         wrd = (insn >> 12) & 0xf;
2491         rd0 = (insn >> 16) & 0xf;
2492         rd1 = (insn >> 0) & 0xf;
2493         gen_op_iwmmxt_movq_M0_wRn(rd0);
2494         switch ((insn >> 20) & 0xf) {
2495         case 0x0:
2496             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2497             break;
2498         case 0x1:
2499             gen_op_iwmmxt_addub_M0_wRn(rd1);
2500             break;
2501         case 0x3:
2502             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2503             break;
2504         case 0x4:
2505             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2506             break;
2507         case 0x5:
2508             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2509             break;
2510         case 0x7:
2511             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2512             break;
2513         case 0x8:
2514             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2515             break;
2516         case 0x9:
2517             gen_op_iwmmxt_addul_M0_wRn(rd1);
2518             break;
2519         case 0xb:
2520             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2521             break;
2522         default:
2523             return 1;
2524         }
2525         gen_op_iwmmxt_movq_wRn_M0(wrd);
2526         gen_op_iwmmxt_set_mup();
2527         gen_op_iwmmxt_set_cup();
2528         break;
2529     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2530     case 0x408: case 0x508: case 0x608: case 0x708:
2531     case 0x808: case 0x908: case 0xa08: case 0xb08:
2532     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2533         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2534             return 1;
2535         wrd = (insn >> 12) & 0xf;
2536         rd0 = (insn >> 16) & 0xf;
2537         rd1 = (insn >> 0) & 0xf;
2538         gen_op_iwmmxt_movq_M0_wRn(rd0);
2539         switch ((insn >> 22) & 3) {
2540         case 1:
2541             if (insn & (1 << 21))
2542                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2543             else
2544                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2545             break;
2546         case 2:
2547             if (insn & (1 << 21))
2548                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2549             else
2550                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2551             break;
2552         case 3:
2553             if (insn & (1 << 21))
2554                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2555             else
2556                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2557             break;
2558         }
2559         gen_op_iwmmxt_movq_wRn_M0(wrd);
2560         gen_op_iwmmxt_set_mup();
2561         gen_op_iwmmxt_set_cup();
2562         break;
2563     case 0x201: case 0x203: case 0x205: case 0x207:
2564     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2565     case 0x211: case 0x213: case 0x215: case 0x217:
2566     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2567         wrd = (insn >> 5) & 0xf;
2568         rd0 = (insn >> 12) & 0xf;
2569         rd1 = (insn >> 0) & 0xf;
2570         if (rd0 == 0xf || rd1 == 0xf)
2571             return 1;
2572         gen_op_iwmmxt_movq_M0_wRn(wrd);
2573         tmp = load_reg(s, rd0);
2574         tmp2 = load_reg(s, rd1);
2575         switch ((insn >> 16) & 0xf) {
2576         case 0x0:                                       /* TMIA */
2577             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2578             break;
2579         case 0x8:                                       /* TMIAPH */
2580             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2581             break;
2582         case 0xc: case 0xd: case 0xe: case 0xf:         /* TMIAxy */
2583             if (insn & (1 << 16))
2584                 tcg_gen_shri_i32(tmp, tmp, 16);
2585             if (insn & (1 << 17))
2586                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2587             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2588             break;
2589         default:
2590             tcg_temp_free_i32(tmp2);
2591             tcg_temp_free_i32(tmp);
2592             return 1;
2593         }
2594         tcg_temp_free_i32(tmp2);
2595         tcg_temp_free_i32(tmp);
2596         gen_op_iwmmxt_movq_wRn_M0(wrd);
2597         gen_op_iwmmxt_set_mup();
2598         break;
2599     default:
2600         return 1;
2601     }
2602
2603     return 0;
2604 }
2605
2606 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2607    (ie. an undefined instruction).  */
2608 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2609 {
2610     int acc, rd0, rd1, rdhi, rdlo;
2611     TCGv_i32 tmp, tmp2;
2612
2613     if ((insn & 0x0ff00f10) == 0x0e200010) {
2614         /* Multiply with Internal Accumulate Format */
2615         rd0 = (insn >> 12) & 0xf;
2616         rd1 = insn & 0xf;
2617         acc = (insn >> 5) & 7;
2618
2619         if (acc != 0)
2620             return 1;
2621
2622         tmp = load_reg(s, rd0);
2623         tmp2 = load_reg(s, rd1);
2624         switch ((insn >> 16) & 0xf) {
2625         case 0x0:                                       /* MIA */
2626             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2627             break;
2628         case 0x8:                                       /* MIAPH */
2629             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2630             break;
2631         case 0xc:                                       /* MIABB */
2632         case 0xd:                                       /* MIABT */
2633         case 0xe:                                       /* MIATB */
2634         case 0xf:                                       /* MIATT */
2635             if (insn & (1 << 16))
2636                 tcg_gen_shri_i32(tmp, tmp, 16);
2637             if (insn & (1 << 17))
2638                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2639             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2640             break;
2641         default:
2642             return 1;
2643         }
2644         tcg_temp_free_i32(tmp2);
2645         tcg_temp_free_i32(tmp);
2646
2647         gen_op_iwmmxt_movq_wRn_M0(acc);
2648         return 0;
2649     }
2650
2651     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2652         /* Internal Accumulator Access Format */
2653         rdhi = (insn >> 16) & 0xf;
2654         rdlo = (insn >> 12) & 0xf;
2655         acc = insn & 7;
2656
2657         if (acc != 0)
2658             return 1;
2659
2660         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2661             iwmmxt_load_reg(cpu_V0, acc);
2662             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2663             tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2664             tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
2665             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2666         } else {                                        /* MAR */
2667             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2668             iwmmxt_store_reg(cpu_V0, acc);
2669         }
2670         return 0;
2671     }
2672
2673     return 1;
2674 }
2675
2676 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2677 #define VFP_SREG(insn, bigbit, smallbit) \
2678   ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2679 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2680     if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2681         reg = (((insn) >> (bigbit)) & 0x0f) \
2682               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2683     } else { \
2684         if (insn & (1 << (smallbit))) \
2685             return 1; \
2686         reg = ((insn) >> (bigbit)) & 0x0f; \
2687     }} while (0)
2688
2689 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2690 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2691 #define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2692 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2693 #define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2694 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2695
2696 /* Move between integer and VFP cores.  */
2697 static TCGv_i32 gen_vfp_mrs(void)
2698 {
2699     TCGv_i32 tmp = tcg_temp_new_i32();
2700     tcg_gen_mov_i32(tmp, cpu_F0s);
2701     return tmp;
2702 }
2703
2704 static void gen_vfp_msr(TCGv_i32 tmp)
2705 {
2706     tcg_gen_mov_i32(cpu_F0s, tmp);
2707     tcg_temp_free_i32(tmp);
2708 }
2709
2710 static void gen_neon_dup_u8(TCGv_i32 var, int shift)
2711 {
2712     TCGv_i32 tmp = tcg_temp_new_i32();
2713     if (shift)
2714         tcg_gen_shri_i32(var, var, shift);
2715     tcg_gen_ext8u_i32(var, var);
2716     tcg_gen_shli_i32(tmp, var, 8);
2717     tcg_gen_or_i32(var, var, tmp);
2718     tcg_gen_shli_i32(tmp, var, 16);
2719     tcg_gen_or_i32(var, var, tmp);
2720     tcg_temp_free_i32(tmp);
2721 }
2722
2723 static void gen_neon_dup_low16(TCGv_i32 var)
2724 {
2725     TCGv_i32 tmp = tcg_temp_new_i32();
2726     tcg_gen_ext16u_i32(var, var);
2727     tcg_gen_shli_i32(tmp, var, 16);
2728     tcg_gen_or_i32(var, var, tmp);
2729     tcg_temp_free_i32(tmp);
2730 }
2731
2732 static void gen_neon_dup_high16(TCGv_i32 var)
2733 {
2734     TCGv_i32 tmp = tcg_temp_new_i32();
2735     tcg_gen_andi_i32(var, var, 0xffff0000);
2736     tcg_gen_shri_i32(tmp, var, 16);
2737     tcg_gen_or_i32(var, var, tmp);
2738     tcg_temp_free_i32(tmp);
2739 }
2740
2741 static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
2742 {
2743     /* Load a single Neon element and replicate into a 32 bit TCG reg */
2744     TCGv_i32 tmp = tcg_temp_new_i32();
2745     switch (size) {
2746     case 0:
2747         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
2748         gen_neon_dup_u8(tmp, 0);
2749         break;
2750     case 1:
2751         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
2752         gen_neon_dup_low16(tmp);
2753         break;
2754     case 2:
2755         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
2756         break;
2757     default: /* Avoid compiler warnings.  */
2758         abort();
2759     }
2760     return tmp;
2761 }
2762
2763 static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
2764                        uint32_t dp)
2765 {
2766     uint32_t cc = extract32(insn, 20, 2);
2767
2768     if (dp) {
2769         TCGv_i64 frn, frm, dest;
2770         TCGv_i64 tmp, zero, zf, nf, vf;
2771
2772         zero = tcg_const_i64(0);
2773
2774         frn = tcg_temp_new_i64();
2775         frm = tcg_temp_new_i64();
2776         dest = tcg_temp_new_i64();
2777
2778         zf = tcg_temp_new_i64();
2779         nf = tcg_temp_new_i64();
2780         vf = tcg_temp_new_i64();
2781
2782         tcg_gen_extu_i32_i64(zf, cpu_ZF);
2783         tcg_gen_ext_i32_i64(nf, cpu_NF);
2784         tcg_gen_ext_i32_i64(vf, cpu_VF);
2785
2786         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2787         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2788         switch (cc) {
2789         case 0: /* eq: Z */
2790             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
2791                                 frn, frm);
2792             break;
2793         case 1: /* vs: V */
2794             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
2795                                 frn, frm);
2796             break;
2797         case 2: /* ge: N == V -> N ^ V == 0 */
2798             tmp = tcg_temp_new_i64();
2799             tcg_gen_xor_i64(tmp, vf, nf);
2800             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2801                                 frn, frm);
2802             tcg_temp_free_i64(tmp);
2803             break;
2804         case 3: /* gt: !Z && N == V */
2805             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
2806                                 frn, frm);
2807             tmp = tcg_temp_new_i64();
2808             tcg_gen_xor_i64(tmp, vf, nf);
2809             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2810                                 dest, frm);
2811             tcg_temp_free_i64(tmp);
2812             break;
2813         }
2814         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2815         tcg_temp_free_i64(frn);
2816         tcg_temp_free_i64(frm);
2817         tcg_temp_free_i64(dest);
2818
2819         tcg_temp_free_i64(zf);
2820         tcg_temp_free_i64(nf);
2821         tcg_temp_free_i64(vf);
2822
2823         tcg_temp_free_i64(zero);
2824     } else {
2825         TCGv_i32 frn, frm, dest;
2826         TCGv_i32 tmp, zero;
2827
2828         zero = tcg_const_i32(0);
2829
2830         frn = tcg_temp_new_i32();
2831         frm = tcg_temp_new_i32();
2832         dest = tcg_temp_new_i32();
2833         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2834         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2835         switch (cc) {
2836         case 0: /* eq: Z */
2837             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
2838                                 frn, frm);
2839             break;
2840         case 1: /* vs: V */
2841             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
2842                                 frn, frm);
2843             break;
2844         case 2: /* ge: N == V -> N ^ V == 0 */
2845             tmp = tcg_temp_new_i32();
2846             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2847             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2848                                 frn, frm);
2849             tcg_temp_free_i32(tmp);
2850             break;
2851         case 3: /* gt: !Z && N == V */
2852             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
2853                                 frn, frm);
2854             tmp = tcg_temp_new_i32();
2855             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2856             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2857                                 dest, frm);
2858             tcg_temp_free_i32(tmp);
2859             break;
2860         }
2861         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2862         tcg_temp_free_i32(frn);
2863         tcg_temp_free_i32(frm);
2864         tcg_temp_free_i32(dest);
2865
2866         tcg_temp_free_i32(zero);
2867     }
2868
2869     return 0;
2870 }
2871
2872 static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
2873                             uint32_t rm, uint32_t dp)
2874 {
2875     uint32_t vmin = extract32(insn, 6, 1);
2876     TCGv_ptr fpst = get_fpstatus_ptr(0);
2877
2878     if (dp) {
2879         TCGv_i64 frn, frm, dest;
2880
2881         frn = tcg_temp_new_i64();
2882         frm = tcg_temp_new_i64();
2883         dest = tcg_temp_new_i64();
2884
2885         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2886         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2887         if (vmin) {
2888             gen_helper_vfp_minnumd(dest, frn, frm, fpst);
2889         } else {
2890             gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
2891         }
2892         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2893         tcg_temp_free_i64(frn);
2894         tcg_temp_free_i64(frm);
2895         tcg_temp_free_i64(dest);
2896     } else {
2897         TCGv_i32 frn, frm, dest;
2898
2899         frn = tcg_temp_new_i32();
2900         frm = tcg_temp_new_i32();
2901         dest = tcg_temp_new_i32();
2902
2903         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2904         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2905         if (vmin) {
2906             gen_helper_vfp_minnums(dest, frn, frm, fpst);
2907         } else {
2908             gen_helper_vfp_maxnums(dest, frn, frm, fpst);
2909         }
2910         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2911         tcg_temp_free_i32(frn);
2912         tcg_temp_free_i32(frm);
2913         tcg_temp_free_i32(dest);
2914     }
2915
2916     tcg_temp_free_ptr(fpst);
2917     return 0;
2918 }
2919
2920 static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2921                         int rounding)
2922 {
2923     TCGv_ptr fpst = get_fpstatus_ptr(0);
2924     TCGv_i32 tcg_rmode;
2925
2926     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2927     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2928
2929     if (dp) {
2930         TCGv_i64 tcg_op;
2931         TCGv_i64 tcg_res;
2932         tcg_op = tcg_temp_new_i64();
2933         tcg_res = tcg_temp_new_i64();
2934         tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2935         gen_helper_rintd(tcg_res, tcg_op, fpst);
2936         tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2937         tcg_temp_free_i64(tcg_op);
2938         tcg_temp_free_i64(tcg_res);
2939     } else {
2940         TCGv_i32 tcg_op;
2941         TCGv_i32 tcg_res;
2942         tcg_op = tcg_temp_new_i32();
2943         tcg_res = tcg_temp_new_i32();
2944         tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2945         gen_helper_rints(tcg_res, tcg_op, fpst);
2946         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2947         tcg_temp_free_i32(tcg_op);
2948         tcg_temp_free_i32(tcg_res);
2949     }
2950
2951     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2952     tcg_temp_free_i32(tcg_rmode);
2953
2954     tcg_temp_free_ptr(fpst);
2955     return 0;
2956 }
2957
2958 static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2959                        int rounding)
2960 {
2961     bool is_signed = extract32(insn, 7, 1);
2962     TCGv_ptr fpst = get_fpstatus_ptr(0);
2963     TCGv_i32 tcg_rmode, tcg_shift;
2964
2965     tcg_shift = tcg_const_i32(0);
2966
2967     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2968     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2969
2970     if (dp) {
2971         TCGv_i64 tcg_double, tcg_res;
2972         TCGv_i32 tcg_tmp;
2973         /* Rd is encoded as a single precision register even when the source
2974          * is double precision.
2975          */
2976         rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
2977         tcg_double = tcg_temp_new_i64();
2978         tcg_res = tcg_temp_new_i64();
2979         tcg_tmp = tcg_temp_new_i32();
2980         tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
2981         if (is_signed) {
2982             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
2983         } else {
2984             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
2985         }
2986         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
2987         tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
2988         tcg_temp_free_i32(tcg_tmp);
2989         tcg_temp_free_i64(tcg_res);
2990         tcg_temp_free_i64(tcg_double);
2991     } else {
2992         TCGv_i32 tcg_single, tcg_res;
2993         tcg_single = tcg_temp_new_i32();
2994         tcg_res = tcg_temp_new_i32();
2995         tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
2996         if (is_signed) {
2997             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
2998         } else {
2999             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
3000         }
3001         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
3002         tcg_temp_free_i32(tcg_res);
3003         tcg_temp_free_i32(tcg_single);
3004     }
3005
3006     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3007     tcg_temp_free_i32(tcg_rmode);
3008
3009     tcg_temp_free_i32(tcg_shift);
3010
3011     tcg_temp_free_ptr(fpst);
3012
3013     return 0;
3014 }
3015
3016 /* Table for converting the most common AArch32 encoding of
3017  * rounding mode to arm_fprounding order (which matches the
3018  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
3019  */
3020 static const uint8_t fp_decode_rm[] = {
3021     FPROUNDING_TIEAWAY,
3022     FPROUNDING_TIEEVEN,
3023     FPROUNDING_POSINF,
3024     FPROUNDING_NEGINF,
3025 };
3026
3027 static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
3028 {
3029     uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
3030
3031     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3032         return 1;
3033     }
3034
3035     if (dp) {
3036         VFP_DREG_D(rd, insn);
3037         VFP_DREG_N(rn, insn);
3038         VFP_DREG_M(rm, insn);
3039     } else {
3040         rd = VFP_SREG_D(insn);
3041         rn = VFP_SREG_N(insn);
3042         rm = VFP_SREG_M(insn);
3043     }
3044
3045     if ((insn & 0x0f800e50) == 0x0e000a00) {
3046         return handle_vsel(insn, rd, rn, rm, dp);
3047     } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
3048         return handle_vminmaxnm(insn, rd, rn, rm, dp);
3049     } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
3050         /* VRINTA, VRINTN, VRINTP, VRINTM */
3051         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3052         return handle_vrint(insn, rd, rm, dp, rounding);
3053     } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
3054         /* VCVTA, VCVTN, VCVTP, VCVTM */
3055         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3056         return handle_vcvt(insn, rd, rm, dp, rounding);
3057     }
3058     return 1;
3059 }
3060
3061 /* Disassemble a VFP instruction.  Returns nonzero if an error occurred
3062    (ie. an undefined instruction).  */
3063 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
3064 {
3065     uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
3066     int dp, veclen;
3067     TCGv_i32 addr;
3068     TCGv_i32 tmp;
3069     TCGv_i32 tmp2;
3070
3071     if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
3072         return 1;
3073     }
3074
3075     /* FIXME: this access check should not take precedence over UNDEF
3076      * for invalid encodings; we will generate incorrect syndrome information
3077      * for attempts to execute invalid vfp/neon encodings with FP disabled.
3078      */
3079     if (s->fp_excp_el) {
3080         gen_exception_insn(s, 4, EXCP_UDEF,
3081                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
3082         return 0;
3083     }
3084
3085     if (!s->vfp_enabled) {
3086         /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
3087         if ((insn & 0x0fe00fff) != 0x0ee00a10)
3088             return 1;
3089         rn = (insn >> 16) & 0xf;
3090         if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
3091             && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
3092             return 1;
3093         }
3094     }
3095
3096     if (extract32(insn, 28, 4) == 0xf) {
3097         /* Encodings with T=1 (Thumb) or unconditional (ARM):
3098          * only used in v8 and above.
3099          */
3100         return disas_vfp_v8_insn(s, insn);
3101     }
3102
3103     dp = ((insn & 0xf00) == 0xb00);
3104     switch ((insn >> 24) & 0xf) {
3105     case 0xe:
3106         if (insn & (1 << 4)) {
3107             /* single register transfer */
3108             rd = (insn >> 12) & 0xf;
3109             if (dp) {
3110                 int size;
3111                 int pass;
3112
3113                 VFP_DREG_N(rn, insn);
3114                 if (insn & 0xf)
3115                     return 1;
3116                 if (insn & 0x00c00060
3117                     && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
3118                     return 1;
3119                 }
3120
3121                 pass = (insn >> 21) & 1;
3122                 if (insn & (1 << 22)) {
3123                     size = 0;
3124                     offset = ((insn >> 5) & 3) * 8;
3125                 } else if (insn & (1 << 5)) {
3126                     size = 1;
3127                     offset = (insn & (1 << 6)) ? 16 : 0;
3128                 } else {
3129                     size = 2;
3130                     offset = 0;
3131                 }
3132                 if (insn & ARM_CP_RW_BIT) {
3133                     /* vfp->arm */
3134                     tmp = neon_load_reg(rn, pass);
3135                     switch (size) {
3136                     case 0:
3137                         if (offset)
3138                             tcg_gen_shri_i32(tmp, tmp, offset);
3139                         if (insn & (1 << 23))
3140                             gen_uxtb(tmp);
3141                         else
3142                             gen_sxtb(tmp);
3143                         break;
3144                     case 1:
3145                         if (insn & (1 << 23)) {
3146                             if (offset) {
3147                                 tcg_gen_shri_i32(tmp, tmp, 16);
3148                             } else {
3149                                 gen_uxth(tmp);
3150                             }
3151                         } else {
3152                             if (offset) {
3153                                 tcg_gen_sari_i32(tmp, tmp, 16);
3154                             } else {
3155                                 gen_sxth(tmp);
3156                             }
3157                         }
3158                         break;
3159                     case 2:
3160                         break;
3161                     }
3162                     store_reg(s, rd, tmp);
3163                 } else {
3164                     /* arm->vfp */
3165                     tmp = load_reg(s, rd);
3166                     if (insn & (1 << 23)) {
3167                         /* VDUP */
3168                         if (size == 0) {
3169                             gen_neon_dup_u8(tmp, 0);
3170                         } else if (size == 1) {
3171                             gen_neon_dup_low16(tmp);
3172                         }
3173                         for (n = 0; n <= pass * 2; n++) {
3174                             tmp2 = tcg_temp_new_i32();
3175                             tcg_gen_mov_i32(tmp2, tmp);
3176                             neon_store_reg(rn, n, tmp2);
3177                         }
3178                         neon_store_reg(rn, n, tmp);
3179                     } else {
3180                         /* VMOV */
3181                         switch (size) {
3182                         case 0:
3183                             tmp2 = neon_load_reg(rn, pass);
3184                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
3185                             tcg_temp_free_i32(tmp2);
3186                             break;
3187                         case 1:
3188                             tmp2 = neon_load_reg(rn, pass);
3189                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
3190                             tcg_temp_free_i32(tmp2);
3191                             break;
3192                         case 2:
3193                             break;
3194                         }
3195                         neon_store_reg(rn, pass, tmp);
3196                     }
3197                 }
3198             } else { /* !dp */
3199                 if ((insn & 0x6f) != 0x00)
3200                     return 1;
3201                 rn = VFP_SREG_N(insn);
3202                 if (insn & ARM_CP_RW_BIT) {
3203                     /* vfp->arm */
3204                     if (insn & (1 << 21)) {
3205                         /* system register */
3206                         rn >>= 1;
3207
3208                         switch (rn) {
3209                         case ARM_VFP_FPSID:
3210                             /* VFP2 allows access to FSID from userspace.
3211                                VFP3 restricts all id registers to privileged
3212                                accesses.  */
3213                             if (IS_USER(s)
3214                                 && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3215                                 return 1;
3216                             }
3217                             tmp = load_cpu_field(vfp.xregs[rn]);
3218                             break;
3219                         case ARM_VFP_FPEXC:
3220                             if (IS_USER(s))
3221                                 return 1;
3222                             tmp = load_cpu_field(vfp.xregs[rn]);
3223                             break;
3224                         case ARM_VFP_FPINST:
3225                         case ARM_VFP_FPINST2:
3226                             /* Not present in VFP3.  */
3227                             if (IS_USER(s)
3228                                 || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3229                                 return 1;
3230                             }
3231                             tmp = load_cpu_field(vfp.xregs[rn]);
3232                             break;
3233                         case ARM_VFP_FPSCR:
3234                             if (rd == 15) {
3235                                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
3236                                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
3237                             } else {
3238                                 tmp = tcg_temp_new_i32();
3239                                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
3240                             }
3241                             break;
3242                         case ARM_VFP_MVFR2:
3243                             if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3244                                 return 1;
3245                             }
3246                             /* fall through */
3247                         case ARM_VFP_MVFR0:
3248                         case ARM_VFP_MVFR1:
3249                             if (IS_USER(s)
3250                                 || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
3251                                 return 1;
3252                             }
3253                             tmp = load_cpu_field(vfp.xregs[rn]);
3254                             break;
3255                         default:
3256                             return 1;
3257                         }
3258                     } else {
3259                         gen_mov_F0_vreg(0, rn);
3260                         tmp = gen_vfp_mrs();
3261                     }
3262                     if (rd == 15) {
3263                         /* Set the 4 flag bits in the CPSR.  */
3264                         gen_set_nzcv(tmp);
3265                         tcg_temp_free_i32(tmp);
3266                     } else {
3267                         store_reg(s, rd, tmp);
3268                     }
3269                 } else {
3270                     /* arm->vfp */
3271                     if (insn & (1 << 21)) {
3272                         rn >>= 1;
3273                         /* system register */
3274                         switch (rn) {
3275                         case ARM_VFP_FPSID:
3276                         case ARM_VFP_MVFR0:
3277                         case ARM_VFP_MVFR1:
3278                             /* Writes are ignored.  */
3279                             break;
3280                         case ARM_VFP_FPSCR:
3281                             tmp = load_reg(s, rd);
3282                             gen_helper_vfp_set_fpscr(cpu_env, tmp);
3283                             tcg_temp_free_i32(tmp);
3284                             gen_lookup_tb(s);
3285                             break;
3286                         case ARM_VFP_FPEXC:
3287                             if (IS_USER(s))
3288                                 return 1;
3289                             /* TODO: VFP subarchitecture support.
3290                              * For now, keep the EN bit only */
3291                             tmp = load_reg(s, rd);
3292                             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
3293                             store_cpu_field(tmp, vfp.xregs[rn]);
3294                             gen_lookup_tb(s);
3295                             break;
3296                         case ARM_VFP_FPINST:
3297                         case ARM_VFP_FPINST2:
3298                             if (IS_USER(s)) {
3299                                 return 1;
3300                             }
3301                             tmp = load_reg(s, rd);
3302                             store_cpu_field(tmp, vfp.xregs[rn]);
3303                             break;
3304                         default:
3305                             return 1;
3306                         }
3307                     } else {
3308                         tmp = load_reg(s, rd);
3309                         gen_vfp_msr(tmp);
3310                         gen_mov_vreg_F0(0, rn);
3311                     }
3312                 }
3313             }
3314         } else {
3315             /* data processing */
3316             /* The opcode is in bits 23, 21, 20 and 6.  */
3317             op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
3318             if (dp) {
3319                 if (op == 15) {
3320                     /* rn is opcode */
3321                     rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
3322                 } else {
3323                     /* rn is register number */
3324                     VFP_DREG_N(rn, insn);
3325                 }
3326
3327                 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
3328                                  ((rn & 0x1e) == 0x6))) {
3329                     /* Integer or single/half precision destination.  */
3330                     rd = VFP_SREG_D(insn);
3331                 } else {
3332                     VFP_DREG_D(rd, insn);
3333                 }
3334                 if (op == 15 &&
3335                     (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
3336                      ((rn & 0x1e) == 0x4))) {
3337                     /* VCVT from int or half precision is always from S reg
3338                      * regardless of dp bit. VCVT with immediate frac_bits
3339                      * has same format as SREG_M.
3340                      */
3341                     rm = VFP_SREG_M(insn);
3342                 } else {
3343                     VFP_DREG_M(rm, insn);
3344                 }
3345             } else {
3346                 rn = VFP_SREG_N(insn);
3347                 if (op == 15 && rn == 15) {
3348                     /* Double precision destination.  */
3349                     VFP_DREG_D(rd, insn);
3350                 } else {
3351                     rd = VFP_SREG_D(insn);
3352                 }
3353                 /* NB that we implicitly rely on the encoding for the frac_bits
3354                  * in VCVT of fixed to float being the same as that of an SREG_M
3355                  */
3356                 rm = VFP_SREG_M(insn);
3357             }
3358
3359             veclen = s->vec_len;
3360             if (op == 15 && rn > 3)
3361                 veclen = 0;
3362
3363             /* Shut up compiler warnings.  */
3364             delta_m = 0;
3365             delta_d = 0;
3366             bank_mask = 0;
3367
3368             if (veclen > 0) {
3369                 if (dp)
3370                     bank_mask = 0xc;
3371                 else
3372                     bank_mask = 0x18;
3373
3374                 /* Figure out what type of vector operation this is.  */
3375                 if ((rd & bank_mask) == 0) {
3376                     /* scalar */
3377                     veclen = 0;
3378                 } else {
3379                     if (dp)
3380                         delta_d = (s->vec_stride >> 1) + 1;
3381                     else
3382                         delta_d = s->vec_stride + 1;
3383
3384                     if ((rm & bank_mask) == 0) {
3385                         /* mixed scalar/vector */
3386                         delta_m = 0;
3387                     } else {
3388                         /* vector */
3389                         delta_m = delta_d;
3390                     }
3391                 }
3392             }
3393
3394             /* Load the initial operands.  */
3395             if (op == 15) {
3396                 switch (rn) {
3397                 case 16:
3398                 case 17:
3399                     /* Integer source */
3400                     gen_mov_F0_vreg(0, rm);
3401                     break;
3402                 case 8:
3403                 case 9:
3404                     /* Compare */
3405                     gen_mov_F0_vreg(dp, rd);
3406                     gen_mov_F1_vreg(dp, rm);
3407                     break;
3408                 case 10:
3409                 case 11:
3410                     /* Compare with zero */
3411                     gen_mov_F0_vreg(dp, rd);
3412                     gen_vfp_F1_ld0(dp);
3413                     break;
3414                 case 20:
3415                 case 21:
3416                 case 22:
3417                 case 23:
3418                 case 28:
3419                 case 29:
3420                 case 30:
3421                 case 31:
3422                     /* Source and destination the same.  */
3423                     gen_mov_F0_vreg(dp, rd);
3424                     break;
3425                 case 4:
3426                 case 5:
3427                 case 6:
3428                 case 7:
3429                     /* VCVTB, VCVTT: only present with the halfprec extension
3430                      * UNPREDICTABLE if bit 8 is set prior to ARMv8
3431                      * (we choose to UNDEF)
3432                      */
3433                     if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
3434                         !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
3435                         return 1;
3436                     }
3437                     if (!extract32(rn, 1, 1)) {
3438                         /* Half precision source.  */
3439                         gen_mov_F0_vreg(0, rm);
3440                         break;
3441                     }
3442                     /* Otherwise fall through */
3443                 default:
3444                     /* One source operand.  */
3445                     gen_mov_F0_vreg(dp, rm);
3446                     break;
3447                 }
3448             } else {
3449                 /* Two source operands.  */
3450                 gen_mov_F0_vreg(dp, rn);
3451                 gen_mov_F1_vreg(dp, rm);
3452             }
3453
3454             for (;;) {
3455                 /* Perform the calculation.  */
3456                 switch (op) {
3457                 case 0: /* VMLA: fd + (fn * fm) */
3458                     /* Note that order of inputs to the add matters for NaNs */
3459                     gen_vfp_F1_mul(dp);
3460                     gen_mov_F0_vreg(dp, rd);
3461                     gen_vfp_add(dp);
3462                     break;
3463                 case 1: /* VMLS: fd + -(fn * fm) */
3464                     gen_vfp_mul(dp);
3465                     gen_vfp_F1_neg(dp);
3466                     gen_mov_F0_vreg(dp, rd);
3467                     gen_vfp_add(dp);
3468                     break;
3469                 case 2: /* VNMLS: -fd + (fn * fm) */
3470                     /* Note that it isn't valid to replace (-A + B) with (B - A)
3471                      * or similar plausible looking simplifications
3472                      * because this will give wrong results for NaNs.
3473                      */
3474                     gen_vfp_F1_mul(dp);
3475                     gen_mov_F0_vreg(dp, rd);
3476                     gen_vfp_neg(dp);
3477                     gen_vfp_add(dp);
3478                     break;
3479                 case 3: /* VNMLA: -fd + -(fn * fm) */
3480                     gen_vfp_mul(dp);
3481                     gen_vfp_F1_neg(dp);
3482                     gen_mov_F0_vreg(dp, rd);
3483                     gen_vfp_neg(dp);
3484                     gen_vfp_add(dp);
3485                     break;
3486                 case 4: /* mul: fn * fm */
3487                     gen_vfp_mul(dp);
3488                     break;
3489                 case 5: /* nmul: -(fn * fm) */
3490                     gen_vfp_mul(dp);
3491                     gen_vfp_neg(dp);
3492                     break;
3493                 case 6: /* add: fn + fm */
3494                     gen_vfp_add(dp);
3495                     break;
3496                 case 7: /* sub: fn - fm */
3497                     gen_vfp_sub(dp);
3498                     break;
3499                 case 8: /* div: fn / fm */
3500                     gen_vfp_div(dp);
3501                     break;
3502                 case 10: /* VFNMA : fd = muladd(-fd,  fn, fm) */
3503                 case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
3504                 case 12: /* VFMA  : fd = muladd( fd,  fn, fm) */
3505                 case 13: /* VFMS  : fd = muladd( fd, -fn, fm) */
3506                     /* These are fused multiply-add, and must be done as one
3507                      * floating point operation with no rounding between the
3508                      * multiplication and addition steps.
3509                      * NB that doing the negations here as separate steps is
3510                      * correct : an input NaN should come out with its sign bit
3511                      * flipped if it is a negated-input.
3512                      */
3513                     if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
3514                         return 1;
3515                     }
3516                     if (dp) {
3517                         TCGv_ptr fpst;
3518                         TCGv_i64 frd;
3519                         if (op & 1) {
3520                             /* VFNMS, VFMS */
3521                             gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
3522                         }
3523                         frd = tcg_temp_new_i64();
3524                         tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
3525                         if (op & 2) {
3526                             /* VFNMA, VFNMS */
3527                             gen_helper_vfp_negd(frd, frd);
3528                         }
3529                         fpst = get_fpstatus_ptr(0);
3530                         gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
3531                                                cpu_F1d, frd, fpst);
3532                         tcg_temp_free_ptr(fpst);
3533                         tcg_temp_free_i64(frd);
3534                     } else {
3535                         TCGv_ptr fpst;
3536                         TCGv_i32 frd;
3537                         if (op & 1) {
3538                             /* VFNMS, VFMS */
3539                             gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
3540                         }
3541                         frd = tcg_temp_new_i32();
3542                         tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
3543                         if (op & 2) {
3544                             gen_helper_vfp_negs(frd, frd);
3545                         }
3546                         fpst = get_fpstatus_ptr(0);
3547                         gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3548                                                cpu_F1s, frd, fpst);
3549                         tcg_temp_free_ptr(fpst);
3550                         tcg_temp_free_i32(frd);
3551                     }
3552                     break;
3553                 case 14: /* fconst */
3554                     if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3555                         return 1;
3556                     }
3557
3558                     n = (insn << 12) & 0x80000000;
3559                     i = ((insn >> 12) & 0x70) | (insn & 0xf);
3560                     if (dp) {
3561                         if (i & 0x40)
3562                             i |= 0x3f80;
3563                         else
3564                             i |= 0x4000;
3565                         n |= i << 16;
3566                         tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3567                     } else {
3568                         if (i & 0x40)
3569                             i |= 0x780;
3570                         else
3571                             i |= 0x800;
3572                         n |= i << 19;
3573                         tcg_gen_movi_i32(cpu_F0s, n);
3574                     }
3575                     break;
3576                 case 15: /* extension space */
3577                     switch (rn) {
3578                     case 0: /* cpy */
3579                         /* no-op */
3580                         break;
3581                     case 1: /* abs */
3582                         gen_vfp_abs(dp);
3583                         break;
3584                     case 2: /* neg */
3585                         gen_vfp_neg(dp);
3586                         break;
3587                     case 3: /* sqrt */
3588                         gen_vfp_sqrt(dp);
3589                         break;
3590                     case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
3591                         tmp = gen_vfp_mrs();
3592                         tcg_gen_ext16u_i32(tmp, tmp);
3593                         if (dp) {
3594                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3595                                                            cpu_env);
3596                         } else {
3597                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3598                                                            cpu_env);
3599                         }
3600                         tcg_temp_free_i32(tmp);
3601                         break;
3602                     case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
3603                         tmp = gen_vfp_mrs();
3604                         tcg_gen_shri_i32(tmp, tmp, 16);
3605                         if (dp) {
3606                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3607                                                            cpu_env);
3608                         } else {
3609                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3610                                                            cpu_env);
3611                         }
3612                         tcg_temp_free_i32(tmp);
3613                         break;
3614                     case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
3615                         tmp = tcg_temp_new_i32();
3616                         if (dp) {
3617                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3618                                                            cpu_env);
3619                         } else {
3620                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3621                                                            cpu_env);
3622                         }
3623                         gen_mov_F0_vreg(0, rd);
3624                         tmp2 = gen_vfp_mrs();
3625                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3626                         tcg_gen_or_i32(tmp, tmp, tmp2);
3627                         tcg_temp_free_i32(tmp2);
3628                         gen_vfp_msr(tmp);
3629                         break;
3630                     case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
3631                         tmp = tcg_temp_new_i32();
3632                         if (dp) {
3633                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3634                                                            cpu_env);
3635                         } else {
3636                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3637                                                            cpu_env);
3638                         }
3639                         tcg_gen_shli_i32(tmp, tmp, 16);
3640                         gen_mov_F0_vreg(0, rd);
3641                         tmp2 = gen_vfp_mrs();
3642                         tcg_gen_ext16u_i32(tmp2, tmp2);
3643                         tcg_gen_or_i32(tmp, tmp, tmp2);
3644                         tcg_temp_free_i32(tmp2);
3645                         gen_vfp_msr(tmp);
3646                         break;
3647                     case 8: /* cmp */
3648                         gen_vfp_cmp(dp);
3649                         break;
3650                     case 9: /* cmpe */
3651                         gen_vfp_cmpe(dp);
3652                         break;
3653                     case 10: /* cmpz */
3654                         gen_vfp_cmp(dp);
3655                         break;
3656                     case 11: /* cmpez */
3657                         gen_vfp_F1_ld0(dp);
3658                         gen_vfp_cmpe(dp);
3659                         break;
3660                     case 12: /* vrintr */
3661                     {
3662                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3663                         if (dp) {
3664                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3665                         } else {
3666                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3667                         }
3668                         tcg_temp_free_ptr(fpst);
3669                         break;
3670                     }
3671                     case 13: /* vrintz */
3672                     {
3673                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3674                         TCGv_i32 tcg_rmode;
3675                         tcg_rmode = tcg_const_i32(float_round_to_zero);
3676                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3677                         if (dp) {
3678                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3679                         } else {
3680                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3681                         }
3682                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3683                         tcg_temp_free_i32(tcg_rmode);
3684                         tcg_temp_free_ptr(fpst);
3685                         break;
3686                     }
3687                     case 14: /* vrintx */
3688                     {
3689                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3690                         if (dp) {
3691                             gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
3692                         } else {
3693                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
3694                         }
3695                         tcg_temp_free_ptr(fpst);
3696                         break;
3697                     }
3698                     case 15: /* single<->double conversion */
3699                         if (dp)
3700                             gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3701                         else
3702                             gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3703                         break;
3704                     case 16: /* fuito */
3705                         gen_vfp_uito(dp, 0);
3706                         break;
3707                     case 17: /* fsito */
3708                         gen_vfp_sito(dp, 0);
3709                         break;
3710                     case 20: /* fshto */
3711                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3712                             return 1;
3713                         }
3714                         gen_vfp_shto(dp, 16 - rm, 0);
3715                         break;
3716                     case 21: /* fslto */
3717                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3718                             return 1;
3719                         }
3720                         gen_vfp_slto(dp, 32 - rm, 0);
3721                         break;
3722                     case 22: /* fuhto */
3723                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3724                             return 1;
3725                         }
3726                         gen_vfp_uhto(dp, 16 - rm, 0);
3727                         break;
3728                     case 23: /* fulto */
3729                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3730                             return 1;
3731                         }
3732                         gen_vfp_ulto(dp, 32 - rm, 0);
3733                         break;
3734                     case 24: /* ftoui */
3735                         gen_vfp_toui(dp, 0);
3736                         break;
3737                     case 25: /* ftouiz */
3738                         gen_vfp_touiz(dp, 0);
3739                         break;
3740                     case 26: /* ftosi */
3741                         gen_vfp_tosi(dp, 0);
3742                         break;
3743                     case 27: /* ftosiz */
3744                         gen_vfp_tosiz(dp, 0);
3745                         break;
3746                     case 28: /* ftosh */
3747                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3748                             return 1;
3749                         }
3750                         gen_vfp_tosh(dp, 16 - rm, 0);
3751                         break;
3752                     case 29: /* ftosl */
3753                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3754                             return 1;
3755                         }
3756                         gen_vfp_tosl(dp, 32 - rm, 0);
3757                         break;
3758                     case 30: /* ftouh */
3759                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3760                             return 1;
3761                         }
3762                         gen_vfp_touh(dp, 16 - rm, 0);
3763                         break;
3764                     case 31: /* ftoul */
3765                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3766                             return 1;
3767                         }
3768                         gen_vfp_toul(dp, 32 - rm, 0);
3769                         break;
3770                     default: /* undefined */
3771                         return 1;
3772                     }
3773                     break;
3774                 default: /* undefined */
3775                     return 1;
3776                 }
3777
3778                 /* Write back the result.  */
3779                 if (op == 15 && (rn >= 8 && rn <= 11)) {
3780                     /* Comparison, do nothing.  */
3781                 } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
3782                                               (rn & 0x1e) == 0x6)) {
3783                     /* VCVT double to int: always integer result.
3784                      * VCVT double to half precision is always a single
3785                      * precision result.
3786                      */
3787                     gen_mov_vreg_F0(0, rd);
3788                 } else if (op == 15 && rn == 15) {
3789                     /* conversion */
3790                     gen_mov_vreg_F0(!dp, rd);
3791                 } else {
3792                     gen_mov_vreg_F0(dp, rd);
3793                 }
3794
3795                 /* break out of the loop if we have finished  */
3796                 if (veclen == 0)
3797                     break;
3798
3799                 if (op == 15 && delta_m == 0) {
3800                     /* single source one-many */
3801                     while (veclen--) {
3802                         rd = ((rd + delta_d) & (bank_mask - 1))
3803                              | (rd & bank_mask);
3804                         gen_mov_vreg_F0(dp, rd);
3805                     }
3806                     break;
3807                 }
3808                 /* Setup the next operands.  */
3809                 veclen--;
3810                 rd = ((rd + delta_d) & (bank_mask - 1))
3811                      | (rd & bank_mask);
3812
3813                 if (op == 15) {
3814                     /* One source operand.  */
3815                     rm = ((rm + delta_m) & (bank_mask - 1))
3816                          | (rm & bank_mask);
3817                     gen_mov_F0_vreg(dp, rm);
3818                 } else {
3819                     /* Two source operands.  */
3820                     rn = ((rn + delta_d) & (bank_mask - 1))
3821                          | (rn & bank_mask);
3822                     gen_mov_F0_vreg(dp, rn);
3823                     if (delta_m) {
3824                         rm = ((rm + delta_m) & (bank_mask - 1))
3825                              | (rm & bank_mask);
3826                         gen_mov_F1_vreg(dp, rm);
3827                     }
3828                 }
3829             }
3830         }
3831         break;
3832     case 0xc:
3833     case 0xd:
3834         if ((insn & 0x03e00000) == 0x00400000) {
3835             /* two-register transfer */
3836             rn = (insn >> 16) & 0xf;
3837             rd = (insn >> 12) & 0xf;
3838             if (dp) {
3839                 VFP_DREG_M(rm, insn);
3840             } else {
3841                 rm = VFP_SREG_M(insn);
3842             }
3843
3844             if (insn & ARM_CP_RW_BIT) {
3845                 /* vfp->arm */
3846                 if (dp) {
3847                     gen_mov_F0_vreg(0, rm * 2);
3848                     tmp = gen_vfp_mrs();
3849                     store_reg(s, rd, tmp);
3850                     gen_mov_F0_vreg(0, rm * 2 + 1);
3851                     tmp = gen_vfp_mrs();
3852                     store_reg(s, rn, tmp);
3853                 } else {
3854                     gen_mov_F0_vreg(0, rm);
3855                     tmp = gen_vfp_mrs();
3856                     store_reg(s, rd, tmp);
3857                     gen_mov_F0_vreg(0, rm + 1);
3858                     tmp = gen_vfp_mrs();
3859                     store_reg(s, rn, tmp);
3860                 }
3861             } else {
3862                 /* arm->vfp */
3863                 if (dp) {
3864                     tmp = load_reg(s, rd);
3865                     gen_vfp_msr(tmp);
3866                     gen_mov_vreg_F0(0, rm * 2);
3867                     tmp = load_reg(s, rn);
3868                     gen_vfp_msr(tmp);
3869                     gen_mov_vreg_F0(0, rm * 2 + 1);
3870                 } else {
3871                     tmp = load_reg(s, rd);
3872                     gen_vfp_msr(tmp);
3873                     gen_mov_vreg_F0(0, rm);
3874                     tmp = load_reg(s, rn);
3875                     gen_vfp_msr(tmp);
3876                     gen_mov_vreg_F0(0, rm + 1);
3877                 }
3878             }
3879         } else {
3880             /* Load/store */
3881             rn = (insn >> 16) & 0xf;
3882             if (dp)
3883                 VFP_DREG_D(rd, insn);
3884             else
3885                 rd = VFP_SREG_D(insn);
3886             if ((insn & 0x01200000) == 0x01000000) {
3887                 /* Single load/store */
3888                 offset = (insn & 0xff) << 2;
3889                 if ((insn & (1 << 23)) == 0)
3890                     offset = -offset;
3891                 if (s->thumb && rn == 15) {
3892                     /* This is actually UNPREDICTABLE */
3893                     addr = tcg_temp_new_i32();
3894                     tcg_gen_movi_i32(addr, s->pc & ~2);
3895                 } else {
3896                     addr = load_reg(s, rn);
3897                 }
3898                 tcg_gen_addi_i32(addr, addr, offset);
3899                 if (insn & (1 << 20)) {
3900                     gen_vfp_ld(s, dp, addr);
3901                     gen_mov_vreg_F0(dp, rd);
3902                 } else {
3903                     gen_mov_F0_vreg(dp, rd);
3904                     gen_vfp_st(s, dp, addr);
3905                 }
3906                 tcg_temp_free_i32(addr);
3907             } else {
3908                 /* load/store multiple */
3909                 int w = insn & (1 << 21);
3910                 if (dp)
3911                     n = (insn >> 1) & 0x7f;
3912                 else
3913                     n = insn & 0xff;
3914
3915                 if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
3916                     /* P == U , W == 1  => UNDEF */
3917                     return 1;
3918                 }
3919                 if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
3920                     /* UNPREDICTABLE cases for bad immediates: we choose to
3921                      * UNDEF to avoid generating huge numbers of TCG ops
3922                      */
3923                     return 1;
3924                 }
3925                 if (rn == 15 && w) {
3926                     /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
3927                     return 1;
3928                 }
3929
3930                 if (s->thumb && rn == 15) {
3931                     /* This is actually UNPREDICTABLE */
3932                     addr = tcg_temp_new_i32();
3933                     tcg_gen_movi_i32(addr, s->pc & ~2);
3934                 } else {
3935                     addr = load_reg(s, rn);
3936                 }
3937                 if (insn & (1 << 24)) /* pre-decrement */
3938                     tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
3939
3940                 if (dp)
3941                     offset = 8;
3942                 else
3943                     offset = 4;
3944                 for (i = 0; i < n; i++) {
3945                     if (insn & ARM_CP_RW_BIT) {
3946                         /* load */
3947                         gen_vfp_ld(s, dp, addr);
3948                         gen_mov_vreg_F0(dp, rd + i);
3949                     } else {
3950                         /* store */
3951                         gen_mov_F0_vreg(dp, rd + i);
3952                         gen_vfp_st(s, dp, addr);
3953                     }
3954                     tcg_gen_addi_i32(addr, addr, offset);
3955                 }
3956                 if (w) {
3957                     /* writeback */
3958                     if (insn & (1 << 24))
3959                         offset = -offset * n;
3960                     else if (dp && (insn & 1))
3961                         offset = 4;
3962                     else
3963                         offset = 0;
3964
3965                     if (offset != 0)
3966                         tcg_gen_addi_i32(addr, addr, offset);
3967                     store_reg(s, rn, addr);
3968                 } else {
3969                     tcg_temp_free_i32(addr);
3970                 }
3971             }
3972         }
3973         break;
3974     default:
3975         /* Should never happen.  */
3976         return 1;
3977     }
3978     return 0;
3979 }
3980
3981 static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
3982 {
3983     TranslationBlock *tb;
3984
3985     tb = s->tb;
3986     if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3987         tcg_gen_goto_tb(n);
3988         gen_set_pc_im(s, dest);
3989         tcg_gen_exit_tb((uintptr_t)tb + n);
3990     } else {
3991         gen_set_pc_im(s, dest);
3992         tcg_gen_exit_tb(0);
3993     }
3994 }
3995
3996 static inline void gen_jmp (DisasContext *s, uint32_t dest)
3997 {
3998     if (unlikely(s->singlestep_enabled || s->ss_active)) {
3999         /* An indirect jump so that we still trigger the debug exception.  */
4000         if (s->thumb)
4001             dest |= 1;
4002         gen_bx_im(s, dest);
4003     } else {
4004         gen_goto_tb(s, 0, dest);
4005         s->is_jmp = DISAS_TB_JUMP;
4006     }
4007 }
4008
4009 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
4010 {
4011     if (x)
4012         tcg_gen_sari_i32(t0, t0, 16);
4013     else
4014         gen_sxth(t0);
4015     if (y)
4016         tcg_gen_sari_i32(t1, t1, 16);
4017     else
4018         gen_sxth(t1);
4019     tcg_gen_mul_i32(t0, t0, t1);
4020 }
4021
4022 /* Return the mask of PSR bits set by a MSR instruction.  */
4023 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
4024 {
4025     uint32_t mask;
4026
4027     mask = 0;
4028     if (flags & (1 << 0))
4029         mask |= 0xff;
4030     if (flags & (1 << 1))
4031         mask |= 0xff00;
4032     if (flags & (1 << 2))
4033         mask |= 0xff0000;
4034     if (flags & (1 << 3))
4035         mask |= 0xff000000;
4036
4037     /* Mask out undefined bits.  */
4038     mask &= ~CPSR_RESERVED;
4039     if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
4040         mask &= ~CPSR_T;
4041     }
4042     if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
4043         mask &= ~CPSR_Q; /* V5TE in reality*/
4044     }
4045     if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
4046         mask &= ~(CPSR_E | CPSR_GE);
4047     }
4048     if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
4049         mask &= ~CPSR_IT;
4050     }
4051     /* Mask out execution state and reserved bits.  */
4052     if (!spsr) {
4053         mask &= ~(CPSR_EXEC | CPSR_RESERVED);
4054     }
4055     /* Mask out privileged bits.  */
4056     if (IS_USER(s))
4057         mask &= CPSR_USER;
4058     return mask;
4059 }
4060
4061 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
4062 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
4063 {
4064     TCGv_i32 tmp;
4065     if (spsr) {
4066         /* ??? This is also undefined in system mode.  */
4067         if (IS_USER(s))
4068             return 1;
4069
4070         tmp = load_cpu_field(spsr);
4071         tcg_gen_andi_i32(tmp, tmp, ~mask);
4072         tcg_gen_andi_i32(t0, t0, mask);
4073         tcg_gen_or_i32(tmp, tmp, t0);
4074         store_cpu_field(tmp, spsr);
4075     } else {
4076         gen_set_cpsr(t0, mask);
4077     }
4078     tcg_temp_free_i32(t0);
4079     gen_lookup_tb(s);
4080     return 0;
4081 }
4082
4083 /* Returns nonzero if access to the PSR is not permitted.  */
4084 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
4085 {
4086     TCGv_i32 tmp;
4087     tmp = tcg_temp_new_i32();
4088     tcg_gen_movi_i32(tmp, val);
4089     return gen_set_psr(s, mask, spsr, tmp);
4090 }
4091
4092 /* Generate an old-style exception return. Marks pc as dead. */
4093 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
4094 {
4095     TCGv_i32 tmp;
4096     store_reg(s, 15, pc);
4097     tmp = load_cpu_field(spsr);
4098     gen_set_cpsr(tmp, CPSR_ERET_MASK);
4099     tcg_temp_free_i32(tmp);
4100     s->is_jmp = DISAS_UPDATE;
4101 }
4102
4103 /* Generate a v6 exception return.  Marks both values as dead.  */
4104 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
4105 {
4106     gen_set_cpsr(cpsr, CPSR_ERET_MASK);
4107     tcg_temp_free_i32(cpsr);
4108     store_reg(s, 15, pc);
4109     s->is_jmp = DISAS_UPDATE;
4110 }
4111
4112 static void gen_nop_hint(DisasContext *s, int val)
4113 {
4114     switch (val) {
4115     case 1: /* yield */
4116         gen_set_pc_im(s, s->pc);
4117         s->is_jmp = DISAS_YIELD;
4118         break;
4119     case 3: /* wfi */
4120         gen_set_pc_im(s, s->pc);
4121         s->is_jmp = DISAS_WFI;
4122         break;
4123     case 2: /* wfe */
4124         gen_set_pc_im(s, s->pc);
4125         s->is_jmp = DISAS_WFE;
4126         break;
4127     case 4: /* sev */
4128     case 5: /* sevl */
4129         /* TODO: Implement SEV, SEVL and WFE.  May help SMP performance.  */
4130     default: /* nop */
4131         break;
4132     }
4133 }
4134
4135 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
4136
4137 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
4138 {
4139     switch (size) {
4140     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
4141     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
4142     case 2: tcg_gen_add_i32(t0, t0, t1); break;
4143     default: abort();
4144     }
4145 }
4146
4147 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
4148 {
4149     switch (size) {
4150     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
4151     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
4152     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
4153     default: return;
4154     }
4155 }
4156
4157 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
4158 #define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
4159 #define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
4160 #define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
4161 #define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
4162
4163 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
4164     switch ((size << 1) | u) { \
4165     case 0: \
4166         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
4167         break; \
4168     case 1: \
4169         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
4170         break; \
4171     case 2: \
4172         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
4173         break; \
4174     case 3: \
4175         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
4176         break; \
4177     case 4: \
4178         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
4179         break; \
4180     case 5: \
4181         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
4182         break; \
4183     default: return 1; \
4184     }} while (0)
4185
4186 #define GEN_NEON_INTEGER_OP(name) do { \
4187     switch ((size << 1) | u) { \
4188     case 0: \
4189         gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
4190         break; \
4191     case 1: \
4192         gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
4193         break; \
4194     case 2: \
4195         gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
4196         break; \
4197     case 3: \
4198         gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
4199         break; \
4200     case 4: \
4201         gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
4202         break; \
4203     case 5: \
4204         gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
4205         break; \
4206     default: return 1; \
4207     }} while (0)
4208
4209 static TCGv_i32 neon_load_scratch(int scratch)
4210 {
4211     TCGv_i32 tmp = tcg_temp_new_i32();
4212     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4213     return tmp;
4214 }
4215
4216 static void neon_store_scratch(int scratch, TCGv_i32 var)
4217 {
4218     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4219     tcg_temp_free_i32(var);
4220 }
4221
4222 static inline TCGv_i32 neon_get_scalar(int size, int reg)
4223 {
4224     TCGv_i32 tmp;
4225     if (size == 1) {
4226         tmp = neon_load_reg(reg & 7, reg >> 4);
4227         if (reg & 8) {
4228             gen_neon_dup_high16(tmp);
4229         } else {
4230             gen_neon_dup_low16(tmp);
4231         }
4232     } else {
4233         tmp = neon_load_reg(reg & 15, reg >> 4);
4234     }
4235     return tmp;
4236 }
4237
4238 static int gen_neon_unzip(int rd, int rm, int size, int q)
4239 {
4240     TCGv_i32 tmp, tmp2;
4241     if (!q && size == 2) {
4242         return 1;
4243     }
4244     tmp = tcg_const_i32(rd);
4245     tmp2 = tcg_const_i32(rm);
4246     if (q) {
4247         switch (size) {
4248         case 0:
4249             gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
4250             break;
4251         case 1:
4252             gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
4253             break;
4254         case 2:
4255             gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
4256             break;
4257         default:
4258             abort();
4259         }
4260     } else {
4261         switch (size) {
4262         case 0:
4263             gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
4264             break;
4265         case 1:
4266             gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
4267             break;
4268         default:
4269             abort();
4270         }
4271     }
4272     tcg_temp_free_i32(tmp);
4273     tcg_temp_free_i32(tmp2);
4274     return 0;
4275 }
4276
4277 static int gen_neon_zip(int rd, int rm, int size, int q)
4278 {
4279     TCGv_i32 tmp, tmp2;
4280     if (!q && size == 2) {
4281         return 1;
4282     }
4283     tmp = tcg_const_i32(rd);
4284     tmp2 = tcg_const_i32(rm);
4285     if (q) {
4286         switch (size) {
4287         case 0:
4288             gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
4289             break;
4290         case 1:
4291             gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
4292             break;
4293         case 2:
4294             gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
4295             break;
4296         default:
4297             abort();
4298         }
4299     } else {
4300         switch (size) {
4301         case 0:
4302             gen_helper_neon_zip8(cpu_env, tmp, tmp2);
4303             break;
4304         case 1:
4305             gen_helper_neon_zip16(cpu_env, tmp, tmp2);
4306             break;
4307         default:
4308             abort();
4309         }
4310     }
4311     tcg_temp_free_i32(tmp);
4312     tcg_temp_free_i32(tmp2);
4313     return 0;
4314 }
4315
4316 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4317 {
4318     TCGv_i32 rd, tmp;
4319
4320     rd = tcg_temp_new_i32();
4321     tmp = tcg_temp_new_i32();
4322
4323     tcg_gen_shli_i32(rd, t0, 8);
4324     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4325     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4326     tcg_gen_or_i32(rd, rd, tmp);
4327
4328     tcg_gen_shri_i32(t1, t1, 8);
4329     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4330     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4331     tcg_gen_or_i32(t1, t1, tmp);
4332     tcg_gen_mov_i32(t0, rd);
4333
4334     tcg_temp_free_i32(tmp);
4335     tcg_temp_free_i32(rd);
4336 }
4337
4338 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4339 {
4340     TCGv_i32 rd, tmp;
4341
4342     rd = tcg_temp_new_i32();
4343     tmp = tcg_temp_new_i32();
4344
4345     tcg_gen_shli_i32(rd, t0, 16);
4346     tcg_gen_andi_i32(tmp, t1, 0xffff);
4347     tcg_gen_or_i32(rd, rd, tmp);
4348     tcg_gen_shri_i32(t1, t1, 16);
4349     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4350     tcg_gen_or_i32(t1, t1, tmp);
4351     tcg_gen_mov_i32(t0, rd);
4352
4353     tcg_temp_free_i32(tmp);
4354     tcg_temp_free_i32(rd);
4355 }
4356
4357
4358 static struct {
4359     int nregs;
4360     int interleave;
4361     int spacing;
4362 } neon_ls_element_type[11] = {
4363     {4, 4, 1},
4364     {4, 4, 2},
4365     {4, 1, 1},
4366     {4, 2, 1},
4367     {3, 3, 1},
4368     {3, 3, 2},
4369     {3, 1, 1},
4370     {1, 1, 1},
4371     {2, 2, 1},
4372     {2, 2, 2},
4373     {2, 1, 1}
4374 };
4375
4376 /* Translate a NEON load/store element instruction.  Return nonzero if the
4377    instruction is invalid.  */
4378 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
4379 {
4380     int rd, rn, rm;
4381     int op;
4382     int nregs;
4383     int interleave;
4384     int spacing;
4385     int stride;
4386     int size;
4387     int reg;
4388     int pass;
4389     int load;
4390     int shift;
4391     int n;
4392     TCGv_i32 addr;
4393     TCGv_i32 tmp;
4394     TCGv_i32 tmp2;
4395     TCGv_i64 tmp64;
4396
4397     /* FIXME: this access check should not take precedence over UNDEF
4398      * for invalid encodings; we will generate incorrect syndrome information
4399      * for attempts to execute invalid vfp/neon encodings with FP disabled.
4400      */
4401     if (s->fp_excp_el) {
4402         gen_exception_insn(s, 4, EXCP_UDEF,
4403                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
4404         return 0;
4405     }
4406
4407     if (!s->vfp_enabled)
4408       return 1;
4409     VFP_DREG_D(rd, insn);
4410     rn = (insn >> 16) & 0xf;
4411     rm = insn & 0xf;
4412     load = (insn & (1 << 21)) != 0;
4413     if ((insn & (1 << 23)) == 0) {
4414         /* Load store all elements.  */
4415         op = (insn >> 8) & 0xf;
4416         size = (insn >> 6) & 3;
4417         if (op > 10)
4418             return 1;
4419         /* Catch UNDEF cases for bad values of align field */
4420         switch (op & 0xc) {
4421         case 4:
4422             if (((insn >> 5) & 1) == 1) {
4423                 return 1;
4424             }
4425             break;
4426         case 8:
4427             if (((insn >> 4) & 3) == 3) {
4428                 return 1;
4429             }
4430             break;
4431         default:
4432             break;
4433         }
4434         nregs = neon_ls_element_type[op].nregs;
4435         interleave = neon_ls_element_type[op].interleave;
4436         spacing = neon_ls_element_type[op].spacing;
4437         if (size == 3 && (interleave | spacing) != 1)
4438             return 1;
4439         addr = tcg_temp_new_i32();
4440         load_reg_var(s, addr, rn);
4441         stride = (1 << size) * interleave;
4442         for (reg = 0; reg < nregs; reg++) {
4443             if (interleave > 2 || (interleave == 2 && nregs == 2)) {
4444                 load_reg_var(s, addr, rn);
4445                 tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
4446             } else if (interleave == 2 && nregs == 4 && reg == 2) {
4447                 load_reg_var(s, addr, rn);
4448                 tcg_gen_addi_i32(addr, addr, 1 << size);
4449             }
4450             if (size == 3) {
4451                 tmp64 = tcg_temp_new_i64();
4452                 if (load) {
4453                     gen_aa32_ld64(tmp64, addr, get_mem_index(s));
4454                     neon_store_reg64(tmp64, rd);
4455                 } else {
4456                     neon_load_reg64(tmp64, rd);
4457                     gen_aa32_st64(tmp64, addr, get_mem_index(s));
4458                 }
4459                 tcg_temp_free_i64(tmp64);
4460                 tcg_gen_addi_i32(addr, addr, stride);
4461             } else {
4462                 for (pass = 0; pass < 2; pass++) {
4463                     if (size == 2) {
4464                         if (load) {
4465                             tmp = tcg_temp_new_i32();
4466                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4467                             neon_store_reg(rd, pass, tmp);
4468                         } else {
4469                             tmp = neon_load_reg(rd, pass);
4470                             gen_aa32_st32(tmp, addr, get_mem_index(s));
4471                             tcg_temp_free_i32(tmp);
4472                         }
4473                         tcg_gen_addi_i32(addr, addr, stride);
4474                     } else if (size == 1) {
4475                         if (load) {
4476                             tmp = tcg_temp_new_i32();
4477                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4478                             tcg_gen_addi_i32(addr, addr, stride);
4479                             tmp2 = tcg_temp_new_i32();
4480                             gen_aa32_ld16u(tmp2, addr, get_mem_index(s));
4481                             tcg_gen_addi_i32(addr, addr, stride);
4482                             tcg_gen_shli_i32(tmp2, tmp2, 16);
4483                             tcg_gen_or_i32(tmp, tmp, tmp2);
4484                             tcg_temp_free_i32(tmp2);
4485                             neon_store_reg(rd, pass, tmp);
4486                         } else {
4487                             tmp = neon_load_reg(rd, pass);
4488                             tmp2 = tcg_temp_new_i32();
4489                             tcg_gen_shri_i32(tmp2, tmp, 16);
4490                             gen_aa32_st16(tmp, addr, get_mem_index(s));
4491                             tcg_temp_free_i32(tmp);
4492                             tcg_gen_addi_i32(addr, addr, stride);
4493                             gen_aa32_st16(tmp2, addr, get_mem_index(s));
4494                             tcg_temp_free_i32(tmp2);
4495                             tcg_gen_addi_i32(addr, addr, stride);
4496                         }
4497                     } else /* size == 0 */ {
4498                         if (load) {
4499                             TCGV_UNUSED_I32(tmp2);
4500                             for (n = 0; n < 4; n++) {
4501                                 tmp = tcg_temp_new_i32();
4502                                 gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4503                                 tcg_gen_addi_i32(addr, addr, stride);
4504                                 if (n == 0) {
4505                                     tmp2 = tmp;
4506                                 } else {
4507                                     tcg_gen_shli_i32(tmp, tmp, n * 8);
4508                                     tcg_gen_or_i32(tmp2, tmp2, tmp);
4509                                     tcg_temp_free_i32(tmp);
4510                                 }
4511                             }
4512                             neon_store_reg(rd, pass, tmp2);
4513                         } else {
4514                             tmp2 = neon_load_reg(rd, pass);
4515                             for (n = 0; n < 4; n++) {
4516                                 tmp = tcg_temp_new_i32();
4517                                 if (n == 0) {
4518                                     tcg_gen_mov_i32(tmp, tmp2);
4519                                 } else {
4520                                     tcg_gen_shri_i32(tmp, tmp2, n * 8);
4521                                 }
4522                                 gen_aa32_st8(tmp, addr, get_mem_index(s));
4523                                 tcg_temp_free_i32(tmp);
4524                                 tcg_gen_addi_i32(addr, addr, stride);
4525                             }
4526                             tcg_temp_free_i32(tmp2);
4527                         }
4528                     }
4529                 }
4530             }
4531             rd += spacing;
4532         }
4533         tcg_temp_free_i32(addr);
4534         stride = nregs * 8;
4535     } else {
4536         size = (insn >> 10) & 3;
4537         if (size == 3) {
4538             /* Load single element to all lanes.  */
4539             int a = (insn >> 4) & 1;
4540             if (!load) {
4541                 return 1;
4542             }
4543             size = (insn >> 6) & 3;
4544             nregs = ((insn >> 8) & 3) + 1;
4545
4546             if (size == 3) {
4547                 if (nregs != 4 || a == 0) {
4548                     return 1;
4549                 }
4550                 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
4551                 size = 2;
4552             }
4553             if (nregs == 1 && a == 1 && size == 0) {
4554                 return 1;
4555             }
4556             if (nregs == 3 && a == 1) {
4557                 return 1;
4558             }
4559             addr = tcg_temp_new_i32();
4560             load_reg_var(s, addr, rn);
4561             if (nregs == 1) {
4562                 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
4563                 tmp = gen_load_and_replicate(s, addr, size);
4564                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4565                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4566                 if (insn & (1 << 5)) {
4567                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
4568                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
4569                 }
4570                 tcg_temp_free_i32(tmp);
4571             } else {
4572                 /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
4573                 stride = (insn & (1 << 5)) ? 2 : 1;
4574                 for (reg = 0; reg < nregs; reg++) {
4575                     tmp = gen_load_and_replicate(s, addr, size);
4576                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4577                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4578                     tcg_temp_free_i32(tmp);
4579                     tcg_gen_addi_i32(addr, addr, 1 << size);
4580                     rd += stride;
4581                 }
4582             }
4583             tcg_temp_free_i32(addr);
4584             stride = (1 << size) * nregs;
4585         } else {
4586             /* Single element.  */
4587             int idx = (insn >> 4) & 0xf;
4588             pass = (insn >> 7) & 1;
4589             switch (size) {
4590             case 0:
4591                 shift = ((insn >> 5) & 3) * 8;
4592                 stride = 1;
4593                 break;
4594             case 1:
4595                 shift = ((insn >> 6) & 1) * 16;
4596                 stride = (insn & (1 << 5)) ? 2 : 1;
4597                 break;
4598             case 2:
4599                 shift = 0;
4600                 stride = (insn & (1 << 6)) ? 2 : 1;
4601                 break;
4602             default:
4603                 abort();
4604             }
4605             nregs = ((insn >> 8) & 3) + 1;
4606             /* Catch the UNDEF cases. This is unavoidably a bit messy. */
4607             switch (nregs) {
4608             case 1:
4609                 if (((idx & (1 << size)) != 0) ||
4610                     (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
4611                     return 1;
4612                 }
4613                 break;
4614             case 3:
4615                 if ((idx & 1) != 0) {
4616                     return 1;
4617                 }
4618                 /* fall through */
4619             case 2:
4620                 if (size == 2 && (idx & 2) != 0) {
4621                     return 1;
4622                 }
4623                 break;
4624             case 4:
4625                 if ((size == 2) && ((idx & 3) == 3)) {
4626                     return 1;
4627                 }
4628                 break;
4629             default:
4630                 abort();
4631             }
4632             if ((rd + stride * (nregs - 1)) > 31) {
4633                 /* Attempts to write off the end of the register file
4634                  * are UNPREDICTABLE; we choose to UNDEF because otherwise
4635                  * the neon_load_reg() would write off the end of the array.
4636                  */
4637                 return 1;
4638             }
4639             addr = tcg_temp_new_i32();
4640             load_reg_var(s, addr, rn);
4641             for (reg = 0; reg < nregs; reg++) {
4642                 if (load) {
4643                     tmp = tcg_temp_new_i32();
4644                     switch (size) {
4645                     case 0:
4646                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4647                         break;
4648                     case 1:
4649                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4650                         break;
4651                     case 2:
4652                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4653                         break;
4654                     default: /* Avoid compiler warnings.  */
4655                         abort();
4656                     }
4657                     if (size != 2) {
4658                         tmp2 = neon_load_reg(rd, pass);
4659                         tcg_gen_deposit_i32(tmp, tmp2, tmp,
4660                                             shift, size ? 16 : 8);
4661                         tcg_temp_free_i32(tmp2);
4662                     }
4663                     neon_store_reg(rd, pass, tmp);
4664                 } else { /* Store */
4665                     tmp = neon_load_reg(rd, pass);
4666                     if (shift)
4667                         tcg_gen_shri_i32(tmp, tmp, shift);
4668                     switch (size) {
4669                     case 0:
4670                         gen_aa32_st8(tmp, addr, get_mem_index(s));
4671                         break;
4672                     case 1:
4673                         gen_aa32_st16(tmp, addr, get_mem_index(s));
4674                         break;
4675                     case 2:
4676                         gen_aa32_st32(tmp, addr, get_mem_index(s));
4677                         break;
4678                     }
4679                     tcg_temp_free_i32(tmp);
4680                 }
4681                 rd += stride;
4682                 tcg_gen_addi_i32(addr, addr, 1 << size);
4683             }
4684             tcg_temp_free_i32(addr);
4685             stride = nregs * (1 << size);
4686         }
4687     }
4688     if (rm != 15) {
4689         TCGv_i32 base;
4690
4691         base = load_reg(s, rn);
4692         if (rm == 13) {
4693             tcg_gen_addi_i32(base, base, stride);
4694         } else {
4695             TCGv_i32 index;
4696             index = load_reg(s, rm);
4697             tcg_gen_add_i32(base, base, index);
4698             tcg_temp_free_i32(index);
4699         }
4700         store_reg(s, rn, base);
4701     }
4702     return 0;
4703 }
4704
4705 /* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
4706 static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
4707 {
4708     tcg_gen_and_i32(t, t, c);
4709     tcg_gen_andc_i32(f, f, c);
4710     tcg_gen_or_i32(dest, t, f);
4711 }
4712
4713 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
4714 {
4715     switch (size) {
4716     case 0: gen_helper_neon_narrow_u8(dest, src); break;
4717     case 1: gen_helper_neon_narrow_u16(dest, src); break;
4718     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
4719     default: abort();
4720     }
4721 }
4722
4723 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4724 {
4725     switch (size) {
4726     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
4727     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
4728     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
4729     default: abort();
4730     }
4731 }
4732
4733 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
4734 {
4735     switch (size) {
4736     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
4737     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
4738     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
4739     default: abort();
4740     }
4741 }
4742
4743 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4744 {
4745     switch (size) {
4746     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
4747     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
4748     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
4749     default: abort();
4750     }
4751 }
4752
4753 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
4754                                          int q, int u)
4755 {
4756     if (q) {
4757         if (u) {
4758             switch (size) {
4759             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4760             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4761             default: abort();
4762             }
4763         } else {
4764             switch (size) {
4765             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
4766             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
4767             default: abort();
4768             }
4769         }
4770     } else {
4771         if (u) {
4772             switch (size) {
4773             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
4774             case 2: gen_helper_neon_shl_u32(var, var, shift); break;
4775             default: abort();
4776             }
4777         } else {
4778             switch (size) {
4779             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4780             case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4781             default: abort();
4782             }
4783         }
4784     }
4785 }
4786
4787 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
4788 {
4789     if (u) {
4790         switch (size) {
4791         case 0: gen_helper_neon_widen_u8(dest, src); break;
4792         case 1: gen_helper_neon_widen_u16(dest, src); break;
4793         case 2: tcg_gen_extu_i32_i64(dest, src); break;
4794         default: abort();
4795         }
4796     } else {
4797         switch (size) {
4798         case 0: gen_helper_neon_widen_s8(dest, src); break;
4799         case 1: gen_helper_neon_widen_s16(dest, src); break;
4800         case 2: tcg_gen_ext_i32_i64(dest, src); break;
4801         default: abort();
4802         }
4803     }
4804     tcg_temp_free_i32(src);
4805 }
4806
4807 static inline void gen_neon_addl(int size)
4808 {
4809     switch (size) {
4810     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4811     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4812     case 2: tcg_gen_add_i64(CPU_V001); break;
4813     default: abort();
4814     }
4815 }
4816
4817 static inline void gen_neon_subl(int size)
4818 {
4819     switch (size) {
4820     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4821     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4822     case 2: tcg_gen_sub_i64(CPU_V001); break;
4823     default: abort();
4824     }
4825 }
4826
4827 static inline void gen_neon_negl(TCGv_i64 var, int size)
4828 {
4829     switch (size) {
4830     case 0: gen_helper_neon_negl_u16(var, var); break;
4831     case 1: gen_helper_neon_negl_u32(var, var); break;
4832     case 2:
4833         tcg_gen_neg_i64(var, var);
4834         break;
4835     default: abort();
4836     }
4837 }
4838
4839 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4840 {
4841     switch (size) {
4842     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4843     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4844     default: abort();
4845     }
4846 }
4847
4848 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
4849                                  int size, int u)
4850 {
4851     TCGv_i64 tmp;
4852
4853     switch ((size << 1) | u) {
4854     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4855     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4856     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4857     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4858     case 4:
4859         tmp = gen_muls_i64_i32(a, b);
4860         tcg_gen_mov_i64(dest, tmp);
4861         tcg_temp_free_i64(tmp);
4862         break;
4863     case 5:
4864         tmp = gen_mulu_i64_i32(a, b);
4865         tcg_gen_mov_i64(dest, tmp);
4866         tcg_temp_free_i64(tmp);
4867         break;
4868     default: abort();
4869     }
4870
4871     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4872        Don't forget to clean them now.  */
4873     if (size < 2) {
4874         tcg_temp_free_i32(a);
4875         tcg_temp_free_i32(b);
4876     }
4877 }
4878
4879 static void gen_neon_narrow_op(int op, int u, int size,
4880                                TCGv_i32 dest, TCGv_i64 src)
4881 {
4882     if (op) {
4883         if (u) {
4884             gen_neon_unarrow_sats(size, dest, src);
4885         } else {
4886             gen_neon_narrow(size, dest, src);
4887         }
4888     } else {
4889         if (u) {
4890             gen_neon_narrow_satu(size, dest, src);
4891         } else {
4892             gen_neon_narrow_sats(size, dest, src);
4893         }
4894     }
4895 }
4896
4897 /* Symbolic constants for op fields for Neon 3-register same-length.
4898  * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4899  * table A7-9.
4900  */
4901 #define NEON_3R_VHADD 0
4902 #define NEON_3R_VQADD 1
4903 #define NEON_3R_VRHADD 2
4904 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4905 #define NEON_3R_VHSUB 4
4906 #define NEON_3R_VQSUB 5
4907 #define NEON_3R_VCGT 6
4908 #define NEON_3R_VCGE 7
4909 #define NEON_3R_VSHL 8
4910 #define NEON_3R_VQSHL 9
4911 #define NEON_3R_VRSHL 10
4912 #define NEON_3R_VQRSHL 11
4913 #define NEON_3R_VMAX 12
4914 #define NEON_3R_VMIN 13
4915 #define NEON_3R_VABD 14
4916 #define NEON_3R_VABA 15
4917 #define NEON_3R_VADD_VSUB 16
4918 #define NEON_3R_VTST_VCEQ 17
4919 #define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
4920 #define NEON_3R_VMUL 19
4921 #define NEON_3R_VPMAX 20
4922 #define NEON_3R_VPMIN 21
4923 #define NEON_3R_VQDMULH_VQRDMULH 22
4924 #define NEON_3R_VPADD 23
4925 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
4926 #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
4927 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4928 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4929 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4930 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4931 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4932 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
4933
4934 static const uint8_t neon_3r_sizes[] = {
4935     [NEON_3R_VHADD] = 0x7,
4936     [NEON_3R_VQADD] = 0xf,
4937     [NEON_3R_VRHADD] = 0x7,
4938     [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4939     [NEON_3R_VHSUB] = 0x7,
4940     [NEON_3R_VQSUB] = 0xf,
4941     [NEON_3R_VCGT] = 0x7,
4942     [NEON_3R_VCGE] = 0x7,
4943     [NEON_3R_VSHL] = 0xf,
4944     [NEON_3R_VQSHL] = 0xf,
4945     [NEON_3R_VRSHL] = 0xf,
4946     [NEON_3R_VQRSHL] = 0xf,
4947     [NEON_3R_VMAX] = 0x7,
4948     [NEON_3R_VMIN] = 0x7,
4949     [NEON_3R_VABD] = 0x7,
4950     [NEON_3R_VABA] = 0x7,
4951     [NEON_3R_VADD_VSUB] = 0xf,
4952     [NEON_3R_VTST_VCEQ] = 0x7,
4953     [NEON_3R_VML] = 0x7,
4954     [NEON_3R_VMUL] = 0x7,
4955     [NEON_3R_VPMAX] = 0x7,
4956     [NEON_3R_VPMIN] = 0x7,
4957     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4958     [NEON_3R_VPADD] = 0x7,
4959     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
4960     [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
4961     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4962     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4963     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4964     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4965     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4966     [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
4967 };
4968
4969 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
4970  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4971  * table A7-13.
4972  */
4973 #define NEON_2RM_VREV64 0
4974 #define NEON_2RM_VREV32 1
4975 #define NEON_2RM_VREV16 2
4976 #define NEON_2RM_VPADDL 4
4977 #define NEON_2RM_VPADDL_U 5
4978 #define NEON_2RM_AESE 6 /* Includes AESD */
4979 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
4980 #define NEON_2RM_VCLS 8
4981 #define NEON_2RM_VCLZ 9
4982 #define NEON_2RM_VCNT 10
4983 #define NEON_2RM_VMVN 11
4984 #define NEON_2RM_VPADAL 12
4985 #define NEON_2RM_VPADAL_U 13
4986 #define NEON_2RM_VQABS 14
4987 #define NEON_2RM_VQNEG 15
4988 #define NEON_2RM_VCGT0 16
4989 #define NEON_2RM_VCGE0 17
4990 #define NEON_2RM_VCEQ0 18
4991 #define NEON_2RM_VCLE0 19
4992 #define NEON_2RM_VCLT0 20
4993 #define NEON_2RM_SHA1H 21
4994 #define NEON_2RM_VABS 22
4995 #define NEON_2RM_VNEG 23
4996 #define NEON_2RM_VCGT0_F 24
4997 #define NEON_2RM_VCGE0_F 25
4998 #define NEON_2RM_VCEQ0_F 26
4999 #define NEON_2RM_VCLE0_F 27
5000 #define NEON_2RM_VCLT0_F 28
5001 #define NEON_2RM_VABS_F 30
5002 #define NEON_2RM_VNEG_F 31
5003 #define NEON_2RM_VSWP 32
5004 #define NEON_2RM_VTRN 33
5005 #define NEON_2RM_VUZP 34
5006 #define NEON_2RM_VZIP 35
5007 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
5008 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
5009 #define NEON_2RM_VSHLL 38
5010 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
5011 #define NEON_2RM_VRINTN 40
5012 #define NEON_2RM_VRINTX 41
5013 #define NEON_2RM_VRINTA 42
5014 #define NEON_2RM_VRINTZ 43
5015 #define NEON_2RM_VCVT_F16_F32 44
5016 #define NEON_2RM_VRINTM 45
5017 #define NEON_2RM_VCVT_F32_F16 46
5018 #define NEON_2RM_VRINTP 47
5019 #define NEON_2RM_VCVTAU 48
5020 #define NEON_2RM_VCVTAS 49
5021 #define NEON_2RM_VCVTNU 50
5022 #define NEON_2RM_VCVTNS 51
5023 #define NEON_2RM_VCVTPU 52
5024 #define NEON_2RM_VCVTPS 53
5025 #define NEON_2RM_VCVTMU 54
5026 #define NEON_2RM_VCVTMS 55
5027 #define NEON_2RM_VRECPE 56
5028 #define NEON_2RM_VRSQRTE 57
5029 #define NEON_2RM_VRECPE_F 58
5030 #define NEON_2RM_VRSQRTE_F 59
5031 #define NEON_2RM_VCVT_FS 60
5032 #define NEON_2RM_VCVT_FU 61
5033 #define NEON_2RM_VCVT_SF 62
5034 #define NEON_2RM_VCVT_UF 63
5035
5036 static int neon_2rm_is_float_op(int op)
5037 {
5038     /* Return true if this neon 2reg-misc op is float-to-float */
5039     return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
5040             (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
5041             op == NEON_2RM_VRINTM ||
5042             (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
5043             op >= NEON_2RM_VRECPE_F);
5044 }
5045
5046 /* Each entry in this array has bit n set if the insn allows
5047  * size value n (otherwise it will UNDEF). Since unallocated
5048  * op values will have no bits set they always UNDEF.
5049  */
5050 static const uint8_t neon_2rm_sizes[] = {
5051     [NEON_2RM_VREV64] = 0x7,
5052     [NEON_2RM_VREV32] = 0x3,
5053     [NEON_2RM_VREV16] = 0x1,
5054     [NEON_2RM_VPADDL] = 0x7,
5055     [NEON_2RM_VPADDL_U] = 0x7,
5056     [NEON_2RM_AESE] = 0x1,
5057     [NEON_2RM_AESMC] = 0x1,
5058     [NEON_2RM_VCLS] = 0x7,
5059     [NEON_2RM_VCLZ] = 0x7,
5060     [NEON_2RM_VCNT] = 0x1,
5061     [NEON_2RM_VMVN] = 0x1,
5062     [NEON_2RM_VPADAL] = 0x7,
5063     [NEON_2RM_VPADAL_U] = 0x7,
5064     [NEON_2RM_VQABS] = 0x7,
5065     [NEON_2RM_VQNEG] = 0x7,
5066     [NEON_2RM_VCGT0] = 0x7,
5067     [NEON_2RM_VCGE0] = 0x7,
5068     [NEON_2RM_VCEQ0] = 0x7,
5069     [NEON_2RM_VCLE0] = 0x7,
5070     [NEON_2RM_VCLT0] = 0x7,
5071     [NEON_2RM_SHA1H] = 0x4,
5072     [NEON_2RM_VABS] = 0x7,
5073     [NEON_2RM_VNEG] = 0x7,
5074     [NEON_2RM_VCGT0_F] = 0x4,
5075     [NEON_2RM_VCGE0_F] = 0x4,
5076     [NEON_2RM_VCEQ0_F] = 0x4,
5077     [NEON_2RM_VCLE0_F] = 0x4,
5078     [NEON_2RM_VCLT0_F] = 0x4,
5079     [NEON_2RM_VABS_F] = 0x4,
5080     [NEON_2RM_VNEG_F] = 0x4,
5081     [NEON_2RM_VSWP] = 0x1,
5082     [NEON_2RM_VTRN] = 0x7,
5083     [NEON_2RM_VUZP] = 0x7,
5084     [NEON_2RM_VZIP] = 0x7,
5085     [NEON_2RM_VMOVN] = 0x7,
5086     [NEON_2RM_VQMOVN] = 0x7,
5087     [NEON_2RM_VSHLL] = 0x7,
5088     [NEON_2RM_SHA1SU1] = 0x4,
5089     [NEON_2RM_VRINTN] = 0x4,
5090     [NEON_2RM_VRINTX] = 0x4,
5091     [NEON_2RM_VRINTA] = 0x4,
5092     [NEON_2RM_VRINTZ] = 0x4,
5093     [NEON_2RM_VCVT_F16_F32] = 0x2,
5094     [NEON_2RM_VRINTM] = 0x4,
5095     [NEON_2RM_VCVT_F32_F16] = 0x2,
5096     [NEON_2RM_VRINTP] = 0x4,
5097     [NEON_2RM_VCVTAU] = 0x4,
5098     [NEON_2RM_VCVTAS] = 0x4,
5099     [NEON_2RM_VCVTNU] = 0x4,
5100     [NEON_2RM_VCVTNS] = 0x4,
5101     [NEON_2RM_VCVTPU] = 0x4,
5102     [NEON_2RM_VCVTPS] = 0x4,
5103     [NEON_2RM_VCVTMU] = 0x4,
5104     [NEON_2RM_VCVTMS] = 0x4,
5105     [NEON_2RM_VRECPE] = 0x4,
5106     [NEON_2RM_VRSQRTE] = 0x4,
5107     [NEON_2RM_VRECPE_F] = 0x4,
5108     [NEON_2RM_VRSQRTE_F] = 0x4,
5109     [NEON_2RM_VCVT_FS] = 0x4,
5110     [NEON_2RM_VCVT_FU] = 0x4,
5111     [NEON_2RM_VCVT_SF] = 0x4,
5112     [NEON_2RM_VCVT_UF] = 0x4,
5113 };
5114
5115 /* Translate a NEON data processing instruction.  Return nonzero if the
5116    instruction is invalid.
5117    We process data in a mixture of 32-bit and 64-bit chunks.
5118    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5119
5120 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5121 {
5122     int op;
5123     int q;
5124     int rd, rn, rm;
5125     int size;
5126     int shift;
5127     int pass;
5128     int count;
5129     int pairwise;
5130     int u;
5131     uint32_t imm, mask;
5132     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5133     TCGv_i64 tmp64;
5134
5135     /* FIXME: this access check should not take precedence over UNDEF
5136      * for invalid encodings; we will generate incorrect syndrome information
5137      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5138      */
5139     if (s->fp_excp_el) {
5140         gen_exception_insn(s, 4, EXCP_UDEF,
5141                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
5142         return 0;
5143     }
5144
5145     if (!s->vfp_enabled)
5146       return 1;
5147     q = (insn & (1 << 6)) != 0;
5148     u = (insn >> 24) & 1;
5149     VFP_DREG_D(rd, insn);
5150     VFP_DREG_N(rn, insn);
5151     VFP_DREG_M(rm, insn);
5152     size = (insn >> 20) & 3;
5153     if ((insn & (1 << 23)) == 0) {
5154         /* Three register same length.  */
5155         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5156         /* Catch invalid op and bad size combinations: UNDEF */
5157         if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5158             return 1;
5159         }
5160         /* All insns of this form UNDEF for either this condition or the
5161          * superset of cases "Q==1"; we catch the latter later.
5162          */
5163         if (q && ((rd | rn | rm) & 1)) {
5164             return 1;
5165         }
5166         /*
5167          * The SHA-1/SHA-256 3-register instructions require special treatment
5168          * here, as their size field is overloaded as an op type selector, and
5169          * they all consume their input in a single pass.
5170          */
5171         if (op == NEON_3R_SHA) {
5172             if (!q) {
5173                 return 1;
5174             }
5175             if (!u) { /* SHA-1 */
5176                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
5177                     return 1;
5178                 }
5179                 tmp = tcg_const_i32(rd);
5180                 tmp2 = tcg_const_i32(rn);
5181                 tmp3 = tcg_const_i32(rm);
5182                 tmp4 = tcg_const_i32(size);
5183                 gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
5184                 tcg_temp_free_i32(tmp4);
5185             } else { /* SHA-256 */
5186                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
5187                     return 1;
5188                 }
5189                 tmp = tcg_const_i32(rd);
5190                 tmp2 = tcg_const_i32(rn);
5191                 tmp3 = tcg_const_i32(rm);
5192                 switch (size) {
5193                 case 0:
5194                     gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
5195                     break;
5196                 case 1:
5197                     gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
5198                     break;
5199                 case 2:
5200                     gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
5201                     break;
5202                 }
5203             }
5204             tcg_temp_free_i32(tmp);
5205             tcg_temp_free_i32(tmp2);
5206             tcg_temp_free_i32(tmp3);
5207             return 0;
5208         }
5209         if (size == 3 && op != NEON_3R_LOGIC) {
5210             /* 64-bit element instructions. */
5211             for (pass = 0; pass < (q ? 2 : 1); pass++) {
5212                 neon_load_reg64(cpu_V0, rn + pass);
5213                 neon_load_reg64(cpu_V1, rm + pass);
5214                 switch (op) {
5215                 case NEON_3R_VQADD:
5216                     if (u) {
5217                         gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
5218                                                  cpu_V0, cpu_V1);
5219                     } else {
5220                         gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
5221                                                  cpu_V0, cpu_V1);
5222                     }
5223                     break;
5224                 case NEON_3R_VQSUB:
5225                     if (u) {
5226                         gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
5227                                                  cpu_V0, cpu_V1);
5228                     } else {
5229                         gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
5230                                                  cpu_V0, cpu_V1);
5231                     }
5232                     break;
5233                 case NEON_3R_VSHL:
5234                     if (u) {
5235                         gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
5236                     } else {
5237                         gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
5238                     }
5239                     break;
5240                 case NEON_3R_VQSHL:
5241                     if (u) {
5242                         gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5243                                                  cpu_V1, cpu_V0);
5244                     } else {
5245                         gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5246                                                  cpu_V1, cpu_V0);
5247                     }
5248                     break;
5249                 case NEON_3R_VRSHL:
5250                     if (u) {
5251                         gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5252                     } else {
5253                         gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5254                     }
5255                     break;
5256                 case NEON_3R_VQRSHL:
5257                     if (u) {
5258                         gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5259                                                   cpu_V1, cpu_V0);
5260                     } else {
5261                         gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5262                                                   cpu_V1, cpu_V0);
5263                     }
5264                     break;
5265                 case NEON_3R_VADD_VSUB:
5266                     if (u) {
5267                         tcg_gen_sub_i64(CPU_V001);
5268                     } else {
5269                         tcg_gen_add_i64(CPU_V001);
5270                     }
5271                     break;
5272                 default:
5273                     abort();
5274                 }
5275                 neon_store_reg64(cpu_V0, rd + pass);
5276             }
5277             return 0;
5278         }
5279         pairwise = 0;
5280         switch (op) {
5281         case NEON_3R_VSHL:
5282         case NEON_3R_VQSHL:
5283         case NEON_3R_VRSHL:
5284         case NEON_3R_VQRSHL:
5285             {
5286                 int rtmp;
5287                 /* Shift instruction operands are reversed.  */
5288                 rtmp = rn;
5289                 rn = rm;
5290                 rm = rtmp;
5291             }
5292             break;
5293         case NEON_3R_VPADD:
5294             if (u) {
5295                 return 1;
5296             }
5297             /* Fall through */
5298         case NEON_3R_VPMAX:
5299         case NEON_3R_VPMIN:
5300             pairwise = 1;
5301             break;
5302         case NEON_3R_FLOAT_ARITH:
5303             pairwise = (u && size < 2); /* if VPADD (float) */
5304             break;
5305         case NEON_3R_FLOAT_MINMAX:
5306             pairwise = u; /* if VPMIN/VPMAX (float) */
5307             break;
5308         case NEON_3R_FLOAT_CMP:
5309             if (!u && size) {
5310                 /* no encoding for U=0 C=1x */
5311                 return 1;
5312             }
5313             break;
5314         case NEON_3R_FLOAT_ACMP:
5315             if (!u) {
5316                 return 1;
5317             }
5318             break;
5319         case NEON_3R_FLOAT_MISC:
5320             /* VMAXNM/VMINNM in ARMv8 */
5321             if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5322                 return 1;
5323             }
5324             break;
5325         case NEON_3R_VMUL:
5326             if (u && (size != 0)) {
5327                 /* UNDEF on invalid size for polynomial subcase */
5328                 return 1;
5329             }
5330             break;
5331         case NEON_3R_VFM:
5332             if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
5333                 return 1;
5334             }
5335             break;
5336         default:
5337             break;
5338         }
5339
5340         if (pairwise && q) {
5341             /* All the pairwise insns UNDEF if Q is set */
5342             return 1;
5343         }
5344
5345         for (pass = 0; pass < (q ? 4 : 2); pass++) {
5346
5347         if (pairwise) {
5348             /* Pairwise.  */
5349             if (pass < 1) {
5350                 tmp = neon_load_reg(rn, 0);
5351                 tmp2 = neon_load_reg(rn, 1);
5352             } else {
5353                 tmp = neon_load_reg(rm, 0);
5354                 tmp2 = neon_load_reg(rm, 1);
5355             }
5356         } else {
5357             /* Elementwise.  */
5358             tmp = neon_load_reg(rn, pass);
5359             tmp2 = neon_load_reg(rm, pass);
5360         }
5361         switch (op) {
5362         case NEON_3R_VHADD:
5363             GEN_NEON_INTEGER_OP(hadd);
5364             break;
5365         case NEON_3R_VQADD:
5366             GEN_NEON_INTEGER_OP_ENV(qadd);
5367             break;
5368         case NEON_3R_VRHADD:
5369             GEN_NEON_INTEGER_OP(rhadd);
5370             break;
5371         case NEON_3R_LOGIC: /* Logic ops.  */
5372             switch ((u << 2) | size) {
5373             case 0: /* VAND */
5374                 tcg_gen_and_i32(tmp, tmp, tmp2);
5375                 break;
5376             case 1: /* BIC */
5377                 tcg_gen_andc_i32(tmp, tmp, tmp2);
5378                 break;
5379             case 2: /* VORR */
5380                 tcg_gen_or_i32(tmp, tmp, tmp2);
5381                 break;
5382             case 3: /* VORN */
5383                 tcg_gen_orc_i32(tmp, tmp, tmp2);
5384                 break;
5385             case 4: /* VEOR */
5386                 tcg_gen_xor_i32(tmp, tmp, tmp2);
5387                 break;
5388             case 5: /* VBSL */
5389                 tmp3 = neon_load_reg(rd, pass);
5390                 gen_neon_bsl(tmp, tmp, tmp2, tmp3);
5391                 tcg_temp_free_i32(tmp3);
5392                 break;
5393             case 6: /* VBIT */
5394                 tmp3 = neon_load_reg(rd, pass);
5395                 gen_neon_bsl(tmp, tmp, tmp3, tmp2);
5396                 tcg_temp_free_i32(tmp3);
5397                 break;
5398             case 7: /* VBIF */
5399                 tmp3 = neon_load_reg(rd, pass);
5400                 gen_neon_bsl(tmp, tmp3, tmp, tmp2);
5401                 tcg_temp_free_i32(tmp3);
5402                 break;
5403             }
5404             break;
5405         case NEON_3R_VHSUB:
5406             GEN_NEON_INTEGER_OP(hsub);
5407             break;
5408         case NEON_3R_VQSUB:
5409             GEN_NEON_INTEGER_OP_ENV(qsub);
5410             break;
5411         case NEON_3R_VCGT:
5412             GEN_NEON_INTEGER_OP(cgt);
5413             break;
5414         case NEON_3R_VCGE:
5415             GEN_NEON_INTEGER_OP(cge);
5416             break;
5417         case NEON_3R_VSHL:
5418             GEN_NEON_INTEGER_OP(shl);
5419             break;
5420         case NEON_3R_VQSHL:
5421             GEN_NEON_INTEGER_OP_ENV(qshl);
5422             break;
5423         case NEON_3R_VRSHL:
5424             GEN_NEON_INTEGER_OP(rshl);
5425             break;
5426         case NEON_3R_VQRSHL:
5427             GEN_NEON_INTEGER_OP_ENV(qrshl);
5428             break;
5429         case NEON_3R_VMAX:
5430             GEN_NEON_INTEGER_OP(max);
5431             break;
5432         case NEON_3R_VMIN:
5433             GEN_NEON_INTEGER_OP(min);
5434             break;
5435         case NEON_3R_VABD:
5436             GEN_NEON_INTEGER_OP(abd);
5437             break;
5438         case NEON_3R_VABA:
5439             GEN_NEON_INTEGER_OP(abd);
5440             tcg_temp_free_i32(tmp2);
5441             tmp2 = neon_load_reg(rd, pass);
5442             gen_neon_add(size, tmp, tmp2);
5443             break;
5444         case NEON_3R_VADD_VSUB:
5445             if (!u) { /* VADD */
5446                 gen_neon_add(size, tmp, tmp2);
5447             } else { /* VSUB */
5448                 switch (size) {
5449                 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
5450                 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
5451                 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
5452                 default: abort();
5453                 }
5454             }
5455             break;
5456         case NEON_3R_VTST_VCEQ:
5457             if (!u) { /* VTST */
5458                 switch (size) {
5459                 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
5460                 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
5461                 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
5462                 default: abort();
5463                 }
5464             } else { /* VCEQ */
5465                 switch (size) {
5466                 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
5467                 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
5468                 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
5469                 default: abort();
5470                 }
5471             }
5472             break;
5473         case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
5474             switch (size) {
5475             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5476             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5477             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5478             default: abort();
5479             }
5480             tcg_temp_free_i32(tmp2);
5481             tmp2 = neon_load_reg(rd, pass);
5482             if (u) { /* VMLS */
5483                 gen_neon_rsb(size, tmp, tmp2);
5484             } else { /* VMLA */
5485                 gen_neon_add(size, tmp, tmp2);
5486             }
5487             break;
5488         case NEON_3R_VMUL:
5489             if (u) { /* polynomial */
5490                 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
5491             } else { /* Integer */
5492                 switch (size) {
5493                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5494                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5495                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5496                 default: abort();
5497                 }
5498             }
5499             break;
5500         case NEON_3R_VPMAX:
5501             GEN_NEON_INTEGER_OP(pmax);
5502             break;
5503         case NEON_3R_VPMIN:
5504             GEN_NEON_INTEGER_OP(pmin);
5505             break;
5506         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
5507             if (!u) { /* VQDMULH */
5508                 switch (size) {
5509                 case 1:
5510                     gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5511                     break;
5512                 case 2:
5513                     gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5514                     break;
5515                 default: abort();
5516                 }
5517             } else { /* VQRDMULH */
5518                 switch (size) {
5519                 case 1:
5520                     gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5521                     break;
5522                 case 2:
5523                     gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5524                     break;
5525                 default: abort();
5526                 }
5527             }
5528             break;
5529         case NEON_3R_VPADD:
5530             switch (size) {
5531             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5532             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5533             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5534             default: abort();
5535             }
5536             break;
5537         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5538         {
5539             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5540             switch ((u << 2) | size) {
5541             case 0: /* VADD */
5542             case 4: /* VPADD */
5543                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5544                 break;
5545             case 2: /* VSUB */
5546                 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5547                 break;
5548             case 6: /* VABD */
5549                 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5550                 break;
5551             default:
5552                 abort();
5553             }
5554             tcg_temp_free_ptr(fpstatus);
5555             break;
5556         }
5557         case NEON_3R_FLOAT_MULTIPLY:
5558         {
5559             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5560             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5561             if (!u) {
5562                 tcg_temp_free_i32(tmp2);
5563                 tmp2 = neon_load_reg(rd, pass);
5564                 if (size == 0) {
5565                     gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5566                 } else {
5567                     gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5568                 }
5569             }
5570             tcg_temp_free_ptr(fpstatus);
5571             break;
5572         }
5573         case NEON_3R_FLOAT_CMP:
5574         {
5575             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5576             if (!u) {
5577                 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5578             } else {
5579                 if (size == 0) {
5580                     gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5581                 } else {
5582                     gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5583                 }
5584             }
5585             tcg_temp_free_ptr(fpstatus);
5586             break;
5587         }
5588         case NEON_3R_FLOAT_ACMP:
5589         {
5590             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5591             if (size == 0) {
5592                 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5593             } else {
5594                 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5595             }
5596             tcg_temp_free_ptr(fpstatus);
5597             break;
5598         }
5599         case NEON_3R_FLOAT_MINMAX:
5600         {
5601             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5602             if (size == 0) {
5603                 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5604             } else {
5605                 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5606             }
5607             tcg_temp_free_ptr(fpstatus);
5608             break;
5609         }
5610         case NEON_3R_FLOAT_MISC:
5611             if (u) {
5612                 /* VMAXNM/VMINNM */
5613                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5614                 if (size == 0) {
5615                     gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5616                 } else {
5617                     gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5618                 }
5619                 tcg_temp_free_ptr(fpstatus);
5620             } else {
5621                 if (size == 0) {
5622                     gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5623                 } else {
5624                     gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5625               }
5626             }
5627             break;
5628         case NEON_3R_VFM:
5629         {
5630             /* VFMA, VFMS: fused multiply-add */
5631             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5632             TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5633             if (size) {
5634                 /* VFMS */
5635                 gen_helper_vfp_negs(tmp, tmp);
5636             }
5637             gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5638             tcg_temp_free_i32(tmp3);
5639             tcg_temp_free_ptr(fpstatus);
5640             break;
5641         }
5642         default:
5643             abort();
5644         }
5645         tcg_temp_free_i32(tmp2);
5646
5647         /* Save the result.  For elementwise operations we can put it
5648            straight into the destination register.  For pairwise operations
5649            we have to be careful to avoid clobbering the source operands.  */
5650         if (pairwise && rd == rm) {
5651             neon_store_scratch(pass, tmp);
5652         } else {
5653             neon_store_reg(rd, pass, tmp);
5654         }
5655
5656         } /* for pass */
5657         if (pairwise && rd == rm) {
5658             for (pass = 0; pass < (q ? 4 : 2); pass++) {
5659                 tmp = neon_load_scratch(pass);
5660                 neon_store_reg(rd, pass, tmp);
5661             }
5662         }
5663         /* End of 3 register same size operations.  */
5664     } else if (insn & (1 << 4)) {
5665         if ((insn & 0x00380080) != 0) {
5666             /* Two registers and shift.  */
5667             op = (insn >> 8) & 0xf;
5668             if (insn & (1 << 7)) {
5669                 /* 64-bit shift. */
5670                 if (op > 7) {
5671                     return 1;
5672                 }
5673                 size = 3;
5674             } else {
5675                 size = 2;
5676                 while ((insn & (1 << (size + 19))) == 0)
5677                     size--;
5678             }
5679             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5680             /* To avoid excessive duplication of ops we implement shift
5681                by immediate using the variable shift operations.  */
5682             if (op < 8) {
5683                 /* Shift by immediate:
5684                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5685                 if (q && ((rd | rm) & 1)) {
5686                     return 1;
5687                 }
5688                 if (!u && (op == 4 || op == 6)) {
5689                     return 1;
5690                 }
5691                 /* Right shifts are encoded as N - shift, where N is the
5692                    element size in bits.  */
5693                 if (op <= 4)
5694                     shift = shift - (1 << (size + 3));
5695                 if (size == 3) {
5696                     count = q + 1;
5697                 } else {
5698                     count = q ? 4: 2;
5699                 }
5700                 switch (size) {
5701                 case 0:
5702                     imm = (uint8_t) shift;
5703                     imm |= imm << 8;
5704                     imm |= imm << 16;
5705                     break;
5706                 case 1:
5707                     imm = (uint16_t) shift;
5708                     imm |= imm << 16;
5709                     break;
5710                 case 2:
5711                 case 3:
5712                     imm = shift;
5713                     break;
5714                 default:
5715                     abort();
5716                 }
5717
5718                 for (pass = 0; pass < count; pass++) {
5719                     if (size == 3) {
5720                         neon_load_reg64(cpu_V0, rm + pass);
5721                         tcg_gen_movi_i64(cpu_V1, imm);
5722                         switch (op) {
5723                         case 0:  /* VSHR */
5724                         case 1:  /* VSRA */
5725                             if (u)
5726                                 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5727                             else
5728                                 gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
5729                             break;
5730                         case 2: /* VRSHR */
5731                         case 3: /* VRSRA */
5732                             if (u)
5733                                 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5734                             else
5735                                 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5736                             break;
5737                         case 4: /* VSRI */
5738                         case 5: /* VSHL, VSLI */
5739                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5740                             break;
5741                         case 6: /* VQSHLU */
5742                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5743                                                       cpu_V0, cpu_V1);
5744                             break;
5745                         case 7: /* VQSHL */
5746                             if (u) {
5747                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5748                                                          cpu_V0, cpu_V1);
5749                             } else {
5750                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5751                                                          cpu_V0, cpu_V1);
5752                             }
5753                             break;
5754                         }
5755                         if (op == 1 || op == 3) {
5756                             /* Accumulate.  */
5757                             neon_load_reg64(cpu_V1, rd + pass);
5758                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5759                         } else if (op == 4 || (op == 5 && u)) {
5760                             /* Insert */
5761                             neon_load_reg64(cpu_V1, rd + pass);
5762                             uint64_t mask;
5763                             if (shift < -63 || shift > 63) {
5764                                 mask = 0;
5765                             } else {
5766                                 if (op == 4) {
5767                                     mask = 0xffffffffffffffffull >> -shift;
5768                                 } else {
5769                                     mask = 0xffffffffffffffffull << shift;
5770                                 }
5771                             }
5772                             tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
5773                             tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5774                         }
5775                         neon_store_reg64(cpu_V0, rd + pass);
5776                     } else { /* size < 3 */
5777                         /* Operands in T0 and T1.  */
5778                         tmp = neon_load_reg(rm, pass);
5779                         tmp2 = tcg_temp_new_i32();
5780                         tcg_gen_movi_i32(tmp2, imm);
5781                         switch (op) {
5782                         case 0:  /* VSHR */
5783                         case 1:  /* VSRA */
5784                             GEN_NEON_INTEGER_OP(shl);
5785                             break;
5786                         case 2: /* VRSHR */
5787                         case 3: /* VRSRA */
5788                             GEN_NEON_INTEGER_OP(rshl);
5789                             break;
5790                         case 4: /* VSRI */
5791                         case 5: /* VSHL, VSLI */
5792                             switch (size) {
5793                             case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
5794                             case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
5795                             case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
5796                             default: abort();
5797                             }
5798                             break;
5799                         case 6: /* VQSHLU */
5800                             switch (size) {
5801                             case 0:
5802                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5803                                                          tmp, tmp2);
5804                                 break;
5805                             case 1:
5806                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5807                                                           tmp, tmp2);
5808                                 break;
5809                             case 2:
5810                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5811                                                           tmp, tmp2);
5812                                 break;
5813                             default:
5814                                 abort();
5815                             }
5816                             break;
5817                         case 7: /* VQSHL */
5818                             GEN_NEON_INTEGER_OP_ENV(qshl);
5819                             break;
5820                         }
5821                         tcg_temp_free_i32(tmp2);
5822
5823                         if (op == 1 || op == 3) {
5824                             /* Accumulate.  */
5825                             tmp2 = neon_load_reg(rd, pass);
5826                             gen_neon_add(size, tmp, tmp2);
5827                             tcg_temp_free_i32(tmp2);
5828                         } else if (op == 4 || (op == 5 && u)) {
5829                             /* Insert */
5830                             switch (size) {
5831                             case 0:
5832                                 if (op == 4)
5833                                     mask = 0xff >> -shift;
5834                                 else
5835                                     mask = (uint8_t)(0xff << shift);
5836                                 mask |= mask << 8;
5837                                 mask |= mask << 16;
5838                                 break;
5839                             case 1:
5840                                 if (op == 4)
5841                                     mask = 0xffff >> -shift;
5842                                 else
5843                                     mask = (uint16_t)(0xffff << shift);
5844                                 mask |= mask << 16;
5845                                 break;
5846                             case 2:
5847                                 if (shift < -31 || shift > 31) {
5848                                     mask = 0;
5849                                 } else {
5850                                     if (op == 4)
5851                                         mask = 0xffffffffu >> -shift;
5852                                     else
5853                                         mask = 0xffffffffu << shift;
5854                                 }
5855                                 break;
5856                             default:
5857                                 abort();
5858                             }
5859                             tmp2 = neon_load_reg(rd, pass);
5860                             tcg_gen_andi_i32(tmp, tmp, mask);
5861                             tcg_gen_andi_i32(tmp2, tmp2, ~mask);
5862                             tcg_gen_or_i32(tmp, tmp, tmp2);
5863                             tcg_temp_free_i32(tmp2);
5864                         }
5865                         neon_store_reg(rd, pass, tmp);
5866                     }
5867                 } /* for pass */
5868             } else if (op < 10) {
5869                 /* Shift by immediate and narrow:
5870                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5871                 int input_unsigned = (op == 8) ? !u : u;
5872                 if (rm & 1) {
5873                     return 1;
5874                 }
5875                 shift = shift - (1 << (size + 3));
5876                 size++;
5877                 if (size == 3) {
5878                     tmp64 = tcg_const_i64(shift);
5879                     neon_load_reg64(cpu_V0, rm);
5880                     neon_load_reg64(cpu_V1, rm + 1);
5881                     for (pass = 0; pass < 2; pass++) {
5882                         TCGv_i64 in;
5883                         if (pass == 0) {
5884                             in = cpu_V0;
5885                         } else {
5886                             in = cpu_V1;
5887                         }
5888                         if (q) {
5889                             if (input_unsigned) {
5890                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5891                             } else {
5892                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5893                             }
5894                         } else {
5895                             if (input_unsigned) {
5896                                 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5897                             } else {
5898                                 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5899                             }
5900                         }
5901                         tmp = tcg_temp_new_i32();
5902                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5903                         neon_store_reg(rd, pass, tmp);
5904                     } /* for pass */
5905                     tcg_temp_free_i64(tmp64);
5906                 } else {
5907                     if (size == 1) {
5908                         imm = (uint16_t)shift;
5909                         imm |= imm << 16;
5910                     } else {
5911                         /* size == 2 */
5912                         imm = (uint32_t)shift;
5913                     }
5914                     tmp2 = tcg_const_i32(imm);
5915                     tmp4 = neon_load_reg(rm + 1, 0);
5916                     tmp5 = neon_load_reg(rm + 1, 1);
5917                     for (pass = 0; pass < 2; pass++) {
5918                         if (pass == 0) {
5919                             tmp = neon_load_reg(rm, 0);
5920                         } else {
5921                             tmp = tmp4;
5922                         }
5923                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5924                                               input_unsigned);
5925                         if (pass == 0) {
5926                             tmp3 = neon_load_reg(rm, 1);
5927                         } else {
5928                             tmp3 = tmp5;
5929                         }
5930                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5931                                               input_unsigned);
5932                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5933                         tcg_temp_free_i32(tmp);
5934                         tcg_temp_free_i32(tmp3);
5935                         tmp = tcg_temp_new_i32();
5936                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5937                         neon_store_reg(rd, pass, tmp);
5938                     } /* for pass */
5939                     tcg_temp_free_i32(tmp2);
5940                 }
5941             } else if (op == 10) {
5942                 /* VSHLL, VMOVL */
5943                 if (q || (rd & 1)) {
5944                     return 1;
5945                 }
5946                 tmp = neon_load_reg(rm, 0);
5947                 tmp2 = neon_load_reg(rm, 1);
5948                 for (pass = 0; pass < 2; pass++) {
5949                     if (pass == 1)
5950                         tmp = tmp2;
5951
5952                     gen_neon_widen(cpu_V0, tmp, size, u);
5953
5954                     if (shift != 0) {
5955                         /* The shift is less than the width of the source
5956                            type, so we can just shift the whole register.  */
5957                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5958                         /* Widen the result of shift: we need to clear
5959                          * the potential overflow bits resulting from
5960                          * left bits of the narrow input appearing as
5961                          * right bits of left the neighbour narrow
5962                          * input.  */
5963                         if (size < 2 || !u) {
5964                             uint64_t imm64;
5965                             if (size == 0) {
5966                                 imm = (0xffu >> (8 - shift));
5967                                 imm |= imm << 16;
5968                             } else if (size == 1) {
5969                                 imm = 0xffff >> (16 - shift);
5970                             } else {
5971                                 /* size == 2 */
5972                                 imm = 0xffffffff >> (32 - shift);
5973                             }
5974                             if (size < 2) {
5975                                 imm64 = imm | (((uint64_t)imm) << 32);
5976                             } else {
5977                                 imm64 = imm;
5978                             }
5979                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5980                         }
5981                     }
5982                     neon_store_reg64(cpu_V0, rd + pass);
5983                 }
5984             } else if (op >= 14) {
5985                 /* VCVT fixed-point.  */
5986                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5987                     return 1;
5988                 }
5989                 /* We have already masked out the must-be-1 top bit of imm6,
5990                  * hence this 32-shift where the ARM ARM has 64-imm6.
5991                  */
5992                 shift = 32 - shift;
5993                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5994                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5995                     if (!(op & 1)) {
5996                         if (u)
5997                             gen_vfp_ulto(0, shift, 1);
5998                         else
5999                             gen_vfp_slto(0, shift, 1);
6000                     } else {
6001                         if (u)
6002                             gen_vfp_toul(0, shift, 1);
6003                         else
6004                             gen_vfp_tosl(0, shift, 1);
6005                     }
6006                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
6007                 }
6008             } else {
6009                 return 1;
6010             }
6011         } else { /* (insn & 0x00380080) == 0 */
6012             int invert;
6013             if (q && (rd & 1)) {
6014                 return 1;
6015             }
6016
6017             op = (insn >> 8) & 0xf;
6018             /* One register and immediate.  */
6019             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
6020             invert = (insn & (1 << 5)) != 0;
6021             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
6022              * We choose to not special-case this and will behave as if a
6023              * valid constant encoding of 0 had been given.
6024              */
6025             switch (op) {
6026             case 0: case 1:
6027                 /* no-op */
6028                 break;
6029             case 2: case 3:
6030                 imm <<= 8;
6031                 break;
6032             case 4: case 5:
6033                 imm <<= 16;
6034                 break;
6035             case 6: case 7:
6036                 imm <<= 24;
6037                 break;
6038             case 8: case 9:
6039                 imm |= imm << 16;
6040                 break;
6041             case 10: case 11:
6042                 imm = (imm << 8) | (imm << 24);
6043                 break;
6044             case 12:
6045                 imm = (imm << 8) | 0xff;
6046                 break;
6047             case 13:
6048                 imm = (imm << 16) | 0xffff;
6049                 break;
6050             case 14:
6051                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
6052                 if (invert)
6053                     imm = ~imm;
6054                 break;
6055             case 15:
6056                 if (invert) {
6057                     return 1;
6058                 }
6059                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
6060                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
6061                 break;
6062             }
6063             if (invert)
6064                 imm = ~imm;
6065
6066             for (pass = 0; pass < (q ? 4 : 2); pass++) {
6067                 if (op & 1 && op < 12) {
6068                     tmp = neon_load_reg(rd, pass);
6069                     if (invert) {
6070                         /* The immediate value has already been inverted, so
6071                            BIC becomes AND.  */
6072                         tcg_gen_andi_i32(tmp, tmp, imm);
6073                     } else {
6074                         tcg_gen_ori_i32(tmp, tmp, imm);
6075                     }
6076                 } else {
6077                     /* VMOV, VMVN.  */
6078                     tmp = tcg_temp_new_i32();
6079                     if (op == 14 && invert) {
6080                         int n;
6081                         uint32_t val;
6082                         val = 0;
6083                         for (n = 0; n < 4; n++) {
6084                             if (imm & (1 << (n + (pass & 1) * 4)))
6085                                 val |= 0xff << (n * 8);
6086                         }
6087                         tcg_gen_movi_i32(tmp, val);
6088                     } else {
6089                         tcg_gen_movi_i32(tmp, imm);
6090                     }
6091                 }
6092                 neon_store_reg(rd, pass, tmp);
6093             }
6094         }
6095     } else { /* (insn & 0x00800010 == 0x00800000) */
6096         if (size != 3) {
6097             op = (insn >> 8) & 0xf;
6098             if ((insn & (1 << 6)) == 0) {
6099                 /* Three registers of different lengths.  */
6100                 int src1_wide;
6101                 int src2_wide;
6102                 int prewiden;
6103                 /* undefreq: bit 0 : UNDEF if size == 0
6104                  *           bit 1 : UNDEF if size == 1
6105                  *           bit 2 : UNDEF if size == 2
6106                  *           bit 3 : UNDEF if U == 1
6107                  * Note that [2:0] set implies 'always UNDEF'
6108                  */
6109                 int undefreq;
6110                 /* prewiden, src1_wide, src2_wide, undefreq */
6111                 static const int neon_3reg_wide[16][4] = {
6112                     {1, 0, 0, 0}, /* VADDL */
6113                     {1, 1, 0, 0}, /* VADDW */
6114                     {1, 0, 0, 0}, /* VSUBL */
6115                     {1, 1, 0, 0}, /* VSUBW */
6116                     {0, 1, 1, 0}, /* VADDHN */
6117                     {0, 0, 0, 0}, /* VABAL */
6118                     {0, 1, 1, 0}, /* VSUBHN */
6119                     {0, 0, 0, 0}, /* VABDL */
6120                     {0, 0, 0, 0}, /* VMLAL */
6121                     {0, 0, 0, 9}, /* VQDMLAL */
6122                     {0, 0, 0, 0}, /* VMLSL */
6123                     {0, 0, 0, 9}, /* VQDMLSL */
6124                     {0, 0, 0, 0}, /* Integer VMULL */
6125                     {0, 0, 0, 1}, /* VQDMULL */
6126                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
6127                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
6128                 };
6129
6130                 prewiden = neon_3reg_wide[op][0];
6131                 src1_wide = neon_3reg_wide[op][1];
6132                 src2_wide = neon_3reg_wide[op][2];
6133                 undefreq = neon_3reg_wide[op][3];
6134
6135                 if ((undefreq & (1 << size)) ||
6136                     ((undefreq & 8) && u)) {
6137                     return 1;
6138                 }
6139                 if ((src1_wide && (rn & 1)) ||
6140                     (src2_wide && (rm & 1)) ||
6141                     (!src2_wide && (rd & 1))) {
6142                     return 1;
6143                 }
6144
6145                 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
6146                  * outside the loop below as it only performs a single pass.
6147                  */
6148                 if (op == 14 && size == 2) {
6149                     TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
6150
6151                     if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
6152                         return 1;
6153                     }
6154                     tcg_rn = tcg_temp_new_i64();
6155                     tcg_rm = tcg_temp_new_i64();
6156                     tcg_rd = tcg_temp_new_i64();
6157                     neon_load_reg64(tcg_rn, rn);
6158                     neon_load_reg64(tcg_rm, rm);
6159                     gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
6160                     neon_store_reg64(tcg_rd, rd);
6161                     gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
6162                     neon_store_reg64(tcg_rd, rd + 1);
6163                     tcg_temp_free_i64(tcg_rn);
6164                     tcg_temp_free_i64(tcg_rm);
6165                     tcg_temp_free_i64(tcg_rd);
6166                     return 0;
6167                 }
6168
6169                 /* Avoid overlapping operands.  Wide source operands are
6170                    always aligned so will never overlap with wide
6171                    destinations in problematic ways.  */
6172                 if (rd == rm && !src2_wide) {
6173                     tmp = neon_load_reg(rm, 1);
6174                     neon_store_scratch(2, tmp);
6175                 } else if (rd == rn && !src1_wide) {
6176                     tmp = neon_load_reg(rn, 1);
6177                     neon_store_scratch(2, tmp);
6178                 }
6179                 TCGV_UNUSED_I32(tmp3);
6180                 for (pass = 0; pass < 2; pass++) {
6181                     if (src1_wide) {
6182                         neon_load_reg64(cpu_V0, rn + pass);
6183                         TCGV_UNUSED_I32(tmp);
6184                     } else {
6185                         if (pass == 1 && rd == rn) {
6186                             tmp = neon_load_scratch(2);
6187                         } else {
6188                             tmp = neon_load_reg(rn, pass);
6189                         }
6190                         if (prewiden) {
6191                             gen_neon_widen(cpu_V0, tmp, size, u);
6192                         }
6193                     }
6194                     if (src2_wide) {
6195                         neon_load_reg64(cpu_V1, rm + pass);
6196                         TCGV_UNUSED_I32(tmp2);
6197                     } else {
6198                         if (pass == 1 && rd == rm) {
6199                             tmp2 = neon_load_scratch(2);
6200                         } else {
6201                             tmp2 = neon_load_reg(rm, pass);
6202                         }
6203                         if (prewiden) {
6204                             gen_neon_widen(cpu_V1, tmp2, size, u);
6205                         }
6206                     }
6207                     switch (op) {
6208                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6209                         gen_neon_addl(size);
6210                         break;
6211                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6212                         gen_neon_subl(size);
6213                         break;
6214                     case 5: case 7: /* VABAL, VABDL */
6215                         switch ((size << 1) | u) {
6216                         case 0:
6217                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6218                             break;
6219                         case 1:
6220                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6221                             break;
6222                         case 2:
6223                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6224                             break;
6225                         case 3:
6226                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6227                             break;
6228                         case 4:
6229                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6230                             break;
6231                         case 5:
6232                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6233                             break;
6234                         default: abort();
6235                         }
6236                         tcg_temp_free_i32(tmp2);
6237                         tcg_temp_free_i32(tmp);
6238                         break;
6239                     case 8: case 9: case 10: case 11: case 12: case 13:
6240                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6241                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6242                         break;
6243                     case 14: /* Polynomial VMULL */
6244                         gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
6245                         tcg_temp_free_i32(tmp2);
6246                         tcg_temp_free_i32(tmp);
6247                         break;
6248                     default: /* 15 is RESERVED: caught earlier  */
6249                         abort();
6250                     }
6251                     if (op == 13) {
6252                         /* VQDMULL */
6253                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6254                         neon_store_reg64(cpu_V0, rd + pass);
6255                     } else if (op == 5 || (op >= 8 && op <= 11)) {
6256                         /* Accumulate.  */
6257                         neon_load_reg64(cpu_V1, rd + pass);
6258                         switch (op) {
6259                         case 10: /* VMLSL */
6260                             gen_neon_negl(cpu_V0, size);
6261                             /* Fall through */
6262                         case 5: case 8: /* VABAL, VMLAL */
6263                             gen_neon_addl(size);
6264                             break;
6265                         case 9: case 11: /* VQDMLAL, VQDMLSL */
6266                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6267                             if (op == 11) {
6268                                 gen_neon_negl(cpu_V0, size);
6269                             }
6270                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6271                             break;
6272                         default:
6273                             abort();
6274                         }
6275                         neon_store_reg64(cpu_V0, rd + pass);
6276                     } else if (op == 4 || op == 6) {
6277                         /* Narrowing operation.  */
6278                         tmp = tcg_temp_new_i32();
6279                         if (!u) {
6280                             switch (size) {
6281                             case 0:
6282                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6283                                 break;
6284                             case 1:
6285                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6286                                 break;
6287                             case 2:
6288                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6289                                 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6290                                 break;
6291                             default: abort();
6292                             }
6293                         } else {
6294                             switch (size) {
6295                             case 0:
6296                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6297                                 break;
6298                             case 1:
6299                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6300                                 break;
6301                             case 2:
6302                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6303                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6304                                 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6305                                 break;
6306                             default: abort();
6307                             }
6308                         }
6309                         if (pass == 0) {
6310                             tmp3 = tmp;
6311                         } else {
6312                             neon_store_reg(rd, 0, tmp3);
6313                             neon_store_reg(rd, 1, tmp);
6314                         }
6315                     } else {
6316                         /* Write back the result.  */
6317                         neon_store_reg64(cpu_V0, rd + pass);
6318                     }
6319                 }
6320             } else {
6321                 /* Two registers and a scalar. NB that for ops of this form
6322                  * the ARM ARM labels bit 24 as Q, but it is in our variable
6323                  * 'u', not 'q'.
6324                  */
6325                 if (size == 0) {
6326                     return 1;
6327                 }
6328                 switch (op) {
6329                 case 1: /* Float VMLA scalar */
6330                 case 5: /* Floating point VMLS scalar */
6331                 case 9: /* Floating point VMUL scalar */
6332                     if (size == 1) {
6333                         return 1;
6334                     }
6335                     /* fall through */
6336                 case 0: /* Integer VMLA scalar */
6337                 case 4: /* Integer VMLS scalar */
6338                 case 8: /* Integer VMUL scalar */
6339                 case 12: /* VQDMULH scalar */
6340                 case 13: /* VQRDMULH scalar */
6341                     if (u && ((rd | rn) & 1)) {
6342                         return 1;
6343                     }
6344                     tmp = neon_get_scalar(size, rm);
6345                     neon_store_scratch(0, tmp);
6346                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
6347                         tmp = neon_load_scratch(0);
6348                         tmp2 = neon_load_reg(rn, pass);
6349                         if (op == 12) {
6350                             if (size == 1) {
6351                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6352                             } else {
6353                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6354                             }
6355                         } else if (op == 13) {
6356                             if (size == 1) {
6357                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6358                             } else {
6359                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6360                             }
6361                         } else if (op & 1) {
6362                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6363                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6364                             tcg_temp_free_ptr(fpstatus);
6365                         } else {
6366                             switch (size) {
6367                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6368                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6369                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6370                             default: abort();
6371                             }
6372                         }
6373                         tcg_temp_free_i32(tmp2);
6374                         if (op < 8) {
6375                             /* Accumulate.  */
6376                             tmp2 = neon_load_reg(rd, pass);
6377                             switch (op) {
6378                             case 0:
6379                                 gen_neon_add(size, tmp, tmp2);
6380                                 break;
6381                             case 1:
6382                             {
6383                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6384                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6385                                 tcg_temp_free_ptr(fpstatus);
6386                                 break;
6387                             }
6388                             case 4:
6389                                 gen_neon_rsb(size, tmp, tmp2);
6390                                 break;
6391                             case 5:
6392                             {
6393                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6394                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6395                                 tcg_temp_free_ptr(fpstatus);
6396                                 break;
6397                             }
6398                             default:
6399                                 abort();
6400                             }
6401                             tcg_temp_free_i32(tmp2);
6402                         }
6403                         neon_store_reg(rd, pass, tmp);
6404                     }
6405                     break;
6406                 case 3: /* VQDMLAL scalar */
6407                 case 7: /* VQDMLSL scalar */
6408                 case 11: /* VQDMULL scalar */
6409                     if (u == 1) {
6410                         return 1;
6411                     }
6412                     /* fall through */
6413                 case 2: /* VMLAL sclar */
6414                 case 6: /* VMLSL scalar */
6415                 case 10: /* VMULL scalar */
6416                     if (rd & 1) {
6417                         return 1;
6418                     }
6419                     tmp2 = neon_get_scalar(size, rm);
6420                     /* We need a copy of tmp2 because gen_neon_mull
6421                      * deletes it during pass 0.  */
6422                     tmp4 = tcg_temp_new_i32();
6423                     tcg_gen_mov_i32(tmp4, tmp2);
6424                     tmp3 = neon_load_reg(rn, 1);
6425
6426                     for (pass = 0; pass < 2; pass++) {
6427                         if (pass == 0) {
6428                             tmp = neon_load_reg(rn, 0);
6429                         } else {
6430                             tmp = tmp3;
6431                             tmp2 = tmp4;
6432                         }
6433                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6434                         if (op != 11) {
6435                             neon_load_reg64(cpu_V1, rd + pass);
6436                         }
6437                         switch (op) {
6438                         case 6:
6439                             gen_neon_negl(cpu_V0, size);
6440                             /* Fall through */
6441                         case 2:
6442                             gen_neon_addl(size);
6443                             break;
6444                         case 3: case 7:
6445                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6446                             if (op == 7) {
6447                                 gen_neon_negl(cpu_V0, size);
6448                             }
6449                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6450                             break;
6451                         case 10:
6452                             /* no-op */
6453                             break;
6454                         case 11:
6455                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6456                             break;
6457                         default:
6458                             abort();
6459                         }
6460                         neon_store_reg64(cpu_V0, rd + pass);
6461                     }
6462
6463
6464                     break;
6465                 default: /* 14 and 15 are RESERVED */
6466                     return 1;
6467                 }
6468             }
6469         } else { /* size == 3 */
6470             if (!u) {
6471                 /* Extract.  */
6472                 imm = (insn >> 8) & 0xf;
6473
6474                 if (imm > 7 && !q)
6475                     return 1;
6476
6477                 if (q && ((rd | rn | rm) & 1)) {
6478                     return 1;
6479                 }
6480
6481                 if (imm == 0) {
6482                     neon_load_reg64(cpu_V0, rn);
6483                     if (q) {
6484                         neon_load_reg64(cpu_V1, rn + 1);
6485                     }
6486                 } else if (imm == 8) {
6487                     neon_load_reg64(cpu_V0, rn + 1);
6488                     if (q) {
6489                         neon_load_reg64(cpu_V1, rm);
6490                     }
6491                 } else if (q) {
6492                     tmp64 = tcg_temp_new_i64();
6493                     if (imm < 8) {
6494                         neon_load_reg64(cpu_V0, rn);
6495                         neon_load_reg64(tmp64, rn + 1);
6496                     } else {
6497                         neon_load_reg64(cpu_V0, rn + 1);
6498                         neon_load_reg64(tmp64, rm);
6499                     }
6500                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6501                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6502                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6503                     if (imm < 8) {
6504                         neon_load_reg64(cpu_V1, rm);
6505                     } else {
6506                         neon_load_reg64(cpu_V1, rm + 1);
6507                         imm -= 8;
6508                     }
6509                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6510                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6511                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6512                     tcg_temp_free_i64(tmp64);
6513                 } else {
6514                     /* BUGFIX */
6515                     neon_load_reg64(cpu_V0, rn);
6516                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6517                     neon_load_reg64(cpu_V1, rm);
6518                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6519                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6520                 }
6521                 neon_store_reg64(cpu_V0, rd);
6522                 if (q) {
6523                     neon_store_reg64(cpu_V1, rd + 1);
6524                 }
6525             } else if ((insn & (1 << 11)) == 0) {
6526                 /* Two register misc.  */
6527                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6528                 size = (insn >> 18) & 3;
6529                 /* UNDEF for unknown op values and bad op-size combinations */
6530                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6531                     return 1;
6532                 }
6533                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6534                     q && ((rm | rd) & 1)) {
6535                     return 1;
6536                 }
6537                 switch (op) {
6538                 case NEON_2RM_VREV64:
6539                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6540                         tmp = neon_load_reg(rm, pass * 2);
6541                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6542                         switch (size) {
6543                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6544                         case 1: gen_swap_half(tmp); break;
6545                         case 2: /* no-op */ break;
6546                         default: abort();
6547                         }
6548                         neon_store_reg(rd, pass * 2 + 1, tmp);
6549                         if (size == 2) {
6550                             neon_store_reg(rd, pass * 2, tmp2);
6551                         } else {
6552                             switch (size) {
6553                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6554                             case 1: gen_swap_half(tmp2); break;
6555                             default: abort();
6556                             }
6557                             neon_store_reg(rd, pass * 2, tmp2);
6558                         }
6559                     }
6560                     break;
6561                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6562                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6563                     for (pass = 0; pass < q + 1; pass++) {
6564                         tmp = neon_load_reg(rm, pass * 2);
6565                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6566                         tmp = neon_load_reg(rm, pass * 2 + 1);
6567                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6568                         switch (size) {
6569                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6570                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6571                         case 2: tcg_gen_add_i64(CPU_V001); break;
6572                         default: abort();
6573                         }
6574                         if (op >= NEON_2RM_VPADAL) {
6575                             /* Accumulate.  */
6576                             neon_load_reg64(cpu_V1, rd + pass);
6577                             gen_neon_addl(size);
6578                         }
6579                         neon_store_reg64(cpu_V0, rd + pass);
6580                     }
6581                     break;
6582                 case NEON_2RM_VTRN:
6583                     if (size == 2) {
6584                         int n;
6585                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6586                             tmp = neon_load_reg(rm, n);
6587                             tmp2 = neon_load_reg(rd, n + 1);
6588                             neon_store_reg(rm, n, tmp2);
6589                             neon_store_reg(rd, n + 1, tmp);
6590                         }
6591                     } else {
6592                         goto elementwise;
6593                     }
6594                     break;
6595                 case NEON_2RM_VUZP:
6596                     if (gen_neon_unzip(rd, rm, size, q)) {
6597                         return 1;
6598                     }
6599                     break;
6600                 case NEON_2RM_VZIP:
6601                     if (gen_neon_zip(rd, rm, size, q)) {
6602                         return 1;
6603                     }
6604                     break;
6605                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6606                     /* also VQMOVUN; op field and mnemonics don't line up */
6607                     if (rm & 1) {
6608                         return 1;
6609                     }
6610                     TCGV_UNUSED_I32(tmp2);
6611                     for (pass = 0; pass < 2; pass++) {
6612                         neon_load_reg64(cpu_V0, rm + pass);
6613                         tmp = tcg_temp_new_i32();
6614                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6615                                            tmp, cpu_V0);
6616                         if (pass == 0) {
6617                             tmp2 = tmp;
6618                         } else {
6619                             neon_store_reg(rd, 0, tmp2);
6620                             neon_store_reg(rd, 1, tmp);
6621                         }
6622                     }
6623                     break;
6624                 case NEON_2RM_VSHLL:
6625                     if (q || (rd & 1)) {
6626                         return 1;
6627                     }
6628                     tmp = neon_load_reg(rm, 0);
6629                     tmp2 = neon_load_reg(rm, 1);
6630                     for (pass = 0; pass < 2; pass++) {
6631                         if (pass == 1)
6632                             tmp = tmp2;
6633                         gen_neon_widen(cpu_V0, tmp, size, 1);
6634                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6635                         neon_store_reg64(cpu_V0, rd + pass);
6636                     }
6637                     break;
6638                 case NEON_2RM_VCVT_F16_F32:
6639                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6640                         q || (rm & 1)) {
6641                         return 1;
6642                     }
6643                     tmp = tcg_temp_new_i32();
6644                     tmp2 = tcg_temp_new_i32();
6645                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
6646                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6647                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
6648                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6649                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6650                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6651                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
6652                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6653                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
6654                     neon_store_reg(rd, 0, tmp2);
6655                     tmp2 = tcg_temp_new_i32();
6656                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6657                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6658                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6659                     neon_store_reg(rd, 1, tmp2);
6660                     tcg_temp_free_i32(tmp);
6661                     break;
6662                 case NEON_2RM_VCVT_F32_F16:
6663                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6664                         q || (rd & 1)) {
6665                         return 1;
6666                     }
6667                     tmp3 = tcg_temp_new_i32();
6668                     tmp = neon_load_reg(rm, 0);
6669                     tmp2 = neon_load_reg(rm, 1);
6670                     tcg_gen_ext16u_i32(tmp3, tmp);
6671                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6672                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
6673                     tcg_gen_shri_i32(tmp3, tmp, 16);
6674                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6675                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
6676                     tcg_temp_free_i32(tmp);
6677                     tcg_gen_ext16u_i32(tmp3, tmp2);
6678                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6679                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
6680                     tcg_gen_shri_i32(tmp3, tmp2, 16);
6681                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6682                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
6683                     tcg_temp_free_i32(tmp2);
6684                     tcg_temp_free_i32(tmp3);
6685                     break;
6686                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6687                     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
6688                         || ((rm | rd) & 1)) {
6689                         return 1;
6690                     }
6691                     tmp = tcg_const_i32(rd);
6692                     tmp2 = tcg_const_i32(rm);
6693
6694                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6695                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6696                       */
6697                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6698
6699                     if (op == NEON_2RM_AESE) {
6700                         gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
6701                     } else {
6702                         gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
6703                     }
6704                     tcg_temp_free_i32(tmp);
6705                     tcg_temp_free_i32(tmp2);
6706                     tcg_temp_free_i32(tmp3);
6707                     break;
6708                 case NEON_2RM_SHA1H:
6709                     if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
6710                         || ((rm | rd) & 1)) {
6711                         return 1;
6712                     }
6713                     tmp = tcg_const_i32(rd);
6714                     tmp2 = tcg_const_i32(rm);
6715
6716                     gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
6717
6718                     tcg_temp_free_i32(tmp);
6719                     tcg_temp_free_i32(tmp2);
6720                     break;
6721                 case NEON_2RM_SHA1SU1:
6722                     if ((rm | rd) & 1) {
6723                             return 1;
6724                     }
6725                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6726                     if (q) {
6727                         if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
6728                             return 1;
6729                         }
6730                     } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
6731                         return 1;
6732                     }
6733                     tmp = tcg_const_i32(rd);
6734                     tmp2 = tcg_const_i32(rm);
6735                     if (q) {
6736                         gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
6737                     } else {
6738                         gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
6739                     }
6740                     tcg_temp_free_i32(tmp);
6741                     tcg_temp_free_i32(tmp2);
6742                     break;
6743                 default:
6744                 elementwise:
6745                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6746                         if (neon_2rm_is_float_op(op)) {
6747                             tcg_gen_ld_f32(cpu_F0s, cpu_env,
6748                                            neon_reg_offset(rm, pass));
6749                             TCGV_UNUSED_I32(tmp);
6750                         } else {
6751                             tmp = neon_load_reg(rm, pass);
6752                         }
6753                         switch (op) {
6754                         case NEON_2RM_VREV32:
6755                             switch (size) {
6756                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6757                             case 1: gen_swap_half(tmp); break;
6758                             default: abort();
6759                             }
6760                             break;
6761                         case NEON_2RM_VREV16:
6762                             gen_rev16(tmp);
6763                             break;
6764                         case NEON_2RM_VCLS:
6765                             switch (size) {
6766                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6767                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6768                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6769                             default: abort();
6770                             }
6771                             break;
6772                         case NEON_2RM_VCLZ:
6773                             switch (size) {
6774                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6775                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6776                             case 2: gen_helper_clz(tmp, tmp); break;
6777                             default: abort();
6778                             }
6779                             break;
6780                         case NEON_2RM_VCNT:
6781                             gen_helper_neon_cnt_u8(tmp, tmp);
6782                             break;
6783                         case NEON_2RM_VMVN:
6784                             tcg_gen_not_i32(tmp, tmp);
6785                             break;
6786                         case NEON_2RM_VQABS:
6787                             switch (size) {
6788                             case 0:
6789                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6790                                 break;
6791                             case 1:
6792                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6793                                 break;
6794                             case 2:
6795                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6796                                 break;
6797                             default: abort();
6798                             }
6799                             break;
6800                         case NEON_2RM_VQNEG:
6801                             switch (size) {
6802                             case 0:
6803                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6804                                 break;
6805                             case 1:
6806                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6807                                 break;
6808                             case 2:
6809                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6810                                 break;
6811                             default: abort();
6812                             }
6813                             break;
6814                         case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6815                             tmp2 = tcg_const_i32(0);
6816                             switch(size) {
6817                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6818                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6819                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6820                             default: abort();
6821                             }
6822                             tcg_temp_free_i32(tmp2);
6823                             if (op == NEON_2RM_VCLE0) {
6824                                 tcg_gen_not_i32(tmp, tmp);
6825                             }
6826                             break;
6827                         case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6828                             tmp2 = tcg_const_i32(0);
6829                             switch(size) {
6830                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6831                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6832                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6833                             default: abort();
6834                             }
6835                             tcg_temp_free_i32(tmp2);
6836                             if (op == NEON_2RM_VCLT0) {
6837                                 tcg_gen_not_i32(tmp, tmp);
6838                             }
6839                             break;
6840                         case NEON_2RM_VCEQ0:
6841                             tmp2 = tcg_const_i32(0);
6842                             switch(size) {
6843                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6844                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6845                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6846                             default: abort();
6847                             }
6848                             tcg_temp_free_i32(tmp2);
6849                             break;
6850                         case NEON_2RM_VABS:
6851                             switch(size) {
6852                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
6853                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
6854                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
6855                             default: abort();
6856                             }
6857                             break;
6858                         case NEON_2RM_VNEG:
6859                             tmp2 = tcg_const_i32(0);
6860                             gen_neon_rsb(size, tmp, tmp2);
6861                             tcg_temp_free_i32(tmp2);
6862                             break;
6863                         case NEON_2RM_VCGT0_F:
6864                         {
6865                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6866                             tmp2 = tcg_const_i32(0);
6867                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6868                             tcg_temp_free_i32(tmp2);
6869                             tcg_temp_free_ptr(fpstatus);
6870                             break;
6871                         }
6872                         case NEON_2RM_VCGE0_F:
6873                         {
6874                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6875                             tmp2 = tcg_const_i32(0);
6876                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6877                             tcg_temp_free_i32(tmp2);
6878                             tcg_temp_free_ptr(fpstatus);
6879                             break;
6880                         }
6881                         case NEON_2RM_VCEQ0_F:
6882                         {
6883                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6884                             tmp2 = tcg_const_i32(0);
6885                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6886                             tcg_temp_free_i32(tmp2);
6887                             tcg_temp_free_ptr(fpstatus);
6888                             break;
6889                         }
6890                         case NEON_2RM_VCLE0_F:
6891                         {
6892                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6893                             tmp2 = tcg_const_i32(0);
6894                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6895                             tcg_temp_free_i32(tmp2);
6896                             tcg_temp_free_ptr(fpstatus);
6897                             break;
6898                         }
6899                         case NEON_2RM_VCLT0_F:
6900                         {
6901                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6902                             tmp2 = tcg_const_i32(0);
6903                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6904                             tcg_temp_free_i32(tmp2);
6905                             tcg_temp_free_ptr(fpstatus);
6906                             break;
6907                         }
6908                         case NEON_2RM_VABS_F:
6909                             gen_vfp_abs(0);
6910                             break;
6911                         case NEON_2RM_VNEG_F:
6912                             gen_vfp_neg(0);
6913                             break;
6914                         case NEON_2RM_VSWP:
6915                             tmp2 = neon_load_reg(rd, pass);
6916                             neon_store_reg(rm, pass, tmp2);
6917                             break;
6918                         case NEON_2RM_VTRN:
6919                             tmp2 = neon_load_reg(rd, pass);
6920                             switch (size) {
6921                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6922                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6923                             default: abort();
6924                             }
6925                             neon_store_reg(rm, pass, tmp2);
6926                             break;
6927                         case NEON_2RM_VRINTN:
6928                         case NEON_2RM_VRINTA:
6929                         case NEON_2RM_VRINTM:
6930                         case NEON_2RM_VRINTP:
6931                         case NEON_2RM_VRINTZ:
6932                         {
6933                             TCGv_i32 tcg_rmode;
6934                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6935                             int rmode;
6936
6937                             if (op == NEON_2RM_VRINTZ) {
6938                                 rmode = FPROUNDING_ZERO;
6939                             } else {
6940                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6941                             }
6942
6943                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6944                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6945                                                       cpu_env);
6946                             gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
6947                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6948                                                       cpu_env);
6949                             tcg_temp_free_ptr(fpstatus);
6950                             tcg_temp_free_i32(tcg_rmode);
6951                             break;
6952                         }
6953                         case NEON_2RM_VRINTX:
6954                         {
6955                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6956                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
6957                             tcg_temp_free_ptr(fpstatus);
6958                             break;
6959                         }
6960                         case NEON_2RM_VCVTAU:
6961                         case NEON_2RM_VCVTAS:
6962                         case NEON_2RM_VCVTNU:
6963                         case NEON_2RM_VCVTNS:
6964                         case NEON_2RM_VCVTPU:
6965                         case NEON_2RM_VCVTPS:
6966                         case NEON_2RM_VCVTMU:
6967                         case NEON_2RM_VCVTMS:
6968                         {
6969                             bool is_signed = !extract32(insn, 7, 1);
6970                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6971                             TCGv_i32 tcg_rmode, tcg_shift;
6972                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6973
6974                             tcg_shift = tcg_const_i32(0);
6975                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6976                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6977                                                       cpu_env);
6978
6979                             if (is_signed) {
6980                                 gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
6981                                                      tcg_shift, fpst);
6982                             } else {
6983                                 gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
6984                                                      tcg_shift, fpst);
6985                             }
6986
6987                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6988                                                       cpu_env);
6989                             tcg_temp_free_i32(tcg_rmode);
6990                             tcg_temp_free_i32(tcg_shift);
6991                             tcg_temp_free_ptr(fpst);
6992                             break;
6993                         }
6994                         case NEON_2RM_VRECPE:
6995                         {
6996                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6997                             gen_helper_recpe_u32(tmp, tmp, fpstatus);
6998                             tcg_temp_free_ptr(fpstatus);
6999                             break;
7000                         }
7001                         case NEON_2RM_VRSQRTE:
7002                         {
7003                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7004                             gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
7005                             tcg_temp_free_ptr(fpstatus);
7006                             break;
7007                         }
7008                         case NEON_2RM_VRECPE_F:
7009                         {
7010                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7011                             gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
7012                             tcg_temp_free_ptr(fpstatus);
7013                             break;
7014                         }
7015                         case NEON_2RM_VRSQRTE_F:
7016                         {
7017                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7018                             gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
7019                             tcg_temp_free_ptr(fpstatus);
7020                             break;
7021                         }
7022                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
7023                             gen_vfp_sito(0, 1);
7024                             break;
7025                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
7026                             gen_vfp_uito(0, 1);
7027                             break;
7028                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
7029                             gen_vfp_tosiz(0, 1);
7030                             break;
7031                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
7032                             gen_vfp_touiz(0, 1);
7033                             break;
7034                         default:
7035                             /* Reserved op values were caught by the
7036                              * neon_2rm_sizes[] check earlier.
7037                              */
7038                             abort();
7039                         }
7040                         if (neon_2rm_is_float_op(op)) {
7041                             tcg_gen_st_f32(cpu_F0s, cpu_env,
7042                                            neon_reg_offset(rd, pass));
7043                         } else {
7044                             neon_store_reg(rd, pass, tmp);
7045                         }
7046                     }
7047                     break;
7048                 }
7049             } else if ((insn & (1 << 10)) == 0) {
7050                 /* VTBL, VTBX.  */
7051                 int n = ((insn >> 8) & 3) + 1;
7052                 if ((rn + n) > 32) {
7053                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
7054                      * helper function running off the end of the register file.
7055                      */
7056                     return 1;
7057                 }
7058                 n <<= 3;
7059                 if (insn & (1 << 6)) {
7060                     tmp = neon_load_reg(rd, 0);
7061                 } else {
7062                     tmp = tcg_temp_new_i32();
7063                     tcg_gen_movi_i32(tmp, 0);
7064                 }
7065                 tmp2 = neon_load_reg(rm, 0);
7066                 tmp4 = tcg_const_i32(rn);
7067                 tmp5 = tcg_const_i32(n);
7068                 gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
7069                 tcg_temp_free_i32(tmp);
7070                 if (insn & (1 << 6)) {
7071                     tmp = neon_load_reg(rd, 1);
7072                 } else {
7073                     tmp = tcg_temp_new_i32();
7074                     tcg_gen_movi_i32(tmp, 0);
7075                 }
7076                 tmp3 = neon_load_reg(rm, 1);
7077                 gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
7078                 tcg_temp_free_i32(tmp5);
7079                 tcg_temp_free_i32(tmp4);
7080                 neon_store_reg(rd, 0, tmp2);
7081                 neon_store_reg(rd, 1, tmp3);
7082                 tcg_temp_free_i32(tmp);
7083             } else if ((insn & 0x380) == 0) {
7084                 /* VDUP */
7085                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7086                     return 1;
7087                 }
7088                 if (insn & (1 << 19)) {
7089                     tmp = neon_load_reg(rm, 1);
7090                 } else {
7091                     tmp = neon_load_reg(rm, 0);
7092                 }
7093                 if (insn & (1 << 16)) {
7094                     gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
7095                 } else if (insn & (1 << 17)) {
7096                     if ((insn >> 18) & 1)
7097                         gen_neon_dup_high16(tmp);
7098                     else
7099                         gen_neon_dup_low16(tmp);
7100                 }
7101                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7102                     tmp2 = tcg_temp_new_i32();
7103                     tcg_gen_mov_i32(tmp2, tmp);
7104                     neon_store_reg(rd, pass, tmp2);
7105                 }
7106                 tcg_temp_free_i32(tmp);
7107             } else {
7108                 return 1;
7109             }
7110         }
7111     }
7112     return 0;
7113 }
7114
7115 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7116 {
7117     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7118     const ARMCPRegInfo *ri;
7119
7120     cpnum = (insn >> 8) & 0xf;
7121
7122     /* First check for coprocessor space used for XScale/iwMMXt insns */
7123     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7124         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7125             return 1;
7126         }
7127         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7128             return disas_iwmmxt_insn(s, insn);
7129         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7130             return disas_dsp_insn(s, insn);
7131         }
7132         return 1;
7133     }
7134
7135     /* Otherwise treat as a generic register access */
7136     is64 = (insn & (1 << 25)) == 0;
7137     if (!is64 && ((insn & (1 << 4)) == 0)) {
7138         /* cdp */
7139         return 1;
7140     }
7141
7142     crm = insn & 0xf;
7143     if (is64) {
7144         crn = 0;
7145         opc1 = (insn >> 4) & 0xf;
7146         opc2 = 0;
7147         rt2 = (insn >> 16) & 0xf;
7148     } else {
7149         crn = (insn >> 16) & 0xf;
7150         opc1 = (insn >> 21) & 7;
7151         opc2 = (insn >> 5) & 7;
7152         rt2 = 0;
7153     }
7154     isread = (insn >> 20) & 1;
7155     rt = (insn >> 12) & 0xf;
7156
7157     ri = get_arm_cp_reginfo(s->cp_regs,
7158             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7159     if (ri) {
7160         /* Check access permissions */
7161         if (!cp_access_ok(s->current_el, ri, isread)) {
7162             return 1;
7163         }
7164
7165         if (ri->accessfn ||
7166             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7167             /* Emit code to perform further access permissions checks at
7168              * runtime; this may result in an exception.
7169              * Note that on XScale all cp0..c13 registers do an access check
7170              * call in order to handle c15_cpar.
7171              */
7172             TCGv_ptr tmpptr;
7173             TCGv_i32 tcg_syn;
7174             uint32_t syndrome;
7175
7176             /* Note that since we are an implementation which takes an
7177              * exception on a trapped conditional instruction only if the
7178              * instruction passes its condition code check, we can take
7179              * advantage of the clause in the ARM ARM that allows us to set
7180              * the COND field in the instruction to 0xE in all cases.
7181              * We could fish the actual condition out of the insn (ARM)
7182              * or the condexec bits (Thumb) but it isn't necessary.
7183              */
7184             switch (cpnum) {
7185             case 14:
7186                 if (is64) {
7187                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7188                                                  isread, s->thumb);
7189                 } else {
7190                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7191                                                 rt, isread, s->thumb);
7192                 }
7193                 break;
7194             case 15:
7195                 if (is64) {
7196                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7197                                                  isread, s->thumb);
7198                 } else {
7199                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7200                                                 rt, isread, s->thumb);
7201                 }
7202                 break;
7203             default:
7204                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7205                  * so this can only happen if this is an ARMv7 or earlier CPU,
7206                  * in which case the syndrome information won't actually be
7207                  * guest visible.
7208                  */
7209                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7210                 syndrome = syn_uncategorized();
7211                 break;
7212             }
7213
7214             gen_set_pc_im(s, s->pc - 4);
7215             tmpptr = tcg_const_ptr(ri);
7216             tcg_syn = tcg_const_i32(syndrome);
7217             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
7218             tcg_temp_free_ptr(tmpptr);
7219             tcg_temp_free_i32(tcg_syn);
7220         }
7221
7222         /* Handle special cases first */
7223         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7224         case ARM_CP_NOP:
7225             return 0;
7226         case ARM_CP_WFI:
7227             if (isread) {
7228                 return 1;
7229             }
7230             gen_set_pc_im(s, s->pc);
7231             s->is_jmp = DISAS_WFI;
7232             return 0;
7233         default:
7234             break;
7235         }
7236
7237         if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7238             gen_io_start();
7239         }
7240
7241         if (isread) {
7242             /* Read */
7243             if (is64) {
7244                 TCGv_i64 tmp64;
7245                 TCGv_i32 tmp;
7246                 if (ri->type & ARM_CP_CONST) {
7247                     tmp64 = tcg_const_i64(ri->resetvalue);
7248                 } else if (ri->readfn) {
7249                     TCGv_ptr tmpptr;
7250                     tmp64 = tcg_temp_new_i64();
7251                     tmpptr = tcg_const_ptr(ri);
7252                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7253                     tcg_temp_free_ptr(tmpptr);
7254                 } else {
7255                     tmp64 = tcg_temp_new_i64();
7256                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7257                 }
7258                 tmp = tcg_temp_new_i32();
7259                 tcg_gen_extrl_i64_i32(tmp, tmp64);
7260                 store_reg(s, rt, tmp);
7261                 tcg_gen_shri_i64(tmp64, tmp64, 32);
7262                 tmp = tcg_temp_new_i32();
7263                 tcg_gen_extrl_i64_i32(tmp, tmp64);
7264                 tcg_temp_free_i64(tmp64);
7265                 store_reg(s, rt2, tmp);
7266             } else {
7267                 TCGv_i32 tmp;
7268                 if (ri->type & ARM_CP_CONST) {
7269                     tmp = tcg_const_i32(ri->resetvalue);
7270                 } else if (ri->readfn) {
7271                     TCGv_ptr tmpptr;
7272                     tmp = tcg_temp_new_i32();
7273                     tmpptr = tcg_const_ptr(ri);
7274                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7275                     tcg_temp_free_ptr(tmpptr);
7276                 } else {
7277                     tmp = load_cpu_offset(ri->fieldoffset);
7278                 }
7279                 if (rt == 15) {
7280                     /* Destination register of r15 for 32 bit loads sets
7281                      * the condition codes from the high 4 bits of the value
7282                      */
7283                     gen_set_nzcv(tmp);
7284                     tcg_temp_free_i32(tmp);
7285                 } else {
7286                     store_reg(s, rt, tmp);
7287                 }
7288             }
7289         } else {
7290             /* Write */
7291             if (ri->type & ARM_CP_CONST) {
7292                 /* If not forbidden by access permissions, treat as WI */
7293                 return 0;
7294             }
7295
7296             if (is64) {
7297                 TCGv_i32 tmplo, tmphi;
7298                 TCGv_i64 tmp64 = tcg_temp_new_i64();
7299                 tmplo = load_reg(s, rt);
7300                 tmphi = load_reg(s, rt2);
7301                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7302                 tcg_temp_free_i32(tmplo);
7303                 tcg_temp_free_i32(tmphi);
7304                 if (ri->writefn) {
7305                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
7306                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7307                     tcg_temp_free_ptr(tmpptr);
7308                 } else {
7309                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7310                 }
7311                 tcg_temp_free_i64(tmp64);
7312             } else {
7313                 if (ri->writefn) {
7314                     TCGv_i32 tmp;
7315                     TCGv_ptr tmpptr;
7316                     tmp = load_reg(s, rt);
7317                     tmpptr = tcg_const_ptr(ri);
7318                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7319                     tcg_temp_free_ptr(tmpptr);
7320                     tcg_temp_free_i32(tmp);
7321                 } else {
7322                     TCGv_i32 tmp = load_reg(s, rt);
7323                     store_cpu_offset(tmp, ri->fieldoffset);
7324                 }
7325             }
7326         }
7327
7328         if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7329             /* I/O operations must end the TB here (whether read or write) */
7330             gen_io_end();
7331             gen_lookup_tb(s);
7332         } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7333             /* We default to ending the TB on a coprocessor register write,
7334              * but allow this to be suppressed by the register definition
7335              * (usually only necessary to work around guest bugs).
7336              */
7337             gen_lookup_tb(s);
7338         }
7339
7340         return 0;
7341     }
7342
7343     /* Unknown register; this might be a guest error or a QEMU
7344      * unimplemented feature.
7345      */
7346     if (is64) {
7347         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7348                       "64 bit system register cp:%d opc1: %d crm:%d "
7349                       "(%s)\n",
7350                       isread ? "read" : "write", cpnum, opc1, crm,
7351                       s->ns ? "non-secure" : "secure");
7352     } else {
7353         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7354                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7355                       "(%s)\n",
7356                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7357                       s->ns ? "non-secure" : "secure");
7358     }
7359
7360     return 1;
7361 }
7362
7363
7364 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7365 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7366 {
7367     TCGv_i32 tmp;
7368     tmp = tcg_temp_new_i32();
7369     tcg_gen_extrl_i64_i32(tmp, val);
7370     store_reg(s, rlow, tmp);
7371     tmp = tcg_temp_new_i32();
7372     tcg_gen_shri_i64(val, val, 32);
7373     tcg_gen_extrl_i64_i32(tmp, val);
7374     store_reg(s, rhigh, tmp);
7375 }
7376
7377 /* load a 32-bit value from a register and perform a 64-bit accumulate.  */
7378 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
7379 {
7380     TCGv_i64 tmp;
7381     TCGv_i32 tmp2;
7382
7383     /* Load value and extend to 64 bits.  */
7384     tmp = tcg_temp_new_i64();
7385     tmp2 = load_reg(s, rlow);
7386     tcg_gen_extu_i32_i64(tmp, tmp2);
7387     tcg_temp_free_i32(tmp2);
7388     tcg_gen_add_i64(val, val, tmp);
7389     tcg_temp_free_i64(tmp);
7390 }
7391
7392 /* load and add a 64-bit value from a register pair.  */
7393 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7394 {
7395     TCGv_i64 tmp;
7396     TCGv_i32 tmpl;
7397     TCGv_i32 tmph;
7398
7399     /* Load 64-bit value rd:rn.  */
7400     tmpl = load_reg(s, rlow);
7401     tmph = load_reg(s, rhigh);
7402     tmp = tcg_temp_new_i64();
7403     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7404     tcg_temp_free_i32(tmpl);
7405     tcg_temp_free_i32(tmph);
7406     tcg_gen_add_i64(val, val, tmp);
7407     tcg_temp_free_i64(tmp);
7408 }
7409
7410 /* Set N and Z flags from hi|lo.  */
7411 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7412 {
7413     tcg_gen_mov_i32(cpu_NF, hi);
7414     tcg_gen_or_i32(cpu_ZF, lo, hi);
7415 }
7416
7417 /* Load/Store exclusive instructions are implemented by remembering
7418    the value/address loaded, and seeing if these are the same
7419    when the store is performed. This should be sufficient to implement
7420    the architecturally mandated semantics, and avoids having to monitor
7421    regular stores.
7422
7423    In system emulation mode only one CPU will be running at once, so
7424    this sequence is effectively atomic.  In user emulation mode we
7425    throw an exception and handle the atomic operation elsewhere.  */
7426 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7427                                TCGv_i32 addr, int size)
7428 {
7429     TCGv_i32 tmp = tcg_temp_new_i32();
7430
7431     s->is_ldex = true;
7432
7433     switch (size) {
7434     case 0:
7435         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7436         break;
7437     case 1:
7438         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
7439         break;
7440     case 2:
7441     case 3:
7442         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7443         break;
7444     default:
7445         abort();
7446     }
7447
7448     if (size == 3) {
7449         TCGv_i32 tmp2 = tcg_temp_new_i32();
7450         TCGv_i32 tmp3 = tcg_temp_new_i32();
7451
7452         tcg_gen_addi_i32(tmp2, addr, 4);
7453         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7454         tcg_temp_free_i32(tmp2);
7455         tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
7456         store_reg(s, rt2, tmp3);
7457     } else {
7458         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7459     }
7460
7461     store_reg(s, rt, tmp);
7462     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7463 }
7464
7465 static void gen_clrex(DisasContext *s)
7466 {
7467     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7468 }
7469
7470 #ifdef CONFIG_USER_ONLY
7471 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7472                                 TCGv_i32 addr, int size)
7473 {
7474     tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
7475     tcg_gen_movi_i32(cpu_exclusive_info,
7476                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
7477     gen_exception_internal_insn(s, 4, EXCP_STREX);
7478 }
7479 #else
7480 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7481                                 TCGv_i32 addr, int size)
7482 {
7483     TCGv_i32 tmp;
7484     TCGv_i64 val64, extaddr;
7485     TCGLabel *done_label;
7486     TCGLabel *fail_label;
7487
7488     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7489          [addr] = {Rt};
7490          {Rd} = 0;
7491        } else {
7492          {Rd} = 1;
7493        } */
7494     fail_label = gen_new_label();
7495     done_label = gen_new_label();
7496     extaddr = tcg_temp_new_i64();
7497     tcg_gen_extu_i32_i64(extaddr, addr);
7498     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7499     tcg_temp_free_i64(extaddr);
7500
7501     tmp = tcg_temp_new_i32();
7502     switch (size) {
7503     case 0:
7504         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7505         break;
7506     case 1:
7507         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
7508         break;
7509     case 2:
7510     case 3:
7511         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7512         break;
7513     default:
7514         abort();
7515     }
7516
7517     val64 = tcg_temp_new_i64();
7518     if (size == 3) {
7519         TCGv_i32 tmp2 = tcg_temp_new_i32();
7520         TCGv_i32 tmp3 = tcg_temp_new_i32();
7521         tcg_gen_addi_i32(tmp2, addr, 4);
7522         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7523         tcg_temp_free_i32(tmp2);
7524         tcg_gen_concat_i32_i64(val64, tmp, tmp3);
7525         tcg_temp_free_i32(tmp3);
7526     } else {
7527         tcg_gen_extu_i32_i64(val64, tmp);
7528     }
7529     tcg_temp_free_i32(tmp);
7530
7531     tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
7532     tcg_temp_free_i64(val64);
7533
7534     tmp = load_reg(s, rt);
7535     switch (size) {
7536     case 0:
7537         gen_aa32_st8(tmp, addr, get_mem_index(s));
7538         break;
7539     case 1:
7540         gen_aa32_st16(tmp, addr, get_mem_index(s));
7541         break;
7542     case 2:
7543     case 3:
7544         gen_aa32_st32(tmp, addr, get_mem_index(s));
7545         break;
7546     default:
7547         abort();
7548     }
7549     tcg_temp_free_i32(tmp);
7550     if (size == 3) {
7551         tcg_gen_addi_i32(addr, addr, 4);
7552         tmp = load_reg(s, rt2);
7553         gen_aa32_st32(tmp, addr, get_mem_index(s));
7554         tcg_temp_free_i32(tmp);
7555     }
7556     tcg_gen_movi_i32(cpu_R[rd], 0);
7557     tcg_gen_br(done_label);
7558     gen_set_label(fail_label);
7559     tcg_gen_movi_i32(cpu_R[rd], 1);
7560     gen_set_label(done_label);
7561     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7562 }
7563 #endif
7564
7565 /* gen_srs:
7566  * @env: CPUARMState
7567  * @s: DisasContext
7568  * @mode: mode field from insn (which stack to store to)
7569  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7570  * @writeback: true if writeback bit set
7571  *
7572  * Generate code for the SRS (Store Return State) insn.
7573  */
7574 static void gen_srs(DisasContext *s,
7575                     uint32_t mode, uint32_t amode, bool writeback)
7576 {
7577     int32_t offset;
7578     TCGv_i32 addr = tcg_temp_new_i32();
7579     TCGv_i32 tmp = tcg_const_i32(mode);
7580     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7581     tcg_temp_free_i32(tmp);
7582     switch (amode) {
7583     case 0: /* DA */
7584         offset = -4;
7585         break;
7586     case 1: /* IA */
7587         offset = 0;
7588         break;
7589     case 2: /* DB */
7590         offset = -8;
7591         break;
7592     case 3: /* IB */
7593         offset = 4;
7594         break;
7595     default:
7596         abort();
7597     }
7598     tcg_gen_addi_i32(addr, addr, offset);
7599     tmp = load_reg(s, 14);
7600     gen_aa32_st32(tmp, addr, get_mem_index(s));
7601     tcg_temp_free_i32(tmp);
7602     tmp = load_cpu_field(spsr);
7603     tcg_gen_addi_i32(addr, addr, 4);
7604     gen_aa32_st32(tmp, addr, get_mem_index(s));
7605     tcg_temp_free_i32(tmp);
7606     if (writeback) {
7607         switch (amode) {
7608         case 0:
7609             offset = -8;
7610             break;
7611         case 1:
7612             offset = 4;
7613             break;
7614         case 2:
7615             offset = -4;
7616             break;
7617         case 3:
7618             offset = 0;
7619             break;
7620         default:
7621             abort();
7622         }
7623         tcg_gen_addi_i32(addr, addr, offset);
7624         tmp = tcg_const_i32(mode);
7625         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7626         tcg_temp_free_i32(tmp);
7627     }
7628     tcg_temp_free_i32(addr);
7629 }
7630
7631 static void disas_arm_insn(DisasContext *s, unsigned int insn)
7632 {
7633     unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
7634     TCGv_i32 tmp;
7635     TCGv_i32 tmp2;
7636     TCGv_i32 tmp3;
7637     TCGv_i32 addr;
7638     TCGv_i64 tmp64;
7639
7640     /* M variants do not implement ARM mode.  */
7641     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7642         goto illegal_op;
7643     }
7644     cond = insn >> 28;
7645     if (cond == 0xf){
7646         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
7647          * choose to UNDEF. In ARMv5 and above the space is used
7648          * for miscellaneous unconditional instructions.
7649          */
7650         ARCH(5);
7651
7652         /* Unconditional instructions.  */
7653         if (((insn >> 25) & 7) == 1) {
7654             /* NEON Data processing.  */
7655             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7656                 goto illegal_op;
7657             }
7658
7659             if (disas_neon_data_insn(s, insn)) {
7660                 goto illegal_op;
7661             }
7662             return;
7663         }
7664         if ((insn & 0x0f100000) == 0x04000000) {
7665             /* NEON load/store.  */
7666             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7667                 goto illegal_op;
7668             }
7669
7670             if (disas_neon_ls_insn(s, insn)) {
7671                 goto illegal_op;
7672             }
7673             return;
7674         }
7675         if ((insn & 0x0f000e10) == 0x0e000a00) {
7676             /* VFP.  */
7677             if (disas_vfp_insn(s, insn)) {
7678                 goto illegal_op;
7679             }
7680             return;
7681         }
7682         if (((insn & 0x0f30f000) == 0x0510f000) ||
7683             ((insn & 0x0f30f010) == 0x0710f000)) {
7684             if ((insn & (1 << 22)) == 0) {
7685                 /* PLDW; v7MP */
7686                 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7687                     goto illegal_op;
7688                 }
7689             }
7690             /* Otherwise PLD; v5TE+ */
7691             ARCH(5TE);
7692             return;
7693         }
7694         if (((insn & 0x0f70f000) == 0x0450f000) ||
7695             ((insn & 0x0f70f010) == 0x0650f000)) {
7696             ARCH(7);
7697             return; /* PLI; V7 */
7698         }
7699         if (((insn & 0x0f700000) == 0x04100000) ||
7700             ((insn & 0x0f700010) == 0x06100000)) {
7701             if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7702                 goto illegal_op;
7703             }
7704             return; /* v7MP: Unallocated memory hint: must NOP */
7705         }
7706
7707         if ((insn & 0x0ffffdff) == 0x01010000) {
7708             ARCH(6);
7709             /* setend */
7710             if (((insn >> 9) & 1) != s->bswap_code) {
7711                 /* Dynamic endianness switching not implemented. */
7712                 qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
7713                 goto illegal_op;
7714             }
7715             return;
7716         } else if ((insn & 0x0fffff00) == 0x057ff000) {
7717             switch ((insn >> 4) & 0xf) {
7718             case 1: /* clrex */
7719                 ARCH(6K);
7720                 gen_clrex(s);
7721                 return;
7722             case 4: /* dsb */
7723             case 5: /* dmb */
7724             case 6: /* isb */
7725                 ARCH(7);
7726                 /* We don't emulate caches so these are a no-op.  */
7727                 return;
7728             default:
7729                 goto illegal_op;
7730             }
7731         } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
7732             /* srs */
7733             if (IS_USER(s)) {
7734                 goto illegal_op;
7735             }
7736             ARCH(6);
7737             gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
7738             return;
7739         } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
7740             /* rfe */
7741             int32_t offset;
7742             if (IS_USER(s))
7743                 goto illegal_op;
7744             ARCH(6);
7745             rn = (insn >> 16) & 0xf;
7746             addr = load_reg(s, rn);
7747             i = (insn >> 23) & 3;
7748             switch (i) {
7749             case 0: offset = -4; break; /* DA */
7750             case 1: offset = 0; break; /* IA */
7751             case 2: offset = -8; break; /* DB */
7752             case 3: offset = 4; break; /* IB */
7753             default: abort();
7754             }
7755             if (offset)
7756                 tcg_gen_addi_i32(addr, addr, offset);
7757             /* Load PC into tmp and CPSR into tmp2.  */
7758             tmp = tcg_temp_new_i32();
7759             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7760             tcg_gen_addi_i32(addr, addr, 4);
7761             tmp2 = tcg_temp_new_i32();
7762             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
7763             if (insn & (1 << 21)) {
7764                 /* Base writeback.  */
7765                 switch (i) {
7766                 case 0: offset = -8; break;
7767                 case 1: offset = 4; break;
7768                 case 2: offset = -4; break;
7769                 case 3: offset = 0; break;
7770                 default: abort();
7771                 }
7772                 if (offset)
7773                     tcg_gen_addi_i32(addr, addr, offset);
7774                 store_reg(s, rn, addr);
7775             } else {
7776                 tcg_temp_free_i32(addr);
7777             }
7778             gen_rfe(s, tmp, tmp2);
7779             return;
7780         } else if ((insn & 0x0e000000) == 0x0a000000) {
7781             /* branch link and change to thumb (blx <offset>) */
7782             int32_t offset;
7783
7784             val = (uint32_t)s->pc;
7785             tmp = tcg_temp_new_i32();
7786             tcg_gen_movi_i32(tmp, val);
7787             store_reg(s, 14, tmp);
7788             /* Sign-extend the 24-bit offset */
7789             offset = (((int32_t)insn) << 8) >> 8;
7790             /* offset * 4 + bit24 * 2 + (thumb bit) */
7791             val += (offset << 2) | ((insn >> 23) & 2) | 1;
7792             /* pipeline offset */
7793             val += 4;
7794             /* protected by ARCH(5); above, near the start of uncond block */
7795             gen_bx_im(s, val);
7796             return;
7797         } else if ((insn & 0x0e000f00) == 0x0c000100) {
7798             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7799                 /* iWMMXt register transfer.  */
7800                 if (extract32(s->c15_cpar, 1, 1)) {
7801                     if (!disas_iwmmxt_insn(s, insn)) {
7802                         return;
7803                     }
7804                 }
7805             }
7806         } else if ((insn & 0x0fe00000) == 0x0c400000) {
7807             /* Coprocessor double register transfer.  */
7808             ARCH(5TE);
7809         } else if ((insn & 0x0f000010) == 0x0e000010) {
7810             /* Additional coprocessor register transfer.  */
7811         } else if ((insn & 0x0ff10020) == 0x01000000) {
7812             uint32_t mask;
7813             uint32_t val;
7814             /* cps (privileged) */
7815             if (IS_USER(s))
7816                 return;
7817             mask = val = 0;
7818             if (insn & (1 << 19)) {
7819                 if (insn & (1 << 8))
7820                     mask |= CPSR_A;
7821                 if (insn & (1 << 7))
7822                     mask |= CPSR_I;
7823                 if (insn & (1 << 6))
7824                     mask |= CPSR_F;
7825                 if (insn & (1 << 18))
7826                     val |= mask;
7827             }
7828             if (insn & (1 << 17)) {
7829                 mask |= CPSR_M;
7830                 val |= (insn & 0x1f);
7831             }
7832             if (mask) {
7833                 gen_set_psr_im(s, mask, 0, val);
7834             }
7835             return;
7836         }
7837         goto illegal_op;
7838     }
7839     if (cond != 0xe) {
7840         /* if not always execute, we generate a conditional jump to
7841            next instruction */
7842         s->condlabel = gen_new_label();
7843         arm_gen_test_cc(cond ^ 1, s->condlabel);
7844         s->condjmp = 1;
7845     }
7846     if ((insn & 0x0f900000) == 0x03000000) {
7847         if ((insn & (1 << 21)) == 0) {
7848             ARCH(6T2);
7849             rd = (insn >> 12) & 0xf;
7850             val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
7851             if ((insn & (1 << 22)) == 0) {
7852                 /* MOVW */
7853                 tmp = tcg_temp_new_i32();
7854                 tcg_gen_movi_i32(tmp, val);
7855             } else {
7856                 /* MOVT */
7857                 tmp = load_reg(s, rd);
7858                 tcg_gen_ext16u_i32(tmp, tmp);
7859                 tcg_gen_ori_i32(tmp, tmp, val << 16);
7860             }
7861             store_reg(s, rd, tmp);
7862         } else {
7863             if (((insn >> 12) & 0xf) != 0xf)
7864                 goto illegal_op;
7865             if (((insn >> 16) & 0xf) == 0) {
7866                 gen_nop_hint(s, insn & 0xff);
7867             } else {
7868                 /* CPSR = immediate */
7869                 val = insn & 0xff;
7870                 shift = ((insn >> 8) & 0xf) * 2;
7871                 if (shift)
7872                     val = (val >> shift) | (val << (32 - shift));
7873                 i = ((insn & (1 << 22)) != 0);
7874                 if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
7875                                    i, val)) {
7876                     goto illegal_op;
7877                 }
7878             }
7879         }
7880     } else if ((insn & 0x0f900000) == 0x01000000
7881                && (insn & 0x00000090) != 0x00000090) {
7882         /* miscellaneous instructions */
7883         op1 = (insn >> 21) & 3;
7884         sh = (insn >> 4) & 0xf;
7885         rm = insn & 0xf;
7886         switch (sh) {
7887         case 0x0: /* move program status register */
7888             if (op1 & 1) {
7889                 /* PSR = reg */
7890                 tmp = load_reg(s, rm);
7891                 i = ((op1 & 2) != 0);
7892                 if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
7893                     goto illegal_op;
7894             } else {
7895                 /* reg = PSR */
7896                 rd = (insn >> 12) & 0xf;
7897                 if (op1 & 2) {
7898                     if (IS_USER(s))
7899                         goto illegal_op;
7900                     tmp = load_cpu_field(spsr);
7901                 } else {
7902                     tmp = tcg_temp_new_i32();
7903                     gen_helper_cpsr_read(tmp, cpu_env);
7904                 }
7905                 store_reg(s, rd, tmp);
7906             }
7907             break;
7908         case 0x1:
7909             if (op1 == 1) {
7910                 /* branch/exchange thumb (bx).  */
7911                 ARCH(4T);
7912                 tmp = load_reg(s, rm);
7913                 gen_bx(s, tmp);
7914             } else if (op1 == 3) {
7915                 /* clz */
7916                 ARCH(5);
7917                 rd = (insn >> 12) & 0xf;
7918                 tmp = load_reg(s, rm);
7919                 gen_helper_clz(tmp, tmp);
7920                 store_reg(s, rd, tmp);
7921             } else {
7922                 goto illegal_op;
7923             }
7924             break;
7925         case 0x2:
7926             if (op1 == 1) {
7927                 ARCH(5J); /* bxj */
7928                 /* Trivial implementation equivalent to bx.  */
7929                 tmp = load_reg(s, rm);
7930                 gen_bx(s, tmp);
7931             } else {
7932                 goto illegal_op;
7933             }
7934             break;
7935         case 0x3:
7936             if (op1 != 1)
7937               goto illegal_op;
7938
7939             ARCH(5);
7940             /* branch link/exchange thumb (blx) */
7941             tmp = load_reg(s, rm);
7942             tmp2 = tcg_temp_new_i32();
7943             tcg_gen_movi_i32(tmp2, s->pc);
7944             store_reg(s, 14, tmp2);
7945             gen_bx(s, tmp);
7946             break;
7947         case 0x4:
7948         {
7949             /* crc32/crc32c */
7950             uint32_t c = extract32(insn, 8, 4);
7951
7952             /* Check this CPU supports ARMv8 CRC instructions.
7953              * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
7954              * Bits 8, 10 and 11 should be zero.
7955              */
7956             if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
7957                 (c & 0xd) != 0) {
7958                 goto illegal_op;
7959             }
7960
7961             rn = extract32(insn, 16, 4);
7962             rd = extract32(insn, 12, 4);
7963
7964             tmp = load_reg(s, rn);
7965             tmp2 = load_reg(s, rm);
7966             if (op1 == 0) {
7967                 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
7968             } else if (op1 == 1) {
7969                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
7970             }
7971             tmp3 = tcg_const_i32(1 << op1);
7972             if (c & 0x2) {
7973                 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
7974             } else {
7975                 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
7976             }
7977             tcg_temp_free_i32(tmp2);
7978             tcg_temp_free_i32(tmp3);
7979             store_reg(s, rd, tmp);
7980             break;
7981         }
7982         case 0x5: /* saturating add/subtract */
7983             ARCH(5TE);
7984             rd = (insn >> 12) & 0xf;
7985             rn = (insn >> 16) & 0xf;
7986             tmp = load_reg(s, rm);
7987             tmp2 = load_reg(s, rn);
7988             if (op1 & 2)
7989                 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
7990             if (op1 & 1)
7991                 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
7992             else
7993                 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
7994             tcg_temp_free_i32(tmp2);
7995             store_reg(s, rd, tmp);
7996             break;
7997         case 7:
7998         {
7999             int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
8000             switch (op1) {
8001             case 1:
8002                 /* bkpt */
8003                 ARCH(5);
8004                 gen_exception_insn(s, 4, EXCP_BKPT,
8005                                    syn_aa32_bkpt(imm16, false),
8006                                    default_exception_el(s));
8007                 break;
8008             case 2:
8009                 /* Hypervisor call (v7) */
8010                 ARCH(7);
8011                 if (IS_USER(s)) {
8012                     goto illegal_op;
8013                 }
8014                 gen_hvc(s, imm16);
8015                 break;
8016             case 3:
8017                 /* Secure monitor call (v6+) */
8018                 ARCH(6K);
8019                 if (IS_USER(s)) {
8020                     goto illegal_op;
8021                 }
8022                 gen_smc(s);
8023                 break;
8024             default:
8025                 goto illegal_op;
8026             }
8027             break;
8028         }
8029         case 0x8: /* signed multiply */
8030         case 0xa:
8031         case 0xc:
8032         case 0xe:
8033             ARCH(5TE);
8034             rs = (insn >> 8) & 0xf;
8035             rn = (insn >> 12) & 0xf;
8036             rd = (insn >> 16) & 0xf;
8037             if (op1 == 1) {
8038                 /* (32 * 16) >> 16 */
8039                 tmp = load_reg(s, rm);
8040                 tmp2 = load_reg(s, rs);
8041                 if (sh & 4)
8042                     tcg_gen_sari_i32(tmp2, tmp2, 16);
8043                 else
8044                     gen_sxth(tmp2);
8045                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8046                 tcg_gen_shri_i64(tmp64, tmp64, 16);
8047                 tmp = tcg_temp_new_i32();
8048                 tcg_gen_extrl_i64_i32(tmp, tmp64);
8049                 tcg_temp_free_i64(tmp64);
8050                 if ((sh & 2) == 0) {
8051                     tmp2 = load_reg(s, rn);
8052                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8053                     tcg_temp_free_i32(tmp2);
8054                 }
8055                 store_reg(s, rd, tmp);
8056             } else {
8057                 /* 16 * 16 */
8058                 tmp = load_reg(s, rm);
8059                 tmp2 = load_reg(s, rs);
8060                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
8061                 tcg_temp_free_i32(tmp2);
8062                 if (op1 == 2) {
8063                     tmp64 = tcg_temp_new_i64();
8064                     tcg_gen_ext_i32_i64(tmp64, tmp);
8065                     tcg_temp_free_i32(tmp);
8066                     gen_addq(s, tmp64, rn, rd);
8067                     gen_storeq_reg(s, rn, rd, tmp64);
8068                     tcg_temp_free_i64(tmp64);
8069                 } else {
8070                     if (op1 == 0) {
8071                         tmp2 = load_reg(s, rn);
8072                         gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8073                         tcg_temp_free_i32(tmp2);
8074                     }
8075                     store_reg(s, rd, tmp);
8076                 }
8077             }
8078             break;
8079         default:
8080             goto illegal_op;
8081         }
8082     } else if (((insn & 0x0e000000) == 0 &&
8083                 (insn & 0x00000090) != 0x90) ||
8084                ((insn & 0x0e000000) == (1 << 25))) {
8085         int set_cc, logic_cc, shiftop;
8086
8087         op1 = (insn >> 21) & 0xf;
8088         set_cc = (insn >> 20) & 1;
8089         logic_cc = table_logic_cc[op1] & set_cc;
8090
8091         /* data processing instruction */
8092         if (insn & (1 << 25)) {
8093             /* immediate operand */
8094             val = insn & 0xff;
8095             shift = ((insn >> 8) & 0xf) * 2;
8096             if (shift) {
8097                 val = (val >> shift) | (val << (32 - shift));
8098             }
8099             tmp2 = tcg_temp_new_i32();
8100             tcg_gen_movi_i32(tmp2, val);
8101             if (logic_cc && shift) {
8102                 gen_set_CF_bit31(tmp2);
8103             }
8104         } else {
8105             /* register */
8106             rm = (insn) & 0xf;
8107             tmp2 = load_reg(s, rm);
8108             shiftop = (insn >> 5) & 3;
8109             if (!(insn & (1 << 4))) {
8110                 shift = (insn >> 7) & 0x1f;
8111                 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8112             } else {
8113                 rs = (insn >> 8) & 0xf;
8114                 tmp = load_reg(s, rs);
8115                 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
8116             }
8117         }
8118         if (op1 != 0x0f && op1 != 0x0d) {
8119             rn = (insn >> 16) & 0xf;
8120             tmp = load_reg(s, rn);
8121         } else {
8122             TCGV_UNUSED_I32(tmp);
8123         }
8124         rd = (insn >> 12) & 0xf;
8125         switch(op1) {
8126         case 0x00:
8127             tcg_gen_and_i32(tmp, tmp, tmp2);
8128             if (logic_cc) {
8129                 gen_logic_CC(tmp);
8130             }
8131             store_reg_bx(s, rd, tmp);
8132             break;
8133         case 0x01:
8134             tcg_gen_xor_i32(tmp, tmp, tmp2);
8135             if (logic_cc) {
8136                 gen_logic_CC(tmp);
8137             }
8138             store_reg_bx(s, rd, tmp);
8139             break;
8140         case 0x02:
8141             if (set_cc && rd == 15) {
8142                 /* SUBS r15, ... is used for exception return.  */
8143                 if (IS_USER(s)) {
8144                     goto illegal_op;
8145                 }
8146                 gen_sub_CC(tmp, tmp, tmp2);
8147                 gen_exception_return(s, tmp);
8148             } else {
8149                 if (set_cc) {
8150                     gen_sub_CC(tmp, tmp, tmp2);
8151                 } else {
8152                     tcg_gen_sub_i32(tmp, tmp, tmp2);
8153                 }
8154                 store_reg_bx(s, rd, tmp);
8155             }
8156             break;
8157         case 0x03:
8158             if (set_cc) {
8159                 gen_sub_CC(tmp, tmp2, tmp);
8160             } else {
8161                 tcg_gen_sub_i32(tmp, tmp2, tmp);
8162             }
8163             store_reg_bx(s, rd, tmp);
8164             break;
8165         case 0x04:
8166             if (set_cc) {
8167                 gen_add_CC(tmp, tmp, tmp2);
8168             } else {
8169                 tcg_gen_add_i32(tmp, tmp, tmp2);
8170             }
8171             store_reg_bx(s, rd, tmp);
8172             break;
8173         case 0x05:
8174             if (set_cc) {
8175                 gen_adc_CC(tmp, tmp, tmp2);
8176             } else {
8177                 gen_add_carry(tmp, tmp, tmp2);
8178             }
8179             store_reg_bx(s, rd, tmp);
8180             break;
8181         case 0x06:
8182             if (set_cc) {
8183                 gen_sbc_CC(tmp, tmp, tmp2);
8184             } else {
8185                 gen_sub_carry(tmp, tmp, tmp2);
8186             }
8187             store_reg_bx(s, rd, tmp);
8188             break;
8189         case 0x07:
8190             if (set_cc) {
8191                 gen_sbc_CC(tmp, tmp2, tmp);
8192             } else {
8193                 gen_sub_carry(tmp, tmp2, tmp);
8194             }
8195             store_reg_bx(s, rd, tmp);
8196             break;
8197         case 0x08:
8198             if (set_cc) {
8199                 tcg_gen_and_i32(tmp, tmp, tmp2);
8200                 gen_logic_CC(tmp);
8201             }
8202             tcg_temp_free_i32(tmp);
8203             break;
8204         case 0x09:
8205             if (set_cc) {
8206                 tcg_gen_xor_i32(tmp, tmp, tmp2);
8207                 gen_logic_CC(tmp);
8208             }
8209             tcg_temp_free_i32(tmp);
8210             break;
8211         case 0x0a:
8212             if (set_cc) {
8213                 gen_sub_CC(tmp, tmp, tmp2);
8214             }
8215             tcg_temp_free_i32(tmp);
8216             break;
8217         case 0x0b:
8218             if (set_cc) {
8219                 gen_add_CC(tmp, tmp, tmp2);
8220             }
8221             tcg_temp_free_i32(tmp);
8222             break;
8223         case 0x0c:
8224             tcg_gen_or_i32(tmp, tmp, tmp2);
8225             if (logic_cc) {
8226                 gen_logic_CC(tmp);
8227             }
8228             store_reg_bx(s, rd, tmp);
8229             break;
8230         case 0x0d:
8231             if (logic_cc && rd == 15) {
8232                 /* MOVS r15, ... is used for exception return.  */
8233                 if (IS_USER(s)) {
8234                     goto illegal_op;
8235                 }
8236                 gen_exception_return(s, tmp2);
8237             } else {
8238                 if (logic_cc) {
8239                     gen_logic_CC(tmp2);
8240                 }
8241                 store_reg_bx(s, rd, tmp2);
8242             }
8243             break;
8244         case 0x0e:
8245             tcg_gen_andc_i32(tmp, tmp, tmp2);
8246             if (logic_cc) {
8247                 gen_logic_CC(tmp);
8248             }
8249             store_reg_bx(s, rd, tmp);
8250             break;
8251         default:
8252         case 0x0f:
8253             tcg_gen_not_i32(tmp2, tmp2);
8254             if (logic_cc) {
8255                 gen_logic_CC(tmp2);
8256             }
8257             store_reg_bx(s, rd, tmp2);
8258             break;
8259         }
8260         if (op1 != 0x0f && op1 != 0x0d) {
8261             tcg_temp_free_i32(tmp2);
8262         }
8263     } else {
8264         /* other instructions */
8265         op1 = (insn >> 24) & 0xf;
8266         switch(op1) {
8267         case 0x0:
8268         case 0x1:
8269             /* multiplies, extra load/stores */
8270             sh = (insn >> 5) & 3;
8271             if (sh == 0) {
8272                 if (op1 == 0x0) {
8273                     rd = (insn >> 16) & 0xf;
8274                     rn = (insn >> 12) & 0xf;
8275                     rs = (insn >> 8) & 0xf;
8276                     rm = (insn) & 0xf;
8277                     op1 = (insn >> 20) & 0xf;
8278                     switch (op1) {
8279                     case 0: case 1: case 2: case 3: case 6:
8280                         /* 32 bit mul */
8281                         tmp = load_reg(s, rs);
8282                         tmp2 = load_reg(s, rm);
8283                         tcg_gen_mul_i32(tmp, tmp, tmp2);
8284                         tcg_temp_free_i32(tmp2);
8285                         if (insn & (1 << 22)) {
8286                             /* Subtract (mls) */
8287                             ARCH(6T2);
8288                             tmp2 = load_reg(s, rn);
8289                             tcg_gen_sub_i32(tmp, tmp2, tmp);
8290                             tcg_temp_free_i32(tmp2);
8291                         } else if (insn & (1 << 21)) {
8292                             /* Add */
8293                             tmp2 = load_reg(s, rn);
8294                             tcg_gen_add_i32(tmp, tmp, tmp2);
8295                             tcg_temp_free_i32(tmp2);
8296                         }
8297                         if (insn & (1 << 20))
8298                             gen_logic_CC(tmp);
8299                         store_reg(s, rd, tmp);
8300                         break;
8301                     case 4:
8302                         /* 64 bit mul double accumulate (UMAAL) */
8303                         ARCH(6);
8304                         tmp = load_reg(s, rs);
8305                         tmp2 = load_reg(s, rm);
8306                         tmp64 = gen_mulu_i64_i32(tmp, tmp2);
8307                         gen_addq_lo(s, tmp64, rn);
8308                         gen_addq_lo(s, tmp64, rd);
8309                         gen_storeq_reg(s, rn, rd, tmp64);
8310                         tcg_temp_free_i64(tmp64);
8311                         break;
8312                     case 8: case 9: case 10: case 11:
8313                     case 12: case 13: case 14: case 15:
8314                         /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
8315                         tmp = load_reg(s, rs);
8316                         tmp2 = load_reg(s, rm);
8317                         if (insn & (1 << 22)) {
8318                             tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
8319                         } else {
8320                             tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
8321                         }
8322                         if (insn & (1 << 21)) { /* mult accumulate */
8323                             TCGv_i32 al = load_reg(s, rn);
8324                             TCGv_i32 ah = load_reg(s, rd);
8325                             tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
8326                             tcg_temp_free_i32(al);
8327                             tcg_temp_free_i32(ah);
8328                         }
8329                         if (insn & (1 << 20)) {
8330                             gen_logicq_cc(tmp, tmp2);
8331                         }
8332                         store_reg(s, rn, tmp);
8333                         store_reg(s, rd, tmp2);
8334                         break;
8335                     default:
8336                         goto illegal_op;
8337                     }
8338                 } else {
8339                     rn = (insn >> 16) & 0xf;
8340                     rd = (insn >> 12) & 0xf;
8341                     if (insn & (1 << 23)) {
8342                         /* load/store exclusive */
8343                         int op2 = (insn >> 8) & 3;
8344                         op1 = (insn >> 21) & 0x3;
8345
8346                         switch (op2) {
8347                         case 0: /* lda/stl */
8348                             if (op1 == 1) {
8349                                 goto illegal_op;
8350                             }
8351                             ARCH(8);
8352                             break;
8353                         case 1: /* reserved */
8354                             goto illegal_op;
8355                         case 2: /* ldaex/stlex */
8356                             ARCH(8);
8357                             break;
8358                         case 3: /* ldrex/strex */
8359                             if (op1) {
8360                                 ARCH(6K);
8361                             } else {
8362                                 ARCH(6);
8363                             }
8364                             break;
8365                         }
8366
8367                         addr = tcg_temp_local_new_i32();
8368                         load_reg_var(s, addr, rn);
8369
8370                         /* Since the emulation does not have barriers,
8371                            the acquire/release semantics need no special
8372                            handling */
8373                         if (op2 == 0) {
8374                             if (insn & (1 << 20)) {
8375                                 tmp = tcg_temp_new_i32();
8376                                 switch (op1) {
8377                                 case 0: /* lda */
8378                                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8379                                     break;
8380                                 case 2: /* ldab */
8381                                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
8382                                     break;
8383                                 case 3: /* ldah */
8384                                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8385                                     break;
8386                                 default:
8387                                     abort();
8388                                 }
8389                                 store_reg(s, rd, tmp);
8390                             } else {
8391                                 rm = insn & 0xf;
8392                                 tmp = load_reg(s, rm);
8393                                 switch (op1) {
8394                                 case 0: /* stl */
8395                                     gen_aa32_st32(tmp, addr, get_mem_index(s));
8396                                     break;
8397                                 case 2: /* stlb */
8398                                     gen_aa32_st8(tmp, addr, get_mem_index(s));
8399                                     break;
8400                                 case 3: /* stlh */
8401                                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8402                                     break;
8403                                 default:
8404                                     abort();
8405                                 }
8406                                 tcg_temp_free_i32(tmp);
8407                             }
8408                         } else if (insn & (1 << 20)) {
8409                             switch (op1) {
8410                             case 0: /* ldrex */
8411                                 gen_load_exclusive(s, rd, 15, addr, 2);
8412                                 break;
8413                             case 1: /* ldrexd */
8414                                 gen_load_exclusive(s, rd, rd + 1, addr, 3);
8415                                 break;
8416                             case 2: /* ldrexb */
8417                                 gen_load_exclusive(s, rd, 15, addr, 0);
8418                                 break;
8419                             case 3: /* ldrexh */
8420                                 gen_load_exclusive(s, rd, 15, addr, 1);
8421                                 break;
8422                             default:
8423                                 abort();
8424                             }
8425                         } else {
8426                             rm = insn & 0xf;
8427                             switch (op1) {
8428                             case 0:  /*  strex */
8429                                 gen_store_exclusive(s, rd, rm, 15, addr, 2);
8430                                 break;
8431                             case 1: /*  strexd */
8432                                 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
8433                                 break;
8434                             case 2: /*  strexb */
8435                                 gen_store_exclusive(s, rd, rm, 15, addr, 0);
8436                                 break;
8437                             case 3: /* strexh */
8438                                 gen_store_exclusive(s, rd, rm, 15, addr, 1);
8439                                 break;
8440                             default:
8441                                 abort();
8442                             }
8443                         }
8444                         tcg_temp_free_i32(addr);
8445                     } else {
8446                         /* SWP instruction */
8447                         rm = (insn) & 0xf;
8448
8449                         /* ??? This is not really atomic.  However we know
8450                            we never have multiple CPUs running in parallel,
8451                            so it is good enough.  */
8452                         addr = load_reg(s, rn);
8453                         tmp = load_reg(s, rm);
8454                         tmp2 = tcg_temp_new_i32();
8455                         if (insn & (1 << 22)) {
8456                             gen_aa32_ld8u(tmp2, addr, get_mem_index(s));
8457                             gen_aa32_st8(tmp, addr, get_mem_index(s));
8458                         } else {
8459                             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
8460                             gen_aa32_st32(tmp, addr, get_mem_index(s));
8461                         }
8462                         tcg_temp_free_i32(tmp);
8463                         tcg_temp_free_i32(addr);
8464                         store_reg(s, rd, tmp2);
8465                     }
8466                 }
8467             } else {
8468                 int address_offset;
8469                 bool load = insn & (1 << 20);
8470                 bool doubleword = false;
8471                 /* Misc load/store */
8472                 rn = (insn >> 16) & 0xf;
8473                 rd = (insn >> 12) & 0xf;
8474
8475                 if (!load && (sh & 2)) {
8476                     /* doubleword */
8477                     ARCH(5TE);
8478                     if (rd & 1) {
8479                         /* UNPREDICTABLE; we choose to UNDEF */
8480                         goto illegal_op;
8481                     }
8482                     load = (sh & 1) == 0;
8483                     doubleword = true;
8484                 }
8485
8486                 addr = load_reg(s, rn);
8487                 if (insn & (1 << 24))
8488                     gen_add_datah_offset(s, insn, 0, addr);
8489                 address_offset = 0;
8490
8491                 if (doubleword) {
8492                     if (!load) {
8493                         /* store */
8494                         tmp = load_reg(s, rd);
8495                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8496                         tcg_temp_free_i32(tmp);
8497                         tcg_gen_addi_i32(addr, addr, 4);
8498                         tmp = load_reg(s, rd + 1);
8499                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8500                         tcg_temp_free_i32(tmp);
8501                     } else {
8502                         /* load */
8503                         tmp = tcg_temp_new_i32();
8504                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8505                         store_reg(s, rd, tmp);
8506                         tcg_gen_addi_i32(addr, addr, 4);
8507                         tmp = tcg_temp_new_i32();
8508                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8509                         rd++;
8510                     }
8511                     address_offset = -4;
8512                 } else if (load) {
8513                     /* load */
8514                     tmp = tcg_temp_new_i32();
8515                     switch (sh) {
8516                     case 1:
8517                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8518                         break;
8519                     case 2:
8520                         gen_aa32_ld8s(tmp, addr, get_mem_index(s));
8521                         break;
8522                     default:
8523                     case 3:
8524                         gen_aa32_ld16s(tmp, addr, get_mem_index(s));
8525                         break;
8526                     }
8527                 } else {
8528                     /* store */
8529                     tmp = load_reg(s, rd);
8530                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8531                     tcg_temp_free_i32(tmp);
8532                 }
8533                 /* Perform base writeback before the loaded value to
8534                    ensure correct behavior with overlapping index registers.
8535                    ldrd with base writeback is undefined if the
8536                    destination and index registers overlap.  */
8537                 if (!(insn & (1 << 24))) {
8538                     gen_add_datah_offset(s, insn, address_offset, addr);
8539                     store_reg(s, rn, addr);
8540                 } else if (insn & (1 << 21)) {
8541                     if (address_offset)
8542                         tcg_gen_addi_i32(addr, addr, address_offset);
8543                     store_reg(s, rn, addr);
8544                 } else {
8545                     tcg_temp_free_i32(addr);
8546                 }
8547                 if (load) {
8548                     /* Complete the load.  */
8549                     store_reg(s, rd, tmp);
8550                 }
8551             }
8552             break;
8553         case 0x4:
8554         case 0x5:
8555             goto do_ldst;
8556         case 0x6:
8557         case 0x7:
8558             if (insn & (1 << 4)) {
8559                 ARCH(6);
8560                 /* Armv6 Media instructions.  */
8561                 rm = insn & 0xf;
8562                 rn = (insn >> 16) & 0xf;
8563                 rd = (insn >> 12) & 0xf;
8564                 rs = (insn >> 8) & 0xf;
8565                 switch ((insn >> 23) & 3) {
8566                 case 0: /* Parallel add/subtract.  */
8567                     op1 = (insn >> 20) & 7;
8568                     tmp = load_reg(s, rn);
8569                     tmp2 = load_reg(s, rm);
8570                     sh = (insn >> 5) & 7;
8571                     if ((op1 & 3) == 0 || sh == 5 || sh == 6)
8572                         goto illegal_op;
8573                     gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
8574                     tcg_temp_free_i32(tmp2);
8575                     store_reg(s, rd, tmp);
8576                     break;
8577                 case 1:
8578                     if ((insn & 0x00700020) == 0) {
8579                         /* Halfword pack.  */
8580                         tmp = load_reg(s, rn);
8581                         tmp2 = load_reg(s, rm);
8582                         shift = (insn >> 7) & 0x1f;
8583                         if (insn & (1 << 6)) {
8584                             /* pkhtb */
8585                             if (shift == 0)
8586                                 shift = 31;
8587                             tcg_gen_sari_i32(tmp2, tmp2, shift);
8588                             tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
8589                             tcg_gen_ext16u_i32(tmp2, tmp2);
8590                         } else {
8591                             /* pkhbt */
8592                             if (shift)
8593                                 tcg_gen_shli_i32(tmp2, tmp2, shift);
8594                             tcg_gen_ext16u_i32(tmp, tmp);
8595                             tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
8596                         }
8597                         tcg_gen_or_i32(tmp, tmp, tmp2);
8598                         tcg_temp_free_i32(tmp2);
8599                         store_reg(s, rd, tmp);
8600                     } else if ((insn & 0x00200020) == 0x00200000) {
8601                         /* [us]sat */
8602                         tmp = load_reg(s, rm);
8603                         shift = (insn >> 7) & 0x1f;
8604                         if (insn & (1 << 6)) {
8605                             if (shift == 0)
8606                                 shift = 31;
8607                             tcg_gen_sari_i32(tmp, tmp, shift);
8608                         } else {
8609                             tcg_gen_shli_i32(tmp, tmp, shift);
8610                         }
8611                         sh = (insn >> 16) & 0x1f;
8612                         tmp2 = tcg_const_i32(sh);
8613                         if (insn & (1 << 22))
8614                           gen_helper_usat(tmp, cpu_env, tmp, tmp2);
8615                         else
8616                           gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
8617                         tcg_temp_free_i32(tmp2);
8618                         store_reg(s, rd, tmp);
8619                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
8620                         /* [us]sat16 */
8621                         tmp = load_reg(s, rm);
8622                         sh = (insn >> 16) & 0x1f;
8623                         tmp2 = tcg_const_i32(sh);
8624                         if (insn & (1 << 22))
8625                           gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
8626                         else
8627                           gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
8628                         tcg_temp_free_i32(tmp2);
8629                         store_reg(s, rd, tmp);
8630                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
8631                         /* Select bytes.  */
8632                         tmp = load_reg(s, rn);
8633                         tmp2 = load_reg(s, rm);
8634                         tmp3 = tcg_temp_new_i32();
8635                         tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
8636                         gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
8637                         tcg_temp_free_i32(tmp3);
8638                         tcg_temp_free_i32(tmp2);
8639                         store_reg(s, rd, tmp);
8640                     } else if ((insn & 0x000003e0) == 0x00000060) {
8641                         tmp = load_reg(s, rm);
8642                         shift = (insn >> 10) & 3;
8643                         /* ??? In many cases it's not necessary to do a
8644                            rotate, a shift is sufficient.  */
8645                         if (shift != 0)
8646                             tcg_gen_rotri_i32(tmp, tmp, shift * 8);
8647                         op1 = (insn >> 20) & 7;
8648                         switch (op1) {
8649                         case 0: gen_sxtb16(tmp);  break;
8650                         case 2: gen_sxtb(tmp);    break;
8651                         case 3: gen_sxth(tmp);    break;
8652                         case 4: gen_uxtb16(tmp);  break;
8653                         case 6: gen_uxtb(tmp);    break;
8654                         case 7: gen_uxth(tmp);    break;
8655                         default: goto illegal_op;
8656                         }
8657                         if (rn != 15) {
8658                             tmp2 = load_reg(s, rn);
8659                             if ((op1 & 3) == 0) {
8660                                 gen_add16(tmp, tmp2);
8661                             } else {
8662                                 tcg_gen_add_i32(tmp, tmp, tmp2);
8663                                 tcg_temp_free_i32(tmp2);
8664                             }
8665                         }
8666                         store_reg(s, rd, tmp);
8667                     } else if ((insn & 0x003f0f60) == 0x003f0f20) {
8668                         /* rev */
8669                         tmp = load_reg(s, rm);
8670                         if (insn & (1 << 22)) {
8671                             if (insn & (1 << 7)) {
8672                                 gen_revsh(tmp);
8673                             } else {
8674                                 ARCH(6T2);
8675                                 gen_helper_rbit(tmp, tmp);
8676                             }
8677                         } else {
8678                             if (insn & (1 << 7))
8679                                 gen_rev16(tmp);
8680                             else
8681                                 tcg_gen_bswap32_i32(tmp, tmp);
8682                         }
8683                         store_reg(s, rd, tmp);
8684                     } else {
8685                         goto illegal_op;
8686                     }
8687                     break;
8688                 case 2: /* Multiplies (Type 3).  */
8689                     switch ((insn >> 20) & 0x7) {
8690                     case 5:
8691                         if (((insn >> 6) ^ (insn >> 7)) & 1) {
8692                             /* op2 not 00x or 11x : UNDEF */
8693                             goto illegal_op;
8694                         }
8695                         /* Signed multiply most significant [accumulate].
8696                            (SMMUL, SMMLA, SMMLS) */
8697                         tmp = load_reg(s, rm);
8698                         tmp2 = load_reg(s, rs);
8699                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
8700
8701                         if (rd != 15) {
8702                             tmp = load_reg(s, rd);
8703                             if (insn & (1 << 6)) {
8704                                 tmp64 = gen_subq_msw(tmp64, tmp);
8705                             } else {
8706                                 tmp64 = gen_addq_msw(tmp64, tmp);
8707                             }
8708                         }
8709                         if (insn & (1 << 5)) {
8710                             tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
8711                         }
8712                         tcg_gen_shri_i64(tmp64, tmp64, 32);
8713                         tmp = tcg_temp_new_i32();
8714                         tcg_gen_extrl_i64_i32(tmp, tmp64);
8715                         tcg_temp_free_i64(tmp64);
8716                         store_reg(s, rn, tmp);
8717                         break;
8718                     case 0:
8719                     case 4:
8720                         /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
8721                         if (insn & (1 << 7)) {
8722                             goto illegal_op;
8723                         }
8724                         tmp = load_reg(s, rm);
8725                         tmp2 = load_reg(s, rs);
8726                         if (insn & (1 << 5))
8727                             gen_swap_half(tmp2);
8728                         gen_smul_dual(tmp, tmp2);
8729                         if (insn & (1 << 22)) {
8730                             /* smlald, smlsld */
8731                             TCGv_i64 tmp64_2;
8732
8733                             tmp64 = tcg_temp_new_i64();
8734                             tmp64_2 = tcg_temp_new_i64();
8735                             tcg_gen_ext_i32_i64(tmp64, tmp);
8736                             tcg_gen_ext_i32_i64(tmp64_2, tmp2);
8737                             tcg_temp_free_i32(tmp);
8738                             tcg_temp_free_i32(tmp2);
8739                             if (insn & (1 << 6)) {
8740                                 tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
8741                             } else {
8742                                 tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
8743                             }
8744                             tcg_temp_free_i64(tmp64_2);
8745                             gen_addq(s, tmp64, rd, rn);
8746                             gen_storeq_reg(s, rd, rn, tmp64);
8747                             tcg_temp_free_i64(tmp64);
8748                         } else {
8749                             /* smuad, smusd, smlad, smlsd */
8750                             if (insn & (1 << 6)) {
8751                                 /* This subtraction cannot overflow. */
8752                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
8753                             } else {
8754                                 /* This addition cannot overflow 32 bits;
8755                                  * however it may overflow considered as a
8756                                  * signed operation, in which case we must set
8757                                  * the Q flag.
8758                                  */
8759                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8760                             }
8761                             tcg_temp_free_i32(tmp2);
8762                             if (rd != 15)
8763                               {
8764                                 tmp2 = load_reg(s, rd);
8765                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8766                                 tcg_temp_free_i32(tmp2);
8767                               }
8768                             store_reg(s, rn, tmp);
8769                         }
8770                         break;
8771                     case 1:
8772                     case 3:
8773                         /* SDIV, UDIV */
8774                         if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
8775                             goto illegal_op;
8776                         }
8777                         if (((insn >> 5) & 7) || (rd != 15)) {
8778                             goto illegal_op;
8779                         }
8780                         tmp = load_reg(s, rm);
8781                         tmp2 = load_reg(s, rs);
8782                         if (insn & (1 << 21)) {
8783                             gen_helper_udiv(tmp, tmp, tmp2);
8784                         } else {
8785                             gen_helper_sdiv(tmp, tmp, tmp2);
8786                         }
8787                         tcg_temp_free_i32(tmp2);
8788                         store_reg(s, rn, tmp);
8789                         break;
8790                     default:
8791                         goto illegal_op;
8792                     }
8793                     break;
8794                 case 3:
8795                     op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
8796                     switch (op1) {
8797                     case 0: /* Unsigned sum of absolute differences.  */
8798                         ARCH(6);
8799                         tmp = load_reg(s, rm);
8800                         tmp2 = load_reg(s, rs);
8801                         gen_helper_usad8(tmp, tmp, tmp2);
8802                         tcg_temp_free_i32(tmp2);
8803                         if (rd != 15) {
8804                             tmp2 = load_reg(s, rd);
8805                             tcg_gen_add_i32(tmp, tmp, tmp2);
8806                             tcg_temp_free_i32(tmp2);
8807                         }
8808                         store_reg(s, rn, tmp);
8809                         break;
8810                     case 0x20: case 0x24: case 0x28: case 0x2c:
8811                         /* Bitfield insert/clear.  */
8812                         ARCH(6T2);
8813                         shift = (insn >> 7) & 0x1f;
8814                         i = (insn >> 16) & 0x1f;
8815                         if (i < shift) {
8816                             /* UNPREDICTABLE; we choose to UNDEF */
8817                             goto illegal_op;
8818                         }
8819                         i = i + 1 - shift;
8820                         if (rm == 15) {
8821                             tmp = tcg_temp_new_i32();
8822                             tcg_gen_movi_i32(tmp, 0);
8823                         } else {
8824                             tmp = load_reg(s, rm);
8825                         }
8826                         if (i != 32) {
8827                             tmp2 = load_reg(s, rd);
8828                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
8829                             tcg_temp_free_i32(tmp2);
8830                         }
8831                         store_reg(s, rd, tmp);
8832                         break;
8833                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
8834                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
8835                         ARCH(6T2);
8836                         tmp = load_reg(s, rm);
8837                         shift = (insn >> 7) & 0x1f;
8838                         i = ((insn >> 16) & 0x1f) + 1;
8839                         if (shift + i > 32)
8840                             goto illegal_op;
8841                         if (i < 32) {
8842                             if (op1 & 0x20) {
8843                                 gen_ubfx(tmp, shift, (1u << i) - 1);
8844                             } else {
8845                                 gen_sbfx(tmp, shift, i);
8846                             }
8847                         }
8848                         store_reg(s, rd, tmp);
8849                         break;
8850                     default:
8851                         goto illegal_op;
8852                     }
8853                     break;
8854                 }
8855                 break;
8856             }
8857         do_ldst:
8858             /* Check for undefined extension instructions
8859              * per the ARM Bible IE:
8860              * xxxx 0111 1111 xxxx  xxxx xxxx 1111 xxxx
8861              */
8862             sh = (0xf << 20) | (0xf << 4);
8863             if (op1 == 0x7 && ((insn & sh) == sh))
8864             {
8865                 goto illegal_op;
8866             }
8867             /* load/store byte/word */
8868             rn = (insn >> 16) & 0xf;
8869             rd = (insn >> 12) & 0xf;
8870             tmp2 = load_reg(s, rn);
8871             if ((insn & 0x01200000) == 0x00200000) {
8872                 /* ldrt/strt */
8873                 i = get_a32_user_mem_index(s);
8874             } else {
8875                 i = get_mem_index(s);
8876             }
8877             if (insn & (1 << 24))
8878                 gen_add_data_offset(s, insn, tmp2);
8879             if (insn & (1 << 20)) {
8880                 /* load */
8881                 tmp = tcg_temp_new_i32();
8882                 if (insn & (1 << 22)) {
8883                     gen_aa32_ld8u(tmp, tmp2, i);
8884                 } else {
8885                     gen_aa32_ld32u(tmp, tmp2, i);
8886                 }
8887             } else {
8888                 /* store */
8889                 tmp = load_reg(s, rd);
8890                 if (insn & (1 << 22)) {
8891                     gen_aa32_st8(tmp, tmp2, i);
8892                 } else {
8893                     gen_aa32_st32(tmp, tmp2, i);
8894                 }
8895                 tcg_temp_free_i32(tmp);
8896             }
8897             if (!(insn & (1 << 24))) {
8898                 gen_add_data_offset(s, insn, tmp2);
8899                 store_reg(s, rn, tmp2);
8900             } else if (insn & (1 << 21)) {
8901                 store_reg(s, rn, tmp2);
8902             } else {
8903                 tcg_temp_free_i32(tmp2);
8904             }
8905             if (insn & (1 << 20)) {
8906                 /* Complete the load.  */
8907                 store_reg_from_load(s, rd, tmp);
8908             }
8909             break;
8910         case 0x08:
8911         case 0x09:
8912             {
8913                 int j, n, loaded_base;
8914                 bool exc_return = false;
8915                 bool is_load = extract32(insn, 20, 1);
8916                 bool user = false;
8917                 TCGv_i32 loaded_var;
8918                 /* load/store multiple words */
8919                 /* XXX: store correct base if write back */
8920                 if (insn & (1 << 22)) {
8921                     /* LDM (user), LDM (exception return) and STM (user) */
8922                     if (IS_USER(s))
8923                         goto illegal_op; /* only usable in supervisor mode */
8924
8925                     if (is_load && extract32(insn, 15, 1)) {
8926                         exc_return = true;
8927                     } else {
8928                         user = true;
8929                     }
8930                 }
8931                 rn = (insn >> 16) & 0xf;
8932                 addr = load_reg(s, rn);
8933
8934                 /* compute total size */
8935                 loaded_base = 0;
8936                 TCGV_UNUSED_I32(loaded_var);
8937                 n = 0;
8938                 for(i=0;i<16;i++) {
8939                     if (insn & (1 << i))
8940                         n++;
8941                 }
8942                 /* XXX: test invalid n == 0 case ? */
8943                 if (insn & (1 << 23)) {
8944                     if (insn & (1 << 24)) {
8945                         /* pre increment */
8946                         tcg_gen_addi_i32(addr, addr, 4);
8947                     } else {
8948                         /* post increment */
8949                     }
8950                 } else {
8951                     if (insn & (1 << 24)) {
8952                         /* pre decrement */
8953                         tcg_gen_addi_i32(addr, addr, -(n * 4));
8954                     } else {
8955                         /* post decrement */
8956                         if (n != 1)
8957                         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8958                     }
8959                 }
8960                 j = 0;
8961                 for(i=0;i<16;i++) {
8962                     if (insn & (1 << i)) {
8963                         if (is_load) {
8964                             /* load */
8965                             tmp = tcg_temp_new_i32();
8966                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8967                             if (user) {
8968                                 tmp2 = tcg_const_i32(i);
8969                                 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
8970                                 tcg_temp_free_i32(tmp2);
8971                                 tcg_temp_free_i32(tmp);
8972                             } else if (i == rn) {
8973                                 loaded_var = tmp;
8974                                 loaded_base = 1;
8975                             } else {
8976                                 store_reg_from_load(s, i, tmp);
8977                             }
8978                         } else {
8979                             /* store */
8980                             if (i == 15) {
8981                                 /* special case: r15 = PC + 8 */
8982                                 val = (long)s->pc + 4;
8983                                 tmp = tcg_temp_new_i32();
8984                                 tcg_gen_movi_i32(tmp, val);
8985                             } else if (user) {
8986                                 tmp = tcg_temp_new_i32();
8987                                 tmp2 = tcg_const_i32(i);
8988                                 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
8989                                 tcg_temp_free_i32(tmp2);
8990                             } else {
8991                                 tmp = load_reg(s, i);
8992                             }
8993                             gen_aa32_st32(tmp, addr, get_mem_index(s));
8994                             tcg_temp_free_i32(tmp);
8995                         }
8996                         j++;
8997                         /* no need to add after the last transfer */
8998                         if (j != n)
8999                             tcg_gen_addi_i32(addr, addr, 4);
9000                     }
9001                 }
9002                 if (insn & (1 << 21)) {
9003                     /* write back */
9004                     if (insn & (1 << 23)) {
9005                         if (insn & (1 << 24)) {
9006                             /* pre increment */
9007                         } else {
9008                             /* post increment */
9009                             tcg_gen_addi_i32(addr, addr, 4);
9010                         }
9011                     } else {
9012                         if (insn & (1 << 24)) {
9013                             /* pre decrement */
9014                             if (n != 1)
9015                                 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9016                         } else {
9017                             /* post decrement */
9018                             tcg_gen_addi_i32(addr, addr, -(n * 4));
9019                         }
9020                     }
9021                     store_reg(s, rn, addr);
9022                 } else {
9023                     tcg_temp_free_i32(addr);
9024                 }
9025                 if (loaded_base) {
9026                     store_reg(s, rn, loaded_var);
9027                 }
9028                 if (exc_return) {
9029                     /* Restore CPSR from SPSR.  */
9030                     tmp = load_cpu_field(spsr);
9031                     gen_set_cpsr(tmp, CPSR_ERET_MASK);
9032                     tcg_temp_free_i32(tmp);
9033                     s->is_jmp = DISAS_UPDATE;
9034                 }
9035             }
9036             break;
9037         case 0xa:
9038         case 0xb:
9039             {
9040                 int32_t offset;
9041
9042                 /* branch (and link) */
9043                 val = (int32_t)s->pc;
9044                 if (insn & (1 << 24)) {
9045                     tmp = tcg_temp_new_i32();
9046                     tcg_gen_movi_i32(tmp, val);
9047                     store_reg(s, 14, tmp);
9048                 }
9049                 offset = sextract32(insn << 2, 0, 26);
9050                 val += offset + 4;
9051                 gen_jmp(s, val);
9052             }
9053             break;
9054         case 0xc:
9055         case 0xd:
9056         case 0xe:
9057             if (((insn >> 8) & 0xe) == 10) {
9058                 /* VFP.  */
9059                 if (disas_vfp_insn(s, insn)) {
9060                     goto illegal_op;
9061                 }
9062             } else if (disas_coproc_insn(s, insn)) {
9063                 /* Coprocessor.  */
9064                 goto illegal_op;
9065             }
9066             break;
9067         case 0xf:
9068             /* swi */
9069             gen_set_pc_im(s, s->pc);
9070             s->svc_imm = extract32(insn, 0, 24);
9071             s->is_jmp = DISAS_SWI;
9072             break;
9073         default:
9074         illegal_op:
9075             gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
9076                                default_exception_el(s));
9077             break;
9078         }
9079     }
9080 }
9081
9082 /* Return true if this is a Thumb-2 logical op.  */
9083 static int
9084 thumb2_logic_op(int op)
9085 {
9086     return (op < 8);
9087 }
9088
9089 /* Generate code for a Thumb-2 data processing operation.  If CONDS is nonzero
9090    then set condition code flags based on the result of the operation.
9091    If SHIFTER_OUT is nonzero then set the carry flag for logical operations
9092    to the high bit of T1.
9093    Returns zero if the opcode is valid.  */
9094
9095 static int
9096 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
9097                    TCGv_i32 t0, TCGv_i32 t1)
9098 {
9099     int logic_cc;
9100
9101     logic_cc = 0;
9102     switch (op) {
9103     case 0: /* and */
9104         tcg_gen_and_i32(t0, t0, t1);
9105         logic_cc = conds;
9106         break;
9107     case 1: /* bic */
9108         tcg_gen_andc_i32(t0, t0, t1);
9109         logic_cc = conds;
9110         break;
9111     case 2: /* orr */
9112         tcg_gen_or_i32(t0, t0, t1);
9113         logic_cc = conds;
9114         break;
9115     case 3: /* orn */
9116         tcg_gen_orc_i32(t0, t0, t1);
9117         logic_cc = conds;
9118         break;
9119     case 4: /* eor */
9120         tcg_gen_xor_i32(t0, t0, t1);
9121         logic_cc = conds;
9122         break;
9123     case 8: /* add */
9124         if (conds)
9125             gen_add_CC(t0, t0, t1);
9126         else
9127             tcg_gen_add_i32(t0, t0, t1);
9128         break;
9129     case 10: /* adc */
9130         if (conds)
9131             gen_adc_CC(t0, t0, t1);
9132         else
9133             gen_adc(t0, t1);
9134         break;
9135     case 11: /* sbc */
9136         if (conds) {
9137             gen_sbc_CC(t0, t0, t1);
9138         } else {
9139             gen_sub_carry(t0, t0, t1);
9140         }
9141         break;
9142     case 13: /* sub */
9143         if (conds)
9144             gen_sub_CC(t0, t0, t1);
9145         else
9146             tcg_gen_sub_i32(t0, t0, t1);
9147         break;
9148     case 14: /* rsb */
9149         if (conds)
9150             gen_sub_CC(t0, t1, t0);
9151         else
9152             tcg_gen_sub_i32(t0, t1, t0);
9153         break;
9154     default: /* 5, 6, 7, 9, 12, 15. */
9155         return 1;
9156     }
9157     if (logic_cc) {
9158         gen_logic_CC(t0);
9159         if (shifter_out)
9160             gen_set_CF_bit31(t1);
9161     }
9162     return 0;
9163 }
9164
9165 /* Translate a 32-bit thumb instruction.  Returns nonzero if the instruction
9166    is not legal.  */
9167 static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw1)
9168 {
9169     uint32_t insn, imm, shift, offset;
9170     uint32_t rd, rn, rm, rs;
9171     TCGv_i32 tmp;
9172     TCGv_i32 tmp2;
9173     TCGv_i32 tmp3;
9174     TCGv_i32 addr;
9175     TCGv_i64 tmp64;
9176     int op;
9177     int shiftop;
9178     int conds;
9179     int logic_cc;
9180
9181     if (!(arm_dc_feature(s, ARM_FEATURE_THUMB2)
9182           || arm_dc_feature(s, ARM_FEATURE_M))) {
9183         /* Thumb-1 cores may need to treat bl and blx as a pair of
9184            16-bit instructions to get correct prefetch abort behavior.  */
9185         insn = insn_hw1;
9186         if ((insn & (1 << 12)) == 0) {
9187             ARCH(5);
9188             /* Second half of blx.  */
9189             offset = ((insn & 0x7ff) << 1);
9190             tmp = load_reg(s, 14);
9191             tcg_gen_addi_i32(tmp, tmp, offset);
9192             tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
9193
9194             tmp2 = tcg_temp_new_i32();
9195             tcg_gen_movi_i32(tmp2, s->pc | 1);
9196             store_reg(s, 14, tmp2);
9197             gen_bx(s, tmp);
9198             return 0;
9199         }
9200         if (insn & (1 << 11)) {
9201             /* Second half of bl.  */
9202             offset = ((insn & 0x7ff) << 1) | 1;
9203             tmp = load_reg(s, 14);
9204             tcg_gen_addi_i32(tmp, tmp, offset);
9205
9206             tmp2 = tcg_temp_new_i32();
9207             tcg_gen_movi_i32(tmp2, s->pc | 1);
9208             store_reg(s, 14, tmp2);
9209             gen_bx(s, tmp);
9210             return 0;
9211         }
9212         if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
9213             /* Instruction spans a page boundary.  Implement it as two
9214                16-bit instructions in case the second half causes an
9215                prefetch abort.  */
9216             offset = ((int32_t)insn << 21) >> 9;
9217             tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset);
9218             return 0;
9219         }
9220         /* Fall through to 32-bit decode.  */
9221     }
9222
9223     insn = arm_lduw_code(env, s->pc, s->bswap_code);
9224     s->pc += 2;
9225     insn |= (uint32_t)insn_hw1 << 16;
9226
9227     if ((insn & 0xf800e800) != 0xf000e800) {
9228         ARCH(6T2);
9229     }
9230
9231     rn = (insn >> 16) & 0xf;
9232     rs = (insn >> 12) & 0xf;
9233     rd = (insn >> 8) & 0xf;
9234     rm = insn & 0xf;
9235     switch ((insn >> 25) & 0xf) {
9236     case 0: case 1: case 2: case 3:
9237         /* 16-bit instructions.  Should never happen.  */
9238         abort();
9239     case 4:
9240         if (insn & (1 << 22)) {
9241             /* Other load/store, table branch.  */
9242             if (insn & 0x01200000) {
9243                 /* Load/store doubleword.  */
9244                 if (rn == 15) {
9245                     addr = tcg_temp_new_i32();
9246                     tcg_gen_movi_i32(addr, s->pc & ~3);
9247                 } else {
9248                     addr = load_reg(s, rn);
9249                 }
9250                 offset = (insn & 0xff) * 4;
9251                 if ((insn & (1 << 23)) == 0)
9252                     offset = -offset;
9253                 if (insn & (1 << 24)) {
9254                     tcg_gen_addi_i32(addr, addr, offset);
9255                     offset = 0;
9256                 }
9257                 if (insn & (1 << 20)) {
9258                     /* ldrd */
9259                     tmp = tcg_temp_new_i32();
9260                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9261                     store_reg(s, rs, tmp);
9262                     tcg_gen_addi_i32(addr, addr, 4);
9263                     tmp = tcg_temp_new_i32();
9264                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9265                     store_reg(s, rd, tmp);
9266                 } else {
9267                     /* strd */
9268                     tmp = load_reg(s, rs);
9269                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9270                     tcg_temp_free_i32(tmp);
9271                     tcg_gen_addi_i32(addr, addr, 4);
9272                     tmp = load_reg(s, rd);
9273                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9274                     tcg_temp_free_i32(tmp);
9275                 }
9276                 if (insn & (1 << 21)) {
9277                     /* Base writeback.  */
9278                     if (rn == 15)
9279                         goto illegal_op;
9280                     tcg_gen_addi_i32(addr, addr, offset - 4);
9281                     store_reg(s, rn, addr);
9282                 } else {
9283                     tcg_temp_free_i32(addr);
9284                 }
9285             } else if ((insn & (1 << 23)) == 0) {
9286                 /* Load/store exclusive word.  */
9287                 addr = tcg_temp_local_new_i32();
9288                 load_reg_var(s, addr, rn);
9289                 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
9290                 if (insn & (1 << 20)) {
9291                     gen_load_exclusive(s, rs, 15, addr, 2);
9292                 } else {
9293                     gen_store_exclusive(s, rd, rs, 15, addr, 2);
9294                 }
9295                 tcg_temp_free_i32(addr);
9296             } else if ((insn & (7 << 5)) == 0) {
9297                 /* Table Branch.  */
9298                 if (rn == 15) {
9299                     addr = tcg_temp_new_i32();
9300                     tcg_gen_movi_i32(addr, s->pc);
9301                 } else {
9302                     addr = load_reg(s, rn);
9303                 }
9304                 tmp = load_reg(s, rm);
9305                 tcg_gen_add_i32(addr, addr, tmp);
9306                 if (insn & (1 << 4)) {
9307                     /* tbh */
9308                     tcg_gen_add_i32(addr, addr, tmp);
9309                     tcg_temp_free_i32(tmp);
9310                     tmp = tcg_temp_new_i32();
9311                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9312                 } else { /* tbb */
9313                     tcg_temp_free_i32(tmp);
9314                     tmp = tcg_temp_new_i32();
9315                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9316                 }
9317                 tcg_temp_free_i32(addr);
9318                 tcg_gen_shli_i32(tmp, tmp, 1);
9319                 tcg_gen_addi_i32(tmp, tmp, s->pc);
9320                 store_reg(s, 15, tmp);
9321             } else {
9322                 int op2 = (insn >> 6) & 0x3;
9323                 op = (insn >> 4) & 0x3;
9324                 switch (op2) {
9325                 case 0:
9326                     goto illegal_op;
9327                 case 1:
9328                     /* Load/store exclusive byte/halfword/doubleword */
9329                     if (op == 2) {
9330                         goto illegal_op;
9331                     }
9332                     ARCH(7);
9333                     break;
9334                 case 2:
9335                     /* Load-acquire/store-release */
9336                     if (op == 3) {
9337                         goto illegal_op;
9338                     }
9339                     /* Fall through */
9340                 case 3:
9341                     /* Load-acquire/store-release exclusive */
9342                     ARCH(8);
9343                     break;
9344                 }
9345                 addr = tcg_temp_local_new_i32();
9346                 load_reg_var(s, addr, rn);
9347                 if (!(op2 & 1)) {
9348                     if (insn & (1 << 20)) {
9349                         tmp = tcg_temp_new_i32();
9350                         switch (op) {
9351                         case 0: /* ldab */
9352                             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9353                             break;
9354                         case 1: /* ldah */
9355                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9356                             break;
9357                         case 2: /* lda */
9358                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9359                             break;
9360                         default:
9361                             abort();
9362                         }
9363                         store_reg(s, rs, tmp);
9364                     } else {
9365                         tmp = load_reg(s, rs);
9366                         switch (op) {
9367                         case 0: /* stlb */
9368                             gen_aa32_st8(tmp, addr, get_mem_index(s));
9369                             break;
9370                         case 1: /* stlh */
9371                             gen_aa32_st16(tmp, addr, get_mem_index(s));
9372                             break;
9373                         case 2: /* stl */
9374                             gen_aa32_st32(tmp, addr, get_mem_index(s));
9375                             break;
9376                         default:
9377                             abort();
9378                         }
9379                         tcg_temp_free_i32(tmp);
9380                     }
9381                 } else if (insn & (1 << 20)) {
9382                     gen_load_exclusive(s, rs, rd, addr, op);
9383                 } else {
9384                     gen_store_exclusive(s, rm, rs, rd, addr, op);
9385                 }
9386                 tcg_temp_free_i32(addr);
9387             }
9388         } else {
9389             /* Load/store multiple, RFE, SRS.  */
9390             if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
9391                 /* RFE, SRS: not available in user mode or on M profile */
9392                 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
9393                     goto illegal_op;
9394                 }
9395                 if (insn & (1 << 20)) {
9396                     /* rfe */
9397                     addr = load_reg(s, rn);
9398                     if ((insn & (1 << 24)) == 0)
9399                         tcg_gen_addi_i32(addr, addr, -8);
9400                     /* Load PC into tmp and CPSR into tmp2.  */
9401                     tmp = tcg_temp_new_i32();
9402                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9403                     tcg_gen_addi_i32(addr, addr, 4);
9404                     tmp2 = tcg_temp_new_i32();
9405                     gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
9406                     if (insn & (1 << 21)) {
9407                         /* Base writeback.  */
9408                         if (insn & (1 << 24)) {
9409                             tcg_gen_addi_i32(addr, addr, 4);
9410                         } else {
9411                             tcg_gen_addi_i32(addr, addr, -4);
9412                         }
9413                         store_reg(s, rn, addr);
9414                     } else {
9415                         tcg_temp_free_i32(addr);
9416                     }
9417                     gen_rfe(s, tmp, tmp2);
9418                 } else {
9419                     /* srs */
9420                     gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
9421                             insn & (1 << 21));
9422                 }
9423             } else {
9424                 int i, loaded_base = 0;
9425                 TCGv_i32 loaded_var;
9426                 /* Load/store multiple.  */
9427                 addr = load_reg(s, rn);
9428                 offset = 0;
9429                 for (i = 0; i < 16; i++) {
9430                     if (insn & (1 << i))
9431                         offset += 4;
9432                 }
9433                 if (insn & (1 << 24)) {
9434                     tcg_gen_addi_i32(addr, addr, -offset);
9435                 }
9436
9437                 TCGV_UNUSED_I32(loaded_var);
9438                 for (i = 0; i < 16; i++) {
9439                     if ((insn & (1 << i)) == 0)
9440                         continue;
9441                     if (insn & (1 << 20)) {
9442                         /* Load.  */
9443                         tmp = tcg_temp_new_i32();
9444                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9445                         if (i == 15) {
9446                             gen_bx(s, tmp);
9447                         } else if (i == rn) {
9448                             loaded_var = tmp;
9449                             loaded_base = 1;
9450                         } else {
9451                             store_reg(s, i, tmp);
9452                         }
9453                     } else {
9454                         /* Store.  */
9455                         tmp = load_reg(s, i);
9456                         gen_aa32_st32(tmp, addr, get_mem_index(s));
9457                         tcg_temp_free_i32(tmp);
9458                     }
9459                     tcg_gen_addi_i32(addr, addr, 4);
9460                 }
9461                 if (loaded_base) {
9462                     store_reg(s, rn, loaded_var);
9463                 }
9464                 if (insn & (1 << 21)) {
9465                     /* Base register writeback.  */
9466                     if (insn & (1 << 24)) {
9467                         tcg_gen_addi_i32(addr, addr, -offset);
9468                     }
9469                     /* Fault if writeback register is in register list.  */
9470                     if (insn & (1 << rn))
9471                         goto illegal_op;
9472                     store_reg(s, rn, addr);
9473                 } else {
9474                     tcg_temp_free_i32(addr);
9475                 }
9476             }
9477         }
9478         break;
9479     case 5:
9480
9481         op = (insn >> 21) & 0xf;
9482         if (op == 6) {
9483             if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9484                 goto illegal_op;
9485             }
9486             /* Halfword pack.  */
9487             tmp = load_reg(s, rn);
9488             tmp2 = load_reg(s, rm);
9489             shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
9490             if (insn & (1 << 5)) {
9491                 /* pkhtb */
9492                 if (shift == 0)
9493                     shift = 31;
9494                 tcg_gen_sari_i32(tmp2, tmp2, shift);
9495                 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
9496                 tcg_gen_ext16u_i32(tmp2, tmp2);
9497             } else {
9498                 /* pkhbt */
9499                 if (shift)
9500                     tcg_gen_shli_i32(tmp2, tmp2, shift);
9501                 tcg_gen_ext16u_i32(tmp, tmp);
9502                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
9503             }
9504             tcg_gen_or_i32(tmp, tmp, tmp2);
9505             tcg_temp_free_i32(tmp2);
9506             store_reg(s, rd, tmp);
9507         } else {
9508             /* Data processing register constant shift.  */
9509             if (rn == 15) {
9510                 tmp = tcg_temp_new_i32();
9511                 tcg_gen_movi_i32(tmp, 0);
9512             } else {
9513                 tmp = load_reg(s, rn);
9514             }
9515             tmp2 = load_reg(s, rm);
9516
9517             shiftop = (insn >> 4) & 3;
9518             shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
9519             conds = (insn & (1 << 20)) != 0;
9520             logic_cc = (conds && thumb2_logic_op(op));
9521             gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
9522             if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
9523                 goto illegal_op;
9524             tcg_temp_free_i32(tmp2);
9525             if (rd != 15) {
9526                 store_reg(s, rd, tmp);
9527             } else {
9528                 tcg_temp_free_i32(tmp);
9529             }
9530         }
9531         break;
9532     case 13: /* Misc data processing.  */
9533         op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
9534         if (op < 4 && (insn & 0xf000) != 0xf000)
9535             goto illegal_op;
9536         switch (op) {
9537         case 0: /* Register controlled shift.  */
9538             tmp = load_reg(s, rn);
9539             tmp2 = load_reg(s, rm);
9540             if ((insn & 0x70) != 0)
9541                 goto illegal_op;
9542             op = (insn >> 21) & 3;
9543             logic_cc = (insn & (1 << 20)) != 0;
9544             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
9545             if (logic_cc)
9546                 gen_logic_CC(tmp);
9547             store_reg_bx(s, rd, tmp);
9548             break;
9549         case 1: /* Sign/zero extend.  */
9550             op = (insn >> 20) & 7;
9551             switch (op) {
9552             case 0: /* SXTAH, SXTH */
9553             case 1: /* UXTAH, UXTH */
9554             case 4: /* SXTAB, SXTB */
9555             case 5: /* UXTAB, UXTB */
9556                 break;
9557             case 2: /* SXTAB16, SXTB16 */
9558             case 3: /* UXTAB16, UXTB16 */
9559                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9560                     goto illegal_op;
9561                 }
9562                 break;
9563             default:
9564                 goto illegal_op;
9565             }
9566             if (rn != 15) {
9567                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9568                     goto illegal_op;
9569                 }
9570             }
9571             tmp = load_reg(s, rm);
9572             shift = (insn >> 4) & 3;
9573             /* ??? In many cases it's not necessary to do a
9574                rotate, a shift is sufficient.  */
9575             if (shift != 0)
9576                 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
9577             op = (insn >> 20) & 7;
9578             switch (op) {
9579             case 0: gen_sxth(tmp);   break;
9580             case 1: gen_uxth(tmp);   break;
9581             case 2: gen_sxtb16(tmp); break;
9582             case 3: gen_uxtb16(tmp); break;
9583             case 4: gen_sxtb(tmp);   break;
9584             case 5: gen_uxtb(tmp);   break;
9585             default:
9586                 g_assert_not_reached();
9587             }
9588             if (rn != 15) {
9589                 tmp2 = load_reg(s, rn);
9590                 if ((op >> 1) == 1) {
9591                     gen_add16(tmp, tmp2);
9592                 } else {
9593                     tcg_gen_add_i32(tmp, tmp, tmp2);
9594                     tcg_temp_free_i32(tmp2);
9595                 }
9596             }
9597             store_reg(s, rd, tmp);
9598             break;
9599         case 2: /* SIMD add/subtract.  */
9600             if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9601                 goto illegal_op;
9602             }
9603             op = (insn >> 20) & 7;
9604             shift = (insn >> 4) & 7;
9605             if ((op & 3) == 3 || (shift & 3) == 3)
9606                 goto illegal_op;
9607             tmp = load_reg(s, rn);
9608             tmp2 = load_reg(s, rm);
9609             gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
9610             tcg_temp_free_i32(tmp2);
9611             store_reg(s, rd, tmp);
9612             break;
9613         case 3: /* Other data processing.  */
9614             op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
9615             if (op < 4) {
9616                 /* Saturating add/subtract.  */
9617                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9618                     goto illegal_op;
9619                 }
9620                 tmp = load_reg(s, rn);
9621                 tmp2 = load_reg(s, rm);
9622                 if (op & 1)
9623                     gen_helper_double_saturate(tmp, cpu_env, tmp);
9624                 if (op & 2)
9625                     gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
9626                 else
9627                     gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
9628                 tcg_temp_free_i32(tmp2);
9629             } else {
9630                 switch (op) {
9631                 case 0x0a: /* rbit */
9632                 case 0x08: /* rev */
9633                 case 0x09: /* rev16 */
9634                 case 0x0b: /* revsh */
9635                 case 0x18: /* clz */
9636                     break;
9637                 case 0x10: /* sel */
9638                     if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9639                         goto illegal_op;
9640                     }
9641                     break;
9642                 case 0x20: /* crc32/crc32c */
9643                 case 0x21:
9644                 case 0x22:
9645                 case 0x28:
9646                 case 0x29:
9647                 case 0x2a:
9648                     if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
9649                         goto illegal_op;
9650                     }
9651                     break;
9652                 default:
9653                     goto illegal_op;
9654                 }
9655                 tmp = load_reg(s, rn);
9656                 switch (op) {
9657                 case 0x0a: /* rbit */
9658                     gen_helper_rbit(tmp, tmp);
9659                     break;
9660                 case 0x08: /* rev */
9661                     tcg_gen_bswap32_i32(tmp, tmp);
9662                     break;
9663                 case 0x09: /* rev16 */
9664                     gen_rev16(tmp);
9665                     break;
9666                 case 0x0b: /* revsh */
9667                     gen_revsh(tmp);
9668                     break;
9669                 case 0x10: /* sel */
9670                     tmp2 = load_reg(s, rm);
9671                     tmp3 = tcg_temp_new_i32();
9672                     tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
9673                     gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
9674                     tcg_temp_free_i32(tmp3);
9675                     tcg_temp_free_i32(tmp2);
9676                     break;
9677                 case 0x18: /* clz */
9678                     gen_helper_clz(tmp, tmp);
9679                     break;
9680                 case 0x20:
9681                 case 0x21:
9682                 case 0x22:
9683                 case 0x28:
9684                 case 0x29:
9685                 case 0x2a:
9686                 {
9687                     /* crc32/crc32c */
9688                     uint32_t sz = op & 0x3;
9689                     uint32_t c = op & 0x8;
9690
9691                     tmp2 = load_reg(s, rm);
9692                     if (sz == 0) {
9693                         tcg_gen_andi_i32(tmp2, tmp2, 0xff);
9694                     } else if (sz == 1) {
9695                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
9696                     }
9697                     tmp3 = tcg_const_i32(1 << sz);
9698                     if (c) {
9699                         gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
9700                     } else {
9701                         gen_helper_crc32(tmp, tmp, tmp2, tmp3);
9702                     }
9703                     tcg_temp_free_i32(tmp2);
9704                     tcg_temp_free_i32(tmp3);
9705                     break;
9706                 }
9707                 default:
9708                     g_assert_not_reached();
9709                 }
9710             }
9711             store_reg(s, rd, tmp);
9712             break;
9713         case 4: case 5: /* 32-bit multiply.  Sum of absolute differences.  */
9714             switch ((insn >> 20) & 7) {
9715             case 0: /* 32 x 32 -> 32 */
9716             case 7: /* Unsigned sum of absolute differences.  */
9717                 break;
9718             case 1: /* 16 x 16 -> 32 */
9719             case 2: /* Dual multiply add.  */
9720             case 3: /* 32 * 16 -> 32msb */
9721             case 4: /* Dual multiply subtract.  */
9722             case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
9723                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9724                     goto illegal_op;
9725                 }
9726                 break;
9727             }
9728             op = (insn >> 4) & 0xf;
9729             tmp = load_reg(s, rn);
9730             tmp2 = load_reg(s, rm);
9731             switch ((insn >> 20) & 7) {
9732             case 0: /* 32 x 32 -> 32 */
9733                 tcg_gen_mul_i32(tmp, tmp, tmp2);
9734                 tcg_temp_free_i32(tmp2);
9735                 if (rs != 15) {
9736                     tmp2 = load_reg(s, rs);
9737                     if (op)
9738                         tcg_gen_sub_i32(tmp, tmp2, tmp);
9739                     else
9740                         tcg_gen_add_i32(tmp, tmp, tmp2);
9741                     tcg_temp_free_i32(tmp2);
9742                 }
9743                 break;
9744             case 1: /* 16 x 16 -> 32 */
9745                 gen_mulxy(tmp, tmp2, op & 2, op & 1);
9746                 tcg_temp_free_i32(tmp2);
9747                 if (rs != 15) {
9748                     tmp2 = load_reg(s, rs);
9749                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9750                     tcg_temp_free_i32(tmp2);
9751                 }
9752                 break;
9753             case 2: /* Dual multiply add.  */
9754             case 4: /* Dual multiply subtract.  */
9755                 if (op)
9756                     gen_swap_half(tmp2);
9757                 gen_smul_dual(tmp, tmp2);
9758                 if (insn & (1 << 22)) {
9759                     /* This subtraction cannot overflow. */
9760                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9761                 } else {
9762                     /* This addition cannot overflow 32 bits;
9763                      * however it may overflow considered as a signed
9764                      * operation, in which case we must set the Q flag.
9765                      */
9766                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9767                 }
9768                 tcg_temp_free_i32(tmp2);
9769                 if (rs != 15)
9770                   {
9771                     tmp2 = load_reg(s, rs);
9772                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9773                     tcg_temp_free_i32(tmp2);
9774                   }
9775                 break;
9776             case 3: /* 32 * 16 -> 32msb */
9777                 if (op)
9778                     tcg_gen_sari_i32(tmp2, tmp2, 16);
9779                 else
9780                     gen_sxth(tmp2);
9781                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9782                 tcg_gen_shri_i64(tmp64, tmp64, 16);
9783                 tmp = tcg_temp_new_i32();
9784                 tcg_gen_extrl_i64_i32(tmp, tmp64);
9785                 tcg_temp_free_i64(tmp64);
9786                 if (rs != 15)
9787                   {
9788                     tmp2 = load_reg(s, rs);
9789                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9790                     tcg_temp_free_i32(tmp2);
9791                   }
9792                 break;
9793             case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
9794                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9795                 if (rs != 15) {
9796                     tmp = load_reg(s, rs);
9797                     if (insn & (1 << 20)) {
9798                         tmp64 = gen_addq_msw(tmp64, tmp);
9799                     } else {
9800                         tmp64 = gen_subq_msw(tmp64, tmp);
9801                     }
9802                 }
9803                 if (insn & (1 << 4)) {
9804                     tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
9805                 }
9806                 tcg_gen_shri_i64(tmp64, tmp64, 32);
9807                 tmp = tcg_temp_new_i32();
9808                 tcg_gen_extrl_i64_i32(tmp, tmp64);
9809                 tcg_temp_free_i64(tmp64);
9810                 break;
9811             case 7: /* Unsigned sum of absolute differences.  */
9812                 gen_helper_usad8(tmp, tmp, tmp2);
9813                 tcg_temp_free_i32(tmp2);
9814                 if (rs != 15) {
9815                     tmp2 = load_reg(s, rs);
9816                     tcg_gen_add_i32(tmp, tmp, tmp2);
9817                     tcg_temp_free_i32(tmp2);
9818                 }
9819                 break;
9820             }
9821             store_reg(s, rd, tmp);
9822             break;
9823         case 6: case 7: /* 64-bit multiply, Divide.  */
9824             op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
9825             tmp = load_reg(s, rn);
9826             tmp2 = load_reg(s, rm);
9827             if ((op & 0x50) == 0x10) {
9828                 /* sdiv, udiv */
9829                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
9830                     goto illegal_op;
9831                 }
9832                 if (op & 0x20)
9833                     gen_helper_udiv(tmp, tmp, tmp2);
9834                 else
9835                     gen_helper_sdiv(tmp, tmp, tmp2);
9836                 tcg_temp_free_i32(tmp2);
9837                 store_reg(s, rd, tmp);
9838             } else if ((op & 0xe) == 0xc) {
9839                 /* Dual multiply accumulate long.  */
9840                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9841                     tcg_temp_free_i32(tmp);
9842                     tcg_temp_free_i32(tmp2);
9843                     goto illegal_op;
9844                 }
9845                 if (op & 1)
9846                     gen_swap_half(tmp2);
9847                 gen_smul_dual(tmp, tmp2);
9848                 if (op & 0x10) {
9849                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9850                 } else {
9851                     tcg_gen_add_i32(tmp, tmp, tmp2);
9852                 }
9853                 tcg_temp_free_i32(tmp2);
9854                 /* BUGFIX */
9855                 tmp64 = tcg_temp_new_i64();
9856                 tcg_gen_ext_i32_i64(tmp64, tmp);
9857                 tcg_temp_free_i32(tmp);
9858                 gen_addq(s, tmp64, rs, rd);
9859                 gen_storeq_reg(s, rs, rd, tmp64);
9860                 tcg_temp_free_i64(tmp64);
9861             } else {
9862                 if (op & 0x20) {
9863                     /* Unsigned 64-bit multiply  */
9864                     tmp64 = gen_mulu_i64_i32(tmp, tmp2);
9865                 } else {
9866                     if (op & 8) {
9867                         /* smlalxy */
9868                         if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9869                             tcg_temp_free_i32(tmp2);
9870                             tcg_temp_free_i32(tmp);
9871                             goto illegal_op;
9872                         }
9873                         gen_mulxy(tmp, tmp2, op & 2, op & 1);
9874                         tcg_temp_free_i32(tmp2);
9875                         tmp64 = tcg_temp_new_i64();
9876                         tcg_gen_ext_i32_i64(tmp64, tmp);
9877                         tcg_temp_free_i32(tmp);
9878                     } else {
9879                         /* Signed 64-bit multiply  */
9880                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
9881                     }
9882                 }
9883                 if (op & 4) {
9884                     /* umaal */
9885                     if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9886                         tcg_temp_free_i64(tmp64);
9887                         goto illegal_op;
9888                     }
9889                     gen_addq_lo(s, tmp64, rs);
9890                     gen_addq_lo(s, tmp64, rd);
9891                 } else if (op & 0x40) {
9892                     /* 64-bit accumulate.  */
9893                     gen_addq(s, tmp64, rs, rd);
9894                 }
9895                 gen_storeq_reg(s, rs, rd, tmp64);
9896                 tcg_temp_free_i64(tmp64);
9897             }
9898             break;
9899         }
9900         break;
9901     case 6: case 7: case 14: case 15:
9902         /* Coprocessor.  */
9903         if (((insn >> 24) & 3) == 3) {
9904             /* Translate into the equivalent ARM encoding.  */
9905             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
9906             if (disas_neon_data_insn(s, insn)) {
9907                 goto illegal_op;
9908             }
9909         } else if (((insn >> 8) & 0xe) == 10) {
9910             if (disas_vfp_insn(s, insn)) {
9911                 goto illegal_op;
9912             }
9913         } else {
9914             if (insn & (1 << 28))
9915                 goto illegal_op;
9916             if (disas_coproc_insn(s, insn)) {
9917                 goto illegal_op;
9918             }
9919         }
9920         break;
9921     case 8: case 9: case 10: case 11:
9922         if (insn & (1 << 15)) {
9923             /* Branches, misc control.  */
9924             if (insn & 0x5000) {
9925                 /* Unconditional branch.  */
9926                 /* signextend(hw1[10:0]) -> offset[:12].  */
9927                 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
9928                 /* hw1[10:0] -> offset[11:1].  */
9929                 offset |= (insn & 0x7ff) << 1;
9930                 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
9931                    offset[24:22] already have the same value because of the
9932                    sign extension above.  */
9933                 offset ^= ((~insn) & (1 << 13)) << 10;
9934                 offset ^= ((~insn) & (1 << 11)) << 11;
9935
9936                 if (insn & (1 << 14)) {
9937                     /* Branch and link.  */
9938                     tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
9939                 }
9940
9941                 offset += s->pc;
9942                 if (insn & (1 << 12)) {
9943                     /* b/bl */
9944                     gen_jmp(s, offset);
9945                 } else {
9946                     /* blx */
9947                     offset &= ~(uint32_t)2;
9948                     /* thumb2 bx, no need to check */
9949                     gen_bx_im(s, offset);
9950                 }
9951             } else if (((insn >> 23) & 7) == 7) {
9952                 /* Misc control */
9953                 if (insn & (1 << 13))
9954                     goto illegal_op;
9955
9956                 if (insn & (1 << 26)) {
9957                     if (!(insn & (1 << 20))) {
9958                         /* Hypervisor call (v7) */
9959                         int imm16 = extract32(insn, 16, 4) << 12
9960                             | extract32(insn, 0, 12);
9961                         ARCH(7);
9962                         if (IS_USER(s)) {
9963                             goto illegal_op;
9964                         }
9965                         gen_hvc(s, imm16);
9966                     } else {
9967                         /* Secure monitor call (v6+) */
9968                         ARCH(6K);
9969                         if (IS_USER(s)) {
9970                             goto illegal_op;
9971                         }
9972                         gen_smc(s);
9973                     }
9974                 } else {
9975                     op = (insn >> 20) & 7;
9976                     switch (op) {
9977                     case 0: /* msr cpsr.  */
9978                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9979                             tmp = load_reg(s, rn);
9980                             addr = tcg_const_i32(insn & 0xff);
9981                             gen_helper_v7m_msr(cpu_env, addr, tmp);
9982                             tcg_temp_free_i32(addr);
9983                             tcg_temp_free_i32(tmp);
9984                             gen_lookup_tb(s);
9985                             break;
9986                         }
9987                         /* fall through */
9988                     case 1: /* msr spsr.  */
9989                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9990                             goto illegal_op;
9991                         }
9992                         tmp = load_reg(s, rn);
9993                         if (gen_set_psr(s,
9994                               msr_mask(s, (insn >> 8) & 0xf, op == 1),
9995                               op == 1, tmp))
9996                             goto illegal_op;
9997                         break;
9998                     case 2: /* cps, nop-hint.  */
9999                         if (((insn >> 8) & 7) == 0) {
10000                             gen_nop_hint(s, insn & 0xff);
10001                         }
10002                         /* Implemented as NOP in user mode.  */
10003                         if (IS_USER(s))
10004                             break;
10005                         offset = 0;
10006                         imm = 0;
10007                         if (insn & (1 << 10)) {
10008                             if (insn & (1 << 7))
10009                                 offset |= CPSR_A;
10010                             if (insn & (1 << 6))
10011                                 offset |= CPSR_I;
10012                             if (insn & (1 << 5))
10013                                 offset |= CPSR_F;
10014                             if (insn & (1 << 9))
10015                                 imm = CPSR_A | CPSR_I | CPSR_F;
10016                         }
10017                         if (insn & (1 << 8)) {
10018                             offset |= 0x1f;
10019                             imm |= (insn & 0x1f);
10020                         }
10021                         if (offset) {
10022                             gen_set_psr_im(s, offset, 0, imm);
10023                         }
10024                         break;
10025                     case 3: /* Special control operations.  */
10026                         ARCH(7);
10027                         op = (insn >> 4) & 0xf;
10028                         switch (op) {
10029                         case 2: /* clrex */
10030                             gen_clrex(s);
10031                             break;
10032                         case 4: /* dsb */
10033                         case 5: /* dmb */
10034                         case 6: /* isb */
10035                             /* These execute as NOPs.  */
10036                             break;
10037                         default:
10038                             goto illegal_op;
10039                         }
10040                         break;
10041                     case 4: /* bxj */
10042                         /* Trivial implementation equivalent to bx.  */
10043                         tmp = load_reg(s, rn);
10044                         gen_bx(s, tmp);
10045                         break;
10046                     case 5: /* Exception return.  */
10047                         if (IS_USER(s)) {
10048                             goto illegal_op;
10049                         }
10050                         if (rn != 14 || rd != 15) {
10051                             goto illegal_op;
10052                         }
10053                         tmp = load_reg(s, rn);
10054                         tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
10055                         gen_exception_return(s, tmp);
10056                         break;
10057                     case 6: /* mrs cpsr.  */
10058                         tmp = tcg_temp_new_i32();
10059                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10060                             addr = tcg_const_i32(insn & 0xff);
10061                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
10062                             tcg_temp_free_i32(addr);
10063                         } else {
10064                             gen_helper_cpsr_read(tmp, cpu_env);
10065                         }
10066                         store_reg(s, rd, tmp);
10067                         break;
10068                     case 7: /* mrs spsr.  */
10069                         /* Not accessible in user mode.  */
10070                         if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
10071                             goto illegal_op;
10072                         }
10073                         tmp = load_cpu_field(spsr);
10074                         store_reg(s, rd, tmp);
10075                         break;
10076                     }
10077                 }
10078             } else {
10079                 /* Conditional branch.  */
10080                 op = (insn >> 22) & 0xf;
10081                 /* Generate a conditional jump to next instruction.  */
10082                 s->condlabel = gen_new_label();
10083                 arm_gen_test_cc(op ^ 1, s->condlabel);
10084                 s->condjmp = 1;
10085
10086                 /* offset[11:1] = insn[10:0] */
10087                 offset = (insn & 0x7ff) << 1;
10088                 /* offset[17:12] = insn[21:16].  */
10089                 offset |= (insn & 0x003f0000) >> 4;
10090                 /* offset[31:20] = insn[26].  */
10091                 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
10092                 /* offset[18] = insn[13].  */
10093                 offset |= (insn & (1 << 13)) << 5;
10094                 /* offset[19] = insn[11].  */
10095                 offset |= (insn & (1 << 11)) << 8;
10096
10097                 /* jump to the offset */
10098                 gen_jmp(s, s->pc + offset);
10099             }
10100         } else {
10101             /* Data processing immediate.  */
10102             if (insn & (1 << 25)) {
10103                 if (insn & (1 << 24)) {
10104                     if (insn & (1 << 20))
10105                         goto illegal_op;
10106                     /* Bitfield/Saturate.  */
10107                     op = (insn >> 21) & 7;
10108                     imm = insn & 0x1f;
10109                     shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
10110                     if (rn == 15) {
10111                         tmp = tcg_temp_new_i32();
10112                         tcg_gen_movi_i32(tmp, 0);
10113                     } else {
10114                         tmp = load_reg(s, rn);
10115                     }
10116                     switch (op) {
10117                     case 2: /* Signed bitfield extract.  */
10118                         imm++;
10119                         if (shift + imm > 32)
10120                             goto illegal_op;
10121                         if (imm < 32)
10122                             gen_sbfx(tmp, shift, imm);
10123                         break;
10124                     case 6: /* Unsigned bitfield extract.  */
10125                         imm++;
10126                         if (shift + imm > 32)
10127                             goto illegal_op;
10128                         if (imm < 32)
10129                             gen_ubfx(tmp, shift, (1u << imm) - 1);
10130                         break;
10131                     case 3: /* Bitfield insert/clear.  */
10132                         if (imm < shift)
10133                             goto illegal_op;
10134                         imm = imm + 1 - shift;
10135                         if (imm != 32) {
10136                             tmp2 = load_reg(s, rd);
10137                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
10138                             tcg_temp_free_i32(tmp2);
10139                         }
10140                         break;
10141                     case 7:
10142                         goto illegal_op;
10143                     default: /* Saturate.  */
10144                         if (shift) {
10145                             if (op & 1)
10146                                 tcg_gen_sari_i32(tmp, tmp, shift);
10147                             else
10148                                 tcg_gen_shli_i32(tmp, tmp, shift);
10149                         }
10150                         tmp2 = tcg_const_i32(imm);
10151                         if (op & 4) {
10152                             /* Unsigned.  */
10153                             if ((op & 1) && shift == 0) {
10154                                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10155                                     tcg_temp_free_i32(tmp);
10156                                     tcg_temp_free_i32(tmp2);
10157                                     goto illegal_op;
10158                                 }
10159                                 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
10160                             } else {
10161                                 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
10162                             }
10163                         } else {
10164                             /* Signed.  */
10165                             if ((op & 1) && shift == 0) {
10166                                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10167                                     tcg_temp_free_i32(tmp);
10168                                     tcg_temp_free_i32(tmp2);
10169                                     goto illegal_op;
10170                                 }
10171                                 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
10172                             } else {
10173                                 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
10174                             }
10175                         }
10176                         tcg_temp_free_i32(tmp2);
10177                         break;
10178                     }
10179                     store_reg(s, rd, tmp);
10180                 } else {
10181                     imm = ((insn & 0x04000000) >> 15)
10182                           | ((insn & 0x7000) >> 4) | (insn & 0xff);
10183                     if (insn & (1 << 22)) {
10184                         /* 16-bit immediate.  */
10185                         imm |= (insn >> 4) & 0xf000;
10186                         if (insn & (1 << 23)) {
10187                             /* movt */
10188                             tmp = load_reg(s, rd);
10189                             tcg_gen_ext16u_i32(tmp, tmp);
10190                             tcg_gen_ori_i32(tmp, tmp, imm << 16);
10191                         } else {
10192                             /* movw */
10193                             tmp = tcg_temp_new_i32();
10194                             tcg_gen_movi_i32(tmp, imm);
10195                         }
10196                     } else {
10197                         /* Add/sub 12-bit immediate.  */
10198                         if (rn == 15) {
10199                             offset = s->pc & ~(uint32_t)3;
10200                             if (insn & (1 << 23))
10201                                 offset -= imm;
10202                             else
10203                                 offset += imm;
10204                             tmp = tcg_temp_new_i32();
10205                             tcg_gen_movi_i32(tmp, offset);
10206                         } else {
10207                             tmp = load_reg(s, rn);
10208                             if (insn & (1 << 23))
10209                                 tcg_gen_subi_i32(tmp, tmp, imm);
10210                             else
10211                                 tcg_gen_addi_i32(tmp, tmp, imm);
10212                         }
10213                     }
10214                     store_reg(s, rd, tmp);
10215                 }
10216             } else {
10217                 int shifter_out = 0;
10218                 /* modified 12-bit immediate.  */
10219                 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
10220                 imm = (insn & 0xff);
10221                 switch (shift) {
10222                 case 0: /* XY */
10223                     /* Nothing to do.  */
10224                     break;
10225                 case 1: /* 00XY00XY */
10226                     imm |= imm << 16;
10227                     break;
10228                 case 2: /* XY00XY00 */
10229                     imm |= imm << 16;
10230                     imm <<= 8;
10231                     break;
10232                 case 3: /* XYXYXYXY */
10233                     imm |= imm << 16;
10234                     imm |= imm << 8;
10235                     break;
10236                 default: /* Rotated constant.  */
10237                     shift = (shift << 1) | (imm >> 7);
10238                     imm |= 0x80;
10239                     imm = imm << (32 - shift);
10240                     shifter_out = 1;
10241                     break;
10242                 }
10243                 tmp2 = tcg_temp_new_i32();
10244                 tcg_gen_movi_i32(tmp2, imm);
10245                 rn = (insn >> 16) & 0xf;
10246                 if (rn == 15) {
10247                     tmp = tcg_temp_new_i32();
10248                     tcg_gen_movi_i32(tmp, 0);
10249                 } else {
10250                     tmp = load_reg(s, rn);
10251                 }
10252                 op = (insn >> 21) & 0xf;
10253                 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
10254                                        shifter_out, tmp, tmp2))
10255                     goto illegal_op;
10256                 tcg_temp_free_i32(tmp2);
10257                 rd = (insn >> 8) & 0xf;
10258                 if (rd != 15) {
10259                     store_reg(s, rd, tmp);
10260                 } else {
10261                     tcg_temp_free_i32(tmp);
10262                 }
10263             }
10264         }
10265         break;
10266     case 12: /* Load/store single data item.  */
10267         {
10268         int postinc = 0;
10269         int writeback = 0;
10270         int memidx;
10271         if ((insn & 0x01100000) == 0x01000000) {
10272             if (disas_neon_ls_insn(s, insn)) {
10273                 goto illegal_op;
10274             }
10275             break;
10276         }
10277         op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
10278         if (rs == 15) {
10279             if (!(insn & (1 << 20))) {
10280                 goto illegal_op;
10281             }
10282             if (op != 2) {
10283                 /* Byte or halfword load space with dest == r15 : memory hints.
10284                  * Catch them early so we don't emit pointless addressing code.
10285                  * This space is a mix of:
10286                  *  PLD/PLDW/PLI,  which we implement as NOPs (note that unlike
10287                  *     the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
10288                  *     cores)
10289                  *  unallocated hints, which must be treated as NOPs
10290                  *  UNPREDICTABLE space, which we NOP or UNDEF depending on
10291                  *     which is easiest for the decoding logic
10292                  *  Some space which must UNDEF
10293                  */
10294                 int op1 = (insn >> 23) & 3;
10295                 int op2 = (insn >> 6) & 0x3f;
10296                 if (op & 2) {
10297                     goto illegal_op;
10298                 }
10299                 if (rn == 15) {
10300                     /* UNPREDICTABLE, unallocated hint or
10301                      * PLD/PLDW/PLI (literal)
10302                      */
10303                     return 0;
10304                 }
10305                 if (op1 & 1) {
10306                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10307                 }
10308                 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
10309                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10310                 }
10311                 /* UNDEF space, or an UNPREDICTABLE */
10312                 return 1;
10313             }
10314         }
10315         memidx = get_mem_index(s);
10316         if (rn == 15) {
10317             addr = tcg_temp_new_i32();
10318             /* PC relative.  */
10319             /* s->pc has already been incremented by 4.  */
10320             imm = s->pc & 0xfffffffc;
10321             if (insn & (1 << 23))
10322                 imm += insn & 0xfff;
10323             else
10324                 imm -= insn & 0xfff;
10325             tcg_gen_movi_i32(addr, imm);
10326         } else {
10327             addr = load_reg(s, rn);
10328             if (insn & (1 << 23)) {
10329                 /* Positive offset.  */
10330                 imm = insn & 0xfff;
10331                 tcg_gen_addi_i32(addr, addr, imm);
10332             } else {
10333                 imm = insn & 0xff;
10334                 switch ((insn >> 8) & 0xf) {
10335                 case 0x0: /* Shifted Register.  */
10336                     shift = (insn >> 4) & 0xf;
10337                     if (shift > 3) {
10338                         tcg_temp_free_i32(addr);
10339                         goto illegal_op;
10340                     }
10341                     tmp = load_reg(s, rm);
10342                     if (shift)
10343                         tcg_gen_shli_i32(tmp, tmp, shift);
10344                     tcg_gen_add_i32(addr, addr, tmp);
10345                     tcg_temp_free_i32(tmp);
10346                     break;
10347                 case 0xc: /* Negative offset.  */
10348                     tcg_gen_addi_i32(addr, addr, -imm);
10349                     break;
10350                 case 0xe: /* User privilege.  */
10351                     tcg_gen_addi_i32(addr, addr, imm);
10352                     memidx = get_a32_user_mem_index(s);
10353                     break;
10354                 case 0x9: /* Post-decrement.  */
10355                     imm = -imm;
10356                     /* Fall through.  */
10357                 case 0xb: /* Post-increment.  */
10358                     postinc = 1;
10359                     writeback = 1;
10360                     break;
10361                 case 0xd: /* Pre-decrement.  */
10362                     imm = -imm;
10363                     /* Fall through.  */
10364                 case 0xf: /* Pre-increment.  */
10365                     tcg_gen_addi_i32(addr, addr, imm);
10366                     writeback = 1;
10367                     break;
10368                 default:
10369                     tcg_temp_free_i32(addr);
10370                     goto illegal_op;
10371                 }
10372             }
10373         }
10374         if (insn & (1 << 20)) {
10375             /* Load.  */
10376             tmp = tcg_temp_new_i32();
10377             switch (op) {
10378             case 0:
10379                 gen_aa32_ld8u(tmp, addr, memidx);
10380                 break;
10381             case 4:
10382                 gen_aa32_ld8s(tmp, addr, memidx);
10383                 break;
10384             case 1:
10385                 gen_aa32_ld16u(tmp, addr, memidx);
10386                 break;
10387             case 5:
10388                 gen_aa32_ld16s(tmp, addr, memidx);
10389                 break;
10390             case 2:
10391                 gen_aa32_ld32u(tmp, addr, memidx);
10392                 break;
10393             default:
10394                 tcg_temp_free_i32(tmp);
10395                 tcg_temp_free_i32(addr);
10396                 goto illegal_op;
10397             }
10398             if (rs == 15) {
10399                 gen_bx(s, tmp);
10400             } else {
10401                 store_reg(s, rs, tmp);
10402             }
10403         } else {
10404             /* Store.  */
10405             tmp = load_reg(s, rs);
10406             switch (op) {
10407             case 0:
10408                 gen_aa32_st8(tmp, addr, memidx);
10409                 break;
10410             case 1:
10411                 gen_aa32_st16(tmp, addr, memidx);
10412                 break;
10413             case 2:
10414                 gen_aa32_st32(tmp, addr, memidx);
10415                 break;
10416             default:
10417                 tcg_temp_free_i32(tmp);
10418                 tcg_temp_free_i32(addr);
10419                 goto illegal_op;
10420             }
10421             tcg_temp_free_i32(tmp);
10422         }
10423         if (postinc)
10424             tcg_gen_addi_i32(addr, addr, imm);
10425         if (writeback) {
10426             store_reg(s, rn, addr);
10427         } else {
10428             tcg_temp_free_i32(addr);
10429         }
10430         }
10431         break;
10432     default:
10433         goto illegal_op;
10434     }
10435     return 0;
10436 illegal_op:
10437     return 1;
10438 }
10439
10440 static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
10441 {
10442     uint32_t val, insn, op, rm, rn, rd, shift, cond;
10443     int32_t offset;
10444     int i;
10445     TCGv_i32 tmp;
10446     TCGv_i32 tmp2;
10447     TCGv_i32 addr;
10448
10449     if (s->condexec_mask) {
10450         cond = s->condexec_cond;
10451         if (cond != 0x0e) {     /* Skip conditional when condition is AL. */
10452           s->condlabel = gen_new_label();
10453           arm_gen_test_cc(cond ^ 1, s->condlabel);
10454           s->condjmp = 1;
10455         }
10456     }
10457
10458     insn = arm_lduw_code(env, s->pc, s->bswap_code);
10459     s->pc += 2;
10460
10461     switch (insn >> 12) {
10462     case 0: case 1:
10463
10464         rd = insn & 7;
10465         op = (insn >> 11) & 3;
10466         if (op == 3) {
10467             /* add/subtract */
10468             rn = (insn >> 3) & 7;
10469             tmp = load_reg(s, rn);
10470             if (insn & (1 << 10)) {
10471                 /* immediate */
10472                 tmp2 = tcg_temp_new_i32();
10473                 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
10474             } else {
10475                 /* reg */
10476                 rm = (insn >> 6) & 7;
10477                 tmp2 = load_reg(s, rm);
10478             }
10479             if (insn & (1 << 9)) {
10480                 if (s->condexec_mask)
10481                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10482                 else
10483                     gen_sub_CC(tmp, tmp, tmp2);
10484             } else {
10485                 if (s->condexec_mask)
10486                     tcg_gen_add_i32(tmp, tmp, tmp2);
10487                 else
10488                     gen_add_CC(tmp, tmp, tmp2);
10489             }
10490             tcg_temp_free_i32(tmp2);
10491             store_reg(s, rd, tmp);
10492         } else {
10493             /* shift immediate */
10494             rm = (insn >> 3) & 7;
10495             shift = (insn >> 6) & 0x1f;
10496             tmp = load_reg(s, rm);
10497             gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
10498             if (!s->condexec_mask)
10499                 gen_logic_CC(tmp);
10500             store_reg(s, rd, tmp);
10501         }
10502         break;
10503     case 2: case 3:
10504         /* arithmetic large immediate */
10505         op = (insn >> 11) & 3;
10506         rd = (insn >> 8) & 0x7;
10507         if (op == 0) { /* mov */
10508             tmp = tcg_temp_new_i32();
10509             tcg_gen_movi_i32(tmp, insn & 0xff);
10510             if (!s->condexec_mask)
10511                 gen_logic_CC(tmp);
10512             store_reg(s, rd, tmp);
10513         } else {
10514             tmp = load_reg(s, rd);
10515             tmp2 = tcg_temp_new_i32();
10516             tcg_gen_movi_i32(tmp2, insn & 0xff);
10517             switch (op) {
10518             case 1: /* cmp */
10519                 gen_sub_CC(tmp, tmp, tmp2);
10520                 tcg_temp_free_i32(tmp);
10521                 tcg_temp_free_i32(tmp2);
10522                 break;
10523             case 2: /* add */
10524                 if (s->condexec_mask)
10525                     tcg_gen_add_i32(tmp, tmp, tmp2);
10526                 else
10527                     gen_add_CC(tmp, tmp, tmp2);
10528                 tcg_temp_free_i32(tmp2);
10529                 store_reg(s, rd, tmp);
10530                 break;
10531             case 3: /* sub */
10532                 if (s->condexec_mask)
10533                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10534                 else
10535                     gen_sub_CC(tmp, tmp, tmp2);
10536                 tcg_temp_free_i32(tmp2);
10537                 store_reg(s, rd, tmp);
10538                 break;
10539             }
10540         }
10541         break;
10542     case 4:
10543         if (insn & (1 << 11)) {
10544             rd = (insn >> 8) & 7;
10545             /* load pc-relative.  Bit 1 of PC is ignored.  */
10546             val = s->pc + 2 + ((insn & 0xff) * 4);
10547             val &= ~(uint32_t)2;
10548             addr = tcg_temp_new_i32();
10549             tcg_gen_movi_i32(addr, val);
10550             tmp = tcg_temp_new_i32();
10551             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10552             tcg_temp_free_i32(addr);
10553             store_reg(s, rd, tmp);
10554             break;
10555         }
10556         if (insn & (1 << 10)) {
10557             /* data processing extended or blx */
10558             rd = (insn & 7) | ((insn >> 4) & 8);
10559             rm = (insn >> 3) & 0xf;
10560             op = (insn >> 8) & 3;
10561             switch (op) {
10562             case 0: /* add */
10563                 tmp = load_reg(s, rd);
10564                 tmp2 = load_reg(s, rm);
10565                 tcg_gen_add_i32(tmp, tmp, tmp2);
10566                 tcg_temp_free_i32(tmp2);
10567                 store_reg(s, rd, tmp);
10568                 break;
10569             case 1: /* cmp */
10570                 tmp = load_reg(s, rd);
10571                 tmp2 = load_reg(s, rm);
10572                 gen_sub_CC(tmp, tmp, tmp2);
10573                 tcg_temp_free_i32(tmp2);
10574                 tcg_temp_free_i32(tmp);
10575                 break;
10576             case 2: /* mov/cpy */
10577                 tmp = load_reg(s, rm);
10578                 store_reg(s, rd, tmp);
10579                 break;
10580             case 3:/* branch [and link] exchange thumb register */
10581                 tmp = load_reg(s, rm);
10582                 if (insn & (1 << 7)) {
10583                     ARCH(5);
10584                     val = (uint32_t)s->pc | 1;
10585                     tmp2 = tcg_temp_new_i32();
10586                     tcg_gen_movi_i32(tmp2, val);
10587                     store_reg(s, 14, tmp2);
10588                 }
10589                 /* already thumb, no need to check */
10590                 gen_bx(s, tmp);
10591                 break;
10592             }
10593             break;
10594         }
10595
10596         /* data processing register */
10597         rd = insn & 7;
10598         rm = (insn >> 3) & 7;
10599         op = (insn >> 6) & 0xf;
10600         if (op == 2 || op == 3 || op == 4 || op == 7) {
10601             /* the shift/rotate ops want the operands backwards */
10602             val = rm;
10603             rm = rd;
10604             rd = val;
10605             val = 1;
10606         } else {
10607             val = 0;
10608         }
10609
10610         if (op == 9) { /* neg */
10611             tmp = tcg_temp_new_i32();
10612             tcg_gen_movi_i32(tmp, 0);
10613         } else if (op != 0xf) { /* mvn doesn't read its first operand */
10614             tmp = load_reg(s, rd);
10615         } else {
10616             TCGV_UNUSED_I32(tmp);
10617         }
10618
10619         tmp2 = load_reg(s, rm);
10620         switch (op) {
10621         case 0x0: /* and */
10622             tcg_gen_and_i32(tmp, tmp, tmp2);
10623             if (!s->condexec_mask)
10624                 gen_logic_CC(tmp);
10625             break;
10626         case 0x1: /* eor */
10627             tcg_gen_xor_i32(tmp, tmp, tmp2);
10628             if (!s->condexec_mask)
10629                 gen_logic_CC(tmp);
10630             break;
10631         case 0x2: /* lsl */
10632             if (s->condexec_mask) {
10633                 gen_shl(tmp2, tmp2, tmp);
10634             } else {
10635                 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
10636                 gen_logic_CC(tmp2);
10637             }
10638             break;
10639         case 0x3: /* lsr */
10640             if (s->condexec_mask) {
10641                 gen_shr(tmp2, tmp2, tmp);
10642             } else {
10643                 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
10644                 gen_logic_CC(tmp2);
10645             }
10646             break;
10647         case 0x4: /* asr */
10648             if (s->condexec_mask) {
10649                 gen_sar(tmp2, tmp2, tmp);
10650             } else {
10651                 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
10652                 gen_logic_CC(tmp2);
10653             }
10654             break;
10655         case 0x5: /* adc */
10656             if (s->condexec_mask) {
10657                 gen_adc(tmp, tmp2);
10658             } else {
10659                 gen_adc_CC(tmp, tmp, tmp2);
10660             }
10661             break;
10662         case 0x6: /* sbc */
10663             if (s->condexec_mask) {
10664                 gen_sub_carry(tmp, tmp, tmp2);
10665             } else {
10666                 gen_sbc_CC(tmp, tmp, tmp2);
10667             }
10668             break;
10669         case 0x7: /* ror */
10670             if (s->condexec_mask) {
10671                 tcg_gen_andi_i32(tmp, tmp, 0x1f);
10672                 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
10673             } else {
10674                 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
10675                 gen_logic_CC(tmp2);
10676             }
10677             break;
10678         case 0x8: /* tst */
10679             tcg_gen_and_i32(tmp, tmp, tmp2);
10680             gen_logic_CC(tmp);
10681             rd = 16;
10682             break;
10683         case 0x9: /* neg */
10684             if (s->condexec_mask)
10685                 tcg_gen_neg_i32(tmp, tmp2);
10686             else
10687                 gen_sub_CC(tmp, tmp, tmp2);
10688             break;
10689         case 0xa: /* cmp */
10690             gen_sub_CC(tmp, tmp, tmp2);
10691             rd = 16;
10692             break;
10693         case 0xb: /* cmn */
10694             gen_add_CC(tmp, tmp, tmp2);
10695             rd = 16;
10696             break;
10697         case 0xc: /* orr */
10698             tcg_gen_or_i32(tmp, tmp, tmp2);
10699             if (!s->condexec_mask)
10700                 gen_logic_CC(tmp);
10701             break;
10702         case 0xd: /* mul */
10703             tcg_gen_mul_i32(tmp, tmp, tmp2);
10704             if (!s->condexec_mask)
10705                 gen_logic_CC(tmp);
10706             break;
10707         case 0xe: /* bic */
10708             tcg_gen_andc_i32(tmp, tmp, tmp2);
10709             if (!s->condexec_mask)
10710                 gen_logic_CC(tmp);
10711             break;
10712         case 0xf: /* mvn */
10713             tcg_gen_not_i32(tmp2, tmp2);
10714             if (!s->condexec_mask)
10715                 gen_logic_CC(tmp2);
10716             val = 1;
10717             rm = rd;
10718             break;
10719         }
10720         if (rd != 16) {
10721             if (val) {
10722                 store_reg(s, rm, tmp2);
10723                 if (op != 0xf)
10724                     tcg_temp_free_i32(tmp);
10725             } else {
10726                 store_reg(s, rd, tmp);
10727                 tcg_temp_free_i32(tmp2);
10728             }
10729         } else {
10730             tcg_temp_free_i32(tmp);
10731             tcg_temp_free_i32(tmp2);
10732         }
10733         break;
10734
10735     case 5:
10736         /* load/store register offset.  */
10737         rd = insn & 7;
10738         rn = (insn >> 3) & 7;
10739         rm = (insn >> 6) & 7;
10740         op = (insn >> 9) & 7;
10741         addr = load_reg(s, rn);
10742         tmp = load_reg(s, rm);
10743         tcg_gen_add_i32(addr, addr, tmp);
10744         tcg_temp_free_i32(tmp);
10745
10746         if (op < 3) { /* store */
10747             tmp = load_reg(s, rd);
10748         } else {
10749             tmp = tcg_temp_new_i32();
10750         }
10751
10752         switch (op) {
10753         case 0: /* str */
10754             gen_aa32_st32(tmp, addr, get_mem_index(s));
10755             break;
10756         case 1: /* strh */
10757             gen_aa32_st16(tmp, addr, get_mem_index(s));
10758             break;
10759         case 2: /* strb */
10760             gen_aa32_st8(tmp, addr, get_mem_index(s));
10761             break;
10762         case 3: /* ldrsb */
10763             gen_aa32_ld8s(tmp, addr, get_mem_index(s));
10764             break;
10765         case 4: /* ldr */
10766             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10767             break;
10768         case 5: /* ldrh */
10769             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10770             break;
10771         case 6: /* ldrb */
10772             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10773             break;
10774         case 7: /* ldrsh */
10775             gen_aa32_ld16s(tmp, addr, get_mem_index(s));
10776             break;
10777         }
10778         if (op >= 3) { /* load */
10779             store_reg(s, rd, tmp);
10780         } else {
10781             tcg_temp_free_i32(tmp);
10782         }
10783         tcg_temp_free_i32(addr);
10784         break;
10785
10786     case 6:
10787         /* load/store word immediate offset */
10788         rd = insn & 7;
10789         rn = (insn >> 3) & 7;
10790         addr = load_reg(s, rn);
10791         val = (insn >> 4) & 0x7c;
10792         tcg_gen_addi_i32(addr, addr, val);
10793
10794         if (insn & (1 << 11)) {
10795             /* load */
10796             tmp = tcg_temp_new_i32();
10797             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10798             store_reg(s, rd, tmp);
10799         } else {
10800             /* store */
10801             tmp = load_reg(s, rd);
10802             gen_aa32_st32(tmp, addr, get_mem_index(s));
10803             tcg_temp_free_i32(tmp);
10804         }
10805         tcg_temp_free_i32(addr);
10806         break;
10807
10808     case 7:
10809         /* load/store byte immediate offset */
10810         rd = insn & 7;
10811         rn = (insn >> 3) & 7;
10812         addr = load_reg(s, rn);
10813         val = (insn >> 6) & 0x1f;
10814         tcg_gen_addi_i32(addr, addr, val);
10815
10816         if (insn & (1 << 11)) {
10817             /* load */
10818             tmp = tcg_temp_new_i32();
10819             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10820             store_reg(s, rd, tmp);
10821         } else {
10822             /* store */
10823             tmp = load_reg(s, rd);
10824             gen_aa32_st8(tmp, addr, get_mem_index(s));
10825             tcg_temp_free_i32(tmp);
10826         }
10827         tcg_temp_free_i32(addr);
10828         break;
10829
10830     case 8:
10831         /* load/store halfword immediate offset */
10832         rd = insn & 7;
10833         rn = (insn >> 3) & 7;
10834         addr = load_reg(s, rn);
10835         val = (insn >> 5) & 0x3e;
10836         tcg_gen_addi_i32(addr, addr, val);
10837
10838         if (insn & (1 << 11)) {
10839             /* load */
10840             tmp = tcg_temp_new_i32();
10841             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10842             store_reg(s, rd, tmp);
10843         } else {
10844             /* store */
10845             tmp = load_reg(s, rd);
10846             gen_aa32_st16(tmp, addr, get_mem_index(s));
10847             tcg_temp_free_i32(tmp);
10848         }
10849         tcg_temp_free_i32(addr);
10850         break;
10851
10852     case 9:
10853         /* load/store from stack */
10854         rd = (insn >> 8) & 7;
10855         addr = load_reg(s, 13);
10856         val = (insn & 0xff) * 4;
10857         tcg_gen_addi_i32(addr, addr, val);
10858
10859         if (insn & (1 << 11)) {
10860             /* load */
10861             tmp = tcg_temp_new_i32();
10862             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10863             store_reg(s, rd, tmp);
10864         } else {
10865             /* store */
10866             tmp = load_reg(s, rd);
10867             gen_aa32_st32(tmp, addr, get_mem_index(s));
10868             tcg_temp_free_i32(tmp);
10869         }
10870         tcg_temp_free_i32(addr);
10871         break;
10872
10873     case 10:
10874         /* add to high reg */
10875         rd = (insn >> 8) & 7;
10876         if (insn & (1 << 11)) {
10877             /* SP */
10878             tmp = load_reg(s, 13);
10879         } else {
10880             /* PC. bit 1 is ignored.  */
10881             tmp = tcg_temp_new_i32();
10882             tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
10883         }
10884         val = (insn & 0xff) * 4;
10885         tcg_gen_addi_i32(tmp, tmp, val);
10886         store_reg(s, rd, tmp);
10887         break;
10888
10889     case 11:
10890         /* misc */
10891         op = (insn >> 8) & 0xf;
10892         switch (op) {
10893         case 0:
10894             /* adjust stack pointer */
10895             tmp = load_reg(s, 13);
10896             val = (insn & 0x7f) * 4;
10897             if (insn & (1 << 7))
10898                 val = -(int32_t)val;
10899             tcg_gen_addi_i32(tmp, tmp, val);
10900             store_reg(s, 13, tmp);
10901             break;
10902
10903         case 2: /* sign/zero extend.  */
10904             ARCH(6);
10905             rd = insn & 7;
10906             rm = (insn >> 3) & 7;
10907             tmp = load_reg(s, rm);
10908             switch ((insn >> 6) & 3) {
10909             case 0: gen_sxth(tmp); break;
10910             case 1: gen_sxtb(tmp); break;
10911             case 2: gen_uxth(tmp); break;
10912             case 3: gen_uxtb(tmp); break;
10913             }
10914             store_reg(s, rd, tmp);
10915             break;
10916         case 4: case 5: case 0xc: case 0xd:
10917             /* push/pop */
10918             addr = load_reg(s, 13);
10919             if (insn & (1 << 8))
10920                 offset = 4;
10921             else
10922                 offset = 0;
10923             for (i = 0; i < 8; i++) {
10924                 if (insn & (1 << i))
10925                     offset += 4;
10926             }
10927             if ((insn & (1 << 11)) == 0) {
10928                 tcg_gen_addi_i32(addr, addr, -offset);
10929             }
10930             for (i = 0; i < 8; i++) {
10931                 if (insn & (1 << i)) {
10932                     if (insn & (1 << 11)) {
10933                         /* pop */
10934                         tmp = tcg_temp_new_i32();
10935                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10936                         store_reg(s, i, tmp);
10937                     } else {
10938                         /* push */
10939                         tmp = load_reg(s, i);
10940                         gen_aa32_st32(tmp, addr, get_mem_index(s));
10941                         tcg_temp_free_i32(tmp);
10942                     }
10943                     /* advance to the next address.  */
10944                     tcg_gen_addi_i32(addr, addr, 4);
10945                 }
10946             }
10947             TCGV_UNUSED_I32(tmp);
10948             if (insn & (1 << 8)) {
10949                 if (insn & (1 << 11)) {
10950                     /* pop pc */
10951                     tmp = tcg_temp_new_i32();
10952                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10953                     /* don't set the pc until the rest of the instruction
10954                        has completed */
10955                 } else {
10956                     /* push lr */
10957                     tmp = load_reg(s, 14);
10958                     gen_aa32_st32(tmp, addr, get_mem_index(s));
10959                     tcg_temp_free_i32(tmp);
10960                 }
10961                 tcg_gen_addi_i32(addr, addr, 4);
10962             }
10963             if ((insn & (1 << 11)) == 0) {
10964                 tcg_gen_addi_i32(addr, addr, -offset);
10965             }
10966             /* write back the new stack pointer */
10967             store_reg(s, 13, addr);
10968             /* set the new PC value */
10969             if ((insn & 0x0900) == 0x0900) {
10970                 store_reg_from_load(s, 15, tmp);
10971             }
10972             break;
10973
10974         case 1: case 3: case 9: case 11: /* czb */
10975             rm = insn & 7;
10976             tmp = load_reg(s, rm);
10977             s->condlabel = gen_new_label();
10978             s->condjmp = 1;
10979             if (insn & (1 << 11))
10980                 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
10981             else
10982                 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
10983             tcg_temp_free_i32(tmp);
10984             offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
10985             val = (uint32_t)s->pc + 2;
10986             val += offset;
10987             gen_jmp(s, val);
10988             break;
10989
10990         case 15: /* IT, nop-hint.  */
10991             if ((insn & 0xf) == 0) {
10992                 gen_nop_hint(s, (insn >> 4) & 0xf);
10993                 break;
10994             }
10995             /* If Then.  */
10996             s->condexec_cond = (insn >> 4) & 0xe;
10997             s->condexec_mask = insn & 0x1f;
10998             /* No actual code generated for this insn, just setup state.  */
10999             break;
11000
11001         case 0xe: /* bkpt */
11002         {
11003             int imm8 = extract32(insn, 0, 8);
11004             ARCH(5);
11005             gen_exception_insn(s, 2, EXCP_BKPT, syn_aa32_bkpt(imm8, true),
11006                                default_exception_el(s));
11007             break;
11008         }
11009
11010         case 0xa: /* rev */
11011             ARCH(6);
11012             rn = (insn >> 3) & 0x7;
11013             rd = insn & 0x7;
11014             tmp = load_reg(s, rn);
11015             switch ((insn >> 6) & 3) {
11016             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
11017             case 1: gen_rev16(tmp); break;
11018             case 3: gen_revsh(tmp); break;
11019             default: goto illegal_op;
11020             }
11021             store_reg(s, rd, tmp);
11022             break;
11023
11024         case 6:
11025             switch ((insn >> 5) & 7) {
11026             case 2:
11027                 /* setend */
11028                 ARCH(6);
11029                 if (((insn >> 3) & 1) != s->bswap_code) {
11030                     /* Dynamic endianness switching not implemented. */
11031                     qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
11032                     goto illegal_op;
11033                 }
11034                 break;
11035             case 3:
11036                 /* cps */
11037                 ARCH(6);
11038                 if (IS_USER(s)) {
11039                     break;
11040                 }
11041                 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11042                     tmp = tcg_const_i32((insn & (1 << 4)) != 0);
11043                     /* FAULTMASK */
11044                     if (insn & 1) {
11045                         addr = tcg_const_i32(19);
11046                         gen_helper_v7m_msr(cpu_env, addr, tmp);
11047                         tcg_temp_free_i32(addr);
11048                     }
11049                     /* PRIMASK */
11050                     if (insn & 2) {
11051                         addr = tcg_const_i32(16);
11052                         gen_helper_v7m_msr(cpu_env, addr, tmp);
11053                         tcg_temp_free_i32(addr);
11054                     }
11055                     tcg_temp_free_i32(tmp);
11056                     gen_lookup_tb(s);
11057                 } else {
11058                     if (insn & (1 << 4)) {
11059                         shift = CPSR_A | CPSR_I | CPSR_F;
11060                     } else {
11061                         shift = 0;
11062                     }
11063                     gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
11064                 }
11065                 break;
11066             default:
11067                 goto undef;
11068             }
11069             break;
11070
11071         default:
11072             goto undef;
11073         }
11074         break;
11075
11076     case 12:
11077     {
11078         /* load/store multiple */
11079         TCGv_i32 loaded_var;
11080         TCGV_UNUSED_I32(loaded_var);
11081         rn = (insn >> 8) & 0x7;
11082         addr = load_reg(s, rn);
11083         for (i = 0; i < 8; i++) {
11084             if (insn & (1 << i)) {
11085                 if (insn & (1 << 11)) {
11086                     /* load */
11087                     tmp = tcg_temp_new_i32();
11088                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
11089                     if (i == rn) {
11090                         loaded_var = tmp;
11091                     } else {
11092                         store_reg(s, i, tmp);
11093                     }
11094                 } else {
11095                     /* store */
11096                     tmp = load_reg(s, i);
11097                     gen_aa32_st32(tmp, addr, get_mem_index(s));
11098                     tcg_temp_free_i32(tmp);
11099                 }
11100                 /* advance to the next address */
11101                 tcg_gen_addi_i32(addr, addr, 4);
11102             }
11103         }
11104         if ((insn & (1 << rn)) == 0) {
11105             /* base reg not in list: base register writeback */
11106             store_reg(s, rn, addr);
11107         } else {
11108             /* base reg in list: if load, complete it now */
11109             if (insn & (1 << 11)) {
11110                 store_reg(s, rn, loaded_var);
11111             }
11112             tcg_temp_free_i32(addr);
11113         }
11114         break;
11115     }
11116     case 13:
11117         /* conditional branch or swi */
11118         cond = (insn >> 8) & 0xf;
11119         if (cond == 0xe)
11120             goto undef;
11121
11122         if (cond == 0xf) {
11123             /* swi */
11124             gen_set_pc_im(s, s->pc);
11125             s->svc_imm = extract32(insn, 0, 8);
11126             s->is_jmp = DISAS_SWI;
11127             break;
11128         }
11129         /* generate a conditional jump to next instruction */
11130         s->condlabel = gen_new_label();
11131         arm_gen_test_cc(cond ^ 1, s->condlabel);
11132         s->condjmp = 1;
11133
11134         /* jump to the offset */
11135         val = (uint32_t)s->pc + 2;
11136         offset = ((int32_t)insn << 24) >> 24;
11137         val += offset << 1;
11138         gen_jmp(s, val);
11139         break;
11140
11141     case 14:
11142         if (insn & (1 << 11)) {
11143             if (disas_thumb2_insn(env, s, insn))
11144               goto undef32;
11145             break;
11146         }
11147         /* unconditional branch */
11148         val = (uint32_t)s->pc;
11149         offset = ((int32_t)insn << 21) >> 21;
11150         val += (offset << 1) + 2;
11151         gen_jmp(s, val);
11152         break;
11153
11154     case 15:
11155         if (disas_thumb2_insn(env, s, insn))
11156             goto undef32;
11157         break;
11158     }
11159     return;
11160 undef32:
11161     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
11162                        default_exception_el(s));
11163     return;
11164 illegal_op:
11165 undef:
11166     gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(),
11167                        default_exception_el(s));
11168 }
11169
11170 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
11171    basic block 'tb'. If search_pc is TRUE, also generate PC
11172    information for each intermediate instruction. */
11173 static inline void gen_intermediate_code_internal(ARMCPU *cpu,
11174                                                   TranslationBlock *tb,
11175                                                   bool search_pc)
11176 {
11177     CPUState *cs = CPU(cpu);
11178     CPUARMState *env = &cpu->env;
11179     DisasContext dc1, *dc = &dc1;
11180     CPUBreakpoint *bp;
11181     int j, lj;
11182     target_ulong pc_start;
11183     target_ulong next_page_start;
11184     int num_insns;
11185     int max_insns;
11186
11187     /* generate intermediate code */
11188
11189     /* The A64 decoder has its own top level loop, because it doesn't need
11190      * the A32/T32 complexity to do with conditional execution/IT blocks/etc.
11191      */
11192     if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
11193         gen_intermediate_code_internal_a64(cpu, tb, search_pc);
11194         return;
11195     }
11196
11197     pc_start = tb->pc;
11198
11199     dc->tb = tb;
11200
11201     dc->is_jmp = DISAS_NEXT;
11202     dc->pc = pc_start;
11203     dc->singlestep_enabled = cs->singlestep_enabled;
11204     dc->condjmp = 0;
11205
11206     dc->aarch64 = 0;
11207     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11208      * there is no secure EL1, so we route exceptions to EL3.
11209      */
11210     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11211                                !arm_el_is_aa64(env, 3);
11212     dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
11213     dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
11214     dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
11215     dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
11216     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11217     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11218 #if !defined(CONFIG_USER_ONLY)
11219     dc->user = (dc->current_el == 0);
11220 #endif
11221     dc->ns = ARM_TBFLAG_NS(tb->flags);
11222     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11223     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
11224     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
11225     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
11226     dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
11227     dc->cp_regs = cpu->cp_regs;
11228     dc->features = env->features;
11229
11230     /* Single step state. The code-generation logic here is:
11231      *  SS_ACTIVE == 0:
11232      *   generate code with no special handling for single-stepping (except
11233      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11234      *   this happens anyway because those changes are all system register or
11235      *   PSTATE writes).
11236      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11237      *   emit code for one insn
11238      *   emit code to clear PSTATE.SS
11239      *   emit code to generate software step exception for completed step
11240      *   end TB (as usual for having generated an exception)
11241      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11242      *   emit code to generate a software step exception
11243      *   end the TB
11244      */
11245     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11246     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11247     dc->is_ldex = false;
11248     dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
11249
11250     cpu_F0s = tcg_temp_new_i32();
11251     cpu_F1s = tcg_temp_new_i32();
11252     cpu_F0d = tcg_temp_new_i64();
11253     cpu_F1d = tcg_temp_new_i64();
11254     cpu_V0 = cpu_F0d;
11255     cpu_V1 = cpu_F1d;
11256     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
11257     cpu_M0 = tcg_temp_new_i64();
11258     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11259     lj = -1;
11260     num_insns = 0;
11261     max_insns = tb->cflags & CF_COUNT_MASK;
11262     if (max_insns == 0)
11263         max_insns = CF_COUNT_MASK;
11264
11265     gen_tb_start(tb);
11266
11267     tcg_clear_temp_count();
11268
11269     /* A note on handling of the condexec (IT) bits:
11270      *
11271      * We want to avoid the overhead of having to write the updated condexec
11272      * bits back to the CPUARMState for every instruction in an IT block. So:
11273      * (1) if the condexec bits are not already zero then we write
11274      * zero back into the CPUARMState now. This avoids complications trying
11275      * to do it at the end of the block. (For example if we don't do this
11276      * it's hard to identify whether we can safely skip writing condexec
11277      * at the end of the TB, which we definitely want to do for the case
11278      * where a TB doesn't do anything with the IT state at all.)
11279      * (2) if we are going to leave the TB then we call gen_set_condexec()
11280      * which will write the correct value into CPUARMState if zero is wrong.
11281      * This is done both for leaving the TB at the end, and for leaving
11282      * it because of an exception we know will happen, which is done in
11283      * gen_exception_insn(). The latter is necessary because we need to
11284      * leave the TB with the PC/IT state just prior to execution of the
11285      * instruction which caused the exception.
11286      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11287      * then the CPUARMState will be wrong and we need to reset it.
11288      * This is handled in the same way as restoration of the
11289      * PC in these situations: we will be called again with search_pc=1
11290      * and generate a mapping of the condexec bits for each PC in
11291      * gen_opc_condexec_bits[]. restore_state_to_opc() then uses
11292      * this to restore the condexec bits.
11293      *
11294      * Note that there are no instructions which can read the condexec
11295      * bits, and none which can write non-static values to them, so
11296      * we don't need to care about whether CPUARMState is correct in the
11297      * middle of a TB.
11298      */
11299
11300     /* Reset the conditional execution bits immediately. This avoids
11301        complications trying to do it at the end of the block.  */
11302     if (dc->condexec_mask || dc->condexec_cond)
11303       {
11304         TCGv_i32 tmp = tcg_temp_new_i32();
11305         tcg_gen_movi_i32(tmp, 0);
11306         store_cpu_field(tmp, condexec_bits);
11307       }
11308     do {
11309 #ifdef CONFIG_USER_ONLY
11310         /* Intercept jump to the magic kernel page.  */
11311         if (dc->pc >= 0xffff0000) {
11312             /* We always get here via a jump, so know we are not in a
11313                conditional execution block.  */
11314             gen_exception_internal(EXCP_KERNEL_TRAP);
11315             dc->is_jmp = DISAS_UPDATE;
11316             break;
11317         }
11318 #else
11319         if (dc->pc >= 0xfffffff0 && arm_dc_feature(dc, ARM_FEATURE_M)) {
11320             /* We always get here via a jump, so know we are not in a
11321                conditional execution block.  */
11322             gen_exception_internal(EXCP_EXCEPTION_EXIT);
11323             dc->is_jmp = DISAS_UPDATE;
11324             break;
11325         }
11326 #endif
11327
11328         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11329             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11330                 if (bp->pc == dc->pc) {
11331                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11332                     /* Advance PC so that clearing the breakpoint will
11333                        invalidate this TB.  */
11334                     dc->pc += 2;
11335                     goto done_generating;
11336                 }
11337             }
11338         }
11339         if (search_pc) {
11340             j = tcg_op_buf_count();
11341             if (lj < j) {
11342                 lj++;
11343                 while (lj < j)
11344                     tcg_ctx.gen_opc_instr_start[lj++] = 0;
11345             }
11346             tcg_ctx.gen_opc_pc[lj] = dc->pc;
11347             gen_opc_condexec_bits[lj] = (dc->condexec_cond << 4) | (dc->condexec_mask >> 1);
11348             tcg_ctx.gen_opc_instr_start[lj] = 1;
11349             tcg_ctx.gen_opc_icount[lj] = num_insns;
11350         }
11351
11352         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
11353             gen_io_start();
11354
11355         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
11356             tcg_gen_debug_insn_start(dc->pc);
11357         }
11358
11359         if (dc->ss_active && !dc->pstate_ss) {
11360             /* Singlestep state is Active-pending.
11361              * If we're in this state at the start of a TB then either
11362              *  a) we just took an exception to an EL which is being debugged
11363              *     and this is the first insn in the exception handler
11364              *  b) debug exceptions were masked and we just unmasked them
11365              *     without changing EL (eg by clearing PSTATE.D)
11366              * In either case we're going to take a swstep exception in the
11367              * "did not step an insn" case, and so the syndrome ISV and EX
11368              * bits should be zero.
11369              */
11370             assert(num_insns == 0);
11371             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11372                           default_exception_el(dc));
11373             goto done_generating;
11374         }
11375
11376         if (dc->thumb) {
11377             disas_thumb_insn(env, dc);
11378             if (dc->condexec_mask) {
11379                 dc->condexec_cond = (dc->condexec_cond & 0xe)
11380                                    | ((dc->condexec_mask >> 4) & 1);
11381                 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11382                 if (dc->condexec_mask == 0) {
11383                     dc->condexec_cond = 0;
11384                 }
11385             }
11386         } else {
11387             unsigned int insn = arm_ldl_code(env, dc->pc, dc->bswap_code);
11388             dc->pc += 4;
11389             disas_arm_insn(dc, insn);
11390         }
11391
11392         if (dc->condjmp && !dc->is_jmp) {
11393             gen_set_label(dc->condlabel);
11394             dc->condjmp = 0;
11395         }
11396
11397         if (tcg_check_temp_count()) {
11398             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11399                     dc->pc);
11400         }
11401
11402         /* Translation stops when a conditional branch is encountered.
11403          * Otherwise the subsequent code could get translated several times.
11404          * Also stop translation when a page boundary is reached.  This
11405          * ensures prefetch aborts occur at the right place.  */
11406         num_insns ++;
11407     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11408              !cs->singlestep_enabled &&
11409              !singlestep &&
11410              !dc->ss_active &&
11411              dc->pc < next_page_start &&
11412              num_insns < max_insns);
11413
11414     if (tb->cflags & CF_LAST_IO) {
11415         if (dc->condjmp) {
11416             /* FIXME:  This can theoretically happen with self-modifying
11417                code.  */
11418             cpu_abort(cs, "IO on conditional branch instruction");
11419         }
11420         gen_io_end();
11421     }
11422
11423     /* At this stage dc->condjmp will only be set when the skipped
11424        instruction was a conditional branch or trap, and the PC has
11425        already been written.  */
11426     if (unlikely(cs->singlestep_enabled || dc->ss_active)) {
11427         /* Make sure the pc is updated, and raise a debug exception.  */
11428         if (dc->condjmp) {
11429             gen_set_condexec(dc);
11430             if (dc->is_jmp == DISAS_SWI) {
11431                 gen_ss_advance(dc);
11432                 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11433                               default_exception_el(dc));
11434             } else if (dc->is_jmp == DISAS_HVC) {
11435                 gen_ss_advance(dc);
11436                 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11437             } else if (dc->is_jmp == DISAS_SMC) {
11438                 gen_ss_advance(dc);
11439                 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11440             } else if (dc->ss_active) {
11441                 gen_step_complete_exception(dc);
11442             } else {
11443                 gen_exception_internal(EXCP_DEBUG);
11444             }
11445             gen_set_label(dc->condlabel);
11446         }
11447         if (dc->condjmp || !dc->is_jmp) {
11448             gen_set_pc_im(dc, dc->pc);
11449             dc->condjmp = 0;
11450         }
11451         gen_set_condexec(dc);
11452         if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
11453             gen_ss_advance(dc);
11454             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11455                           default_exception_el(dc));
11456         } else if (dc->is_jmp == DISAS_HVC && !dc->condjmp) {
11457             gen_ss_advance(dc);
11458             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11459         } else if (dc->is_jmp == DISAS_SMC && !dc->condjmp) {
11460             gen_ss_advance(dc);
11461             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11462         } else if (dc->ss_active) {
11463             gen_step_complete_exception(dc);
11464         } else {
11465             /* FIXME: Single stepping a WFI insn will not halt
11466                the CPU.  */
11467             gen_exception_internal(EXCP_DEBUG);
11468         }
11469     } else {
11470         /* While branches must always occur at the end of an IT block,
11471            there are a few other things that can cause us to terminate
11472            the TB in the middle of an IT block:
11473             - Exception generating instructions (bkpt, swi, undefined).
11474             - Page boundaries.
11475             - Hardware watchpoints.
11476            Hardware breakpoints have already been handled and skip this code.
11477          */
11478         gen_set_condexec(dc);
11479         switch(dc->is_jmp) {
11480         case DISAS_NEXT:
11481             gen_goto_tb(dc, 1, dc->pc);
11482             break;
11483         default:
11484         case DISAS_JUMP:
11485         case DISAS_UPDATE:
11486             /* indicate that the hash table must be used to find the next TB */
11487             tcg_gen_exit_tb(0);
11488             break;
11489         case DISAS_TB_JUMP:
11490             /* nothing more to generate */
11491             break;
11492         case DISAS_WFI:
11493             gen_helper_wfi(cpu_env);
11494             /* The helper doesn't necessarily throw an exception, but we
11495              * must go back to the main loop to check for interrupts anyway.
11496              */
11497             tcg_gen_exit_tb(0);
11498             break;
11499         case DISAS_WFE:
11500             gen_helper_wfe(cpu_env);
11501             break;
11502         case DISAS_YIELD:
11503             gen_helper_yield(cpu_env);
11504             break;
11505         case DISAS_SWI:
11506             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11507                           default_exception_el(dc));
11508             break;
11509         case DISAS_HVC:
11510             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11511             break;
11512         case DISAS_SMC:
11513             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11514             break;
11515         }
11516         if (dc->condjmp) {
11517             gen_set_label(dc->condlabel);
11518             gen_set_condexec(dc);
11519             gen_goto_tb(dc, 1, dc->pc);
11520             dc->condjmp = 0;
11521         }
11522     }
11523
11524 done_generating:
11525     gen_tb_end(tb, num_insns);
11526
11527 #ifdef DEBUG_DISAS
11528     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11529         qemu_log("----------------\n");
11530         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11531         log_target_disas(cs, pc_start, dc->pc - pc_start,
11532                          dc->thumb | (dc->bswap_code << 1));
11533         qemu_log("\n");
11534     }
11535 #endif
11536     if (search_pc) {
11537         j = tcg_op_buf_count();
11538         lj++;
11539         while (lj <= j)
11540             tcg_ctx.gen_opc_instr_start[lj++] = 0;
11541     } else {
11542         tb->size = dc->pc - pc_start;
11543         tb->icount = num_insns;
11544     }
11545 }
11546
11547 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
11548 {
11549     gen_intermediate_code_internal(arm_env_get_cpu(env), tb, false);
11550 }
11551
11552 void gen_intermediate_code_pc(CPUARMState *env, TranslationBlock *tb)
11553 {
11554     gen_intermediate_code_internal(arm_env_get_cpu(env), tb, true);
11555 }
11556
11557 static const char *cpu_mode_names[16] = {
11558   "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
11559   "???", "???", "hyp", "und", "???", "???", "???", "sys"
11560 };
11561
11562 void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
11563                         int flags)
11564 {
11565     ARMCPU *cpu = ARM_CPU(cs);
11566     CPUARMState *env = &cpu->env;
11567     int i;
11568     uint32_t psr;
11569
11570     if (is_a64(env)) {
11571         aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
11572         return;
11573     }
11574
11575     for(i=0;i<16;i++) {
11576         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
11577         if ((i % 4) == 3)
11578             cpu_fprintf(f, "\n");
11579         else
11580             cpu_fprintf(f, " ");
11581     }
11582     psr = cpsr_read(env);
11583     cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n",
11584                 psr,
11585                 psr & (1 << 31) ? 'N' : '-',
11586                 psr & (1 << 30) ? 'Z' : '-',
11587                 psr & (1 << 29) ? 'C' : '-',
11588                 psr & (1 << 28) ? 'V' : '-',
11589                 psr & CPSR_T ? 'T' : 'A',
11590                 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
11591
11592     if (flags & CPU_DUMP_FPU) {
11593         int numvfpregs = 0;
11594         if (arm_feature(env, ARM_FEATURE_VFP)) {
11595             numvfpregs += 16;
11596         }
11597         if (arm_feature(env, ARM_FEATURE_VFP3)) {
11598             numvfpregs += 16;
11599         }
11600         for (i = 0; i < numvfpregs; i++) {
11601             uint64_t v = float64_val(env->vfp.regs[i]);
11602             cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
11603                         i * 2, (uint32_t)v,
11604                         i * 2 + 1, (uint32_t)(v >> 32),
11605                         i, v);
11606         }
11607         cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
11608     }
11609 }
11610
11611 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
11612 {
11613     if (is_a64(env)) {
11614         env->pc = tcg_ctx.gen_opc_pc[pc_pos];
11615         env->condexec_bits = 0;
11616     } else {
11617         env->regs[15] = tcg_ctx.gen_opc_pc[pc_pos];
11618         env->condexec_bits = gen_opc_condexec_bits[pc_pos];
11619     }
11620 }
This page took 0.657756 seconds and 4 git commands to generate.