]> Git Repo - qemu.git/blob - target-arm/translate.c
Merge remote-tracking branch 'remotes/kraxel/tags/pull-ui-20160108-1' into staging
[qemu.git] / target-arm / translate.c
1 /*
2  *  ARM translation
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *  Copyright (c) 2005-2007 CodeSourcery
6  *  Copyright (c) 2007 OpenedHand, Ltd.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20  */
21 #include <stdarg.h>
22 #include <stdlib.h>
23 #include <stdio.h>
24 #include <string.h>
25 #include <inttypes.h>
26
27 #include "cpu.h"
28 #include "internals.h"
29 #include "disas/disas.h"
30 #include "tcg-op.h"
31 #include "qemu/log.h"
32 #include "qemu/bitops.h"
33 #include "arm_ldst.h"
34
35 #include "exec/helper-proto.h"
36 #include "exec/helper-gen.h"
37
38 #include "trace-tcg.h"
39
40
41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J    0
46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
51
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
53
54 #include "translate.h"
55
56 #if defined(CONFIG_USER_ONLY)
57 #define IS_USER(s) 1
58 #else
59 #define IS_USER(s) (s->user)
60 #endif
61
62 TCGv_ptr cpu_env;
63 /* We reuse the same 64-bit temporaries for efficiency.  */
64 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
65 static TCGv_i32 cpu_R[16];
66 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
67 TCGv_i64 cpu_exclusive_addr;
68 TCGv_i64 cpu_exclusive_val;
69 #ifdef CONFIG_USER_ONLY
70 TCGv_i64 cpu_exclusive_test;
71 TCGv_i32 cpu_exclusive_info;
72 #endif
73
74 /* FIXME:  These should be removed.  */
75 static TCGv_i32 cpu_F0s, cpu_F1s;
76 static TCGv_i64 cpu_F0d, cpu_F1d;
77
78 #include "exec/gen-icount.h"
79
80 static const char *regnames[] =
81     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
82       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
83
84 /* initialize TCG globals.  */
85 void arm_translate_init(void)
86 {
87     int i;
88
89     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
90
91     for (i = 0; i < 16; i++) {
92         cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
93                                           offsetof(CPUARMState, regs[i]),
94                                           regnames[i]);
95     }
96     cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
97     cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
98     cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
99     cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
100
101     cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
102         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
103     cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
104         offsetof(CPUARMState, exclusive_val), "exclusive_val");
105 #ifdef CONFIG_USER_ONLY
106     cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
107         offsetof(CPUARMState, exclusive_test), "exclusive_test");
108     cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
109         offsetof(CPUARMState, exclusive_info), "exclusive_info");
110 #endif
111
112     a64_translate_init();
113 }
114
115 static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s)
116 {
117     /* Return the mmu_idx to use for A32/T32 "unprivileged load/store"
118      * insns:
119      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
120      *  otherwise, access as if at PL0.
121      */
122     switch (s->mmu_idx) {
123     case ARMMMUIdx_S1E2:        /* this one is UNPREDICTABLE */
124     case ARMMMUIdx_S12NSE0:
125     case ARMMMUIdx_S12NSE1:
126         return ARMMMUIdx_S12NSE0;
127     case ARMMMUIdx_S1E3:
128     case ARMMMUIdx_S1SE0:
129     case ARMMMUIdx_S1SE1:
130         return ARMMMUIdx_S1SE0;
131     case ARMMMUIdx_S2NS:
132     default:
133         g_assert_not_reached();
134     }
135 }
136
137 static inline TCGv_i32 load_cpu_offset(int offset)
138 {
139     TCGv_i32 tmp = tcg_temp_new_i32();
140     tcg_gen_ld_i32(tmp, cpu_env, offset);
141     return tmp;
142 }
143
144 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
145
146 static inline void store_cpu_offset(TCGv_i32 var, int offset)
147 {
148     tcg_gen_st_i32(var, cpu_env, offset);
149     tcg_temp_free_i32(var);
150 }
151
152 #define store_cpu_field(var, name) \
153     store_cpu_offset(var, offsetof(CPUARMState, name))
154
155 /* Set a variable to the value of a CPU register.  */
156 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
157 {
158     if (reg == 15) {
159         uint32_t addr;
160         /* normally, since we updated PC, we need only to add one insn */
161         if (s->thumb)
162             addr = (long)s->pc + 2;
163         else
164             addr = (long)s->pc + 4;
165         tcg_gen_movi_i32(var, addr);
166     } else {
167         tcg_gen_mov_i32(var, cpu_R[reg]);
168     }
169 }
170
171 /* Create a new temporary and set it to the value of a CPU register.  */
172 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
173 {
174     TCGv_i32 tmp = tcg_temp_new_i32();
175     load_reg_var(s, tmp, reg);
176     return tmp;
177 }
178
179 /* Set a CPU register.  The source must be a temporary and will be
180    marked as dead.  */
181 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
182 {
183     if (reg == 15) {
184         tcg_gen_andi_i32(var, var, ~1);
185         s->is_jmp = DISAS_JUMP;
186     }
187     tcg_gen_mov_i32(cpu_R[reg], var);
188     tcg_temp_free_i32(var);
189 }
190
191 /* Value extensions.  */
192 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
193 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
194 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
195 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
196
197 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
198 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
199
200
201 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
202 {
203     TCGv_i32 tmp_mask = tcg_const_i32(mask);
204     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
205     tcg_temp_free_i32(tmp_mask);
206 }
207 /* Set NZCV flags from the high 4 bits of var.  */
208 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
209
210 static void gen_exception_internal(int excp)
211 {
212     TCGv_i32 tcg_excp = tcg_const_i32(excp);
213
214     assert(excp_is_internal(excp));
215     gen_helper_exception_internal(cpu_env, tcg_excp);
216     tcg_temp_free_i32(tcg_excp);
217 }
218
219 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
220 {
221     TCGv_i32 tcg_excp = tcg_const_i32(excp);
222     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
223     TCGv_i32 tcg_el = tcg_const_i32(target_el);
224
225     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
226                                        tcg_syn, tcg_el);
227
228     tcg_temp_free_i32(tcg_el);
229     tcg_temp_free_i32(tcg_syn);
230     tcg_temp_free_i32(tcg_excp);
231 }
232
233 static void gen_ss_advance(DisasContext *s)
234 {
235     /* If the singlestep state is Active-not-pending, advance to
236      * Active-pending.
237      */
238     if (s->ss_active) {
239         s->pstate_ss = 0;
240         gen_helper_clear_pstate_ss(cpu_env);
241     }
242 }
243
244 static void gen_step_complete_exception(DisasContext *s)
245 {
246     /* We just completed step of an insn. Move from Active-not-pending
247      * to Active-pending, and then also take the swstep exception.
248      * This corresponds to making the (IMPDEF) choice to prioritize
249      * swstep exceptions over asynchronous exceptions taken to an exception
250      * level where debug is disabled. This choice has the advantage that
251      * we do not need to maintain internal state corresponding to the
252      * ISV/EX syndrome bits between completion of the step and generation
253      * of the exception, and our syndrome information is always correct.
254      */
255     gen_ss_advance(s);
256     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
257                   default_exception_el(s));
258     s->is_jmp = DISAS_EXC;
259 }
260
261 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
262 {
263     TCGv_i32 tmp1 = tcg_temp_new_i32();
264     TCGv_i32 tmp2 = tcg_temp_new_i32();
265     tcg_gen_ext16s_i32(tmp1, a);
266     tcg_gen_ext16s_i32(tmp2, b);
267     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
268     tcg_temp_free_i32(tmp2);
269     tcg_gen_sari_i32(a, a, 16);
270     tcg_gen_sari_i32(b, b, 16);
271     tcg_gen_mul_i32(b, b, a);
272     tcg_gen_mov_i32(a, tmp1);
273     tcg_temp_free_i32(tmp1);
274 }
275
276 /* Byteswap each halfword.  */
277 static void gen_rev16(TCGv_i32 var)
278 {
279     TCGv_i32 tmp = tcg_temp_new_i32();
280     tcg_gen_shri_i32(tmp, var, 8);
281     tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
282     tcg_gen_shli_i32(var, var, 8);
283     tcg_gen_andi_i32(var, var, 0xff00ff00);
284     tcg_gen_or_i32(var, var, tmp);
285     tcg_temp_free_i32(tmp);
286 }
287
288 /* Byteswap low halfword and sign extend.  */
289 static void gen_revsh(TCGv_i32 var)
290 {
291     tcg_gen_ext16u_i32(var, var);
292     tcg_gen_bswap16_i32(var, var);
293     tcg_gen_ext16s_i32(var, var);
294 }
295
296 /* Unsigned bitfield extract.  */
297 static void gen_ubfx(TCGv_i32 var, int shift, uint32_t mask)
298 {
299     if (shift)
300         tcg_gen_shri_i32(var, var, shift);
301     tcg_gen_andi_i32(var, var, mask);
302 }
303
304 /* Signed bitfield extract.  */
305 static void gen_sbfx(TCGv_i32 var, int shift, int width)
306 {
307     uint32_t signbit;
308
309     if (shift)
310         tcg_gen_sari_i32(var, var, shift);
311     if (shift + width < 32) {
312         signbit = 1u << (width - 1);
313         tcg_gen_andi_i32(var, var, (1u << width) - 1);
314         tcg_gen_xori_i32(var, var, signbit);
315         tcg_gen_subi_i32(var, var, signbit);
316     }
317 }
318
319 /* Return (b << 32) + a. Mark inputs as dead */
320 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
321 {
322     TCGv_i64 tmp64 = tcg_temp_new_i64();
323
324     tcg_gen_extu_i32_i64(tmp64, b);
325     tcg_temp_free_i32(b);
326     tcg_gen_shli_i64(tmp64, tmp64, 32);
327     tcg_gen_add_i64(a, tmp64, a);
328
329     tcg_temp_free_i64(tmp64);
330     return a;
331 }
332
333 /* Return (b << 32) - a. Mark inputs as dead. */
334 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
335 {
336     TCGv_i64 tmp64 = tcg_temp_new_i64();
337
338     tcg_gen_extu_i32_i64(tmp64, b);
339     tcg_temp_free_i32(b);
340     tcg_gen_shli_i64(tmp64, tmp64, 32);
341     tcg_gen_sub_i64(a, tmp64, a);
342
343     tcg_temp_free_i64(tmp64);
344     return a;
345 }
346
347 /* 32x32->64 multiply.  Marks inputs as dead.  */
348 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
349 {
350     TCGv_i32 lo = tcg_temp_new_i32();
351     TCGv_i32 hi = tcg_temp_new_i32();
352     TCGv_i64 ret;
353
354     tcg_gen_mulu2_i32(lo, hi, a, b);
355     tcg_temp_free_i32(a);
356     tcg_temp_free_i32(b);
357
358     ret = tcg_temp_new_i64();
359     tcg_gen_concat_i32_i64(ret, lo, hi);
360     tcg_temp_free_i32(lo);
361     tcg_temp_free_i32(hi);
362
363     return ret;
364 }
365
366 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
367 {
368     TCGv_i32 lo = tcg_temp_new_i32();
369     TCGv_i32 hi = tcg_temp_new_i32();
370     TCGv_i64 ret;
371
372     tcg_gen_muls2_i32(lo, hi, a, b);
373     tcg_temp_free_i32(a);
374     tcg_temp_free_i32(b);
375
376     ret = tcg_temp_new_i64();
377     tcg_gen_concat_i32_i64(ret, lo, hi);
378     tcg_temp_free_i32(lo);
379     tcg_temp_free_i32(hi);
380
381     return ret;
382 }
383
384 /* Swap low and high halfwords.  */
385 static void gen_swap_half(TCGv_i32 var)
386 {
387     TCGv_i32 tmp = tcg_temp_new_i32();
388     tcg_gen_shri_i32(tmp, var, 16);
389     tcg_gen_shli_i32(var, var, 16);
390     tcg_gen_or_i32(var, var, tmp);
391     tcg_temp_free_i32(tmp);
392 }
393
394 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
395     tmp = (t0 ^ t1) & 0x8000;
396     t0 &= ~0x8000;
397     t1 &= ~0x8000;
398     t0 = (t0 + t1) ^ tmp;
399  */
400
401 static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
402 {
403     TCGv_i32 tmp = tcg_temp_new_i32();
404     tcg_gen_xor_i32(tmp, t0, t1);
405     tcg_gen_andi_i32(tmp, tmp, 0x8000);
406     tcg_gen_andi_i32(t0, t0, ~0x8000);
407     tcg_gen_andi_i32(t1, t1, ~0x8000);
408     tcg_gen_add_i32(t0, t0, t1);
409     tcg_gen_xor_i32(t0, t0, tmp);
410     tcg_temp_free_i32(tmp);
411     tcg_temp_free_i32(t1);
412 }
413
414 /* Set CF to the top bit of var.  */
415 static void gen_set_CF_bit31(TCGv_i32 var)
416 {
417     tcg_gen_shri_i32(cpu_CF, var, 31);
418 }
419
420 /* Set N and Z flags from var.  */
421 static inline void gen_logic_CC(TCGv_i32 var)
422 {
423     tcg_gen_mov_i32(cpu_NF, var);
424     tcg_gen_mov_i32(cpu_ZF, var);
425 }
426
427 /* T0 += T1 + CF.  */
428 static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
429 {
430     tcg_gen_add_i32(t0, t0, t1);
431     tcg_gen_add_i32(t0, t0, cpu_CF);
432 }
433
434 /* dest = T0 + T1 + CF. */
435 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
436 {
437     tcg_gen_add_i32(dest, t0, t1);
438     tcg_gen_add_i32(dest, dest, cpu_CF);
439 }
440
441 /* dest = T0 - T1 + CF - 1.  */
442 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
443 {
444     tcg_gen_sub_i32(dest, t0, t1);
445     tcg_gen_add_i32(dest, dest, cpu_CF);
446     tcg_gen_subi_i32(dest, dest, 1);
447 }
448
449 /* dest = T0 + T1. Compute C, N, V and Z flags */
450 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
451 {
452     TCGv_i32 tmp = tcg_temp_new_i32();
453     tcg_gen_movi_i32(tmp, 0);
454     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
455     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
456     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
457     tcg_gen_xor_i32(tmp, t0, t1);
458     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
459     tcg_temp_free_i32(tmp);
460     tcg_gen_mov_i32(dest, cpu_NF);
461 }
462
463 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
464 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
465 {
466     TCGv_i32 tmp = tcg_temp_new_i32();
467     if (TCG_TARGET_HAS_add2_i32) {
468         tcg_gen_movi_i32(tmp, 0);
469         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
470         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
471     } else {
472         TCGv_i64 q0 = tcg_temp_new_i64();
473         TCGv_i64 q1 = tcg_temp_new_i64();
474         tcg_gen_extu_i32_i64(q0, t0);
475         tcg_gen_extu_i32_i64(q1, t1);
476         tcg_gen_add_i64(q0, q0, q1);
477         tcg_gen_extu_i32_i64(q1, cpu_CF);
478         tcg_gen_add_i64(q0, q0, q1);
479         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
480         tcg_temp_free_i64(q0);
481         tcg_temp_free_i64(q1);
482     }
483     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
484     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
485     tcg_gen_xor_i32(tmp, t0, t1);
486     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
487     tcg_temp_free_i32(tmp);
488     tcg_gen_mov_i32(dest, cpu_NF);
489 }
490
491 /* dest = T0 - T1. Compute C, N, V and Z flags */
492 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
493 {
494     TCGv_i32 tmp;
495     tcg_gen_sub_i32(cpu_NF, t0, t1);
496     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
497     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
498     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
499     tmp = tcg_temp_new_i32();
500     tcg_gen_xor_i32(tmp, t0, t1);
501     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
502     tcg_temp_free_i32(tmp);
503     tcg_gen_mov_i32(dest, cpu_NF);
504 }
505
506 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
507 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
508 {
509     TCGv_i32 tmp = tcg_temp_new_i32();
510     tcg_gen_not_i32(tmp, t1);
511     gen_adc_CC(dest, t0, tmp);
512     tcg_temp_free_i32(tmp);
513 }
514
515 #define GEN_SHIFT(name)                                               \
516 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
517 {                                                                     \
518     TCGv_i32 tmp1, tmp2, tmp3;                                        \
519     tmp1 = tcg_temp_new_i32();                                        \
520     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
521     tmp2 = tcg_const_i32(0);                                          \
522     tmp3 = tcg_const_i32(0x1f);                                       \
523     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
524     tcg_temp_free_i32(tmp3);                                          \
525     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
526     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
527     tcg_temp_free_i32(tmp2);                                          \
528     tcg_temp_free_i32(tmp1);                                          \
529 }
530 GEN_SHIFT(shl)
531 GEN_SHIFT(shr)
532 #undef GEN_SHIFT
533
534 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
535 {
536     TCGv_i32 tmp1, tmp2;
537     tmp1 = tcg_temp_new_i32();
538     tcg_gen_andi_i32(tmp1, t1, 0xff);
539     tmp2 = tcg_const_i32(0x1f);
540     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
541     tcg_temp_free_i32(tmp2);
542     tcg_gen_sar_i32(dest, t0, tmp1);
543     tcg_temp_free_i32(tmp1);
544 }
545
546 static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
547 {
548     TCGv_i32 c0 = tcg_const_i32(0);
549     TCGv_i32 tmp = tcg_temp_new_i32();
550     tcg_gen_neg_i32(tmp, src);
551     tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
552     tcg_temp_free_i32(c0);
553     tcg_temp_free_i32(tmp);
554 }
555
556 static void shifter_out_im(TCGv_i32 var, int shift)
557 {
558     if (shift == 0) {
559         tcg_gen_andi_i32(cpu_CF, var, 1);
560     } else {
561         tcg_gen_shri_i32(cpu_CF, var, shift);
562         if (shift != 31) {
563             tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
564         }
565     }
566 }
567
568 /* Shift by immediate.  Includes special handling for shift == 0.  */
569 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
570                                     int shift, int flags)
571 {
572     switch (shiftop) {
573     case 0: /* LSL */
574         if (shift != 0) {
575             if (flags)
576                 shifter_out_im(var, 32 - shift);
577             tcg_gen_shli_i32(var, var, shift);
578         }
579         break;
580     case 1: /* LSR */
581         if (shift == 0) {
582             if (flags) {
583                 tcg_gen_shri_i32(cpu_CF, var, 31);
584             }
585             tcg_gen_movi_i32(var, 0);
586         } else {
587             if (flags)
588                 shifter_out_im(var, shift - 1);
589             tcg_gen_shri_i32(var, var, shift);
590         }
591         break;
592     case 2: /* ASR */
593         if (shift == 0)
594             shift = 32;
595         if (flags)
596             shifter_out_im(var, shift - 1);
597         if (shift == 32)
598           shift = 31;
599         tcg_gen_sari_i32(var, var, shift);
600         break;
601     case 3: /* ROR/RRX */
602         if (shift != 0) {
603             if (flags)
604                 shifter_out_im(var, shift - 1);
605             tcg_gen_rotri_i32(var, var, shift); break;
606         } else {
607             TCGv_i32 tmp = tcg_temp_new_i32();
608             tcg_gen_shli_i32(tmp, cpu_CF, 31);
609             if (flags)
610                 shifter_out_im(var, 0);
611             tcg_gen_shri_i32(var, var, 1);
612             tcg_gen_or_i32(var, var, tmp);
613             tcg_temp_free_i32(tmp);
614         }
615     }
616 };
617
618 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
619                                      TCGv_i32 shift, int flags)
620 {
621     if (flags) {
622         switch (shiftop) {
623         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
624         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
625         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
626         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
627         }
628     } else {
629         switch (shiftop) {
630         case 0:
631             gen_shl(var, var, shift);
632             break;
633         case 1:
634             gen_shr(var, var, shift);
635             break;
636         case 2:
637             gen_sar(var, var, shift);
638             break;
639         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
640                 tcg_gen_rotr_i32(var, var, shift); break;
641         }
642     }
643     tcg_temp_free_i32(shift);
644 }
645
646 #define PAS_OP(pfx) \
647     switch (op2) {  \
648     case 0: gen_pas_helper(glue(pfx,add16)); break; \
649     case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
650     case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
651     case 3: gen_pas_helper(glue(pfx,sub16)); break; \
652     case 4: gen_pas_helper(glue(pfx,add8)); break; \
653     case 7: gen_pas_helper(glue(pfx,sub8)); break; \
654     }
655 static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
656 {
657     TCGv_ptr tmp;
658
659     switch (op1) {
660 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
661     case 1:
662         tmp = tcg_temp_new_ptr();
663         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
664         PAS_OP(s)
665         tcg_temp_free_ptr(tmp);
666         break;
667     case 5:
668         tmp = tcg_temp_new_ptr();
669         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
670         PAS_OP(u)
671         tcg_temp_free_ptr(tmp);
672         break;
673 #undef gen_pas_helper
674 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
675     case 2:
676         PAS_OP(q);
677         break;
678     case 3:
679         PAS_OP(sh);
680         break;
681     case 6:
682         PAS_OP(uq);
683         break;
684     case 7:
685         PAS_OP(uh);
686         break;
687 #undef gen_pas_helper
688     }
689 }
690 #undef PAS_OP
691
692 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
693 #define PAS_OP(pfx) \
694     switch (op1) {  \
695     case 0: gen_pas_helper(glue(pfx,add8)); break; \
696     case 1: gen_pas_helper(glue(pfx,add16)); break; \
697     case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
698     case 4: gen_pas_helper(glue(pfx,sub8)); break; \
699     case 5: gen_pas_helper(glue(pfx,sub16)); break; \
700     case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
701     }
702 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
703 {
704     TCGv_ptr tmp;
705
706     switch (op2) {
707 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
708     case 0:
709         tmp = tcg_temp_new_ptr();
710         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
711         PAS_OP(s)
712         tcg_temp_free_ptr(tmp);
713         break;
714     case 4:
715         tmp = tcg_temp_new_ptr();
716         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
717         PAS_OP(u)
718         tcg_temp_free_ptr(tmp);
719         break;
720 #undef gen_pas_helper
721 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
722     case 1:
723         PAS_OP(q);
724         break;
725     case 2:
726         PAS_OP(sh);
727         break;
728     case 5:
729         PAS_OP(uq);
730         break;
731     case 6:
732         PAS_OP(uh);
733         break;
734 #undef gen_pas_helper
735     }
736 }
737 #undef PAS_OP
738
739 /*
740  * Generate a conditional based on ARM condition code cc.
741  * This is common between ARM and Aarch64 targets.
742  */
743 void arm_test_cc(DisasCompare *cmp, int cc)
744 {
745     TCGv_i32 value;
746     TCGCond cond;
747     bool global = true;
748
749     switch (cc) {
750     case 0: /* eq: Z */
751     case 1: /* ne: !Z */
752         cond = TCG_COND_EQ;
753         value = cpu_ZF;
754         break;
755
756     case 2: /* cs: C */
757     case 3: /* cc: !C */
758         cond = TCG_COND_NE;
759         value = cpu_CF;
760         break;
761
762     case 4: /* mi: N */
763     case 5: /* pl: !N */
764         cond = TCG_COND_LT;
765         value = cpu_NF;
766         break;
767
768     case 6: /* vs: V */
769     case 7: /* vc: !V */
770         cond = TCG_COND_LT;
771         value = cpu_VF;
772         break;
773
774     case 8: /* hi: C && !Z */
775     case 9: /* ls: !C || Z -> !(C && !Z) */
776         cond = TCG_COND_NE;
777         value = tcg_temp_new_i32();
778         global = false;
779         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
780            ZF is non-zero for !Z; so AND the two subexpressions.  */
781         tcg_gen_neg_i32(value, cpu_CF);
782         tcg_gen_and_i32(value, value, cpu_ZF);
783         break;
784
785     case 10: /* ge: N == V -> N ^ V == 0 */
786     case 11: /* lt: N != V -> N ^ V != 0 */
787         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
788         cond = TCG_COND_GE;
789         value = tcg_temp_new_i32();
790         global = false;
791         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
792         break;
793
794     case 12: /* gt: !Z && N == V */
795     case 13: /* le: Z || N != V */
796         cond = TCG_COND_NE;
797         value = tcg_temp_new_i32();
798         global = false;
799         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
800          * the sign bit then AND with ZF to yield the result.  */
801         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
802         tcg_gen_sari_i32(value, value, 31);
803         tcg_gen_andc_i32(value, cpu_ZF, value);
804         break;
805
806     case 14: /* always */
807     case 15: /* always */
808         /* Use the ALWAYS condition, which will fold early.
809          * It doesn't matter what we use for the value.  */
810         cond = TCG_COND_ALWAYS;
811         value = cpu_ZF;
812         goto no_invert;
813
814     default:
815         fprintf(stderr, "Bad condition code 0x%x\n", cc);
816         abort();
817     }
818
819     if (cc & 1) {
820         cond = tcg_invert_cond(cond);
821     }
822
823  no_invert:
824     cmp->cond = cond;
825     cmp->value = value;
826     cmp->value_global = global;
827 }
828
829 void arm_free_cc(DisasCompare *cmp)
830 {
831     if (!cmp->value_global) {
832         tcg_temp_free_i32(cmp->value);
833     }
834 }
835
836 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
837 {
838     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
839 }
840
841 void arm_gen_test_cc(int cc, TCGLabel *label)
842 {
843     DisasCompare cmp;
844     arm_test_cc(&cmp, cc);
845     arm_jump_cc(&cmp, label);
846     arm_free_cc(&cmp);
847 }
848
849 static const uint8_t table_logic_cc[16] = {
850     1, /* and */
851     1, /* xor */
852     0, /* sub */
853     0, /* rsb */
854     0, /* add */
855     0, /* adc */
856     0, /* sbc */
857     0, /* rsc */
858     1, /* andl */
859     1, /* xorl */
860     0, /* cmp */
861     0, /* cmn */
862     1, /* orr */
863     1, /* mov */
864     1, /* bic */
865     1, /* mvn */
866 };
867
868 /* Set PC and Thumb state from an immediate address.  */
869 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
870 {
871     TCGv_i32 tmp;
872
873     s->is_jmp = DISAS_JUMP;
874     if (s->thumb != (addr & 1)) {
875         tmp = tcg_temp_new_i32();
876         tcg_gen_movi_i32(tmp, addr & 1);
877         tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
878         tcg_temp_free_i32(tmp);
879     }
880     tcg_gen_movi_i32(cpu_R[15], addr & ~1);
881 }
882
883 /* Set PC and Thumb state from var.  var is marked as dead.  */
884 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
885 {
886     s->is_jmp = DISAS_JUMP;
887     tcg_gen_andi_i32(cpu_R[15], var, ~1);
888     tcg_gen_andi_i32(var, var, 1);
889     store_cpu_field(var, thumb);
890 }
891
892 /* Variant of store_reg which uses branch&exchange logic when storing
893    to r15 in ARM architecture v7 and above. The source must be a temporary
894    and will be marked as dead. */
895 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
896 {
897     if (reg == 15 && ENABLE_ARCH_7) {
898         gen_bx(s, var);
899     } else {
900         store_reg(s, reg, var);
901     }
902 }
903
904 /* Variant of store_reg which uses branch&exchange logic when storing
905  * to r15 in ARM architecture v5T and above. This is used for storing
906  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
907  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
908 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
909 {
910     if (reg == 15 && ENABLE_ARCH_5) {
911         gen_bx(s, var);
912     } else {
913         store_reg(s, reg, var);
914     }
915 }
916
917 /* Abstractions of "generate code to do a guest load/store for
918  * AArch32", where a vaddr is always 32 bits (and is zero
919  * extended if we're a 64 bit core) and  data is also
920  * 32 bits unless specifically doing a 64 bit access.
921  * These functions work like tcg_gen_qemu_{ld,st}* except
922  * that the address argument is TCGv_i32 rather than TCGv.
923  */
924 #if TARGET_LONG_BITS == 32
925
926 #define DO_GEN_LD(SUFF, OPC)                                             \
927 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
928 {                                                                        \
929     tcg_gen_qemu_ld_i32(val, addr, index, (OPC));                        \
930 }
931
932 #define DO_GEN_ST(SUFF, OPC)                                             \
933 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
934 {                                                                        \
935     tcg_gen_qemu_st_i32(val, addr, index, (OPC));                        \
936 }
937
938 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
939 {
940     tcg_gen_qemu_ld_i64(val, addr, index, MO_TEQ);
941 }
942
943 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
944 {
945     tcg_gen_qemu_st_i64(val, addr, index, MO_TEQ);
946 }
947
948 #else
949
950 #define DO_GEN_LD(SUFF, OPC)                                             \
951 static inline void gen_aa32_ld##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
952 {                                                                        \
953     TCGv addr64 = tcg_temp_new();                                        \
954     tcg_gen_extu_i32_i64(addr64, addr);                                  \
955     tcg_gen_qemu_ld_i32(val, addr64, index, OPC);                        \
956     tcg_temp_free(addr64);                                               \
957 }
958
959 #define DO_GEN_ST(SUFF, OPC)                                             \
960 static inline void gen_aa32_st##SUFF(TCGv_i32 val, TCGv_i32 addr, int index) \
961 {                                                                        \
962     TCGv addr64 = tcg_temp_new();                                        \
963     tcg_gen_extu_i32_i64(addr64, addr);                                  \
964     tcg_gen_qemu_st_i32(val, addr64, index, OPC);                        \
965     tcg_temp_free(addr64);                                               \
966 }
967
968 static inline void gen_aa32_ld64(TCGv_i64 val, TCGv_i32 addr, int index)
969 {
970     TCGv addr64 = tcg_temp_new();
971     tcg_gen_extu_i32_i64(addr64, addr);
972     tcg_gen_qemu_ld_i64(val, addr64, index, MO_TEQ);
973     tcg_temp_free(addr64);
974 }
975
976 static inline void gen_aa32_st64(TCGv_i64 val, TCGv_i32 addr, int index)
977 {
978     TCGv addr64 = tcg_temp_new();
979     tcg_gen_extu_i32_i64(addr64, addr);
980     tcg_gen_qemu_st_i64(val, addr64, index, MO_TEQ);
981     tcg_temp_free(addr64);
982 }
983
984 #endif
985
986 DO_GEN_LD(8s, MO_SB)
987 DO_GEN_LD(8u, MO_UB)
988 DO_GEN_LD(16s, MO_TESW)
989 DO_GEN_LD(16u, MO_TEUW)
990 DO_GEN_LD(32u, MO_TEUL)
991 /* 'a' variants include an alignment check */
992 DO_GEN_LD(16ua, MO_TEUW | MO_ALIGN)
993 DO_GEN_LD(32ua, MO_TEUL | MO_ALIGN)
994 DO_GEN_ST(8, MO_UB)
995 DO_GEN_ST(16, MO_TEUW)
996 DO_GEN_ST(32, MO_TEUL)
997
998 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
999 {
1000     tcg_gen_movi_i32(cpu_R[15], val);
1001 }
1002
1003 static inline void gen_hvc(DisasContext *s, int imm16)
1004 {
1005     /* The pre HVC helper handles cases when HVC gets trapped
1006      * as an undefined insn by runtime configuration (ie before
1007      * the insn really executes).
1008      */
1009     gen_set_pc_im(s, s->pc - 4);
1010     gen_helper_pre_hvc(cpu_env);
1011     /* Otherwise we will treat this as a real exception which
1012      * happens after execution of the insn. (The distinction matters
1013      * for the PC value reported to the exception handler and also
1014      * for single stepping.)
1015      */
1016     s->svc_imm = imm16;
1017     gen_set_pc_im(s, s->pc);
1018     s->is_jmp = DISAS_HVC;
1019 }
1020
1021 static inline void gen_smc(DisasContext *s)
1022 {
1023     /* As with HVC, we may take an exception either before or after
1024      * the insn executes.
1025      */
1026     TCGv_i32 tmp;
1027
1028     gen_set_pc_im(s, s->pc - 4);
1029     tmp = tcg_const_i32(syn_aa32_smc());
1030     gen_helper_pre_smc(cpu_env, tmp);
1031     tcg_temp_free_i32(tmp);
1032     gen_set_pc_im(s, s->pc);
1033     s->is_jmp = DISAS_SMC;
1034 }
1035
1036 static inline void
1037 gen_set_condexec (DisasContext *s)
1038 {
1039     if (s->condexec_mask) {
1040         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
1041         TCGv_i32 tmp = tcg_temp_new_i32();
1042         tcg_gen_movi_i32(tmp, val);
1043         store_cpu_field(tmp, condexec_bits);
1044     }
1045 }
1046
1047 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
1048 {
1049     gen_set_condexec(s);
1050     gen_set_pc_im(s, s->pc - offset);
1051     gen_exception_internal(excp);
1052     s->is_jmp = DISAS_JUMP;
1053 }
1054
1055 static void gen_exception_insn(DisasContext *s, int offset, int excp,
1056                                int syn, uint32_t target_el)
1057 {
1058     gen_set_condexec(s);
1059     gen_set_pc_im(s, s->pc - offset);
1060     gen_exception(excp, syn, target_el);
1061     s->is_jmp = DISAS_JUMP;
1062 }
1063
1064 /* Force a TB lookup after an instruction that changes the CPU state.  */
1065 static inline void gen_lookup_tb(DisasContext *s)
1066 {
1067     tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1068     s->is_jmp = DISAS_JUMP;
1069 }
1070
1071 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1072                                        TCGv_i32 var)
1073 {
1074     int val, rm, shift, shiftop;
1075     TCGv_i32 offset;
1076
1077     if (!(insn & (1 << 25))) {
1078         /* immediate */
1079         val = insn & 0xfff;
1080         if (!(insn & (1 << 23)))
1081             val = -val;
1082         if (val != 0)
1083             tcg_gen_addi_i32(var, var, val);
1084     } else {
1085         /* shift/register */
1086         rm = (insn) & 0xf;
1087         shift = (insn >> 7) & 0x1f;
1088         shiftop = (insn >> 5) & 3;
1089         offset = load_reg(s, rm);
1090         gen_arm_shift_im(offset, shiftop, shift, 0);
1091         if (!(insn & (1 << 23)))
1092             tcg_gen_sub_i32(var, var, offset);
1093         else
1094             tcg_gen_add_i32(var, var, offset);
1095         tcg_temp_free_i32(offset);
1096     }
1097 }
1098
1099 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1100                                         int extra, TCGv_i32 var)
1101 {
1102     int val, rm;
1103     TCGv_i32 offset;
1104
1105     if (insn & (1 << 22)) {
1106         /* immediate */
1107         val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1108         if (!(insn & (1 << 23)))
1109             val = -val;
1110         val += extra;
1111         if (val != 0)
1112             tcg_gen_addi_i32(var, var, val);
1113     } else {
1114         /* register */
1115         if (extra)
1116             tcg_gen_addi_i32(var, var, extra);
1117         rm = (insn) & 0xf;
1118         offset = load_reg(s, rm);
1119         if (!(insn & (1 << 23)))
1120             tcg_gen_sub_i32(var, var, offset);
1121         else
1122             tcg_gen_add_i32(var, var, offset);
1123         tcg_temp_free_i32(offset);
1124     }
1125 }
1126
1127 static TCGv_ptr get_fpstatus_ptr(int neon)
1128 {
1129     TCGv_ptr statusptr = tcg_temp_new_ptr();
1130     int offset;
1131     if (neon) {
1132         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1133     } else {
1134         offset = offsetof(CPUARMState, vfp.fp_status);
1135     }
1136     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1137     return statusptr;
1138 }
1139
1140 #define VFP_OP2(name)                                                 \
1141 static inline void gen_vfp_##name(int dp)                             \
1142 {                                                                     \
1143     TCGv_ptr fpst = get_fpstatus_ptr(0);                              \
1144     if (dp) {                                                         \
1145         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst);    \
1146     } else {                                                          \
1147         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst);    \
1148     }                                                                 \
1149     tcg_temp_free_ptr(fpst);                                          \
1150 }
1151
1152 VFP_OP2(add)
1153 VFP_OP2(sub)
1154 VFP_OP2(mul)
1155 VFP_OP2(div)
1156
1157 #undef VFP_OP2
1158
1159 static inline void gen_vfp_F1_mul(int dp)
1160 {
1161     /* Like gen_vfp_mul() but put result in F1 */
1162     TCGv_ptr fpst = get_fpstatus_ptr(0);
1163     if (dp) {
1164         gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
1165     } else {
1166         gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
1167     }
1168     tcg_temp_free_ptr(fpst);
1169 }
1170
1171 static inline void gen_vfp_F1_neg(int dp)
1172 {
1173     /* Like gen_vfp_neg() but put result in F1 */
1174     if (dp) {
1175         gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
1176     } else {
1177         gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
1178     }
1179 }
1180
1181 static inline void gen_vfp_abs(int dp)
1182 {
1183     if (dp)
1184         gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1185     else
1186         gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1187 }
1188
1189 static inline void gen_vfp_neg(int dp)
1190 {
1191     if (dp)
1192         gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1193     else
1194         gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1195 }
1196
1197 static inline void gen_vfp_sqrt(int dp)
1198 {
1199     if (dp)
1200         gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1201     else
1202         gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1203 }
1204
1205 static inline void gen_vfp_cmp(int dp)
1206 {
1207     if (dp)
1208         gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1209     else
1210         gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1211 }
1212
1213 static inline void gen_vfp_cmpe(int dp)
1214 {
1215     if (dp)
1216         gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1217     else
1218         gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1219 }
1220
1221 static inline void gen_vfp_F1_ld0(int dp)
1222 {
1223     if (dp)
1224         tcg_gen_movi_i64(cpu_F1d, 0);
1225     else
1226         tcg_gen_movi_i32(cpu_F1s, 0);
1227 }
1228
1229 #define VFP_GEN_ITOF(name) \
1230 static inline void gen_vfp_##name(int dp, int neon) \
1231 { \
1232     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1233     if (dp) { \
1234         gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1235     } else { \
1236         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1237     } \
1238     tcg_temp_free_ptr(statusptr); \
1239 }
1240
1241 VFP_GEN_ITOF(uito)
1242 VFP_GEN_ITOF(sito)
1243 #undef VFP_GEN_ITOF
1244
1245 #define VFP_GEN_FTOI(name) \
1246 static inline void gen_vfp_##name(int dp, int neon) \
1247 { \
1248     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1249     if (dp) { \
1250         gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1251     } else { \
1252         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1253     } \
1254     tcg_temp_free_ptr(statusptr); \
1255 }
1256
1257 VFP_GEN_FTOI(toui)
1258 VFP_GEN_FTOI(touiz)
1259 VFP_GEN_FTOI(tosi)
1260 VFP_GEN_FTOI(tosiz)
1261 #undef VFP_GEN_FTOI
1262
1263 #define VFP_GEN_FIX(name, round) \
1264 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1265 { \
1266     TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1267     TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1268     if (dp) { \
1269         gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1270                                         statusptr); \
1271     } else { \
1272         gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1273                                         statusptr); \
1274     } \
1275     tcg_temp_free_i32(tmp_shift); \
1276     tcg_temp_free_ptr(statusptr); \
1277 }
1278 VFP_GEN_FIX(tosh, _round_to_zero)
1279 VFP_GEN_FIX(tosl, _round_to_zero)
1280 VFP_GEN_FIX(touh, _round_to_zero)
1281 VFP_GEN_FIX(toul, _round_to_zero)
1282 VFP_GEN_FIX(shto, )
1283 VFP_GEN_FIX(slto, )
1284 VFP_GEN_FIX(uhto, )
1285 VFP_GEN_FIX(ulto, )
1286 #undef VFP_GEN_FIX
1287
1288 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
1289 {
1290     if (dp) {
1291         gen_aa32_ld64(cpu_F0d, addr, get_mem_index(s));
1292     } else {
1293         gen_aa32_ld32u(cpu_F0s, addr, get_mem_index(s));
1294     }
1295 }
1296
1297 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
1298 {
1299     if (dp) {
1300         gen_aa32_st64(cpu_F0d, addr, get_mem_index(s));
1301     } else {
1302         gen_aa32_st32(cpu_F0s, addr, get_mem_index(s));
1303     }
1304 }
1305
1306 static inline long
1307 vfp_reg_offset (int dp, int reg)
1308 {
1309     if (dp)
1310         return offsetof(CPUARMState, vfp.regs[reg]);
1311     else if (reg & 1) {
1312         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1313           + offsetof(CPU_DoubleU, l.upper);
1314     } else {
1315         return offsetof(CPUARMState, vfp.regs[reg >> 1])
1316           + offsetof(CPU_DoubleU, l.lower);
1317     }
1318 }
1319
1320 /* Return the offset of a 32-bit piece of a NEON register.
1321    zero is the least significant end of the register.  */
1322 static inline long
1323 neon_reg_offset (int reg, int n)
1324 {
1325     int sreg;
1326     sreg = reg * 2 + n;
1327     return vfp_reg_offset(0, sreg);
1328 }
1329
1330 static TCGv_i32 neon_load_reg(int reg, int pass)
1331 {
1332     TCGv_i32 tmp = tcg_temp_new_i32();
1333     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1334     return tmp;
1335 }
1336
1337 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1338 {
1339     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1340     tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1344 {
1345     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1346 }
1347
1348 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1349 {
1350     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1351 }
1352
1353 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1354 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1355 #define tcg_gen_st_f32 tcg_gen_st_i32
1356 #define tcg_gen_st_f64 tcg_gen_st_i64
1357
1358 static inline void gen_mov_F0_vreg(int dp, int reg)
1359 {
1360     if (dp)
1361         tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1362     else
1363         tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1364 }
1365
1366 static inline void gen_mov_F1_vreg(int dp, int reg)
1367 {
1368     if (dp)
1369         tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1370     else
1371         tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1372 }
1373
1374 static inline void gen_mov_vreg_F0(int dp, int reg)
1375 {
1376     if (dp)
1377         tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1378     else
1379         tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1380 }
1381
1382 #define ARM_CP_RW_BIT   (1 << 20)
1383
1384 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1385 {
1386     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1387 }
1388
1389 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1390 {
1391     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1392 }
1393
1394 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1395 {
1396     TCGv_i32 var = tcg_temp_new_i32();
1397     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1398     return var;
1399 }
1400
1401 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1402 {
1403     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1404     tcg_temp_free_i32(var);
1405 }
1406
1407 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1408 {
1409     iwmmxt_store_reg(cpu_M0, rn);
1410 }
1411
1412 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1413 {
1414     iwmmxt_load_reg(cpu_M0, rn);
1415 }
1416
1417 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1418 {
1419     iwmmxt_load_reg(cpu_V1, rn);
1420     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1421 }
1422
1423 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1424 {
1425     iwmmxt_load_reg(cpu_V1, rn);
1426     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1427 }
1428
1429 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1430 {
1431     iwmmxt_load_reg(cpu_V1, rn);
1432     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1433 }
1434
1435 #define IWMMXT_OP(name) \
1436 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1437 { \
1438     iwmmxt_load_reg(cpu_V1, rn); \
1439     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1440 }
1441
1442 #define IWMMXT_OP_ENV(name) \
1443 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1444 { \
1445     iwmmxt_load_reg(cpu_V1, rn); \
1446     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1447 }
1448
1449 #define IWMMXT_OP_ENV_SIZE(name) \
1450 IWMMXT_OP_ENV(name##b) \
1451 IWMMXT_OP_ENV(name##w) \
1452 IWMMXT_OP_ENV(name##l)
1453
1454 #define IWMMXT_OP_ENV1(name) \
1455 static inline void gen_op_iwmmxt_##name##_M0(void) \
1456 { \
1457     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1458 }
1459
1460 IWMMXT_OP(maddsq)
1461 IWMMXT_OP(madduq)
1462 IWMMXT_OP(sadb)
1463 IWMMXT_OP(sadw)
1464 IWMMXT_OP(mulslw)
1465 IWMMXT_OP(mulshw)
1466 IWMMXT_OP(mululw)
1467 IWMMXT_OP(muluhw)
1468 IWMMXT_OP(macsw)
1469 IWMMXT_OP(macuw)
1470
1471 IWMMXT_OP_ENV_SIZE(unpackl)
1472 IWMMXT_OP_ENV_SIZE(unpackh)
1473
1474 IWMMXT_OP_ENV1(unpacklub)
1475 IWMMXT_OP_ENV1(unpackluw)
1476 IWMMXT_OP_ENV1(unpacklul)
1477 IWMMXT_OP_ENV1(unpackhub)
1478 IWMMXT_OP_ENV1(unpackhuw)
1479 IWMMXT_OP_ENV1(unpackhul)
1480 IWMMXT_OP_ENV1(unpacklsb)
1481 IWMMXT_OP_ENV1(unpacklsw)
1482 IWMMXT_OP_ENV1(unpacklsl)
1483 IWMMXT_OP_ENV1(unpackhsb)
1484 IWMMXT_OP_ENV1(unpackhsw)
1485 IWMMXT_OP_ENV1(unpackhsl)
1486
1487 IWMMXT_OP_ENV_SIZE(cmpeq)
1488 IWMMXT_OP_ENV_SIZE(cmpgtu)
1489 IWMMXT_OP_ENV_SIZE(cmpgts)
1490
1491 IWMMXT_OP_ENV_SIZE(mins)
1492 IWMMXT_OP_ENV_SIZE(minu)
1493 IWMMXT_OP_ENV_SIZE(maxs)
1494 IWMMXT_OP_ENV_SIZE(maxu)
1495
1496 IWMMXT_OP_ENV_SIZE(subn)
1497 IWMMXT_OP_ENV_SIZE(addn)
1498 IWMMXT_OP_ENV_SIZE(subu)
1499 IWMMXT_OP_ENV_SIZE(addu)
1500 IWMMXT_OP_ENV_SIZE(subs)
1501 IWMMXT_OP_ENV_SIZE(adds)
1502
1503 IWMMXT_OP_ENV(avgb0)
1504 IWMMXT_OP_ENV(avgb1)
1505 IWMMXT_OP_ENV(avgw0)
1506 IWMMXT_OP_ENV(avgw1)
1507
1508 IWMMXT_OP_ENV(packuw)
1509 IWMMXT_OP_ENV(packul)
1510 IWMMXT_OP_ENV(packuq)
1511 IWMMXT_OP_ENV(packsw)
1512 IWMMXT_OP_ENV(packsl)
1513 IWMMXT_OP_ENV(packsq)
1514
1515 static void gen_op_iwmmxt_set_mup(void)
1516 {
1517     TCGv_i32 tmp;
1518     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1519     tcg_gen_ori_i32(tmp, tmp, 2);
1520     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1521 }
1522
1523 static void gen_op_iwmmxt_set_cup(void)
1524 {
1525     TCGv_i32 tmp;
1526     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1527     tcg_gen_ori_i32(tmp, tmp, 1);
1528     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1529 }
1530
1531 static void gen_op_iwmmxt_setpsr_nz(void)
1532 {
1533     TCGv_i32 tmp = tcg_temp_new_i32();
1534     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1535     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1536 }
1537
1538 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1539 {
1540     iwmmxt_load_reg(cpu_V1, rn);
1541     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1542     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1543 }
1544
1545 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1546                                      TCGv_i32 dest)
1547 {
1548     int rd;
1549     uint32_t offset;
1550     TCGv_i32 tmp;
1551
1552     rd = (insn >> 16) & 0xf;
1553     tmp = load_reg(s, rd);
1554
1555     offset = (insn & 0xff) << ((insn >> 7) & 2);
1556     if (insn & (1 << 24)) {
1557         /* Pre indexed */
1558         if (insn & (1 << 23))
1559             tcg_gen_addi_i32(tmp, tmp, offset);
1560         else
1561             tcg_gen_addi_i32(tmp, tmp, -offset);
1562         tcg_gen_mov_i32(dest, tmp);
1563         if (insn & (1 << 21))
1564             store_reg(s, rd, tmp);
1565         else
1566             tcg_temp_free_i32(tmp);
1567     } else if (insn & (1 << 21)) {
1568         /* Post indexed */
1569         tcg_gen_mov_i32(dest, tmp);
1570         if (insn & (1 << 23))
1571             tcg_gen_addi_i32(tmp, tmp, offset);
1572         else
1573             tcg_gen_addi_i32(tmp, tmp, -offset);
1574         store_reg(s, rd, tmp);
1575     } else if (!(insn & (1 << 23)))
1576         return 1;
1577     return 0;
1578 }
1579
1580 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1581 {
1582     int rd = (insn >> 0) & 0xf;
1583     TCGv_i32 tmp;
1584
1585     if (insn & (1 << 8)) {
1586         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1587             return 1;
1588         } else {
1589             tmp = iwmmxt_load_creg(rd);
1590         }
1591     } else {
1592         tmp = tcg_temp_new_i32();
1593         iwmmxt_load_reg(cpu_V0, rd);
1594         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1595     }
1596     tcg_gen_andi_i32(tmp, tmp, mask);
1597     tcg_gen_mov_i32(dest, tmp);
1598     tcg_temp_free_i32(tmp);
1599     return 0;
1600 }
1601
1602 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1603    (ie. an undefined instruction).  */
1604 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1605 {
1606     int rd, wrd;
1607     int rdhi, rdlo, rd0, rd1, i;
1608     TCGv_i32 addr;
1609     TCGv_i32 tmp, tmp2, tmp3;
1610
1611     if ((insn & 0x0e000e00) == 0x0c000000) {
1612         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1613             wrd = insn & 0xf;
1614             rdlo = (insn >> 12) & 0xf;
1615             rdhi = (insn >> 16) & 0xf;
1616             if (insn & ARM_CP_RW_BIT) {                 /* TMRRC */
1617                 iwmmxt_load_reg(cpu_V0, wrd);
1618                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1619                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1620                 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
1621             } else {                                    /* TMCRR */
1622                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1623                 iwmmxt_store_reg(cpu_V0, wrd);
1624                 gen_op_iwmmxt_set_mup();
1625             }
1626             return 0;
1627         }
1628
1629         wrd = (insn >> 12) & 0xf;
1630         addr = tcg_temp_new_i32();
1631         if (gen_iwmmxt_address(s, insn, addr)) {
1632             tcg_temp_free_i32(addr);
1633             return 1;
1634         }
1635         if (insn & ARM_CP_RW_BIT) {
1636             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1637                 tmp = tcg_temp_new_i32();
1638                 gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1639                 iwmmxt_store_creg(wrd, tmp);
1640             } else {
1641                 i = 1;
1642                 if (insn & (1 << 8)) {
1643                     if (insn & (1 << 22)) {             /* WLDRD */
1644                         gen_aa32_ld64(cpu_M0, addr, get_mem_index(s));
1645                         i = 0;
1646                     } else {                            /* WLDRW wRd */
1647                         tmp = tcg_temp_new_i32();
1648                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
1649                     }
1650                 } else {
1651                     tmp = tcg_temp_new_i32();
1652                     if (insn & (1 << 22)) {             /* WLDRH */
1653                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
1654                     } else {                            /* WLDRB */
1655                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
1656                     }
1657                 }
1658                 if (i) {
1659                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1660                     tcg_temp_free_i32(tmp);
1661                 }
1662                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1663             }
1664         } else {
1665             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1666                 tmp = iwmmxt_load_creg(wrd);
1667                 gen_aa32_st32(tmp, addr, get_mem_index(s));
1668             } else {
1669                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1670                 tmp = tcg_temp_new_i32();
1671                 if (insn & (1 << 8)) {
1672                     if (insn & (1 << 22)) {             /* WSTRD */
1673                         gen_aa32_st64(cpu_M0, addr, get_mem_index(s));
1674                     } else {                            /* WSTRW wRd */
1675                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1676                         gen_aa32_st32(tmp, addr, get_mem_index(s));
1677                     }
1678                 } else {
1679                     if (insn & (1 << 22)) {             /* WSTRH */
1680                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1681                         gen_aa32_st16(tmp, addr, get_mem_index(s));
1682                     } else {                            /* WSTRB */
1683                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1684                         gen_aa32_st8(tmp, addr, get_mem_index(s));
1685                     }
1686                 }
1687             }
1688             tcg_temp_free_i32(tmp);
1689         }
1690         tcg_temp_free_i32(addr);
1691         return 0;
1692     }
1693
1694     if ((insn & 0x0f000000) != 0x0e000000)
1695         return 1;
1696
1697     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1698     case 0x000:                                         /* WOR */
1699         wrd = (insn >> 12) & 0xf;
1700         rd0 = (insn >> 0) & 0xf;
1701         rd1 = (insn >> 16) & 0xf;
1702         gen_op_iwmmxt_movq_M0_wRn(rd0);
1703         gen_op_iwmmxt_orq_M0_wRn(rd1);
1704         gen_op_iwmmxt_setpsr_nz();
1705         gen_op_iwmmxt_movq_wRn_M0(wrd);
1706         gen_op_iwmmxt_set_mup();
1707         gen_op_iwmmxt_set_cup();
1708         break;
1709     case 0x011:                                         /* TMCR */
1710         if (insn & 0xf)
1711             return 1;
1712         rd = (insn >> 12) & 0xf;
1713         wrd = (insn >> 16) & 0xf;
1714         switch (wrd) {
1715         case ARM_IWMMXT_wCID:
1716         case ARM_IWMMXT_wCASF:
1717             break;
1718         case ARM_IWMMXT_wCon:
1719             gen_op_iwmmxt_set_cup();
1720             /* Fall through.  */
1721         case ARM_IWMMXT_wCSSF:
1722             tmp = iwmmxt_load_creg(wrd);
1723             tmp2 = load_reg(s, rd);
1724             tcg_gen_andc_i32(tmp, tmp, tmp2);
1725             tcg_temp_free_i32(tmp2);
1726             iwmmxt_store_creg(wrd, tmp);
1727             break;
1728         case ARM_IWMMXT_wCGR0:
1729         case ARM_IWMMXT_wCGR1:
1730         case ARM_IWMMXT_wCGR2:
1731         case ARM_IWMMXT_wCGR3:
1732             gen_op_iwmmxt_set_cup();
1733             tmp = load_reg(s, rd);
1734             iwmmxt_store_creg(wrd, tmp);
1735             break;
1736         default:
1737             return 1;
1738         }
1739         break;
1740     case 0x100:                                         /* WXOR */
1741         wrd = (insn >> 12) & 0xf;
1742         rd0 = (insn >> 0) & 0xf;
1743         rd1 = (insn >> 16) & 0xf;
1744         gen_op_iwmmxt_movq_M0_wRn(rd0);
1745         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1746         gen_op_iwmmxt_setpsr_nz();
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         gen_op_iwmmxt_set_cup();
1750         break;
1751     case 0x111:                                         /* TMRC */
1752         if (insn & 0xf)
1753             return 1;
1754         rd = (insn >> 12) & 0xf;
1755         wrd = (insn >> 16) & 0xf;
1756         tmp = iwmmxt_load_creg(wrd);
1757         store_reg(s, rd, tmp);
1758         break;
1759     case 0x300:                                         /* WANDN */
1760         wrd = (insn >> 12) & 0xf;
1761         rd0 = (insn >> 0) & 0xf;
1762         rd1 = (insn >> 16) & 0xf;
1763         gen_op_iwmmxt_movq_M0_wRn(rd0);
1764         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1765         gen_op_iwmmxt_andq_M0_wRn(rd1);
1766         gen_op_iwmmxt_setpsr_nz();
1767         gen_op_iwmmxt_movq_wRn_M0(wrd);
1768         gen_op_iwmmxt_set_mup();
1769         gen_op_iwmmxt_set_cup();
1770         break;
1771     case 0x200:                                         /* WAND */
1772         wrd = (insn >> 12) & 0xf;
1773         rd0 = (insn >> 0) & 0xf;
1774         rd1 = (insn >> 16) & 0xf;
1775         gen_op_iwmmxt_movq_M0_wRn(rd0);
1776         gen_op_iwmmxt_andq_M0_wRn(rd1);
1777         gen_op_iwmmxt_setpsr_nz();
1778         gen_op_iwmmxt_movq_wRn_M0(wrd);
1779         gen_op_iwmmxt_set_mup();
1780         gen_op_iwmmxt_set_cup();
1781         break;
1782     case 0x810: case 0xa10:                             /* WMADD */
1783         wrd = (insn >> 12) & 0xf;
1784         rd0 = (insn >> 0) & 0xf;
1785         rd1 = (insn >> 16) & 0xf;
1786         gen_op_iwmmxt_movq_M0_wRn(rd0);
1787         if (insn & (1 << 21))
1788             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1789         else
1790             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1791         gen_op_iwmmxt_movq_wRn_M0(wrd);
1792         gen_op_iwmmxt_set_mup();
1793         break;
1794     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1795         wrd = (insn >> 12) & 0xf;
1796         rd0 = (insn >> 16) & 0xf;
1797         rd1 = (insn >> 0) & 0xf;
1798         gen_op_iwmmxt_movq_M0_wRn(rd0);
1799         switch ((insn >> 22) & 3) {
1800         case 0:
1801             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1802             break;
1803         case 1:
1804             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1805             break;
1806         case 2:
1807             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1808             break;
1809         case 3:
1810             return 1;
1811         }
1812         gen_op_iwmmxt_movq_wRn_M0(wrd);
1813         gen_op_iwmmxt_set_mup();
1814         gen_op_iwmmxt_set_cup();
1815         break;
1816     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1817         wrd = (insn >> 12) & 0xf;
1818         rd0 = (insn >> 16) & 0xf;
1819         rd1 = (insn >> 0) & 0xf;
1820         gen_op_iwmmxt_movq_M0_wRn(rd0);
1821         switch ((insn >> 22) & 3) {
1822         case 0:
1823             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1824             break;
1825         case 1:
1826             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1827             break;
1828         case 2:
1829             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1830             break;
1831         case 3:
1832             return 1;
1833         }
1834         gen_op_iwmmxt_movq_wRn_M0(wrd);
1835         gen_op_iwmmxt_set_mup();
1836         gen_op_iwmmxt_set_cup();
1837         break;
1838     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1839         wrd = (insn >> 12) & 0xf;
1840         rd0 = (insn >> 16) & 0xf;
1841         rd1 = (insn >> 0) & 0xf;
1842         gen_op_iwmmxt_movq_M0_wRn(rd0);
1843         if (insn & (1 << 22))
1844             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1845         else
1846             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1847         if (!(insn & (1 << 20)))
1848             gen_op_iwmmxt_addl_M0_wRn(wrd);
1849         gen_op_iwmmxt_movq_wRn_M0(wrd);
1850         gen_op_iwmmxt_set_mup();
1851         break;
1852     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1853         wrd = (insn >> 12) & 0xf;
1854         rd0 = (insn >> 16) & 0xf;
1855         rd1 = (insn >> 0) & 0xf;
1856         gen_op_iwmmxt_movq_M0_wRn(rd0);
1857         if (insn & (1 << 21)) {
1858             if (insn & (1 << 20))
1859                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1860             else
1861                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1862         } else {
1863             if (insn & (1 << 20))
1864                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1865             else
1866                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1867         }
1868         gen_op_iwmmxt_movq_wRn_M0(wrd);
1869         gen_op_iwmmxt_set_mup();
1870         break;
1871     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1872         wrd = (insn >> 12) & 0xf;
1873         rd0 = (insn >> 16) & 0xf;
1874         rd1 = (insn >> 0) & 0xf;
1875         gen_op_iwmmxt_movq_M0_wRn(rd0);
1876         if (insn & (1 << 21))
1877             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1878         else
1879             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1880         if (!(insn & (1 << 20))) {
1881             iwmmxt_load_reg(cpu_V1, wrd);
1882             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1883         }
1884         gen_op_iwmmxt_movq_wRn_M0(wrd);
1885         gen_op_iwmmxt_set_mup();
1886         break;
1887     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1888         wrd = (insn >> 12) & 0xf;
1889         rd0 = (insn >> 16) & 0xf;
1890         rd1 = (insn >> 0) & 0xf;
1891         gen_op_iwmmxt_movq_M0_wRn(rd0);
1892         switch ((insn >> 22) & 3) {
1893         case 0:
1894             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1895             break;
1896         case 1:
1897             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1898             break;
1899         case 2:
1900             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1901             break;
1902         case 3:
1903             return 1;
1904         }
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         gen_op_iwmmxt_set_cup();
1908         break;
1909     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1910         wrd = (insn >> 12) & 0xf;
1911         rd0 = (insn >> 16) & 0xf;
1912         rd1 = (insn >> 0) & 0xf;
1913         gen_op_iwmmxt_movq_M0_wRn(rd0);
1914         if (insn & (1 << 22)) {
1915             if (insn & (1 << 20))
1916                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1917             else
1918                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1919         } else {
1920             if (insn & (1 << 20))
1921                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1922             else
1923                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1924         }
1925         gen_op_iwmmxt_movq_wRn_M0(wrd);
1926         gen_op_iwmmxt_set_mup();
1927         gen_op_iwmmxt_set_cup();
1928         break;
1929     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1930         wrd = (insn >> 12) & 0xf;
1931         rd0 = (insn >> 16) & 0xf;
1932         rd1 = (insn >> 0) & 0xf;
1933         gen_op_iwmmxt_movq_M0_wRn(rd0);
1934         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1935         tcg_gen_andi_i32(tmp, tmp, 7);
1936         iwmmxt_load_reg(cpu_V1, rd1);
1937         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1938         tcg_temp_free_i32(tmp);
1939         gen_op_iwmmxt_movq_wRn_M0(wrd);
1940         gen_op_iwmmxt_set_mup();
1941         break;
1942     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1943         if (((insn >> 6) & 3) == 3)
1944             return 1;
1945         rd = (insn >> 12) & 0xf;
1946         wrd = (insn >> 16) & 0xf;
1947         tmp = load_reg(s, rd);
1948         gen_op_iwmmxt_movq_M0_wRn(wrd);
1949         switch ((insn >> 6) & 3) {
1950         case 0:
1951             tmp2 = tcg_const_i32(0xff);
1952             tmp3 = tcg_const_i32((insn & 7) << 3);
1953             break;
1954         case 1:
1955             tmp2 = tcg_const_i32(0xffff);
1956             tmp3 = tcg_const_i32((insn & 3) << 4);
1957             break;
1958         case 2:
1959             tmp2 = tcg_const_i32(0xffffffff);
1960             tmp3 = tcg_const_i32((insn & 1) << 5);
1961             break;
1962         default:
1963             TCGV_UNUSED_I32(tmp2);
1964             TCGV_UNUSED_I32(tmp3);
1965         }
1966         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1967         tcg_temp_free_i32(tmp3);
1968         tcg_temp_free_i32(tmp2);
1969         tcg_temp_free_i32(tmp);
1970         gen_op_iwmmxt_movq_wRn_M0(wrd);
1971         gen_op_iwmmxt_set_mup();
1972         break;
1973     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1974         rd = (insn >> 12) & 0xf;
1975         wrd = (insn >> 16) & 0xf;
1976         if (rd == 15 || ((insn >> 22) & 3) == 3)
1977             return 1;
1978         gen_op_iwmmxt_movq_M0_wRn(wrd);
1979         tmp = tcg_temp_new_i32();
1980         switch ((insn >> 22) & 3) {
1981         case 0:
1982             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1983             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1984             if (insn & 8) {
1985                 tcg_gen_ext8s_i32(tmp, tmp);
1986             } else {
1987                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1988             }
1989             break;
1990         case 1:
1991             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1992             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1993             if (insn & 8) {
1994                 tcg_gen_ext16s_i32(tmp, tmp);
1995             } else {
1996                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1997             }
1998             break;
1999         case 2:
2000             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
2001             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2002             break;
2003         }
2004         store_reg(s, rd, tmp);
2005         break;
2006     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
2007         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2008             return 1;
2009         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2010         switch ((insn >> 22) & 3) {
2011         case 0:
2012             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
2013             break;
2014         case 1:
2015             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
2016             break;
2017         case 2:
2018             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
2019             break;
2020         }
2021         tcg_gen_shli_i32(tmp, tmp, 28);
2022         gen_set_nzcv(tmp);
2023         tcg_temp_free_i32(tmp);
2024         break;
2025     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
2026         if (((insn >> 6) & 3) == 3)
2027             return 1;
2028         rd = (insn >> 12) & 0xf;
2029         wrd = (insn >> 16) & 0xf;
2030         tmp = load_reg(s, rd);
2031         switch ((insn >> 6) & 3) {
2032         case 0:
2033             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
2034             break;
2035         case 1:
2036             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
2037             break;
2038         case 2:
2039             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
2040             break;
2041         }
2042         tcg_temp_free_i32(tmp);
2043         gen_op_iwmmxt_movq_wRn_M0(wrd);
2044         gen_op_iwmmxt_set_mup();
2045         break;
2046     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
2047         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2048             return 1;
2049         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2050         tmp2 = tcg_temp_new_i32();
2051         tcg_gen_mov_i32(tmp2, tmp);
2052         switch ((insn >> 22) & 3) {
2053         case 0:
2054             for (i = 0; i < 7; i ++) {
2055                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2056                 tcg_gen_and_i32(tmp, tmp, tmp2);
2057             }
2058             break;
2059         case 1:
2060             for (i = 0; i < 3; i ++) {
2061                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2062                 tcg_gen_and_i32(tmp, tmp, tmp2);
2063             }
2064             break;
2065         case 2:
2066             tcg_gen_shli_i32(tmp2, tmp2, 16);
2067             tcg_gen_and_i32(tmp, tmp, tmp2);
2068             break;
2069         }
2070         gen_set_nzcv(tmp);
2071         tcg_temp_free_i32(tmp2);
2072         tcg_temp_free_i32(tmp);
2073         break;
2074     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2075         wrd = (insn >> 12) & 0xf;
2076         rd0 = (insn >> 16) & 0xf;
2077         gen_op_iwmmxt_movq_M0_wRn(rd0);
2078         switch ((insn >> 22) & 3) {
2079         case 0:
2080             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2081             break;
2082         case 1:
2083             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2084             break;
2085         case 2:
2086             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2087             break;
2088         case 3:
2089             return 1;
2090         }
2091         gen_op_iwmmxt_movq_wRn_M0(wrd);
2092         gen_op_iwmmxt_set_mup();
2093         break;
2094     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2095         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2096             return 1;
2097         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2098         tmp2 = tcg_temp_new_i32();
2099         tcg_gen_mov_i32(tmp2, tmp);
2100         switch ((insn >> 22) & 3) {
2101         case 0:
2102             for (i = 0; i < 7; i ++) {
2103                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2104                 tcg_gen_or_i32(tmp, tmp, tmp2);
2105             }
2106             break;
2107         case 1:
2108             for (i = 0; i < 3; i ++) {
2109                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2110                 tcg_gen_or_i32(tmp, tmp, tmp2);
2111             }
2112             break;
2113         case 2:
2114             tcg_gen_shli_i32(tmp2, tmp2, 16);
2115             tcg_gen_or_i32(tmp, tmp, tmp2);
2116             break;
2117         }
2118         gen_set_nzcv(tmp);
2119         tcg_temp_free_i32(tmp2);
2120         tcg_temp_free_i32(tmp);
2121         break;
2122     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2123         rd = (insn >> 12) & 0xf;
2124         rd0 = (insn >> 16) & 0xf;
2125         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2126             return 1;
2127         gen_op_iwmmxt_movq_M0_wRn(rd0);
2128         tmp = tcg_temp_new_i32();
2129         switch ((insn >> 22) & 3) {
2130         case 0:
2131             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2132             break;
2133         case 1:
2134             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2135             break;
2136         case 2:
2137             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2138             break;
2139         }
2140         store_reg(s, rd, tmp);
2141         break;
2142     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2143     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2144         wrd = (insn >> 12) & 0xf;
2145         rd0 = (insn >> 16) & 0xf;
2146         rd1 = (insn >> 0) & 0xf;
2147         gen_op_iwmmxt_movq_M0_wRn(rd0);
2148         switch ((insn >> 22) & 3) {
2149         case 0:
2150             if (insn & (1 << 21))
2151                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2152             else
2153                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2154             break;
2155         case 1:
2156             if (insn & (1 << 21))
2157                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2158             else
2159                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2160             break;
2161         case 2:
2162             if (insn & (1 << 21))
2163                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2164             else
2165                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2166             break;
2167         case 3:
2168             return 1;
2169         }
2170         gen_op_iwmmxt_movq_wRn_M0(wrd);
2171         gen_op_iwmmxt_set_mup();
2172         gen_op_iwmmxt_set_cup();
2173         break;
2174     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2175     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2176         wrd = (insn >> 12) & 0xf;
2177         rd0 = (insn >> 16) & 0xf;
2178         gen_op_iwmmxt_movq_M0_wRn(rd0);
2179         switch ((insn >> 22) & 3) {
2180         case 0:
2181             if (insn & (1 << 21))
2182                 gen_op_iwmmxt_unpacklsb_M0();
2183             else
2184                 gen_op_iwmmxt_unpacklub_M0();
2185             break;
2186         case 1:
2187             if (insn & (1 << 21))
2188                 gen_op_iwmmxt_unpacklsw_M0();
2189             else
2190                 gen_op_iwmmxt_unpackluw_M0();
2191             break;
2192         case 2:
2193             if (insn & (1 << 21))
2194                 gen_op_iwmmxt_unpacklsl_M0();
2195             else
2196                 gen_op_iwmmxt_unpacklul_M0();
2197             break;
2198         case 3:
2199             return 1;
2200         }
2201         gen_op_iwmmxt_movq_wRn_M0(wrd);
2202         gen_op_iwmmxt_set_mup();
2203         gen_op_iwmmxt_set_cup();
2204         break;
2205     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2206     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2207         wrd = (insn >> 12) & 0xf;
2208         rd0 = (insn >> 16) & 0xf;
2209         gen_op_iwmmxt_movq_M0_wRn(rd0);
2210         switch ((insn >> 22) & 3) {
2211         case 0:
2212             if (insn & (1 << 21))
2213                 gen_op_iwmmxt_unpackhsb_M0();
2214             else
2215                 gen_op_iwmmxt_unpackhub_M0();
2216             break;
2217         case 1:
2218             if (insn & (1 << 21))
2219                 gen_op_iwmmxt_unpackhsw_M0();
2220             else
2221                 gen_op_iwmmxt_unpackhuw_M0();
2222             break;
2223         case 2:
2224             if (insn & (1 << 21))
2225                 gen_op_iwmmxt_unpackhsl_M0();
2226             else
2227                 gen_op_iwmmxt_unpackhul_M0();
2228             break;
2229         case 3:
2230             return 1;
2231         }
2232         gen_op_iwmmxt_movq_wRn_M0(wrd);
2233         gen_op_iwmmxt_set_mup();
2234         gen_op_iwmmxt_set_cup();
2235         break;
2236     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2237     case 0x214: case 0x614: case 0xa14: case 0xe14:
2238         if (((insn >> 22) & 3) == 0)
2239             return 1;
2240         wrd = (insn >> 12) & 0xf;
2241         rd0 = (insn >> 16) & 0xf;
2242         gen_op_iwmmxt_movq_M0_wRn(rd0);
2243         tmp = tcg_temp_new_i32();
2244         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2245             tcg_temp_free_i32(tmp);
2246             return 1;
2247         }
2248         switch ((insn >> 22) & 3) {
2249         case 1:
2250             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2251             break;
2252         case 2:
2253             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2254             break;
2255         case 3:
2256             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2257             break;
2258         }
2259         tcg_temp_free_i32(tmp);
2260         gen_op_iwmmxt_movq_wRn_M0(wrd);
2261         gen_op_iwmmxt_set_mup();
2262         gen_op_iwmmxt_set_cup();
2263         break;
2264     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2265     case 0x014: case 0x414: case 0x814: case 0xc14:
2266         if (((insn >> 22) & 3) == 0)
2267             return 1;
2268         wrd = (insn >> 12) & 0xf;
2269         rd0 = (insn >> 16) & 0xf;
2270         gen_op_iwmmxt_movq_M0_wRn(rd0);
2271         tmp = tcg_temp_new_i32();
2272         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2273             tcg_temp_free_i32(tmp);
2274             return 1;
2275         }
2276         switch ((insn >> 22) & 3) {
2277         case 1:
2278             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2279             break;
2280         case 2:
2281             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2282             break;
2283         case 3:
2284             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2285             break;
2286         }
2287         tcg_temp_free_i32(tmp);
2288         gen_op_iwmmxt_movq_wRn_M0(wrd);
2289         gen_op_iwmmxt_set_mup();
2290         gen_op_iwmmxt_set_cup();
2291         break;
2292     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2293     case 0x114: case 0x514: case 0x914: case 0xd14:
2294         if (((insn >> 22) & 3) == 0)
2295             return 1;
2296         wrd = (insn >> 12) & 0xf;
2297         rd0 = (insn >> 16) & 0xf;
2298         gen_op_iwmmxt_movq_M0_wRn(rd0);
2299         tmp = tcg_temp_new_i32();
2300         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2301             tcg_temp_free_i32(tmp);
2302             return 1;
2303         }
2304         switch ((insn >> 22) & 3) {
2305         case 1:
2306             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2307             break;
2308         case 2:
2309             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2310             break;
2311         case 3:
2312             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2313             break;
2314         }
2315         tcg_temp_free_i32(tmp);
2316         gen_op_iwmmxt_movq_wRn_M0(wrd);
2317         gen_op_iwmmxt_set_mup();
2318         gen_op_iwmmxt_set_cup();
2319         break;
2320     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2321     case 0x314: case 0x714: case 0xb14: case 0xf14:
2322         if (((insn >> 22) & 3) == 0)
2323             return 1;
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         gen_op_iwmmxt_movq_M0_wRn(rd0);
2327         tmp = tcg_temp_new_i32();
2328         switch ((insn >> 22) & 3) {
2329         case 1:
2330             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2331                 tcg_temp_free_i32(tmp);
2332                 return 1;
2333             }
2334             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2335             break;
2336         case 2:
2337             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2338                 tcg_temp_free_i32(tmp);
2339                 return 1;
2340             }
2341             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2342             break;
2343         case 3:
2344             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2345                 tcg_temp_free_i32(tmp);
2346                 return 1;
2347             }
2348             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2349             break;
2350         }
2351         tcg_temp_free_i32(tmp);
2352         gen_op_iwmmxt_movq_wRn_M0(wrd);
2353         gen_op_iwmmxt_set_mup();
2354         gen_op_iwmmxt_set_cup();
2355         break;
2356     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2357     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2358         wrd = (insn >> 12) & 0xf;
2359         rd0 = (insn >> 16) & 0xf;
2360         rd1 = (insn >> 0) & 0xf;
2361         gen_op_iwmmxt_movq_M0_wRn(rd0);
2362         switch ((insn >> 22) & 3) {
2363         case 0:
2364             if (insn & (1 << 21))
2365                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2366             else
2367                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2368             break;
2369         case 1:
2370             if (insn & (1 << 21))
2371                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2372             else
2373                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2374             break;
2375         case 2:
2376             if (insn & (1 << 21))
2377                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2378             else
2379                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2380             break;
2381         case 3:
2382             return 1;
2383         }
2384         gen_op_iwmmxt_movq_wRn_M0(wrd);
2385         gen_op_iwmmxt_set_mup();
2386         break;
2387     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2388     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2389         wrd = (insn >> 12) & 0xf;
2390         rd0 = (insn >> 16) & 0xf;
2391         rd1 = (insn >> 0) & 0xf;
2392         gen_op_iwmmxt_movq_M0_wRn(rd0);
2393         switch ((insn >> 22) & 3) {
2394         case 0:
2395             if (insn & (1 << 21))
2396                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2397             else
2398                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2399             break;
2400         case 1:
2401             if (insn & (1 << 21))
2402                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2403             else
2404                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2405             break;
2406         case 2:
2407             if (insn & (1 << 21))
2408                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2409             else
2410                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2411             break;
2412         case 3:
2413             return 1;
2414         }
2415         gen_op_iwmmxt_movq_wRn_M0(wrd);
2416         gen_op_iwmmxt_set_mup();
2417         break;
2418     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2419     case 0x402: case 0x502: case 0x602: case 0x702:
2420         wrd = (insn >> 12) & 0xf;
2421         rd0 = (insn >> 16) & 0xf;
2422         rd1 = (insn >> 0) & 0xf;
2423         gen_op_iwmmxt_movq_M0_wRn(rd0);
2424         tmp = tcg_const_i32((insn >> 20) & 3);
2425         iwmmxt_load_reg(cpu_V1, rd1);
2426         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2427         tcg_temp_free_i32(tmp);
2428         gen_op_iwmmxt_movq_wRn_M0(wrd);
2429         gen_op_iwmmxt_set_mup();
2430         break;
2431     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2432     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2433     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2434     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2435         wrd = (insn >> 12) & 0xf;
2436         rd0 = (insn >> 16) & 0xf;
2437         rd1 = (insn >> 0) & 0xf;
2438         gen_op_iwmmxt_movq_M0_wRn(rd0);
2439         switch ((insn >> 20) & 0xf) {
2440         case 0x0:
2441             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2442             break;
2443         case 0x1:
2444             gen_op_iwmmxt_subub_M0_wRn(rd1);
2445             break;
2446         case 0x3:
2447             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2448             break;
2449         case 0x4:
2450             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2451             break;
2452         case 0x5:
2453             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2454             break;
2455         case 0x7:
2456             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2457             break;
2458         case 0x8:
2459             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2460             break;
2461         case 0x9:
2462             gen_op_iwmmxt_subul_M0_wRn(rd1);
2463             break;
2464         case 0xb:
2465             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2466             break;
2467         default:
2468             return 1;
2469         }
2470         gen_op_iwmmxt_movq_wRn_M0(wrd);
2471         gen_op_iwmmxt_set_mup();
2472         gen_op_iwmmxt_set_cup();
2473         break;
2474     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2475     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2476     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2477     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2478         wrd = (insn >> 12) & 0xf;
2479         rd0 = (insn >> 16) & 0xf;
2480         gen_op_iwmmxt_movq_M0_wRn(rd0);
2481         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2482         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2483         tcg_temp_free_i32(tmp);
2484         gen_op_iwmmxt_movq_wRn_M0(wrd);
2485         gen_op_iwmmxt_set_mup();
2486         gen_op_iwmmxt_set_cup();
2487         break;
2488     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2489     case 0x418: case 0x518: case 0x618: case 0x718:
2490     case 0x818: case 0x918: case 0xa18: case 0xb18:
2491     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2492         wrd = (insn >> 12) & 0xf;
2493         rd0 = (insn >> 16) & 0xf;
2494         rd1 = (insn >> 0) & 0xf;
2495         gen_op_iwmmxt_movq_M0_wRn(rd0);
2496         switch ((insn >> 20) & 0xf) {
2497         case 0x0:
2498             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2499             break;
2500         case 0x1:
2501             gen_op_iwmmxt_addub_M0_wRn(rd1);
2502             break;
2503         case 0x3:
2504             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2505             break;
2506         case 0x4:
2507             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2508             break;
2509         case 0x5:
2510             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2511             break;
2512         case 0x7:
2513             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2514             break;
2515         case 0x8:
2516             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2517             break;
2518         case 0x9:
2519             gen_op_iwmmxt_addul_M0_wRn(rd1);
2520             break;
2521         case 0xb:
2522             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2523             break;
2524         default:
2525             return 1;
2526         }
2527         gen_op_iwmmxt_movq_wRn_M0(wrd);
2528         gen_op_iwmmxt_set_mup();
2529         gen_op_iwmmxt_set_cup();
2530         break;
2531     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2532     case 0x408: case 0x508: case 0x608: case 0x708:
2533     case 0x808: case 0x908: case 0xa08: case 0xb08:
2534     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2535         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2536             return 1;
2537         wrd = (insn >> 12) & 0xf;
2538         rd0 = (insn >> 16) & 0xf;
2539         rd1 = (insn >> 0) & 0xf;
2540         gen_op_iwmmxt_movq_M0_wRn(rd0);
2541         switch ((insn >> 22) & 3) {
2542         case 1:
2543             if (insn & (1 << 21))
2544                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2545             else
2546                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2547             break;
2548         case 2:
2549             if (insn & (1 << 21))
2550                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2551             else
2552                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2553             break;
2554         case 3:
2555             if (insn & (1 << 21))
2556                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2557             else
2558                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2559             break;
2560         }
2561         gen_op_iwmmxt_movq_wRn_M0(wrd);
2562         gen_op_iwmmxt_set_mup();
2563         gen_op_iwmmxt_set_cup();
2564         break;
2565     case 0x201: case 0x203: case 0x205: case 0x207:
2566     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2567     case 0x211: case 0x213: case 0x215: case 0x217:
2568     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2569         wrd = (insn >> 5) & 0xf;
2570         rd0 = (insn >> 12) & 0xf;
2571         rd1 = (insn >> 0) & 0xf;
2572         if (rd0 == 0xf || rd1 == 0xf)
2573             return 1;
2574         gen_op_iwmmxt_movq_M0_wRn(wrd);
2575         tmp = load_reg(s, rd0);
2576         tmp2 = load_reg(s, rd1);
2577         switch ((insn >> 16) & 0xf) {
2578         case 0x0:                                       /* TMIA */
2579             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2580             break;
2581         case 0x8:                                       /* TMIAPH */
2582             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2583             break;
2584         case 0xc: case 0xd: case 0xe: case 0xf:         /* TMIAxy */
2585             if (insn & (1 << 16))
2586                 tcg_gen_shri_i32(tmp, tmp, 16);
2587             if (insn & (1 << 17))
2588                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2589             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2590             break;
2591         default:
2592             tcg_temp_free_i32(tmp2);
2593             tcg_temp_free_i32(tmp);
2594             return 1;
2595         }
2596         tcg_temp_free_i32(tmp2);
2597         tcg_temp_free_i32(tmp);
2598         gen_op_iwmmxt_movq_wRn_M0(wrd);
2599         gen_op_iwmmxt_set_mup();
2600         break;
2601     default:
2602         return 1;
2603     }
2604
2605     return 0;
2606 }
2607
2608 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2609    (ie. an undefined instruction).  */
2610 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2611 {
2612     int acc, rd0, rd1, rdhi, rdlo;
2613     TCGv_i32 tmp, tmp2;
2614
2615     if ((insn & 0x0ff00f10) == 0x0e200010) {
2616         /* Multiply with Internal Accumulate Format */
2617         rd0 = (insn >> 12) & 0xf;
2618         rd1 = insn & 0xf;
2619         acc = (insn >> 5) & 7;
2620
2621         if (acc != 0)
2622             return 1;
2623
2624         tmp = load_reg(s, rd0);
2625         tmp2 = load_reg(s, rd1);
2626         switch ((insn >> 16) & 0xf) {
2627         case 0x0:                                       /* MIA */
2628             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2629             break;
2630         case 0x8:                                       /* MIAPH */
2631             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2632             break;
2633         case 0xc:                                       /* MIABB */
2634         case 0xd:                                       /* MIABT */
2635         case 0xe:                                       /* MIATB */
2636         case 0xf:                                       /* MIATT */
2637             if (insn & (1 << 16))
2638                 tcg_gen_shri_i32(tmp, tmp, 16);
2639             if (insn & (1 << 17))
2640                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2641             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2642             break;
2643         default:
2644             return 1;
2645         }
2646         tcg_temp_free_i32(tmp2);
2647         tcg_temp_free_i32(tmp);
2648
2649         gen_op_iwmmxt_movq_wRn_M0(acc);
2650         return 0;
2651     }
2652
2653     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2654         /* Internal Accumulator Access Format */
2655         rdhi = (insn >> 16) & 0xf;
2656         rdlo = (insn >> 12) & 0xf;
2657         acc = insn & 7;
2658
2659         if (acc != 0)
2660             return 1;
2661
2662         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2663             iwmmxt_load_reg(cpu_V0, acc);
2664             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2665             tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2666             tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
2667             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2668         } else {                                        /* MAR */
2669             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2670             iwmmxt_store_reg(cpu_V0, acc);
2671         }
2672         return 0;
2673     }
2674
2675     return 1;
2676 }
2677
2678 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2679 #define VFP_SREG(insn, bigbit, smallbit) \
2680   ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2681 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2682     if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2683         reg = (((insn) >> (bigbit)) & 0x0f) \
2684               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2685     } else { \
2686         if (insn & (1 << (smallbit))) \
2687             return 1; \
2688         reg = ((insn) >> (bigbit)) & 0x0f; \
2689     }} while (0)
2690
2691 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2692 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2693 #define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2694 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2695 #define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2696 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2697
2698 /* Move between integer and VFP cores.  */
2699 static TCGv_i32 gen_vfp_mrs(void)
2700 {
2701     TCGv_i32 tmp = tcg_temp_new_i32();
2702     tcg_gen_mov_i32(tmp, cpu_F0s);
2703     return tmp;
2704 }
2705
2706 static void gen_vfp_msr(TCGv_i32 tmp)
2707 {
2708     tcg_gen_mov_i32(cpu_F0s, tmp);
2709     tcg_temp_free_i32(tmp);
2710 }
2711
2712 static void gen_neon_dup_u8(TCGv_i32 var, int shift)
2713 {
2714     TCGv_i32 tmp = tcg_temp_new_i32();
2715     if (shift)
2716         tcg_gen_shri_i32(var, var, shift);
2717     tcg_gen_ext8u_i32(var, var);
2718     tcg_gen_shli_i32(tmp, var, 8);
2719     tcg_gen_or_i32(var, var, tmp);
2720     tcg_gen_shli_i32(tmp, var, 16);
2721     tcg_gen_or_i32(var, var, tmp);
2722     tcg_temp_free_i32(tmp);
2723 }
2724
2725 static void gen_neon_dup_low16(TCGv_i32 var)
2726 {
2727     TCGv_i32 tmp = tcg_temp_new_i32();
2728     tcg_gen_ext16u_i32(var, var);
2729     tcg_gen_shli_i32(tmp, var, 16);
2730     tcg_gen_or_i32(var, var, tmp);
2731     tcg_temp_free_i32(tmp);
2732 }
2733
2734 static void gen_neon_dup_high16(TCGv_i32 var)
2735 {
2736     TCGv_i32 tmp = tcg_temp_new_i32();
2737     tcg_gen_andi_i32(var, var, 0xffff0000);
2738     tcg_gen_shri_i32(tmp, var, 16);
2739     tcg_gen_or_i32(var, var, tmp);
2740     tcg_temp_free_i32(tmp);
2741 }
2742
2743 static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
2744 {
2745     /* Load a single Neon element and replicate into a 32 bit TCG reg */
2746     TCGv_i32 tmp = tcg_temp_new_i32();
2747     switch (size) {
2748     case 0:
2749         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
2750         gen_neon_dup_u8(tmp, 0);
2751         break;
2752     case 1:
2753         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
2754         gen_neon_dup_low16(tmp);
2755         break;
2756     case 2:
2757         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
2758         break;
2759     default: /* Avoid compiler warnings.  */
2760         abort();
2761     }
2762     return tmp;
2763 }
2764
2765 static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
2766                        uint32_t dp)
2767 {
2768     uint32_t cc = extract32(insn, 20, 2);
2769
2770     if (dp) {
2771         TCGv_i64 frn, frm, dest;
2772         TCGv_i64 tmp, zero, zf, nf, vf;
2773
2774         zero = tcg_const_i64(0);
2775
2776         frn = tcg_temp_new_i64();
2777         frm = tcg_temp_new_i64();
2778         dest = tcg_temp_new_i64();
2779
2780         zf = tcg_temp_new_i64();
2781         nf = tcg_temp_new_i64();
2782         vf = tcg_temp_new_i64();
2783
2784         tcg_gen_extu_i32_i64(zf, cpu_ZF);
2785         tcg_gen_ext_i32_i64(nf, cpu_NF);
2786         tcg_gen_ext_i32_i64(vf, cpu_VF);
2787
2788         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2789         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2790         switch (cc) {
2791         case 0: /* eq: Z */
2792             tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
2793                                 frn, frm);
2794             break;
2795         case 1: /* vs: V */
2796             tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
2797                                 frn, frm);
2798             break;
2799         case 2: /* ge: N == V -> N ^ V == 0 */
2800             tmp = tcg_temp_new_i64();
2801             tcg_gen_xor_i64(tmp, vf, nf);
2802             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2803                                 frn, frm);
2804             tcg_temp_free_i64(tmp);
2805             break;
2806         case 3: /* gt: !Z && N == V */
2807             tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
2808                                 frn, frm);
2809             tmp = tcg_temp_new_i64();
2810             tcg_gen_xor_i64(tmp, vf, nf);
2811             tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
2812                                 dest, frm);
2813             tcg_temp_free_i64(tmp);
2814             break;
2815         }
2816         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2817         tcg_temp_free_i64(frn);
2818         tcg_temp_free_i64(frm);
2819         tcg_temp_free_i64(dest);
2820
2821         tcg_temp_free_i64(zf);
2822         tcg_temp_free_i64(nf);
2823         tcg_temp_free_i64(vf);
2824
2825         tcg_temp_free_i64(zero);
2826     } else {
2827         TCGv_i32 frn, frm, dest;
2828         TCGv_i32 tmp, zero;
2829
2830         zero = tcg_const_i32(0);
2831
2832         frn = tcg_temp_new_i32();
2833         frm = tcg_temp_new_i32();
2834         dest = tcg_temp_new_i32();
2835         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2836         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2837         switch (cc) {
2838         case 0: /* eq: Z */
2839             tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
2840                                 frn, frm);
2841             break;
2842         case 1: /* vs: V */
2843             tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
2844                                 frn, frm);
2845             break;
2846         case 2: /* ge: N == V -> N ^ V == 0 */
2847             tmp = tcg_temp_new_i32();
2848             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2849             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2850                                 frn, frm);
2851             tcg_temp_free_i32(tmp);
2852             break;
2853         case 3: /* gt: !Z && N == V */
2854             tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
2855                                 frn, frm);
2856             tmp = tcg_temp_new_i32();
2857             tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
2858             tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
2859                                 dest, frm);
2860             tcg_temp_free_i32(tmp);
2861             break;
2862         }
2863         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2864         tcg_temp_free_i32(frn);
2865         tcg_temp_free_i32(frm);
2866         tcg_temp_free_i32(dest);
2867
2868         tcg_temp_free_i32(zero);
2869     }
2870
2871     return 0;
2872 }
2873
2874 static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
2875                             uint32_t rm, uint32_t dp)
2876 {
2877     uint32_t vmin = extract32(insn, 6, 1);
2878     TCGv_ptr fpst = get_fpstatus_ptr(0);
2879
2880     if (dp) {
2881         TCGv_i64 frn, frm, dest;
2882
2883         frn = tcg_temp_new_i64();
2884         frm = tcg_temp_new_i64();
2885         dest = tcg_temp_new_i64();
2886
2887         tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
2888         tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
2889         if (vmin) {
2890             gen_helper_vfp_minnumd(dest, frn, frm, fpst);
2891         } else {
2892             gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
2893         }
2894         tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
2895         tcg_temp_free_i64(frn);
2896         tcg_temp_free_i64(frm);
2897         tcg_temp_free_i64(dest);
2898     } else {
2899         TCGv_i32 frn, frm, dest;
2900
2901         frn = tcg_temp_new_i32();
2902         frm = tcg_temp_new_i32();
2903         dest = tcg_temp_new_i32();
2904
2905         tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
2906         tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
2907         if (vmin) {
2908             gen_helper_vfp_minnums(dest, frn, frm, fpst);
2909         } else {
2910             gen_helper_vfp_maxnums(dest, frn, frm, fpst);
2911         }
2912         tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
2913         tcg_temp_free_i32(frn);
2914         tcg_temp_free_i32(frm);
2915         tcg_temp_free_i32(dest);
2916     }
2917
2918     tcg_temp_free_ptr(fpst);
2919     return 0;
2920 }
2921
2922 static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2923                         int rounding)
2924 {
2925     TCGv_ptr fpst = get_fpstatus_ptr(0);
2926     TCGv_i32 tcg_rmode;
2927
2928     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2929     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2930
2931     if (dp) {
2932         TCGv_i64 tcg_op;
2933         TCGv_i64 tcg_res;
2934         tcg_op = tcg_temp_new_i64();
2935         tcg_res = tcg_temp_new_i64();
2936         tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2937         gen_helper_rintd(tcg_res, tcg_op, fpst);
2938         tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2939         tcg_temp_free_i64(tcg_op);
2940         tcg_temp_free_i64(tcg_res);
2941     } else {
2942         TCGv_i32 tcg_op;
2943         TCGv_i32 tcg_res;
2944         tcg_op = tcg_temp_new_i32();
2945         tcg_res = tcg_temp_new_i32();
2946         tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
2947         gen_helper_rints(tcg_res, tcg_op, fpst);
2948         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
2949         tcg_temp_free_i32(tcg_op);
2950         tcg_temp_free_i32(tcg_res);
2951     }
2952
2953     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2954     tcg_temp_free_i32(tcg_rmode);
2955
2956     tcg_temp_free_ptr(fpst);
2957     return 0;
2958 }
2959
2960 static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
2961                        int rounding)
2962 {
2963     bool is_signed = extract32(insn, 7, 1);
2964     TCGv_ptr fpst = get_fpstatus_ptr(0);
2965     TCGv_i32 tcg_rmode, tcg_shift;
2966
2967     tcg_shift = tcg_const_i32(0);
2968
2969     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
2970     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
2971
2972     if (dp) {
2973         TCGv_i64 tcg_double, tcg_res;
2974         TCGv_i32 tcg_tmp;
2975         /* Rd is encoded as a single precision register even when the source
2976          * is double precision.
2977          */
2978         rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
2979         tcg_double = tcg_temp_new_i64();
2980         tcg_res = tcg_temp_new_i64();
2981         tcg_tmp = tcg_temp_new_i32();
2982         tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
2983         if (is_signed) {
2984             gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
2985         } else {
2986             gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
2987         }
2988         tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
2989         tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
2990         tcg_temp_free_i32(tcg_tmp);
2991         tcg_temp_free_i64(tcg_res);
2992         tcg_temp_free_i64(tcg_double);
2993     } else {
2994         TCGv_i32 tcg_single, tcg_res;
2995         tcg_single = tcg_temp_new_i32();
2996         tcg_res = tcg_temp_new_i32();
2997         tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
2998         if (is_signed) {
2999             gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
3000         } else {
3001             gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
3002         }
3003         tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
3004         tcg_temp_free_i32(tcg_res);
3005         tcg_temp_free_i32(tcg_single);
3006     }
3007
3008     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3009     tcg_temp_free_i32(tcg_rmode);
3010
3011     tcg_temp_free_i32(tcg_shift);
3012
3013     tcg_temp_free_ptr(fpst);
3014
3015     return 0;
3016 }
3017
3018 /* Table for converting the most common AArch32 encoding of
3019  * rounding mode to arm_fprounding order (which matches the
3020  * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
3021  */
3022 static const uint8_t fp_decode_rm[] = {
3023     FPROUNDING_TIEAWAY,
3024     FPROUNDING_TIEEVEN,
3025     FPROUNDING_POSINF,
3026     FPROUNDING_NEGINF,
3027 };
3028
3029 static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
3030 {
3031     uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
3032
3033     if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3034         return 1;
3035     }
3036
3037     if (dp) {
3038         VFP_DREG_D(rd, insn);
3039         VFP_DREG_N(rn, insn);
3040         VFP_DREG_M(rm, insn);
3041     } else {
3042         rd = VFP_SREG_D(insn);
3043         rn = VFP_SREG_N(insn);
3044         rm = VFP_SREG_M(insn);
3045     }
3046
3047     if ((insn & 0x0f800e50) == 0x0e000a00) {
3048         return handle_vsel(insn, rd, rn, rm, dp);
3049     } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
3050         return handle_vminmaxnm(insn, rd, rn, rm, dp);
3051     } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
3052         /* VRINTA, VRINTN, VRINTP, VRINTM */
3053         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3054         return handle_vrint(insn, rd, rm, dp, rounding);
3055     } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
3056         /* VCVTA, VCVTN, VCVTP, VCVTM */
3057         int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3058         return handle_vcvt(insn, rd, rm, dp, rounding);
3059     }
3060     return 1;
3061 }
3062
3063 /* Disassemble a VFP instruction.  Returns nonzero if an error occurred
3064    (ie. an undefined instruction).  */
3065 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
3066 {
3067     uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
3068     int dp, veclen;
3069     TCGv_i32 addr;
3070     TCGv_i32 tmp;
3071     TCGv_i32 tmp2;
3072
3073     if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
3074         return 1;
3075     }
3076
3077     /* FIXME: this access check should not take precedence over UNDEF
3078      * for invalid encodings; we will generate incorrect syndrome information
3079      * for attempts to execute invalid vfp/neon encodings with FP disabled.
3080      */
3081     if (s->fp_excp_el) {
3082         gen_exception_insn(s, 4, EXCP_UDEF,
3083                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
3084         return 0;
3085     }
3086
3087     if (!s->vfp_enabled) {
3088         /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
3089         if ((insn & 0x0fe00fff) != 0x0ee00a10)
3090             return 1;
3091         rn = (insn >> 16) & 0xf;
3092         if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
3093             && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
3094             return 1;
3095         }
3096     }
3097
3098     if (extract32(insn, 28, 4) == 0xf) {
3099         /* Encodings with T=1 (Thumb) or unconditional (ARM):
3100          * only used in v8 and above.
3101          */
3102         return disas_vfp_v8_insn(s, insn);
3103     }
3104
3105     dp = ((insn & 0xf00) == 0xb00);
3106     switch ((insn >> 24) & 0xf) {
3107     case 0xe:
3108         if (insn & (1 << 4)) {
3109             /* single register transfer */
3110             rd = (insn >> 12) & 0xf;
3111             if (dp) {
3112                 int size;
3113                 int pass;
3114
3115                 VFP_DREG_N(rn, insn);
3116                 if (insn & 0xf)
3117                     return 1;
3118                 if (insn & 0x00c00060
3119                     && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
3120                     return 1;
3121                 }
3122
3123                 pass = (insn >> 21) & 1;
3124                 if (insn & (1 << 22)) {
3125                     size = 0;
3126                     offset = ((insn >> 5) & 3) * 8;
3127                 } else if (insn & (1 << 5)) {
3128                     size = 1;
3129                     offset = (insn & (1 << 6)) ? 16 : 0;
3130                 } else {
3131                     size = 2;
3132                     offset = 0;
3133                 }
3134                 if (insn & ARM_CP_RW_BIT) {
3135                     /* vfp->arm */
3136                     tmp = neon_load_reg(rn, pass);
3137                     switch (size) {
3138                     case 0:
3139                         if (offset)
3140                             tcg_gen_shri_i32(tmp, tmp, offset);
3141                         if (insn & (1 << 23))
3142                             gen_uxtb(tmp);
3143                         else
3144                             gen_sxtb(tmp);
3145                         break;
3146                     case 1:
3147                         if (insn & (1 << 23)) {
3148                             if (offset) {
3149                                 tcg_gen_shri_i32(tmp, tmp, 16);
3150                             } else {
3151                                 gen_uxth(tmp);
3152                             }
3153                         } else {
3154                             if (offset) {
3155                                 tcg_gen_sari_i32(tmp, tmp, 16);
3156                             } else {
3157                                 gen_sxth(tmp);
3158                             }
3159                         }
3160                         break;
3161                     case 2:
3162                         break;
3163                     }
3164                     store_reg(s, rd, tmp);
3165                 } else {
3166                     /* arm->vfp */
3167                     tmp = load_reg(s, rd);
3168                     if (insn & (1 << 23)) {
3169                         /* VDUP */
3170                         if (size == 0) {
3171                             gen_neon_dup_u8(tmp, 0);
3172                         } else if (size == 1) {
3173                             gen_neon_dup_low16(tmp);
3174                         }
3175                         for (n = 0; n <= pass * 2; n++) {
3176                             tmp2 = tcg_temp_new_i32();
3177                             tcg_gen_mov_i32(tmp2, tmp);
3178                             neon_store_reg(rn, n, tmp2);
3179                         }
3180                         neon_store_reg(rn, n, tmp);
3181                     } else {
3182                         /* VMOV */
3183                         switch (size) {
3184                         case 0:
3185                             tmp2 = neon_load_reg(rn, pass);
3186                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
3187                             tcg_temp_free_i32(tmp2);
3188                             break;
3189                         case 1:
3190                             tmp2 = neon_load_reg(rn, pass);
3191                             tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
3192                             tcg_temp_free_i32(tmp2);
3193                             break;
3194                         case 2:
3195                             break;
3196                         }
3197                         neon_store_reg(rn, pass, tmp);
3198                     }
3199                 }
3200             } else { /* !dp */
3201                 if ((insn & 0x6f) != 0x00)
3202                     return 1;
3203                 rn = VFP_SREG_N(insn);
3204                 if (insn & ARM_CP_RW_BIT) {
3205                     /* vfp->arm */
3206                     if (insn & (1 << 21)) {
3207                         /* system register */
3208                         rn >>= 1;
3209
3210                         switch (rn) {
3211                         case ARM_VFP_FPSID:
3212                             /* VFP2 allows access to FSID from userspace.
3213                                VFP3 restricts all id registers to privileged
3214                                accesses.  */
3215                             if (IS_USER(s)
3216                                 && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3217                                 return 1;
3218                             }
3219                             tmp = load_cpu_field(vfp.xregs[rn]);
3220                             break;
3221                         case ARM_VFP_FPEXC:
3222                             if (IS_USER(s))
3223                                 return 1;
3224                             tmp = load_cpu_field(vfp.xregs[rn]);
3225                             break;
3226                         case ARM_VFP_FPINST:
3227                         case ARM_VFP_FPINST2:
3228                             /* Not present in VFP3.  */
3229                             if (IS_USER(s)
3230                                 || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3231                                 return 1;
3232                             }
3233                             tmp = load_cpu_field(vfp.xregs[rn]);
3234                             break;
3235                         case ARM_VFP_FPSCR:
3236                             if (rd == 15) {
3237                                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
3238                                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
3239                             } else {
3240                                 tmp = tcg_temp_new_i32();
3241                                 gen_helper_vfp_get_fpscr(tmp, cpu_env);
3242                             }
3243                             break;
3244                         case ARM_VFP_MVFR2:
3245                             if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3246                                 return 1;
3247                             }
3248                             /* fall through */
3249                         case ARM_VFP_MVFR0:
3250                         case ARM_VFP_MVFR1:
3251                             if (IS_USER(s)
3252                                 || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
3253                                 return 1;
3254                             }
3255                             tmp = load_cpu_field(vfp.xregs[rn]);
3256                             break;
3257                         default:
3258                             return 1;
3259                         }
3260                     } else {
3261                         gen_mov_F0_vreg(0, rn);
3262                         tmp = gen_vfp_mrs();
3263                     }
3264                     if (rd == 15) {
3265                         /* Set the 4 flag bits in the CPSR.  */
3266                         gen_set_nzcv(tmp);
3267                         tcg_temp_free_i32(tmp);
3268                     } else {
3269                         store_reg(s, rd, tmp);
3270                     }
3271                 } else {
3272                     /* arm->vfp */
3273                     if (insn & (1 << 21)) {
3274                         rn >>= 1;
3275                         /* system register */
3276                         switch (rn) {
3277                         case ARM_VFP_FPSID:
3278                         case ARM_VFP_MVFR0:
3279                         case ARM_VFP_MVFR1:
3280                             /* Writes are ignored.  */
3281                             break;
3282                         case ARM_VFP_FPSCR:
3283                             tmp = load_reg(s, rd);
3284                             gen_helper_vfp_set_fpscr(cpu_env, tmp);
3285                             tcg_temp_free_i32(tmp);
3286                             gen_lookup_tb(s);
3287                             break;
3288                         case ARM_VFP_FPEXC:
3289                             if (IS_USER(s))
3290                                 return 1;
3291                             /* TODO: VFP subarchitecture support.
3292                              * For now, keep the EN bit only */
3293                             tmp = load_reg(s, rd);
3294                             tcg_gen_andi_i32(tmp, tmp, 1 << 30);
3295                             store_cpu_field(tmp, vfp.xregs[rn]);
3296                             gen_lookup_tb(s);
3297                             break;
3298                         case ARM_VFP_FPINST:
3299                         case ARM_VFP_FPINST2:
3300                             if (IS_USER(s)) {
3301                                 return 1;
3302                             }
3303                             tmp = load_reg(s, rd);
3304                             store_cpu_field(tmp, vfp.xregs[rn]);
3305                             break;
3306                         default:
3307                             return 1;
3308                         }
3309                     } else {
3310                         tmp = load_reg(s, rd);
3311                         gen_vfp_msr(tmp);
3312                         gen_mov_vreg_F0(0, rn);
3313                     }
3314                 }
3315             }
3316         } else {
3317             /* data processing */
3318             /* The opcode is in bits 23, 21, 20 and 6.  */
3319             op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
3320             if (dp) {
3321                 if (op == 15) {
3322                     /* rn is opcode */
3323                     rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
3324                 } else {
3325                     /* rn is register number */
3326                     VFP_DREG_N(rn, insn);
3327                 }
3328
3329                 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
3330                                  ((rn & 0x1e) == 0x6))) {
3331                     /* Integer or single/half precision destination.  */
3332                     rd = VFP_SREG_D(insn);
3333                 } else {
3334                     VFP_DREG_D(rd, insn);
3335                 }
3336                 if (op == 15 &&
3337                     (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
3338                      ((rn & 0x1e) == 0x4))) {
3339                     /* VCVT from int or half precision is always from S reg
3340                      * regardless of dp bit. VCVT with immediate frac_bits
3341                      * has same format as SREG_M.
3342                      */
3343                     rm = VFP_SREG_M(insn);
3344                 } else {
3345                     VFP_DREG_M(rm, insn);
3346                 }
3347             } else {
3348                 rn = VFP_SREG_N(insn);
3349                 if (op == 15 && rn == 15) {
3350                     /* Double precision destination.  */
3351                     VFP_DREG_D(rd, insn);
3352                 } else {
3353                     rd = VFP_SREG_D(insn);
3354                 }
3355                 /* NB that we implicitly rely on the encoding for the frac_bits
3356                  * in VCVT of fixed to float being the same as that of an SREG_M
3357                  */
3358                 rm = VFP_SREG_M(insn);
3359             }
3360
3361             veclen = s->vec_len;
3362             if (op == 15 && rn > 3)
3363                 veclen = 0;
3364
3365             /* Shut up compiler warnings.  */
3366             delta_m = 0;
3367             delta_d = 0;
3368             bank_mask = 0;
3369
3370             if (veclen > 0) {
3371                 if (dp)
3372                     bank_mask = 0xc;
3373                 else
3374                     bank_mask = 0x18;
3375
3376                 /* Figure out what type of vector operation this is.  */
3377                 if ((rd & bank_mask) == 0) {
3378                     /* scalar */
3379                     veclen = 0;
3380                 } else {
3381                     if (dp)
3382                         delta_d = (s->vec_stride >> 1) + 1;
3383                     else
3384                         delta_d = s->vec_stride + 1;
3385
3386                     if ((rm & bank_mask) == 0) {
3387                         /* mixed scalar/vector */
3388                         delta_m = 0;
3389                     } else {
3390                         /* vector */
3391                         delta_m = delta_d;
3392                     }
3393                 }
3394             }
3395
3396             /* Load the initial operands.  */
3397             if (op == 15) {
3398                 switch (rn) {
3399                 case 16:
3400                 case 17:
3401                     /* Integer source */
3402                     gen_mov_F0_vreg(0, rm);
3403                     break;
3404                 case 8:
3405                 case 9:
3406                     /* Compare */
3407                     gen_mov_F0_vreg(dp, rd);
3408                     gen_mov_F1_vreg(dp, rm);
3409                     break;
3410                 case 10:
3411                 case 11:
3412                     /* Compare with zero */
3413                     gen_mov_F0_vreg(dp, rd);
3414                     gen_vfp_F1_ld0(dp);
3415                     break;
3416                 case 20:
3417                 case 21:
3418                 case 22:
3419                 case 23:
3420                 case 28:
3421                 case 29:
3422                 case 30:
3423                 case 31:
3424                     /* Source and destination the same.  */
3425                     gen_mov_F0_vreg(dp, rd);
3426                     break;
3427                 case 4:
3428                 case 5:
3429                 case 6:
3430                 case 7:
3431                     /* VCVTB, VCVTT: only present with the halfprec extension
3432                      * UNPREDICTABLE if bit 8 is set prior to ARMv8
3433                      * (we choose to UNDEF)
3434                      */
3435                     if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
3436                         !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
3437                         return 1;
3438                     }
3439                     if (!extract32(rn, 1, 1)) {
3440                         /* Half precision source.  */
3441                         gen_mov_F0_vreg(0, rm);
3442                         break;
3443                     }
3444                     /* Otherwise fall through */
3445                 default:
3446                     /* One source operand.  */
3447                     gen_mov_F0_vreg(dp, rm);
3448                     break;
3449                 }
3450             } else {
3451                 /* Two source operands.  */
3452                 gen_mov_F0_vreg(dp, rn);
3453                 gen_mov_F1_vreg(dp, rm);
3454             }
3455
3456             for (;;) {
3457                 /* Perform the calculation.  */
3458                 switch (op) {
3459                 case 0: /* VMLA: fd + (fn * fm) */
3460                     /* Note that order of inputs to the add matters for NaNs */
3461                     gen_vfp_F1_mul(dp);
3462                     gen_mov_F0_vreg(dp, rd);
3463                     gen_vfp_add(dp);
3464                     break;
3465                 case 1: /* VMLS: fd + -(fn * fm) */
3466                     gen_vfp_mul(dp);
3467                     gen_vfp_F1_neg(dp);
3468                     gen_mov_F0_vreg(dp, rd);
3469                     gen_vfp_add(dp);
3470                     break;
3471                 case 2: /* VNMLS: -fd + (fn * fm) */
3472                     /* Note that it isn't valid to replace (-A + B) with (B - A)
3473                      * or similar plausible looking simplifications
3474                      * because this will give wrong results for NaNs.
3475                      */
3476                     gen_vfp_F1_mul(dp);
3477                     gen_mov_F0_vreg(dp, rd);
3478                     gen_vfp_neg(dp);
3479                     gen_vfp_add(dp);
3480                     break;
3481                 case 3: /* VNMLA: -fd + -(fn * fm) */
3482                     gen_vfp_mul(dp);
3483                     gen_vfp_F1_neg(dp);
3484                     gen_mov_F0_vreg(dp, rd);
3485                     gen_vfp_neg(dp);
3486                     gen_vfp_add(dp);
3487                     break;
3488                 case 4: /* mul: fn * fm */
3489                     gen_vfp_mul(dp);
3490                     break;
3491                 case 5: /* nmul: -(fn * fm) */
3492                     gen_vfp_mul(dp);
3493                     gen_vfp_neg(dp);
3494                     break;
3495                 case 6: /* add: fn + fm */
3496                     gen_vfp_add(dp);
3497                     break;
3498                 case 7: /* sub: fn - fm */
3499                     gen_vfp_sub(dp);
3500                     break;
3501                 case 8: /* div: fn / fm */
3502                     gen_vfp_div(dp);
3503                     break;
3504                 case 10: /* VFNMA : fd = muladd(-fd,  fn, fm) */
3505                 case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
3506                 case 12: /* VFMA  : fd = muladd( fd,  fn, fm) */
3507                 case 13: /* VFMS  : fd = muladd( fd, -fn, fm) */
3508                     /* These are fused multiply-add, and must be done as one
3509                      * floating point operation with no rounding between the
3510                      * multiplication and addition steps.
3511                      * NB that doing the negations here as separate steps is
3512                      * correct : an input NaN should come out with its sign bit
3513                      * flipped if it is a negated-input.
3514                      */
3515                     if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
3516                         return 1;
3517                     }
3518                     if (dp) {
3519                         TCGv_ptr fpst;
3520                         TCGv_i64 frd;
3521                         if (op & 1) {
3522                             /* VFNMS, VFMS */
3523                             gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
3524                         }
3525                         frd = tcg_temp_new_i64();
3526                         tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
3527                         if (op & 2) {
3528                             /* VFNMA, VFNMS */
3529                             gen_helper_vfp_negd(frd, frd);
3530                         }
3531                         fpst = get_fpstatus_ptr(0);
3532                         gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
3533                                                cpu_F1d, frd, fpst);
3534                         tcg_temp_free_ptr(fpst);
3535                         tcg_temp_free_i64(frd);
3536                     } else {
3537                         TCGv_ptr fpst;
3538                         TCGv_i32 frd;
3539                         if (op & 1) {
3540                             /* VFNMS, VFMS */
3541                             gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
3542                         }
3543                         frd = tcg_temp_new_i32();
3544                         tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
3545                         if (op & 2) {
3546                             gen_helper_vfp_negs(frd, frd);
3547                         }
3548                         fpst = get_fpstatus_ptr(0);
3549                         gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3550                                                cpu_F1s, frd, fpst);
3551                         tcg_temp_free_ptr(fpst);
3552                         tcg_temp_free_i32(frd);
3553                     }
3554                     break;
3555                 case 14: /* fconst */
3556                     if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3557                         return 1;
3558                     }
3559
3560                     n = (insn << 12) & 0x80000000;
3561                     i = ((insn >> 12) & 0x70) | (insn & 0xf);
3562                     if (dp) {
3563                         if (i & 0x40)
3564                             i |= 0x3f80;
3565                         else
3566                             i |= 0x4000;
3567                         n |= i << 16;
3568                         tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3569                     } else {
3570                         if (i & 0x40)
3571                             i |= 0x780;
3572                         else
3573                             i |= 0x800;
3574                         n |= i << 19;
3575                         tcg_gen_movi_i32(cpu_F0s, n);
3576                     }
3577                     break;
3578                 case 15: /* extension space */
3579                     switch (rn) {
3580                     case 0: /* cpy */
3581                         /* no-op */
3582                         break;
3583                     case 1: /* abs */
3584                         gen_vfp_abs(dp);
3585                         break;
3586                     case 2: /* neg */
3587                         gen_vfp_neg(dp);
3588                         break;
3589                     case 3: /* sqrt */
3590                         gen_vfp_sqrt(dp);
3591                         break;
3592                     case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
3593                         tmp = gen_vfp_mrs();
3594                         tcg_gen_ext16u_i32(tmp, tmp);
3595                         if (dp) {
3596                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3597                                                            cpu_env);
3598                         } else {
3599                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3600                                                            cpu_env);
3601                         }
3602                         tcg_temp_free_i32(tmp);
3603                         break;
3604                     case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
3605                         tmp = gen_vfp_mrs();
3606                         tcg_gen_shri_i32(tmp, tmp, 16);
3607                         if (dp) {
3608                             gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3609                                                            cpu_env);
3610                         } else {
3611                             gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3612                                                            cpu_env);
3613                         }
3614                         tcg_temp_free_i32(tmp);
3615                         break;
3616                     case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
3617                         tmp = tcg_temp_new_i32();
3618                         if (dp) {
3619                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3620                                                            cpu_env);
3621                         } else {
3622                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3623                                                            cpu_env);
3624                         }
3625                         gen_mov_F0_vreg(0, rd);
3626                         tmp2 = gen_vfp_mrs();
3627                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3628                         tcg_gen_or_i32(tmp, tmp, tmp2);
3629                         tcg_temp_free_i32(tmp2);
3630                         gen_vfp_msr(tmp);
3631                         break;
3632                     case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
3633                         tmp = tcg_temp_new_i32();
3634                         if (dp) {
3635                             gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3636                                                            cpu_env);
3637                         } else {
3638                             gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3639                                                            cpu_env);
3640                         }
3641                         tcg_gen_shli_i32(tmp, tmp, 16);
3642                         gen_mov_F0_vreg(0, rd);
3643                         tmp2 = gen_vfp_mrs();
3644                         tcg_gen_ext16u_i32(tmp2, tmp2);
3645                         tcg_gen_or_i32(tmp, tmp, tmp2);
3646                         tcg_temp_free_i32(tmp2);
3647                         gen_vfp_msr(tmp);
3648                         break;
3649                     case 8: /* cmp */
3650                         gen_vfp_cmp(dp);
3651                         break;
3652                     case 9: /* cmpe */
3653                         gen_vfp_cmpe(dp);
3654                         break;
3655                     case 10: /* cmpz */
3656                         gen_vfp_cmp(dp);
3657                         break;
3658                     case 11: /* cmpez */
3659                         gen_vfp_F1_ld0(dp);
3660                         gen_vfp_cmpe(dp);
3661                         break;
3662                     case 12: /* vrintr */
3663                     {
3664                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3665                         if (dp) {
3666                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3667                         } else {
3668                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3669                         }
3670                         tcg_temp_free_ptr(fpst);
3671                         break;
3672                     }
3673                     case 13: /* vrintz */
3674                     {
3675                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3676                         TCGv_i32 tcg_rmode;
3677                         tcg_rmode = tcg_const_i32(float_round_to_zero);
3678                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3679                         if (dp) {
3680                             gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3681                         } else {
3682                             gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3683                         }
3684                         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3685                         tcg_temp_free_i32(tcg_rmode);
3686                         tcg_temp_free_ptr(fpst);
3687                         break;
3688                     }
3689                     case 14: /* vrintx */
3690                     {
3691                         TCGv_ptr fpst = get_fpstatus_ptr(0);
3692                         if (dp) {
3693                             gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
3694                         } else {
3695                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
3696                         }
3697                         tcg_temp_free_ptr(fpst);
3698                         break;
3699                     }
3700                     case 15: /* single<->double conversion */
3701                         if (dp)
3702                             gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3703                         else
3704                             gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3705                         break;
3706                     case 16: /* fuito */
3707                         gen_vfp_uito(dp, 0);
3708                         break;
3709                     case 17: /* fsito */
3710                         gen_vfp_sito(dp, 0);
3711                         break;
3712                     case 20: /* fshto */
3713                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3714                             return 1;
3715                         }
3716                         gen_vfp_shto(dp, 16 - rm, 0);
3717                         break;
3718                     case 21: /* fslto */
3719                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3720                             return 1;
3721                         }
3722                         gen_vfp_slto(dp, 32 - rm, 0);
3723                         break;
3724                     case 22: /* fuhto */
3725                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3726                             return 1;
3727                         }
3728                         gen_vfp_uhto(dp, 16 - rm, 0);
3729                         break;
3730                     case 23: /* fulto */
3731                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3732                             return 1;
3733                         }
3734                         gen_vfp_ulto(dp, 32 - rm, 0);
3735                         break;
3736                     case 24: /* ftoui */
3737                         gen_vfp_toui(dp, 0);
3738                         break;
3739                     case 25: /* ftouiz */
3740                         gen_vfp_touiz(dp, 0);
3741                         break;
3742                     case 26: /* ftosi */
3743                         gen_vfp_tosi(dp, 0);
3744                         break;
3745                     case 27: /* ftosiz */
3746                         gen_vfp_tosiz(dp, 0);
3747                         break;
3748                     case 28: /* ftosh */
3749                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3750                             return 1;
3751                         }
3752                         gen_vfp_tosh(dp, 16 - rm, 0);
3753                         break;
3754                     case 29: /* ftosl */
3755                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3756                             return 1;
3757                         }
3758                         gen_vfp_tosl(dp, 32 - rm, 0);
3759                         break;
3760                     case 30: /* ftouh */
3761                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3762                             return 1;
3763                         }
3764                         gen_vfp_touh(dp, 16 - rm, 0);
3765                         break;
3766                     case 31: /* ftoul */
3767                         if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3768                             return 1;
3769                         }
3770                         gen_vfp_toul(dp, 32 - rm, 0);
3771                         break;
3772                     default: /* undefined */
3773                         return 1;
3774                     }
3775                     break;
3776                 default: /* undefined */
3777                     return 1;
3778                 }
3779
3780                 /* Write back the result.  */
3781                 if (op == 15 && (rn >= 8 && rn <= 11)) {
3782                     /* Comparison, do nothing.  */
3783                 } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
3784                                               (rn & 0x1e) == 0x6)) {
3785                     /* VCVT double to int: always integer result.
3786                      * VCVT double to half precision is always a single
3787                      * precision result.
3788                      */
3789                     gen_mov_vreg_F0(0, rd);
3790                 } else if (op == 15 && rn == 15) {
3791                     /* conversion */
3792                     gen_mov_vreg_F0(!dp, rd);
3793                 } else {
3794                     gen_mov_vreg_F0(dp, rd);
3795                 }
3796
3797                 /* break out of the loop if we have finished  */
3798                 if (veclen == 0)
3799                     break;
3800
3801                 if (op == 15 && delta_m == 0) {
3802                     /* single source one-many */
3803                     while (veclen--) {
3804                         rd = ((rd + delta_d) & (bank_mask - 1))
3805                              | (rd & bank_mask);
3806                         gen_mov_vreg_F0(dp, rd);
3807                     }
3808                     break;
3809                 }
3810                 /* Setup the next operands.  */
3811                 veclen--;
3812                 rd = ((rd + delta_d) & (bank_mask - 1))
3813                      | (rd & bank_mask);
3814
3815                 if (op == 15) {
3816                     /* One source operand.  */
3817                     rm = ((rm + delta_m) & (bank_mask - 1))
3818                          | (rm & bank_mask);
3819                     gen_mov_F0_vreg(dp, rm);
3820                 } else {
3821                     /* Two source operands.  */
3822                     rn = ((rn + delta_d) & (bank_mask - 1))
3823                          | (rn & bank_mask);
3824                     gen_mov_F0_vreg(dp, rn);
3825                     if (delta_m) {
3826                         rm = ((rm + delta_m) & (bank_mask - 1))
3827                              | (rm & bank_mask);
3828                         gen_mov_F1_vreg(dp, rm);
3829                     }
3830                 }
3831             }
3832         }
3833         break;
3834     case 0xc:
3835     case 0xd:
3836         if ((insn & 0x03e00000) == 0x00400000) {
3837             /* two-register transfer */
3838             rn = (insn >> 16) & 0xf;
3839             rd = (insn >> 12) & 0xf;
3840             if (dp) {
3841                 VFP_DREG_M(rm, insn);
3842             } else {
3843                 rm = VFP_SREG_M(insn);
3844             }
3845
3846             if (insn & ARM_CP_RW_BIT) {
3847                 /* vfp->arm */
3848                 if (dp) {
3849                     gen_mov_F0_vreg(0, rm * 2);
3850                     tmp = gen_vfp_mrs();
3851                     store_reg(s, rd, tmp);
3852                     gen_mov_F0_vreg(0, rm * 2 + 1);
3853                     tmp = gen_vfp_mrs();
3854                     store_reg(s, rn, tmp);
3855                 } else {
3856                     gen_mov_F0_vreg(0, rm);
3857                     tmp = gen_vfp_mrs();
3858                     store_reg(s, rd, tmp);
3859                     gen_mov_F0_vreg(0, rm + 1);
3860                     tmp = gen_vfp_mrs();
3861                     store_reg(s, rn, tmp);
3862                 }
3863             } else {
3864                 /* arm->vfp */
3865                 if (dp) {
3866                     tmp = load_reg(s, rd);
3867                     gen_vfp_msr(tmp);
3868                     gen_mov_vreg_F0(0, rm * 2);
3869                     tmp = load_reg(s, rn);
3870                     gen_vfp_msr(tmp);
3871                     gen_mov_vreg_F0(0, rm * 2 + 1);
3872                 } else {
3873                     tmp = load_reg(s, rd);
3874                     gen_vfp_msr(tmp);
3875                     gen_mov_vreg_F0(0, rm);
3876                     tmp = load_reg(s, rn);
3877                     gen_vfp_msr(tmp);
3878                     gen_mov_vreg_F0(0, rm + 1);
3879                 }
3880             }
3881         } else {
3882             /* Load/store */
3883             rn = (insn >> 16) & 0xf;
3884             if (dp)
3885                 VFP_DREG_D(rd, insn);
3886             else
3887                 rd = VFP_SREG_D(insn);
3888             if ((insn & 0x01200000) == 0x01000000) {
3889                 /* Single load/store */
3890                 offset = (insn & 0xff) << 2;
3891                 if ((insn & (1 << 23)) == 0)
3892                     offset = -offset;
3893                 if (s->thumb && rn == 15) {
3894                     /* This is actually UNPREDICTABLE */
3895                     addr = tcg_temp_new_i32();
3896                     tcg_gen_movi_i32(addr, s->pc & ~2);
3897                 } else {
3898                     addr = load_reg(s, rn);
3899                 }
3900                 tcg_gen_addi_i32(addr, addr, offset);
3901                 if (insn & (1 << 20)) {
3902                     gen_vfp_ld(s, dp, addr);
3903                     gen_mov_vreg_F0(dp, rd);
3904                 } else {
3905                     gen_mov_F0_vreg(dp, rd);
3906                     gen_vfp_st(s, dp, addr);
3907                 }
3908                 tcg_temp_free_i32(addr);
3909             } else {
3910                 /* load/store multiple */
3911                 int w = insn & (1 << 21);
3912                 if (dp)
3913                     n = (insn >> 1) & 0x7f;
3914                 else
3915                     n = insn & 0xff;
3916
3917                 if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
3918                     /* P == U , W == 1  => UNDEF */
3919                     return 1;
3920                 }
3921                 if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
3922                     /* UNPREDICTABLE cases for bad immediates: we choose to
3923                      * UNDEF to avoid generating huge numbers of TCG ops
3924                      */
3925                     return 1;
3926                 }
3927                 if (rn == 15 && w) {
3928                     /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
3929                     return 1;
3930                 }
3931
3932                 if (s->thumb && rn == 15) {
3933                     /* This is actually UNPREDICTABLE */
3934                     addr = tcg_temp_new_i32();
3935                     tcg_gen_movi_i32(addr, s->pc & ~2);
3936                 } else {
3937                     addr = load_reg(s, rn);
3938                 }
3939                 if (insn & (1 << 24)) /* pre-decrement */
3940                     tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
3941
3942                 if (dp)
3943                     offset = 8;
3944                 else
3945                     offset = 4;
3946                 for (i = 0; i < n; i++) {
3947                     if (insn & ARM_CP_RW_BIT) {
3948                         /* load */
3949                         gen_vfp_ld(s, dp, addr);
3950                         gen_mov_vreg_F0(dp, rd + i);
3951                     } else {
3952                         /* store */
3953                         gen_mov_F0_vreg(dp, rd + i);
3954                         gen_vfp_st(s, dp, addr);
3955                     }
3956                     tcg_gen_addi_i32(addr, addr, offset);
3957                 }
3958                 if (w) {
3959                     /* writeback */
3960                     if (insn & (1 << 24))
3961                         offset = -offset * n;
3962                     else if (dp && (insn & 1))
3963                         offset = 4;
3964                     else
3965                         offset = 0;
3966
3967                     if (offset != 0)
3968                         tcg_gen_addi_i32(addr, addr, offset);
3969                     store_reg(s, rn, addr);
3970                 } else {
3971                     tcg_temp_free_i32(addr);
3972                 }
3973             }
3974         }
3975         break;
3976     default:
3977         /* Should never happen.  */
3978         return 1;
3979     }
3980     return 0;
3981 }
3982
3983 static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
3984 {
3985     TranslationBlock *tb;
3986
3987     tb = s->tb;
3988     if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3989         tcg_gen_goto_tb(n);
3990         gen_set_pc_im(s, dest);
3991         tcg_gen_exit_tb((uintptr_t)tb + n);
3992     } else {
3993         gen_set_pc_im(s, dest);
3994         tcg_gen_exit_tb(0);
3995     }
3996 }
3997
3998 static inline void gen_jmp (DisasContext *s, uint32_t dest)
3999 {
4000     if (unlikely(s->singlestep_enabled || s->ss_active)) {
4001         /* An indirect jump so that we still trigger the debug exception.  */
4002         if (s->thumb)
4003             dest |= 1;
4004         gen_bx_im(s, dest);
4005     } else {
4006         gen_goto_tb(s, 0, dest);
4007         s->is_jmp = DISAS_TB_JUMP;
4008     }
4009 }
4010
4011 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
4012 {
4013     if (x)
4014         tcg_gen_sari_i32(t0, t0, 16);
4015     else
4016         gen_sxth(t0);
4017     if (y)
4018         tcg_gen_sari_i32(t1, t1, 16);
4019     else
4020         gen_sxth(t1);
4021     tcg_gen_mul_i32(t0, t0, t1);
4022 }
4023
4024 /* Return the mask of PSR bits set by a MSR instruction.  */
4025 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
4026 {
4027     uint32_t mask;
4028
4029     mask = 0;
4030     if (flags & (1 << 0))
4031         mask |= 0xff;
4032     if (flags & (1 << 1))
4033         mask |= 0xff00;
4034     if (flags & (1 << 2))
4035         mask |= 0xff0000;
4036     if (flags & (1 << 3))
4037         mask |= 0xff000000;
4038
4039     /* Mask out undefined bits.  */
4040     mask &= ~CPSR_RESERVED;
4041     if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
4042         mask &= ~CPSR_T;
4043     }
4044     if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
4045         mask &= ~CPSR_Q; /* V5TE in reality*/
4046     }
4047     if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
4048         mask &= ~(CPSR_E | CPSR_GE);
4049     }
4050     if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
4051         mask &= ~CPSR_IT;
4052     }
4053     /* Mask out execution state and reserved bits.  */
4054     if (!spsr) {
4055         mask &= ~(CPSR_EXEC | CPSR_RESERVED);
4056     }
4057     /* Mask out privileged bits.  */
4058     if (IS_USER(s))
4059         mask &= CPSR_USER;
4060     return mask;
4061 }
4062
4063 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
4064 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
4065 {
4066     TCGv_i32 tmp;
4067     if (spsr) {
4068         /* ??? This is also undefined in system mode.  */
4069         if (IS_USER(s))
4070             return 1;
4071
4072         tmp = load_cpu_field(spsr);
4073         tcg_gen_andi_i32(tmp, tmp, ~mask);
4074         tcg_gen_andi_i32(t0, t0, mask);
4075         tcg_gen_or_i32(tmp, tmp, t0);
4076         store_cpu_field(tmp, spsr);
4077     } else {
4078         gen_set_cpsr(t0, mask);
4079     }
4080     tcg_temp_free_i32(t0);
4081     gen_lookup_tb(s);
4082     return 0;
4083 }
4084
4085 /* Returns nonzero if access to the PSR is not permitted.  */
4086 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
4087 {
4088     TCGv_i32 tmp;
4089     tmp = tcg_temp_new_i32();
4090     tcg_gen_movi_i32(tmp, val);
4091     return gen_set_psr(s, mask, spsr, tmp);
4092 }
4093
4094 /* Generate an old-style exception return. Marks pc as dead. */
4095 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
4096 {
4097     TCGv_i32 tmp;
4098     store_reg(s, 15, pc);
4099     tmp = load_cpu_field(spsr);
4100     gen_set_cpsr(tmp, CPSR_ERET_MASK);
4101     tcg_temp_free_i32(tmp);
4102     s->is_jmp = DISAS_JUMP;
4103 }
4104
4105 /* Generate a v6 exception return.  Marks both values as dead.  */
4106 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
4107 {
4108     gen_set_cpsr(cpsr, CPSR_ERET_MASK);
4109     tcg_temp_free_i32(cpsr);
4110     store_reg(s, 15, pc);
4111     s->is_jmp = DISAS_JUMP;
4112 }
4113
4114 static void gen_nop_hint(DisasContext *s, int val)
4115 {
4116     switch (val) {
4117     case 1: /* yield */
4118         gen_set_pc_im(s, s->pc);
4119         s->is_jmp = DISAS_YIELD;
4120         break;
4121     case 3: /* wfi */
4122         gen_set_pc_im(s, s->pc);
4123         s->is_jmp = DISAS_WFI;
4124         break;
4125     case 2: /* wfe */
4126         gen_set_pc_im(s, s->pc);
4127         s->is_jmp = DISAS_WFE;
4128         break;
4129     case 4: /* sev */
4130     case 5: /* sevl */
4131         /* TODO: Implement SEV, SEVL and WFE.  May help SMP performance.  */
4132     default: /* nop */
4133         break;
4134     }
4135 }
4136
4137 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
4138
4139 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
4140 {
4141     switch (size) {
4142     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
4143     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
4144     case 2: tcg_gen_add_i32(t0, t0, t1); break;
4145     default: abort();
4146     }
4147 }
4148
4149 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
4150 {
4151     switch (size) {
4152     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
4153     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
4154     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
4155     default: return;
4156     }
4157 }
4158
4159 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
4160 #define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
4161 #define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
4162 #define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
4163 #define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
4164
4165 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
4166     switch ((size << 1) | u) { \
4167     case 0: \
4168         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
4169         break; \
4170     case 1: \
4171         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
4172         break; \
4173     case 2: \
4174         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
4175         break; \
4176     case 3: \
4177         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
4178         break; \
4179     case 4: \
4180         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
4181         break; \
4182     case 5: \
4183         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
4184         break; \
4185     default: return 1; \
4186     }} while (0)
4187
4188 #define GEN_NEON_INTEGER_OP(name) do { \
4189     switch ((size << 1) | u) { \
4190     case 0: \
4191         gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
4192         break; \
4193     case 1: \
4194         gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
4195         break; \
4196     case 2: \
4197         gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
4198         break; \
4199     case 3: \
4200         gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
4201         break; \
4202     case 4: \
4203         gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
4204         break; \
4205     case 5: \
4206         gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
4207         break; \
4208     default: return 1; \
4209     }} while (0)
4210
4211 static TCGv_i32 neon_load_scratch(int scratch)
4212 {
4213     TCGv_i32 tmp = tcg_temp_new_i32();
4214     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4215     return tmp;
4216 }
4217
4218 static void neon_store_scratch(int scratch, TCGv_i32 var)
4219 {
4220     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4221     tcg_temp_free_i32(var);
4222 }
4223
4224 static inline TCGv_i32 neon_get_scalar(int size, int reg)
4225 {
4226     TCGv_i32 tmp;
4227     if (size == 1) {
4228         tmp = neon_load_reg(reg & 7, reg >> 4);
4229         if (reg & 8) {
4230             gen_neon_dup_high16(tmp);
4231         } else {
4232             gen_neon_dup_low16(tmp);
4233         }
4234     } else {
4235         tmp = neon_load_reg(reg & 15, reg >> 4);
4236     }
4237     return tmp;
4238 }
4239
4240 static int gen_neon_unzip(int rd, int rm, int size, int q)
4241 {
4242     TCGv_i32 tmp, tmp2;
4243     if (!q && size == 2) {
4244         return 1;
4245     }
4246     tmp = tcg_const_i32(rd);
4247     tmp2 = tcg_const_i32(rm);
4248     if (q) {
4249         switch (size) {
4250         case 0:
4251             gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
4252             break;
4253         case 1:
4254             gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
4255             break;
4256         case 2:
4257             gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
4258             break;
4259         default:
4260             abort();
4261         }
4262     } else {
4263         switch (size) {
4264         case 0:
4265             gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
4266             break;
4267         case 1:
4268             gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
4269             break;
4270         default:
4271             abort();
4272         }
4273     }
4274     tcg_temp_free_i32(tmp);
4275     tcg_temp_free_i32(tmp2);
4276     return 0;
4277 }
4278
4279 static int gen_neon_zip(int rd, int rm, int size, int q)
4280 {
4281     TCGv_i32 tmp, tmp2;
4282     if (!q && size == 2) {
4283         return 1;
4284     }
4285     tmp = tcg_const_i32(rd);
4286     tmp2 = tcg_const_i32(rm);
4287     if (q) {
4288         switch (size) {
4289         case 0:
4290             gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
4291             break;
4292         case 1:
4293             gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
4294             break;
4295         case 2:
4296             gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
4297             break;
4298         default:
4299             abort();
4300         }
4301     } else {
4302         switch (size) {
4303         case 0:
4304             gen_helper_neon_zip8(cpu_env, tmp, tmp2);
4305             break;
4306         case 1:
4307             gen_helper_neon_zip16(cpu_env, tmp, tmp2);
4308             break;
4309         default:
4310             abort();
4311         }
4312     }
4313     tcg_temp_free_i32(tmp);
4314     tcg_temp_free_i32(tmp2);
4315     return 0;
4316 }
4317
4318 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4319 {
4320     TCGv_i32 rd, tmp;
4321
4322     rd = tcg_temp_new_i32();
4323     tmp = tcg_temp_new_i32();
4324
4325     tcg_gen_shli_i32(rd, t0, 8);
4326     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4327     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4328     tcg_gen_or_i32(rd, rd, tmp);
4329
4330     tcg_gen_shri_i32(t1, t1, 8);
4331     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4332     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4333     tcg_gen_or_i32(t1, t1, tmp);
4334     tcg_gen_mov_i32(t0, rd);
4335
4336     tcg_temp_free_i32(tmp);
4337     tcg_temp_free_i32(rd);
4338 }
4339
4340 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4341 {
4342     TCGv_i32 rd, tmp;
4343
4344     rd = tcg_temp_new_i32();
4345     tmp = tcg_temp_new_i32();
4346
4347     tcg_gen_shli_i32(rd, t0, 16);
4348     tcg_gen_andi_i32(tmp, t1, 0xffff);
4349     tcg_gen_or_i32(rd, rd, tmp);
4350     tcg_gen_shri_i32(t1, t1, 16);
4351     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4352     tcg_gen_or_i32(t1, t1, tmp);
4353     tcg_gen_mov_i32(t0, rd);
4354
4355     tcg_temp_free_i32(tmp);
4356     tcg_temp_free_i32(rd);
4357 }
4358
4359
4360 static struct {
4361     int nregs;
4362     int interleave;
4363     int spacing;
4364 } neon_ls_element_type[11] = {
4365     {4, 4, 1},
4366     {4, 4, 2},
4367     {4, 1, 1},
4368     {4, 2, 1},
4369     {3, 3, 1},
4370     {3, 3, 2},
4371     {3, 1, 1},
4372     {1, 1, 1},
4373     {2, 2, 1},
4374     {2, 2, 2},
4375     {2, 1, 1}
4376 };
4377
4378 /* Translate a NEON load/store element instruction.  Return nonzero if the
4379    instruction is invalid.  */
4380 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
4381 {
4382     int rd, rn, rm;
4383     int op;
4384     int nregs;
4385     int interleave;
4386     int spacing;
4387     int stride;
4388     int size;
4389     int reg;
4390     int pass;
4391     int load;
4392     int shift;
4393     int n;
4394     TCGv_i32 addr;
4395     TCGv_i32 tmp;
4396     TCGv_i32 tmp2;
4397     TCGv_i64 tmp64;
4398
4399     /* FIXME: this access check should not take precedence over UNDEF
4400      * for invalid encodings; we will generate incorrect syndrome information
4401      * for attempts to execute invalid vfp/neon encodings with FP disabled.
4402      */
4403     if (s->fp_excp_el) {
4404         gen_exception_insn(s, 4, EXCP_UDEF,
4405                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
4406         return 0;
4407     }
4408
4409     if (!s->vfp_enabled)
4410       return 1;
4411     VFP_DREG_D(rd, insn);
4412     rn = (insn >> 16) & 0xf;
4413     rm = insn & 0xf;
4414     load = (insn & (1 << 21)) != 0;
4415     if ((insn & (1 << 23)) == 0) {
4416         /* Load store all elements.  */
4417         op = (insn >> 8) & 0xf;
4418         size = (insn >> 6) & 3;
4419         if (op > 10)
4420             return 1;
4421         /* Catch UNDEF cases for bad values of align field */
4422         switch (op & 0xc) {
4423         case 4:
4424             if (((insn >> 5) & 1) == 1) {
4425                 return 1;
4426             }
4427             break;
4428         case 8:
4429             if (((insn >> 4) & 3) == 3) {
4430                 return 1;
4431             }
4432             break;
4433         default:
4434             break;
4435         }
4436         nregs = neon_ls_element_type[op].nregs;
4437         interleave = neon_ls_element_type[op].interleave;
4438         spacing = neon_ls_element_type[op].spacing;
4439         if (size == 3 && (interleave | spacing) != 1)
4440             return 1;
4441         addr = tcg_temp_new_i32();
4442         load_reg_var(s, addr, rn);
4443         stride = (1 << size) * interleave;
4444         for (reg = 0; reg < nregs; reg++) {
4445             if (interleave > 2 || (interleave == 2 && nregs == 2)) {
4446                 load_reg_var(s, addr, rn);
4447                 tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
4448             } else if (interleave == 2 && nregs == 4 && reg == 2) {
4449                 load_reg_var(s, addr, rn);
4450                 tcg_gen_addi_i32(addr, addr, 1 << size);
4451             }
4452             if (size == 3) {
4453                 tmp64 = tcg_temp_new_i64();
4454                 if (load) {
4455                     gen_aa32_ld64(tmp64, addr, get_mem_index(s));
4456                     neon_store_reg64(tmp64, rd);
4457                 } else {
4458                     neon_load_reg64(tmp64, rd);
4459                     gen_aa32_st64(tmp64, addr, get_mem_index(s));
4460                 }
4461                 tcg_temp_free_i64(tmp64);
4462                 tcg_gen_addi_i32(addr, addr, stride);
4463             } else {
4464                 for (pass = 0; pass < 2; pass++) {
4465                     if (size == 2) {
4466                         if (load) {
4467                             tmp = tcg_temp_new_i32();
4468                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4469                             neon_store_reg(rd, pass, tmp);
4470                         } else {
4471                             tmp = neon_load_reg(rd, pass);
4472                             gen_aa32_st32(tmp, addr, get_mem_index(s));
4473                             tcg_temp_free_i32(tmp);
4474                         }
4475                         tcg_gen_addi_i32(addr, addr, stride);
4476                     } else if (size == 1) {
4477                         if (load) {
4478                             tmp = tcg_temp_new_i32();
4479                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4480                             tcg_gen_addi_i32(addr, addr, stride);
4481                             tmp2 = tcg_temp_new_i32();
4482                             gen_aa32_ld16u(tmp2, addr, get_mem_index(s));
4483                             tcg_gen_addi_i32(addr, addr, stride);
4484                             tcg_gen_shli_i32(tmp2, tmp2, 16);
4485                             tcg_gen_or_i32(tmp, tmp, tmp2);
4486                             tcg_temp_free_i32(tmp2);
4487                             neon_store_reg(rd, pass, tmp);
4488                         } else {
4489                             tmp = neon_load_reg(rd, pass);
4490                             tmp2 = tcg_temp_new_i32();
4491                             tcg_gen_shri_i32(tmp2, tmp, 16);
4492                             gen_aa32_st16(tmp, addr, get_mem_index(s));
4493                             tcg_temp_free_i32(tmp);
4494                             tcg_gen_addi_i32(addr, addr, stride);
4495                             gen_aa32_st16(tmp2, addr, get_mem_index(s));
4496                             tcg_temp_free_i32(tmp2);
4497                             tcg_gen_addi_i32(addr, addr, stride);
4498                         }
4499                     } else /* size == 0 */ {
4500                         if (load) {
4501                             TCGV_UNUSED_I32(tmp2);
4502                             for (n = 0; n < 4; n++) {
4503                                 tmp = tcg_temp_new_i32();
4504                                 gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4505                                 tcg_gen_addi_i32(addr, addr, stride);
4506                                 if (n == 0) {
4507                                     tmp2 = tmp;
4508                                 } else {
4509                                     tcg_gen_shli_i32(tmp, tmp, n * 8);
4510                                     tcg_gen_or_i32(tmp2, tmp2, tmp);
4511                                     tcg_temp_free_i32(tmp);
4512                                 }
4513                             }
4514                             neon_store_reg(rd, pass, tmp2);
4515                         } else {
4516                             tmp2 = neon_load_reg(rd, pass);
4517                             for (n = 0; n < 4; n++) {
4518                                 tmp = tcg_temp_new_i32();
4519                                 if (n == 0) {
4520                                     tcg_gen_mov_i32(tmp, tmp2);
4521                                 } else {
4522                                     tcg_gen_shri_i32(tmp, tmp2, n * 8);
4523                                 }
4524                                 gen_aa32_st8(tmp, addr, get_mem_index(s));
4525                                 tcg_temp_free_i32(tmp);
4526                                 tcg_gen_addi_i32(addr, addr, stride);
4527                             }
4528                             tcg_temp_free_i32(tmp2);
4529                         }
4530                     }
4531                 }
4532             }
4533             rd += spacing;
4534         }
4535         tcg_temp_free_i32(addr);
4536         stride = nregs * 8;
4537     } else {
4538         size = (insn >> 10) & 3;
4539         if (size == 3) {
4540             /* Load single element to all lanes.  */
4541             int a = (insn >> 4) & 1;
4542             if (!load) {
4543                 return 1;
4544             }
4545             size = (insn >> 6) & 3;
4546             nregs = ((insn >> 8) & 3) + 1;
4547
4548             if (size == 3) {
4549                 if (nregs != 4 || a == 0) {
4550                     return 1;
4551                 }
4552                 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
4553                 size = 2;
4554             }
4555             if (nregs == 1 && a == 1 && size == 0) {
4556                 return 1;
4557             }
4558             if (nregs == 3 && a == 1) {
4559                 return 1;
4560             }
4561             addr = tcg_temp_new_i32();
4562             load_reg_var(s, addr, rn);
4563             if (nregs == 1) {
4564                 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
4565                 tmp = gen_load_and_replicate(s, addr, size);
4566                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4567                 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4568                 if (insn & (1 << 5)) {
4569                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
4570                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
4571                 }
4572                 tcg_temp_free_i32(tmp);
4573             } else {
4574                 /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
4575                 stride = (insn & (1 << 5)) ? 2 : 1;
4576                 for (reg = 0; reg < nregs; reg++) {
4577                     tmp = gen_load_and_replicate(s, addr, size);
4578                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
4579                     tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4580                     tcg_temp_free_i32(tmp);
4581                     tcg_gen_addi_i32(addr, addr, 1 << size);
4582                     rd += stride;
4583                 }
4584             }
4585             tcg_temp_free_i32(addr);
4586             stride = (1 << size) * nregs;
4587         } else {
4588             /* Single element.  */
4589             int idx = (insn >> 4) & 0xf;
4590             pass = (insn >> 7) & 1;
4591             switch (size) {
4592             case 0:
4593                 shift = ((insn >> 5) & 3) * 8;
4594                 stride = 1;
4595                 break;
4596             case 1:
4597                 shift = ((insn >> 6) & 1) * 16;
4598                 stride = (insn & (1 << 5)) ? 2 : 1;
4599                 break;
4600             case 2:
4601                 shift = 0;
4602                 stride = (insn & (1 << 6)) ? 2 : 1;
4603                 break;
4604             default:
4605                 abort();
4606             }
4607             nregs = ((insn >> 8) & 3) + 1;
4608             /* Catch the UNDEF cases. This is unavoidably a bit messy. */
4609             switch (nregs) {
4610             case 1:
4611                 if (((idx & (1 << size)) != 0) ||
4612                     (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
4613                     return 1;
4614                 }
4615                 break;
4616             case 3:
4617                 if ((idx & 1) != 0) {
4618                     return 1;
4619                 }
4620                 /* fall through */
4621             case 2:
4622                 if (size == 2 && (idx & 2) != 0) {
4623                     return 1;
4624                 }
4625                 break;
4626             case 4:
4627                 if ((size == 2) && ((idx & 3) == 3)) {
4628                     return 1;
4629                 }
4630                 break;
4631             default:
4632                 abort();
4633             }
4634             if ((rd + stride * (nregs - 1)) > 31) {
4635                 /* Attempts to write off the end of the register file
4636                  * are UNPREDICTABLE; we choose to UNDEF because otherwise
4637                  * the neon_load_reg() would write off the end of the array.
4638                  */
4639                 return 1;
4640             }
4641             addr = tcg_temp_new_i32();
4642             load_reg_var(s, addr, rn);
4643             for (reg = 0; reg < nregs; reg++) {
4644                 if (load) {
4645                     tmp = tcg_temp_new_i32();
4646                     switch (size) {
4647                     case 0:
4648                         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
4649                         break;
4650                     case 1:
4651                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
4652                         break;
4653                     case 2:
4654                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
4655                         break;
4656                     default: /* Avoid compiler warnings.  */
4657                         abort();
4658                     }
4659                     if (size != 2) {
4660                         tmp2 = neon_load_reg(rd, pass);
4661                         tcg_gen_deposit_i32(tmp, tmp2, tmp,
4662                                             shift, size ? 16 : 8);
4663                         tcg_temp_free_i32(tmp2);
4664                     }
4665                     neon_store_reg(rd, pass, tmp);
4666                 } else { /* Store */
4667                     tmp = neon_load_reg(rd, pass);
4668                     if (shift)
4669                         tcg_gen_shri_i32(tmp, tmp, shift);
4670                     switch (size) {
4671                     case 0:
4672                         gen_aa32_st8(tmp, addr, get_mem_index(s));
4673                         break;
4674                     case 1:
4675                         gen_aa32_st16(tmp, addr, get_mem_index(s));
4676                         break;
4677                     case 2:
4678                         gen_aa32_st32(tmp, addr, get_mem_index(s));
4679                         break;
4680                     }
4681                     tcg_temp_free_i32(tmp);
4682                 }
4683                 rd += stride;
4684                 tcg_gen_addi_i32(addr, addr, 1 << size);
4685             }
4686             tcg_temp_free_i32(addr);
4687             stride = nregs * (1 << size);
4688         }
4689     }
4690     if (rm != 15) {
4691         TCGv_i32 base;
4692
4693         base = load_reg(s, rn);
4694         if (rm == 13) {
4695             tcg_gen_addi_i32(base, base, stride);
4696         } else {
4697             TCGv_i32 index;
4698             index = load_reg(s, rm);
4699             tcg_gen_add_i32(base, base, index);
4700             tcg_temp_free_i32(index);
4701         }
4702         store_reg(s, rn, base);
4703     }
4704     return 0;
4705 }
4706
4707 /* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
4708 static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
4709 {
4710     tcg_gen_and_i32(t, t, c);
4711     tcg_gen_andc_i32(f, f, c);
4712     tcg_gen_or_i32(dest, t, f);
4713 }
4714
4715 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
4716 {
4717     switch (size) {
4718     case 0: gen_helper_neon_narrow_u8(dest, src); break;
4719     case 1: gen_helper_neon_narrow_u16(dest, src); break;
4720     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
4721     default: abort();
4722     }
4723 }
4724
4725 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4726 {
4727     switch (size) {
4728     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
4729     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
4730     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
4731     default: abort();
4732     }
4733 }
4734
4735 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
4736 {
4737     switch (size) {
4738     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
4739     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
4740     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
4741     default: abort();
4742     }
4743 }
4744
4745 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
4746 {
4747     switch (size) {
4748     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
4749     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
4750     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
4751     default: abort();
4752     }
4753 }
4754
4755 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
4756                                          int q, int u)
4757 {
4758     if (q) {
4759         if (u) {
4760             switch (size) {
4761             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4762             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4763             default: abort();
4764             }
4765         } else {
4766             switch (size) {
4767             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
4768             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
4769             default: abort();
4770             }
4771         }
4772     } else {
4773         if (u) {
4774             switch (size) {
4775             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
4776             case 2: gen_helper_neon_shl_u32(var, var, shift); break;
4777             default: abort();
4778             }
4779         } else {
4780             switch (size) {
4781             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4782             case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4783             default: abort();
4784             }
4785         }
4786     }
4787 }
4788
4789 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
4790 {
4791     if (u) {
4792         switch (size) {
4793         case 0: gen_helper_neon_widen_u8(dest, src); break;
4794         case 1: gen_helper_neon_widen_u16(dest, src); break;
4795         case 2: tcg_gen_extu_i32_i64(dest, src); break;
4796         default: abort();
4797         }
4798     } else {
4799         switch (size) {
4800         case 0: gen_helper_neon_widen_s8(dest, src); break;
4801         case 1: gen_helper_neon_widen_s16(dest, src); break;
4802         case 2: tcg_gen_ext_i32_i64(dest, src); break;
4803         default: abort();
4804         }
4805     }
4806     tcg_temp_free_i32(src);
4807 }
4808
4809 static inline void gen_neon_addl(int size)
4810 {
4811     switch (size) {
4812     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4813     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4814     case 2: tcg_gen_add_i64(CPU_V001); break;
4815     default: abort();
4816     }
4817 }
4818
4819 static inline void gen_neon_subl(int size)
4820 {
4821     switch (size) {
4822     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4823     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4824     case 2: tcg_gen_sub_i64(CPU_V001); break;
4825     default: abort();
4826     }
4827 }
4828
4829 static inline void gen_neon_negl(TCGv_i64 var, int size)
4830 {
4831     switch (size) {
4832     case 0: gen_helper_neon_negl_u16(var, var); break;
4833     case 1: gen_helper_neon_negl_u32(var, var); break;
4834     case 2:
4835         tcg_gen_neg_i64(var, var);
4836         break;
4837     default: abort();
4838     }
4839 }
4840
4841 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4842 {
4843     switch (size) {
4844     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4845     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4846     default: abort();
4847     }
4848 }
4849
4850 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
4851                                  int size, int u)
4852 {
4853     TCGv_i64 tmp;
4854
4855     switch ((size << 1) | u) {
4856     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4857     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4858     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4859     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4860     case 4:
4861         tmp = gen_muls_i64_i32(a, b);
4862         tcg_gen_mov_i64(dest, tmp);
4863         tcg_temp_free_i64(tmp);
4864         break;
4865     case 5:
4866         tmp = gen_mulu_i64_i32(a, b);
4867         tcg_gen_mov_i64(dest, tmp);
4868         tcg_temp_free_i64(tmp);
4869         break;
4870     default: abort();
4871     }
4872
4873     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4874        Don't forget to clean them now.  */
4875     if (size < 2) {
4876         tcg_temp_free_i32(a);
4877         tcg_temp_free_i32(b);
4878     }
4879 }
4880
4881 static void gen_neon_narrow_op(int op, int u, int size,
4882                                TCGv_i32 dest, TCGv_i64 src)
4883 {
4884     if (op) {
4885         if (u) {
4886             gen_neon_unarrow_sats(size, dest, src);
4887         } else {
4888             gen_neon_narrow(size, dest, src);
4889         }
4890     } else {
4891         if (u) {
4892             gen_neon_narrow_satu(size, dest, src);
4893         } else {
4894             gen_neon_narrow_sats(size, dest, src);
4895         }
4896     }
4897 }
4898
4899 /* Symbolic constants for op fields for Neon 3-register same-length.
4900  * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4901  * table A7-9.
4902  */
4903 #define NEON_3R_VHADD 0
4904 #define NEON_3R_VQADD 1
4905 #define NEON_3R_VRHADD 2
4906 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4907 #define NEON_3R_VHSUB 4
4908 #define NEON_3R_VQSUB 5
4909 #define NEON_3R_VCGT 6
4910 #define NEON_3R_VCGE 7
4911 #define NEON_3R_VSHL 8
4912 #define NEON_3R_VQSHL 9
4913 #define NEON_3R_VRSHL 10
4914 #define NEON_3R_VQRSHL 11
4915 #define NEON_3R_VMAX 12
4916 #define NEON_3R_VMIN 13
4917 #define NEON_3R_VABD 14
4918 #define NEON_3R_VABA 15
4919 #define NEON_3R_VADD_VSUB 16
4920 #define NEON_3R_VTST_VCEQ 17
4921 #define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
4922 #define NEON_3R_VMUL 19
4923 #define NEON_3R_VPMAX 20
4924 #define NEON_3R_VPMIN 21
4925 #define NEON_3R_VQDMULH_VQRDMULH 22
4926 #define NEON_3R_VPADD 23
4927 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
4928 #define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
4929 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4930 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4931 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4932 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4933 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4934 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
4935
4936 static const uint8_t neon_3r_sizes[] = {
4937     [NEON_3R_VHADD] = 0x7,
4938     [NEON_3R_VQADD] = 0xf,
4939     [NEON_3R_VRHADD] = 0x7,
4940     [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4941     [NEON_3R_VHSUB] = 0x7,
4942     [NEON_3R_VQSUB] = 0xf,
4943     [NEON_3R_VCGT] = 0x7,
4944     [NEON_3R_VCGE] = 0x7,
4945     [NEON_3R_VSHL] = 0xf,
4946     [NEON_3R_VQSHL] = 0xf,
4947     [NEON_3R_VRSHL] = 0xf,
4948     [NEON_3R_VQRSHL] = 0xf,
4949     [NEON_3R_VMAX] = 0x7,
4950     [NEON_3R_VMIN] = 0x7,
4951     [NEON_3R_VABD] = 0x7,
4952     [NEON_3R_VABA] = 0x7,
4953     [NEON_3R_VADD_VSUB] = 0xf,
4954     [NEON_3R_VTST_VCEQ] = 0x7,
4955     [NEON_3R_VML] = 0x7,
4956     [NEON_3R_VMUL] = 0x7,
4957     [NEON_3R_VPMAX] = 0x7,
4958     [NEON_3R_VPMIN] = 0x7,
4959     [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4960     [NEON_3R_VPADD] = 0x7,
4961     [NEON_3R_SHA] = 0xf, /* size field encodes op type */
4962     [NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
4963     [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4964     [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4965     [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4966     [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4967     [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4968     [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
4969 };
4970
4971 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
4972  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4973  * table A7-13.
4974  */
4975 #define NEON_2RM_VREV64 0
4976 #define NEON_2RM_VREV32 1
4977 #define NEON_2RM_VREV16 2
4978 #define NEON_2RM_VPADDL 4
4979 #define NEON_2RM_VPADDL_U 5
4980 #define NEON_2RM_AESE 6 /* Includes AESD */
4981 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
4982 #define NEON_2RM_VCLS 8
4983 #define NEON_2RM_VCLZ 9
4984 #define NEON_2RM_VCNT 10
4985 #define NEON_2RM_VMVN 11
4986 #define NEON_2RM_VPADAL 12
4987 #define NEON_2RM_VPADAL_U 13
4988 #define NEON_2RM_VQABS 14
4989 #define NEON_2RM_VQNEG 15
4990 #define NEON_2RM_VCGT0 16
4991 #define NEON_2RM_VCGE0 17
4992 #define NEON_2RM_VCEQ0 18
4993 #define NEON_2RM_VCLE0 19
4994 #define NEON_2RM_VCLT0 20
4995 #define NEON_2RM_SHA1H 21
4996 #define NEON_2RM_VABS 22
4997 #define NEON_2RM_VNEG 23
4998 #define NEON_2RM_VCGT0_F 24
4999 #define NEON_2RM_VCGE0_F 25
5000 #define NEON_2RM_VCEQ0_F 26
5001 #define NEON_2RM_VCLE0_F 27
5002 #define NEON_2RM_VCLT0_F 28
5003 #define NEON_2RM_VABS_F 30
5004 #define NEON_2RM_VNEG_F 31
5005 #define NEON_2RM_VSWP 32
5006 #define NEON_2RM_VTRN 33
5007 #define NEON_2RM_VUZP 34
5008 #define NEON_2RM_VZIP 35
5009 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
5010 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
5011 #define NEON_2RM_VSHLL 38
5012 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
5013 #define NEON_2RM_VRINTN 40
5014 #define NEON_2RM_VRINTX 41
5015 #define NEON_2RM_VRINTA 42
5016 #define NEON_2RM_VRINTZ 43
5017 #define NEON_2RM_VCVT_F16_F32 44
5018 #define NEON_2RM_VRINTM 45
5019 #define NEON_2RM_VCVT_F32_F16 46
5020 #define NEON_2RM_VRINTP 47
5021 #define NEON_2RM_VCVTAU 48
5022 #define NEON_2RM_VCVTAS 49
5023 #define NEON_2RM_VCVTNU 50
5024 #define NEON_2RM_VCVTNS 51
5025 #define NEON_2RM_VCVTPU 52
5026 #define NEON_2RM_VCVTPS 53
5027 #define NEON_2RM_VCVTMU 54
5028 #define NEON_2RM_VCVTMS 55
5029 #define NEON_2RM_VRECPE 56
5030 #define NEON_2RM_VRSQRTE 57
5031 #define NEON_2RM_VRECPE_F 58
5032 #define NEON_2RM_VRSQRTE_F 59
5033 #define NEON_2RM_VCVT_FS 60
5034 #define NEON_2RM_VCVT_FU 61
5035 #define NEON_2RM_VCVT_SF 62
5036 #define NEON_2RM_VCVT_UF 63
5037
5038 static int neon_2rm_is_float_op(int op)
5039 {
5040     /* Return true if this neon 2reg-misc op is float-to-float */
5041     return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
5042             (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
5043             op == NEON_2RM_VRINTM ||
5044             (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
5045             op >= NEON_2RM_VRECPE_F);
5046 }
5047
5048 /* Each entry in this array has bit n set if the insn allows
5049  * size value n (otherwise it will UNDEF). Since unallocated
5050  * op values will have no bits set they always UNDEF.
5051  */
5052 static const uint8_t neon_2rm_sizes[] = {
5053     [NEON_2RM_VREV64] = 0x7,
5054     [NEON_2RM_VREV32] = 0x3,
5055     [NEON_2RM_VREV16] = 0x1,
5056     [NEON_2RM_VPADDL] = 0x7,
5057     [NEON_2RM_VPADDL_U] = 0x7,
5058     [NEON_2RM_AESE] = 0x1,
5059     [NEON_2RM_AESMC] = 0x1,
5060     [NEON_2RM_VCLS] = 0x7,
5061     [NEON_2RM_VCLZ] = 0x7,
5062     [NEON_2RM_VCNT] = 0x1,
5063     [NEON_2RM_VMVN] = 0x1,
5064     [NEON_2RM_VPADAL] = 0x7,
5065     [NEON_2RM_VPADAL_U] = 0x7,
5066     [NEON_2RM_VQABS] = 0x7,
5067     [NEON_2RM_VQNEG] = 0x7,
5068     [NEON_2RM_VCGT0] = 0x7,
5069     [NEON_2RM_VCGE0] = 0x7,
5070     [NEON_2RM_VCEQ0] = 0x7,
5071     [NEON_2RM_VCLE0] = 0x7,
5072     [NEON_2RM_VCLT0] = 0x7,
5073     [NEON_2RM_SHA1H] = 0x4,
5074     [NEON_2RM_VABS] = 0x7,
5075     [NEON_2RM_VNEG] = 0x7,
5076     [NEON_2RM_VCGT0_F] = 0x4,
5077     [NEON_2RM_VCGE0_F] = 0x4,
5078     [NEON_2RM_VCEQ0_F] = 0x4,
5079     [NEON_2RM_VCLE0_F] = 0x4,
5080     [NEON_2RM_VCLT0_F] = 0x4,
5081     [NEON_2RM_VABS_F] = 0x4,
5082     [NEON_2RM_VNEG_F] = 0x4,
5083     [NEON_2RM_VSWP] = 0x1,
5084     [NEON_2RM_VTRN] = 0x7,
5085     [NEON_2RM_VUZP] = 0x7,
5086     [NEON_2RM_VZIP] = 0x7,
5087     [NEON_2RM_VMOVN] = 0x7,
5088     [NEON_2RM_VQMOVN] = 0x7,
5089     [NEON_2RM_VSHLL] = 0x7,
5090     [NEON_2RM_SHA1SU1] = 0x4,
5091     [NEON_2RM_VRINTN] = 0x4,
5092     [NEON_2RM_VRINTX] = 0x4,
5093     [NEON_2RM_VRINTA] = 0x4,
5094     [NEON_2RM_VRINTZ] = 0x4,
5095     [NEON_2RM_VCVT_F16_F32] = 0x2,
5096     [NEON_2RM_VRINTM] = 0x4,
5097     [NEON_2RM_VCVT_F32_F16] = 0x2,
5098     [NEON_2RM_VRINTP] = 0x4,
5099     [NEON_2RM_VCVTAU] = 0x4,
5100     [NEON_2RM_VCVTAS] = 0x4,
5101     [NEON_2RM_VCVTNU] = 0x4,
5102     [NEON_2RM_VCVTNS] = 0x4,
5103     [NEON_2RM_VCVTPU] = 0x4,
5104     [NEON_2RM_VCVTPS] = 0x4,
5105     [NEON_2RM_VCVTMU] = 0x4,
5106     [NEON_2RM_VCVTMS] = 0x4,
5107     [NEON_2RM_VRECPE] = 0x4,
5108     [NEON_2RM_VRSQRTE] = 0x4,
5109     [NEON_2RM_VRECPE_F] = 0x4,
5110     [NEON_2RM_VRSQRTE_F] = 0x4,
5111     [NEON_2RM_VCVT_FS] = 0x4,
5112     [NEON_2RM_VCVT_FU] = 0x4,
5113     [NEON_2RM_VCVT_SF] = 0x4,
5114     [NEON_2RM_VCVT_UF] = 0x4,
5115 };
5116
5117 /* Translate a NEON data processing instruction.  Return nonzero if the
5118    instruction is invalid.
5119    We process data in a mixture of 32-bit and 64-bit chunks.
5120    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5121
5122 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5123 {
5124     int op;
5125     int q;
5126     int rd, rn, rm;
5127     int size;
5128     int shift;
5129     int pass;
5130     int count;
5131     int pairwise;
5132     int u;
5133     uint32_t imm, mask;
5134     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5135     TCGv_i64 tmp64;
5136
5137     /* FIXME: this access check should not take precedence over UNDEF
5138      * for invalid encodings; we will generate incorrect syndrome information
5139      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5140      */
5141     if (s->fp_excp_el) {
5142         gen_exception_insn(s, 4, EXCP_UDEF,
5143                            syn_fp_access_trap(1, 0xe, s->thumb), s->fp_excp_el);
5144         return 0;
5145     }
5146
5147     if (!s->vfp_enabled)
5148       return 1;
5149     q = (insn & (1 << 6)) != 0;
5150     u = (insn >> 24) & 1;
5151     VFP_DREG_D(rd, insn);
5152     VFP_DREG_N(rn, insn);
5153     VFP_DREG_M(rm, insn);
5154     size = (insn >> 20) & 3;
5155     if ((insn & (1 << 23)) == 0) {
5156         /* Three register same length.  */
5157         op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5158         /* Catch invalid op and bad size combinations: UNDEF */
5159         if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5160             return 1;
5161         }
5162         /* All insns of this form UNDEF for either this condition or the
5163          * superset of cases "Q==1"; we catch the latter later.
5164          */
5165         if (q && ((rd | rn | rm) & 1)) {
5166             return 1;
5167         }
5168         /*
5169          * The SHA-1/SHA-256 3-register instructions require special treatment
5170          * here, as their size field is overloaded as an op type selector, and
5171          * they all consume their input in a single pass.
5172          */
5173         if (op == NEON_3R_SHA) {
5174             if (!q) {
5175                 return 1;
5176             }
5177             if (!u) { /* SHA-1 */
5178                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
5179                     return 1;
5180                 }
5181                 tmp = tcg_const_i32(rd);
5182                 tmp2 = tcg_const_i32(rn);
5183                 tmp3 = tcg_const_i32(rm);
5184                 tmp4 = tcg_const_i32(size);
5185                 gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
5186                 tcg_temp_free_i32(tmp4);
5187             } else { /* SHA-256 */
5188                 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
5189                     return 1;
5190                 }
5191                 tmp = tcg_const_i32(rd);
5192                 tmp2 = tcg_const_i32(rn);
5193                 tmp3 = tcg_const_i32(rm);
5194                 switch (size) {
5195                 case 0:
5196                     gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
5197                     break;
5198                 case 1:
5199                     gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
5200                     break;
5201                 case 2:
5202                     gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
5203                     break;
5204                 }
5205             }
5206             tcg_temp_free_i32(tmp);
5207             tcg_temp_free_i32(tmp2);
5208             tcg_temp_free_i32(tmp3);
5209             return 0;
5210         }
5211         if (size == 3 && op != NEON_3R_LOGIC) {
5212             /* 64-bit element instructions. */
5213             for (pass = 0; pass < (q ? 2 : 1); pass++) {
5214                 neon_load_reg64(cpu_V0, rn + pass);
5215                 neon_load_reg64(cpu_V1, rm + pass);
5216                 switch (op) {
5217                 case NEON_3R_VQADD:
5218                     if (u) {
5219                         gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
5220                                                  cpu_V0, cpu_V1);
5221                     } else {
5222                         gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
5223                                                  cpu_V0, cpu_V1);
5224                     }
5225                     break;
5226                 case NEON_3R_VQSUB:
5227                     if (u) {
5228                         gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
5229                                                  cpu_V0, cpu_V1);
5230                     } else {
5231                         gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
5232                                                  cpu_V0, cpu_V1);
5233                     }
5234                     break;
5235                 case NEON_3R_VSHL:
5236                     if (u) {
5237                         gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
5238                     } else {
5239                         gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
5240                     }
5241                     break;
5242                 case NEON_3R_VQSHL:
5243                     if (u) {
5244                         gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5245                                                  cpu_V1, cpu_V0);
5246                     } else {
5247                         gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5248                                                  cpu_V1, cpu_V0);
5249                     }
5250                     break;
5251                 case NEON_3R_VRSHL:
5252                     if (u) {
5253                         gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5254                     } else {
5255                         gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5256                     }
5257                     break;
5258                 case NEON_3R_VQRSHL:
5259                     if (u) {
5260                         gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5261                                                   cpu_V1, cpu_V0);
5262                     } else {
5263                         gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5264                                                   cpu_V1, cpu_V0);
5265                     }
5266                     break;
5267                 case NEON_3R_VADD_VSUB:
5268                     if (u) {
5269                         tcg_gen_sub_i64(CPU_V001);
5270                     } else {
5271                         tcg_gen_add_i64(CPU_V001);
5272                     }
5273                     break;
5274                 default:
5275                     abort();
5276                 }
5277                 neon_store_reg64(cpu_V0, rd + pass);
5278             }
5279             return 0;
5280         }
5281         pairwise = 0;
5282         switch (op) {
5283         case NEON_3R_VSHL:
5284         case NEON_3R_VQSHL:
5285         case NEON_3R_VRSHL:
5286         case NEON_3R_VQRSHL:
5287             {
5288                 int rtmp;
5289                 /* Shift instruction operands are reversed.  */
5290                 rtmp = rn;
5291                 rn = rm;
5292                 rm = rtmp;
5293             }
5294             break;
5295         case NEON_3R_VPADD:
5296             if (u) {
5297                 return 1;
5298             }
5299             /* Fall through */
5300         case NEON_3R_VPMAX:
5301         case NEON_3R_VPMIN:
5302             pairwise = 1;
5303             break;
5304         case NEON_3R_FLOAT_ARITH:
5305             pairwise = (u && size < 2); /* if VPADD (float) */
5306             break;
5307         case NEON_3R_FLOAT_MINMAX:
5308             pairwise = u; /* if VPMIN/VPMAX (float) */
5309             break;
5310         case NEON_3R_FLOAT_CMP:
5311             if (!u && size) {
5312                 /* no encoding for U=0 C=1x */
5313                 return 1;
5314             }
5315             break;
5316         case NEON_3R_FLOAT_ACMP:
5317             if (!u) {
5318                 return 1;
5319             }
5320             break;
5321         case NEON_3R_FLOAT_MISC:
5322             /* VMAXNM/VMINNM in ARMv8 */
5323             if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5324                 return 1;
5325             }
5326             break;
5327         case NEON_3R_VMUL:
5328             if (u && (size != 0)) {
5329                 /* UNDEF on invalid size for polynomial subcase */
5330                 return 1;
5331             }
5332             break;
5333         case NEON_3R_VFM:
5334             if (!arm_dc_feature(s, ARM_FEATURE_VFP4) || u) {
5335                 return 1;
5336             }
5337             break;
5338         default:
5339             break;
5340         }
5341
5342         if (pairwise && q) {
5343             /* All the pairwise insns UNDEF if Q is set */
5344             return 1;
5345         }
5346
5347         for (pass = 0; pass < (q ? 4 : 2); pass++) {
5348
5349         if (pairwise) {
5350             /* Pairwise.  */
5351             if (pass < 1) {
5352                 tmp = neon_load_reg(rn, 0);
5353                 tmp2 = neon_load_reg(rn, 1);
5354             } else {
5355                 tmp = neon_load_reg(rm, 0);
5356                 tmp2 = neon_load_reg(rm, 1);
5357             }
5358         } else {
5359             /* Elementwise.  */
5360             tmp = neon_load_reg(rn, pass);
5361             tmp2 = neon_load_reg(rm, pass);
5362         }
5363         switch (op) {
5364         case NEON_3R_VHADD:
5365             GEN_NEON_INTEGER_OP(hadd);
5366             break;
5367         case NEON_3R_VQADD:
5368             GEN_NEON_INTEGER_OP_ENV(qadd);
5369             break;
5370         case NEON_3R_VRHADD:
5371             GEN_NEON_INTEGER_OP(rhadd);
5372             break;
5373         case NEON_3R_LOGIC: /* Logic ops.  */
5374             switch ((u << 2) | size) {
5375             case 0: /* VAND */
5376                 tcg_gen_and_i32(tmp, tmp, tmp2);
5377                 break;
5378             case 1: /* BIC */
5379                 tcg_gen_andc_i32(tmp, tmp, tmp2);
5380                 break;
5381             case 2: /* VORR */
5382                 tcg_gen_or_i32(tmp, tmp, tmp2);
5383                 break;
5384             case 3: /* VORN */
5385                 tcg_gen_orc_i32(tmp, tmp, tmp2);
5386                 break;
5387             case 4: /* VEOR */
5388                 tcg_gen_xor_i32(tmp, tmp, tmp2);
5389                 break;
5390             case 5: /* VBSL */
5391                 tmp3 = neon_load_reg(rd, pass);
5392                 gen_neon_bsl(tmp, tmp, tmp2, tmp3);
5393                 tcg_temp_free_i32(tmp3);
5394                 break;
5395             case 6: /* VBIT */
5396                 tmp3 = neon_load_reg(rd, pass);
5397                 gen_neon_bsl(tmp, tmp, tmp3, tmp2);
5398                 tcg_temp_free_i32(tmp3);
5399                 break;
5400             case 7: /* VBIF */
5401                 tmp3 = neon_load_reg(rd, pass);
5402                 gen_neon_bsl(tmp, tmp3, tmp, tmp2);
5403                 tcg_temp_free_i32(tmp3);
5404                 break;
5405             }
5406             break;
5407         case NEON_3R_VHSUB:
5408             GEN_NEON_INTEGER_OP(hsub);
5409             break;
5410         case NEON_3R_VQSUB:
5411             GEN_NEON_INTEGER_OP_ENV(qsub);
5412             break;
5413         case NEON_3R_VCGT:
5414             GEN_NEON_INTEGER_OP(cgt);
5415             break;
5416         case NEON_3R_VCGE:
5417             GEN_NEON_INTEGER_OP(cge);
5418             break;
5419         case NEON_3R_VSHL:
5420             GEN_NEON_INTEGER_OP(shl);
5421             break;
5422         case NEON_3R_VQSHL:
5423             GEN_NEON_INTEGER_OP_ENV(qshl);
5424             break;
5425         case NEON_3R_VRSHL:
5426             GEN_NEON_INTEGER_OP(rshl);
5427             break;
5428         case NEON_3R_VQRSHL:
5429             GEN_NEON_INTEGER_OP_ENV(qrshl);
5430             break;
5431         case NEON_3R_VMAX:
5432             GEN_NEON_INTEGER_OP(max);
5433             break;
5434         case NEON_3R_VMIN:
5435             GEN_NEON_INTEGER_OP(min);
5436             break;
5437         case NEON_3R_VABD:
5438             GEN_NEON_INTEGER_OP(abd);
5439             break;
5440         case NEON_3R_VABA:
5441             GEN_NEON_INTEGER_OP(abd);
5442             tcg_temp_free_i32(tmp2);
5443             tmp2 = neon_load_reg(rd, pass);
5444             gen_neon_add(size, tmp, tmp2);
5445             break;
5446         case NEON_3R_VADD_VSUB:
5447             if (!u) { /* VADD */
5448                 gen_neon_add(size, tmp, tmp2);
5449             } else { /* VSUB */
5450                 switch (size) {
5451                 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
5452                 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
5453                 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
5454                 default: abort();
5455                 }
5456             }
5457             break;
5458         case NEON_3R_VTST_VCEQ:
5459             if (!u) { /* VTST */
5460                 switch (size) {
5461                 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
5462                 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
5463                 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
5464                 default: abort();
5465                 }
5466             } else { /* VCEQ */
5467                 switch (size) {
5468                 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
5469                 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
5470                 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
5471                 default: abort();
5472                 }
5473             }
5474             break;
5475         case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
5476             switch (size) {
5477             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5478             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5479             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5480             default: abort();
5481             }
5482             tcg_temp_free_i32(tmp2);
5483             tmp2 = neon_load_reg(rd, pass);
5484             if (u) { /* VMLS */
5485                 gen_neon_rsb(size, tmp, tmp2);
5486             } else { /* VMLA */
5487                 gen_neon_add(size, tmp, tmp2);
5488             }
5489             break;
5490         case NEON_3R_VMUL:
5491             if (u) { /* polynomial */
5492                 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
5493             } else { /* Integer */
5494                 switch (size) {
5495                 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5496                 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5497                 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5498                 default: abort();
5499                 }
5500             }
5501             break;
5502         case NEON_3R_VPMAX:
5503             GEN_NEON_INTEGER_OP(pmax);
5504             break;
5505         case NEON_3R_VPMIN:
5506             GEN_NEON_INTEGER_OP(pmin);
5507             break;
5508         case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
5509             if (!u) { /* VQDMULH */
5510                 switch (size) {
5511                 case 1:
5512                     gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5513                     break;
5514                 case 2:
5515                     gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5516                     break;
5517                 default: abort();
5518                 }
5519             } else { /* VQRDMULH */
5520                 switch (size) {
5521                 case 1:
5522                     gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5523                     break;
5524                 case 2:
5525                     gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5526                     break;
5527                 default: abort();
5528                 }
5529             }
5530             break;
5531         case NEON_3R_VPADD:
5532             switch (size) {
5533             case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5534             case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5535             case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5536             default: abort();
5537             }
5538             break;
5539         case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5540         {
5541             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5542             switch ((u << 2) | size) {
5543             case 0: /* VADD */
5544             case 4: /* VPADD */
5545                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5546                 break;
5547             case 2: /* VSUB */
5548                 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5549                 break;
5550             case 6: /* VABD */
5551                 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5552                 break;
5553             default:
5554                 abort();
5555             }
5556             tcg_temp_free_ptr(fpstatus);
5557             break;
5558         }
5559         case NEON_3R_FLOAT_MULTIPLY:
5560         {
5561             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5562             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5563             if (!u) {
5564                 tcg_temp_free_i32(tmp2);
5565                 tmp2 = neon_load_reg(rd, pass);
5566                 if (size == 0) {
5567                     gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5568                 } else {
5569                     gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5570                 }
5571             }
5572             tcg_temp_free_ptr(fpstatus);
5573             break;
5574         }
5575         case NEON_3R_FLOAT_CMP:
5576         {
5577             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5578             if (!u) {
5579                 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5580             } else {
5581                 if (size == 0) {
5582                     gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5583                 } else {
5584                     gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5585                 }
5586             }
5587             tcg_temp_free_ptr(fpstatus);
5588             break;
5589         }
5590         case NEON_3R_FLOAT_ACMP:
5591         {
5592             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5593             if (size == 0) {
5594                 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5595             } else {
5596                 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5597             }
5598             tcg_temp_free_ptr(fpstatus);
5599             break;
5600         }
5601         case NEON_3R_FLOAT_MINMAX:
5602         {
5603             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5604             if (size == 0) {
5605                 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5606             } else {
5607                 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5608             }
5609             tcg_temp_free_ptr(fpstatus);
5610             break;
5611         }
5612         case NEON_3R_FLOAT_MISC:
5613             if (u) {
5614                 /* VMAXNM/VMINNM */
5615                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5616                 if (size == 0) {
5617                     gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5618                 } else {
5619                     gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5620                 }
5621                 tcg_temp_free_ptr(fpstatus);
5622             } else {
5623                 if (size == 0) {
5624                     gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5625                 } else {
5626                     gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5627               }
5628             }
5629             break;
5630         case NEON_3R_VFM:
5631         {
5632             /* VFMA, VFMS: fused multiply-add */
5633             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5634             TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5635             if (size) {
5636                 /* VFMS */
5637                 gen_helper_vfp_negs(tmp, tmp);
5638             }
5639             gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5640             tcg_temp_free_i32(tmp3);
5641             tcg_temp_free_ptr(fpstatus);
5642             break;
5643         }
5644         default:
5645             abort();
5646         }
5647         tcg_temp_free_i32(tmp2);
5648
5649         /* Save the result.  For elementwise operations we can put it
5650            straight into the destination register.  For pairwise operations
5651            we have to be careful to avoid clobbering the source operands.  */
5652         if (pairwise && rd == rm) {
5653             neon_store_scratch(pass, tmp);
5654         } else {
5655             neon_store_reg(rd, pass, tmp);
5656         }
5657
5658         } /* for pass */
5659         if (pairwise && rd == rm) {
5660             for (pass = 0; pass < (q ? 4 : 2); pass++) {
5661                 tmp = neon_load_scratch(pass);
5662                 neon_store_reg(rd, pass, tmp);
5663             }
5664         }
5665         /* End of 3 register same size operations.  */
5666     } else if (insn & (1 << 4)) {
5667         if ((insn & 0x00380080) != 0) {
5668             /* Two registers and shift.  */
5669             op = (insn >> 8) & 0xf;
5670             if (insn & (1 << 7)) {
5671                 /* 64-bit shift. */
5672                 if (op > 7) {
5673                     return 1;
5674                 }
5675                 size = 3;
5676             } else {
5677                 size = 2;
5678                 while ((insn & (1 << (size + 19))) == 0)
5679                     size--;
5680             }
5681             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5682             /* To avoid excessive duplication of ops we implement shift
5683                by immediate using the variable shift operations.  */
5684             if (op < 8) {
5685                 /* Shift by immediate:
5686                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5687                 if (q && ((rd | rm) & 1)) {
5688                     return 1;
5689                 }
5690                 if (!u && (op == 4 || op == 6)) {
5691                     return 1;
5692                 }
5693                 /* Right shifts are encoded as N - shift, where N is the
5694                    element size in bits.  */
5695                 if (op <= 4)
5696                     shift = shift - (1 << (size + 3));
5697                 if (size == 3) {
5698                     count = q + 1;
5699                 } else {
5700                     count = q ? 4: 2;
5701                 }
5702                 switch (size) {
5703                 case 0:
5704                     imm = (uint8_t) shift;
5705                     imm |= imm << 8;
5706                     imm |= imm << 16;
5707                     break;
5708                 case 1:
5709                     imm = (uint16_t) shift;
5710                     imm |= imm << 16;
5711                     break;
5712                 case 2:
5713                 case 3:
5714                     imm = shift;
5715                     break;
5716                 default:
5717                     abort();
5718                 }
5719
5720                 for (pass = 0; pass < count; pass++) {
5721                     if (size == 3) {
5722                         neon_load_reg64(cpu_V0, rm + pass);
5723                         tcg_gen_movi_i64(cpu_V1, imm);
5724                         switch (op) {
5725                         case 0:  /* VSHR */
5726                         case 1:  /* VSRA */
5727                             if (u)
5728                                 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5729                             else
5730                                 gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
5731                             break;
5732                         case 2: /* VRSHR */
5733                         case 3: /* VRSRA */
5734                             if (u)
5735                                 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5736                             else
5737                                 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5738                             break;
5739                         case 4: /* VSRI */
5740                         case 5: /* VSHL, VSLI */
5741                             gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5742                             break;
5743                         case 6: /* VQSHLU */
5744                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5745                                                       cpu_V0, cpu_V1);
5746                             break;
5747                         case 7: /* VQSHL */
5748                             if (u) {
5749                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5750                                                          cpu_V0, cpu_V1);
5751                             } else {
5752                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5753                                                          cpu_V0, cpu_V1);
5754                             }
5755                             break;
5756                         }
5757                         if (op == 1 || op == 3) {
5758                             /* Accumulate.  */
5759                             neon_load_reg64(cpu_V1, rd + pass);
5760                             tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5761                         } else if (op == 4 || (op == 5 && u)) {
5762                             /* Insert */
5763                             neon_load_reg64(cpu_V1, rd + pass);
5764                             uint64_t mask;
5765                             if (shift < -63 || shift > 63) {
5766                                 mask = 0;
5767                             } else {
5768                                 if (op == 4) {
5769                                     mask = 0xffffffffffffffffull >> -shift;
5770                                 } else {
5771                                     mask = 0xffffffffffffffffull << shift;
5772                                 }
5773                             }
5774                             tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
5775                             tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5776                         }
5777                         neon_store_reg64(cpu_V0, rd + pass);
5778                     } else { /* size < 3 */
5779                         /* Operands in T0 and T1.  */
5780                         tmp = neon_load_reg(rm, pass);
5781                         tmp2 = tcg_temp_new_i32();
5782                         tcg_gen_movi_i32(tmp2, imm);
5783                         switch (op) {
5784                         case 0:  /* VSHR */
5785                         case 1:  /* VSRA */
5786                             GEN_NEON_INTEGER_OP(shl);
5787                             break;
5788                         case 2: /* VRSHR */
5789                         case 3: /* VRSRA */
5790                             GEN_NEON_INTEGER_OP(rshl);
5791                             break;
5792                         case 4: /* VSRI */
5793                         case 5: /* VSHL, VSLI */
5794                             switch (size) {
5795                             case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
5796                             case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
5797                             case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
5798                             default: abort();
5799                             }
5800                             break;
5801                         case 6: /* VQSHLU */
5802                             switch (size) {
5803                             case 0:
5804                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5805                                                          tmp, tmp2);
5806                                 break;
5807                             case 1:
5808                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5809                                                           tmp, tmp2);
5810                                 break;
5811                             case 2:
5812                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5813                                                           tmp, tmp2);
5814                                 break;
5815                             default:
5816                                 abort();
5817                             }
5818                             break;
5819                         case 7: /* VQSHL */
5820                             GEN_NEON_INTEGER_OP_ENV(qshl);
5821                             break;
5822                         }
5823                         tcg_temp_free_i32(tmp2);
5824
5825                         if (op == 1 || op == 3) {
5826                             /* Accumulate.  */
5827                             tmp2 = neon_load_reg(rd, pass);
5828                             gen_neon_add(size, tmp, tmp2);
5829                             tcg_temp_free_i32(tmp2);
5830                         } else if (op == 4 || (op == 5 && u)) {
5831                             /* Insert */
5832                             switch (size) {
5833                             case 0:
5834                                 if (op == 4)
5835                                     mask = 0xff >> -shift;
5836                                 else
5837                                     mask = (uint8_t)(0xff << shift);
5838                                 mask |= mask << 8;
5839                                 mask |= mask << 16;
5840                                 break;
5841                             case 1:
5842                                 if (op == 4)
5843                                     mask = 0xffff >> -shift;
5844                                 else
5845                                     mask = (uint16_t)(0xffff << shift);
5846                                 mask |= mask << 16;
5847                                 break;
5848                             case 2:
5849                                 if (shift < -31 || shift > 31) {
5850                                     mask = 0;
5851                                 } else {
5852                                     if (op == 4)
5853                                         mask = 0xffffffffu >> -shift;
5854                                     else
5855                                         mask = 0xffffffffu << shift;
5856                                 }
5857                                 break;
5858                             default:
5859                                 abort();
5860                             }
5861                             tmp2 = neon_load_reg(rd, pass);
5862                             tcg_gen_andi_i32(tmp, tmp, mask);
5863                             tcg_gen_andi_i32(tmp2, tmp2, ~mask);
5864                             tcg_gen_or_i32(tmp, tmp, tmp2);
5865                             tcg_temp_free_i32(tmp2);
5866                         }
5867                         neon_store_reg(rd, pass, tmp);
5868                     }
5869                 } /* for pass */
5870             } else if (op < 10) {
5871                 /* Shift by immediate and narrow:
5872                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5873                 int input_unsigned = (op == 8) ? !u : u;
5874                 if (rm & 1) {
5875                     return 1;
5876                 }
5877                 shift = shift - (1 << (size + 3));
5878                 size++;
5879                 if (size == 3) {
5880                     tmp64 = tcg_const_i64(shift);
5881                     neon_load_reg64(cpu_V0, rm);
5882                     neon_load_reg64(cpu_V1, rm + 1);
5883                     for (pass = 0; pass < 2; pass++) {
5884                         TCGv_i64 in;
5885                         if (pass == 0) {
5886                             in = cpu_V0;
5887                         } else {
5888                             in = cpu_V1;
5889                         }
5890                         if (q) {
5891                             if (input_unsigned) {
5892                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5893                             } else {
5894                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5895                             }
5896                         } else {
5897                             if (input_unsigned) {
5898                                 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5899                             } else {
5900                                 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5901                             }
5902                         }
5903                         tmp = tcg_temp_new_i32();
5904                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5905                         neon_store_reg(rd, pass, tmp);
5906                     } /* for pass */
5907                     tcg_temp_free_i64(tmp64);
5908                 } else {
5909                     if (size == 1) {
5910                         imm = (uint16_t)shift;
5911                         imm |= imm << 16;
5912                     } else {
5913                         /* size == 2 */
5914                         imm = (uint32_t)shift;
5915                     }
5916                     tmp2 = tcg_const_i32(imm);
5917                     tmp4 = neon_load_reg(rm + 1, 0);
5918                     tmp5 = neon_load_reg(rm + 1, 1);
5919                     for (pass = 0; pass < 2; pass++) {
5920                         if (pass == 0) {
5921                             tmp = neon_load_reg(rm, 0);
5922                         } else {
5923                             tmp = tmp4;
5924                         }
5925                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5926                                               input_unsigned);
5927                         if (pass == 0) {
5928                             tmp3 = neon_load_reg(rm, 1);
5929                         } else {
5930                             tmp3 = tmp5;
5931                         }
5932                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5933                                               input_unsigned);
5934                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5935                         tcg_temp_free_i32(tmp);
5936                         tcg_temp_free_i32(tmp3);
5937                         tmp = tcg_temp_new_i32();
5938                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5939                         neon_store_reg(rd, pass, tmp);
5940                     } /* for pass */
5941                     tcg_temp_free_i32(tmp2);
5942                 }
5943             } else if (op == 10) {
5944                 /* VSHLL, VMOVL */
5945                 if (q || (rd & 1)) {
5946                     return 1;
5947                 }
5948                 tmp = neon_load_reg(rm, 0);
5949                 tmp2 = neon_load_reg(rm, 1);
5950                 for (pass = 0; pass < 2; pass++) {
5951                     if (pass == 1)
5952                         tmp = tmp2;
5953
5954                     gen_neon_widen(cpu_V0, tmp, size, u);
5955
5956                     if (shift != 0) {
5957                         /* The shift is less than the width of the source
5958                            type, so we can just shift the whole register.  */
5959                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5960                         /* Widen the result of shift: we need to clear
5961                          * the potential overflow bits resulting from
5962                          * left bits of the narrow input appearing as
5963                          * right bits of left the neighbour narrow
5964                          * input.  */
5965                         if (size < 2 || !u) {
5966                             uint64_t imm64;
5967                             if (size == 0) {
5968                                 imm = (0xffu >> (8 - shift));
5969                                 imm |= imm << 16;
5970                             } else if (size == 1) {
5971                                 imm = 0xffff >> (16 - shift);
5972                             } else {
5973                                 /* size == 2 */
5974                                 imm = 0xffffffff >> (32 - shift);
5975                             }
5976                             if (size < 2) {
5977                                 imm64 = imm | (((uint64_t)imm) << 32);
5978                             } else {
5979                                 imm64 = imm;
5980                             }
5981                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5982                         }
5983                     }
5984                     neon_store_reg64(cpu_V0, rd + pass);
5985                 }
5986             } else if (op >= 14) {
5987                 /* VCVT fixed-point.  */
5988                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5989                     return 1;
5990                 }
5991                 /* We have already masked out the must-be-1 top bit of imm6,
5992                  * hence this 32-shift where the ARM ARM has 64-imm6.
5993                  */
5994                 shift = 32 - shift;
5995                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5996                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5997                     if (!(op & 1)) {
5998                         if (u)
5999                             gen_vfp_ulto(0, shift, 1);
6000                         else
6001                             gen_vfp_slto(0, shift, 1);
6002                     } else {
6003                         if (u)
6004                             gen_vfp_toul(0, shift, 1);
6005                         else
6006                             gen_vfp_tosl(0, shift, 1);
6007                     }
6008                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
6009                 }
6010             } else {
6011                 return 1;
6012             }
6013         } else { /* (insn & 0x00380080) == 0 */
6014             int invert;
6015             if (q && (rd & 1)) {
6016                 return 1;
6017             }
6018
6019             op = (insn >> 8) & 0xf;
6020             /* One register and immediate.  */
6021             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
6022             invert = (insn & (1 << 5)) != 0;
6023             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
6024              * We choose to not special-case this and will behave as if a
6025              * valid constant encoding of 0 had been given.
6026              */
6027             switch (op) {
6028             case 0: case 1:
6029                 /* no-op */
6030                 break;
6031             case 2: case 3:
6032                 imm <<= 8;
6033                 break;
6034             case 4: case 5:
6035                 imm <<= 16;
6036                 break;
6037             case 6: case 7:
6038                 imm <<= 24;
6039                 break;
6040             case 8: case 9:
6041                 imm |= imm << 16;
6042                 break;
6043             case 10: case 11:
6044                 imm = (imm << 8) | (imm << 24);
6045                 break;
6046             case 12:
6047                 imm = (imm << 8) | 0xff;
6048                 break;
6049             case 13:
6050                 imm = (imm << 16) | 0xffff;
6051                 break;
6052             case 14:
6053                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
6054                 if (invert)
6055                     imm = ~imm;
6056                 break;
6057             case 15:
6058                 if (invert) {
6059                     return 1;
6060                 }
6061                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
6062                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
6063                 break;
6064             }
6065             if (invert)
6066                 imm = ~imm;
6067
6068             for (pass = 0; pass < (q ? 4 : 2); pass++) {
6069                 if (op & 1 && op < 12) {
6070                     tmp = neon_load_reg(rd, pass);
6071                     if (invert) {
6072                         /* The immediate value has already been inverted, so
6073                            BIC becomes AND.  */
6074                         tcg_gen_andi_i32(tmp, tmp, imm);
6075                     } else {
6076                         tcg_gen_ori_i32(tmp, tmp, imm);
6077                     }
6078                 } else {
6079                     /* VMOV, VMVN.  */
6080                     tmp = tcg_temp_new_i32();
6081                     if (op == 14 && invert) {
6082                         int n;
6083                         uint32_t val;
6084                         val = 0;
6085                         for (n = 0; n < 4; n++) {
6086                             if (imm & (1 << (n + (pass & 1) * 4)))
6087                                 val |= 0xff << (n * 8);
6088                         }
6089                         tcg_gen_movi_i32(tmp, val);
6090                     } else {
6091                         tcg_gen_movi_i32(tmp, imm);
6092                     }
6093                 }
6094                 neon_store_reg(rd, pass, tmp);
6095             }
6096         }
6097     } else { /* (insn & 0x00800010 == 0x00800000) */
6098         if (size != 3) {
6099             op = (insn >> 8) & 0xf;
6100             if ((insn & (1 << 6)) == 0) {
6101                 /* Three registers of different lengths.  */
6102                 int src1_wide;
6103                 int src2_wide;
6104                 int prewiden;
6105                 /* undefreq: bit 0 : UNDEF if size == 0
6106                  *           bit 1 : UNDEF if size == 1
6107                  *           bit 2 : UNDEF if size == 2
6108                  *           bit 3 : UNDEF if U == 1
6109                  * Note that [2:0] set implies 'always UNDEF'
6110                  */
6111                 int undefreq;
6112                 /* prewiden, src1_wide, src2_wide, undefreq */
6113                 static const int neon_3reg_wide[16][4] = {
6114                     {1, 0, 0, 0}, /* VADDL */
6115                     {1, 1, 0, 0}, /* VADDW */
6116                     {1, 0, 0, 0}, /* VSUBL */
6117                     {1, 1, 0, 0}, /* VSUBW */
6118                     {0, 1, 1, 0}, /* VADDHN */
6119                     {0, 0, 0, 0}, /* VABAL */
6120                     {0, 1, 1, 0}, /* VSUBHN */
6121                     {0, 0, 0, 0}, /* VABDL */
6122                     {0, 0, 0, 0}, /* VMLAL */
6123                     {0, 0, 0, 9}, /* VQDMLAL */
6124                     {0, 0, 0, 0}, /* VMLSL */
6125                     {0, 0, 0, 9}, /* VQDMLSL */
6126                     {0, 0, 0, 0}, /* Integer VMULL */
6127                     {0, 0, 0, 1}, /* VQDMULL */
6128                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
6129                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
6130                 };
6131
6132                 prewiden = neon_3reg_wide[op][0];
6133                 src1_wide = neon_3reg_wide[op][1];
6134                 src2_wide = neon_3reg_wide[op][2];
6135                 undefreq = neon_3reg_wide[op][3];
6136
6137                 if ((undefreq & (1 << size)) ||
6138                     ((undefreq & 8) && u)) {
6139                     return 1;
6140                 }
6141                 if ((src1_wide && (rn & 1)) ||
6142                     (src2_wide && (rm & 1)) ||
6143                     (!src2_wide && (rd & 1))) {
6144                     return 1;
6145                 }
6146
6147                 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
6148                  * outside the loop below as it only performs a single pass.
6149                  */
6150                 if (op == 14 && size == 2) {
6151                     TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
6152
6153                     if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
6154                         return 1;
6155                     }
6156                     tcg_rn = tcg_temp_new_i64();
6157                     tcg_rm = tcg_temp_new_i64();
6158                     tcg_rd = tcg_temp_new_i64();
6159                     neon_load_reg64(tcg_rn, rn);
6160                     neon_load_reg64(tcg_rm, rm);
6161                     gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
6162                     neon_store_reg64(tcg_rd, rd);
6163                     gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
6164                     neon_store_reg64(tcg_rd, rd + 1);
6165                     tcg_temp_free_i64(tcg_rn);
6166                     tcg_temp_free_i64(tcg_rm);
6167                     tcg_temp_free_i64(tcg_rd);
6168                     return 0;
6169                 }
6170
6171                 /* Avoid overlapping operands.  Wide source operands are
6172                    always aligned so will never overlap with wide
6173                    destinations in problematic ways.  */
6174                 if (rd == rm && !src2_wide) {
6175                     tmp = neon_load_reg(rm, 1);
6176                     neon_store_scratch(2, tmp);
6177                 } else if (rd == rn && !src1_wide) {
6178                     tmp = neon_load_reg(rn, 1);
6179                     neon_store_scratch(2, tmp);
6180                 }
6181                 TCGV_UNUSED_I32(tmp3);
6182                 for (pass = 0; pass < 2; pass++) {
6183                     if (src1_wide) {
6184                         neon_load_reg64(cpu_V0, rn + pass);
6185                         TCGV_UNUSED_I32(tmp);
6186                     } else {
6187                         if (pass == 1 && rd == rn) {
6188                             tmp = neon_load_scratch(2);
6189                         } else {
6190                             tmp = neon_load_reg(rn, pass);
6191                         }
6192                         if (prewiden) {
6193                             gen_neon_widen(cpu_V0, tmp, size, u);
6194                         }
6195                     }
6196                     if (src2_wide) {
6197                         neon_load_reg64(cpu_V1, rm + pass);
6198                         TCGV_UNUSED_I32(tmp2);
6199                     } else {
6200                         if (pass == 1 && rd == rm) {
6201                             tmp2 = neon_load_scratch(2);
6202                         } else {
6203                             tmp2 = neon_load_reg(rm, pass);
6204                         }
6205                         if (prewiden) {
6206                             gen_neon_widen(cpu_V1, tmp2, size, u);
6207                         }
6208                     }
6209                     switch (op) {
6210                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6211                         gen_neon_addl(size);
6212                         break;
6213                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6214                         gen_neon_subl(size);
6215                         break;
6216                     case 5: case 7: /* VABAL, VABDL */
6217                         switch ((size << 1) | u) {
6218                         case 0:
6219                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6220                             break;
6221                         case 1:
6222                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6223                             break;
6224                         case 2:
6225                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6226                             break;
6227                         case 3:
6228                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6229                             break;
6230                         case 4:
6231                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6232                             break;
6233                         case 5:
6234                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6235                             break;
6236                         default: abort();
6237                         }
6238                         tcg_temp_free_i32(tmp2);
6239                         tcg_temp_free_i32(tmp);
6240                         break;
6241                     case 8: case 9: case 10: case 11: case 12: case 13:
6242                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6243                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6244                         break;
6245                     case 14: /* Polynomial VMULL */
6246                         gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
6247                         tcg_temp_free_i32(tmp2);
6248                         tcg_temp_free_i32(tmp);
6249                         break;
6250                     default: /* 15 is RESERVED: caught earlier  */
6251                         abort();
6252                     }
6253                     if (op == 13) {
6254                         /* VQDMULL */
6255                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6256                         neon_store_reg64(cpu_V0, rd + pass);
6257                     } else if (op == 5 || (op >= 8 && op <= 11)) {
6258                         /* Accumulate.  */
6259                         neon_load_reg64(cpu_V1, rd + pass);
6260                         switch (op) {
6261                         case 10: /* VMLSL */
6262                             gen_neon_negl(cpu_V0, size);
6263                             /* Fall through */
6264                         case 5: case 8: /* VABAL, VMLAL */
6265                             gen_neon_addl(size);
6266                             break;
6267                         case 9: case 11: /* VQDMLAL, VQDMLSL */
6268                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6269                             if (op == 11) {
6270                                 gen_neon_negl(cpu_V0, size);
6271                             }
6272                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6273                             break;
6274                         default:
6275                             abort();
6276                         }
6277                         neon_store_reg64(cpu_V0, rd + pass);
6278                     } else if (op == 4 || op == 6) {
6279                         /* Narrowing operation.  */
6280                         tmp = tcg_temp_new_i32();
6281                         if (!u) {
6282                             switch (size) {
6283                             case 0:
6284                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6285                                 break;
6286                             case 1:
6287                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6288                                 break;
6289                             case 2:
6290                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6291                                 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6292                                 break;
6293                             default: abort();
6294                             }
6295                         } else {
6296                             switch (size) {
6297                             case 0:
6298                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6299                                 break;
6300                             case 1:
6301                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6302                                 break;
6303                             case 2:
6304                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6305                                 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6306                                 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6307                                 break;
6308                             default: abort();
6309                             }
6310                         }
6311                         if (pass == 0) {
6312                             tmp3 = tmp;
6313                         } else {
6314                             neon_store_reg(rd, 0, tmp3);
6315                             neon_store_reg(rd, 1, tmp);
6316                         }
6317                     } else {
6318                         /* Write back the result.  */
6319                         neon_store_reg64(cpu_V0, rd + pass);
6320                     }
6321                 }
6322             } else {
6323                 /* Two registers and a scalar. NB that for ops of this form
6324                  * the ARM ARM labels bit 24 as Q, but it is in our variable
6325                  * 'u', not 'q'.
6326                  */
6327                 if (size == 0) {
6328                     return 1;
6329                 }
6330                 switch (op) {
6331                 case 1: /* Float VMLA scalar */
6332                 case 5: /* Floating point VMLS scalar */
6333                 case 9: /* Floating point VMUL scalar */
6334                     if (size == 1) {
6335                         return 1;
6336                     }
6337                     /* fall through */
6338                 case 0: /* Integer VMLA scalar */
6339                 case 4: /* Integer VMLS scalar */
6340                 case 8: /* Integer VMUL scalar */
6341                 case 12: /* VQDMULH scalar */
6342                 case 13: /* VQRDMULH scalar */
6343                     if (u && ((rd | rn) & 1)) {
6344                         return 1;
6345                     }
6346                     tmp = neon_get_scalar(size, rm);
6347                     neon_store_scratch(0, tmp);
6348                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
6349                         tmp = neon_load_scratch(0);
6350                         tmp2 = neon_load_reg(rn, pass);
6351                         if (op == 12) {
6352                             if (size == 1) {
6353                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6354                             } else {
6355                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6356                             }
6357                         } else if (op == 13) {
6358                             if (size == 1) {
6359                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6360                             } else {
6361                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6362                             }
6363                         } else if (op & 1) {
6364                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6365                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6366                             tcg_temp_free_ptr(fpstatus);
6367                         } else {
6368                             switch (size) {
6369                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6370                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6371                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6372                             default: abort();
6373                             }
6374                         }
6375                         tcg_temp_free_i32(tmp2);
6376                         if (op < 8) {
6377                             /* Accumulate.  */
6378                             tmp2 = neon_load_reg(rd, pass);
6379                             switch (op) {
6380                             case 0:
6381                                 gen_neon_add(size, tmp, tmp2);
6382                                 break;
6383                             case 1:
6384                             {
6385                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6386                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6387                                 tcg_temp_free_ptr(fpstatus);
6388                                 break;
6389                             }
6390                             case 4:
6391                                 gen_neon_rsb(size, tmp, tmp2);
6392                                 break;
6393                             case 5:
6394                             {
6395                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6396                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6397                                 tcg_temp_free_ptr(fpstatus);
6398                                 break;
6399                             }
6400                             default:
6401                                 abort();
6402                             }
6403                             tcg_temp_free_i32(tmp2);
6404                         }
6405                         neon_store_reg(rd, pass, tmp);
6406                     }
6407                     break;
6408                 case 3: /* VQDMLAL scalar */
6409                 case 7: /* VQDMLSL scalar */
6410                 case 11: /* VQDMULL scalar */
6411                     if (u == 1) {
6412                         return 1;
6413                     }
6414                     /* fall through */
6415                 case 2: /* VMLAL sclar */
6416                 case 6: /* VMLSL scalar */
6417                 case 10: /* VMULL scalar */
6418                     if (rd & 1) {
6419                         return 1;
6420                     }
6421                     tmp2 = neon_get_scalar(size, rm);
6422                     /* We need a copy of tmp2 because gen_neon_mull
6423                      * deletes it during pass 0.  */
6424                     tmp4 = tcg_temp_new_i32();
6425                     tcg_gen_mov_i32(tmp4, tmp2);
6426                     tmp3 = neon_load_reg(rn, 1);
6427
6428                     for (pass = 0; pass < 2; pass++) {
6429                         if (pass == 0) {
6430                             tmp = neon_load_reg(rn, 0);
6431                         } else {
6432                             tmp = tmp3;
6433                             tmp2 = tmp4;
6434                         }
6435                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6436                         if (op != 11) {
6437                             neon_load_reg64(cpu_V1, rd + pass);
6438                         }
6439                         switch (op) {
6440                         case 6:
6441                             gen_neon_negl(cpu_V0, size);
6442                             /* Fall through */
6443                         case 2:
6444                             gen_neon_addl(size);
6445                             break;
6446                         case 3: case 7:
6447                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6448                             if (op == 7) {
6449                                 gen_neon_negl(cpu_V0, size);
6450                             }
6451                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6452                             break;
6453                         case 10:
6454                             /* no-op */
6455                             break;
6456                         case 11:
6457                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6458                             break;
6459                         default:
6460                             abort();
6461                         }
6462                         neon_store_reg64(cpu_V0, rd + pass);
6463                     }
6464
6465
6466                     break;
6467                 default: /* 14 and 15 are RESERVED */
6468                     return 1;
6469                 }
6470             }
6471         } else { /* size == 3 */
6472             if (!u) {
6473                 /* Extract.  */
6474                 imm = (insn >> 8) & 0xf;
6475
6476                 if (imm > 7 && !q)
6477                     return 1;
6478
6479                 if (q && ((rd | rn | rm) & 1)) {
6480                     return 1;
6481                 }
6482
6483                 if (imm == 0) {
6484                     neon_load_reg64(cpu_V0, rn);
6485                     if (q) {
6486                         neon_load_reg64(cpu_V1, rn + 1);
6487                     }
6488                 } else if (imm == 8) {
6489                     neon_load_reg64(cpu_V0, rn + 1);
6490                     if (q) {
6491                         neon_load_reg64(cpu_V1, rm);
6492                     }
6493                 } else if (q) {
6494                     tmp64 = tcg_temp_new_i64();
6495                     if (imm < 8) {
6496                         neon_load_reg64(cpu_V0, rn);
6497                         neon_load_reg64(tmp64, rn + 1);
6498                     } else {
6499                         neon_load_reg64(cpu_V0, rn + 1);
6500                         neon_load_reg64(tmp64, rm);
6501                     }
6502                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6503                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6504                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6505                     if (imm < 8) {
6506                         neon_load_reg64(cpu_V1, rm);
6507                     } else {
6508                         neon_load_reg64(cpu_V1, rm + 1);
6509                         imm -= 8;
6510                     }
6511                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6512                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6513                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6514                     tcg_temp_free_i64(tmp64);
6515                 } else {
6516                     /* BUGFIX */
6517                     neon_load_reg64(cpu_V0, rn);
6518                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6519                     neon_load_reg64(cpu_V1, rm);
6520                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6521                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6522                 }
6523                 neon_store_reg64(cpu_V0, rd);
6524                 if (q) {
6525                     neon_store_reg64(cpu_V1, rd + 1);
6526                 }
6527             } else if ((insn & (1 << 11)) == 0) {
6528                 /* Two register misc.  */
6529                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6530                 size = (insn >> 18) & 3;
6531                 /* UNDEF for unknown op values and bad op-size combinations */
6532                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6533                     return 1;
6534                 }
6535                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6536                     q && ((rm | rd) & 1)) {
6537                     return 1;
6538                 }
6539                 switch (op) {
6540                 case NEON_2RM_VREV64:
6541                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6542                         tmp = neon_load_reg(rm, pass * 2);
6543                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6544                         switch (size) {
6545                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6546                         case 1: gen_swap_half(tmp); break;
6547                         case 2: /* no-op */ break;
6548                         default: abort();
6549                         }
6550                         neon_store_reg(rd, pass * 2 + 1, tmp);
6551                         if (size == 2) {
6552                             neon_store_reg(rd, pass * 2, tmp2);
6553                         } else {
6554                             switch (size) {
6555                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6556                             case 1: gen_swap_half(tmp2); break;
6557                             default: abort();
6558                             }
6559                             neon_store_reg(rd, pass * 2, tmp2);
6560                         }
6561                     }
6562                     break;
6563                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6564                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6565                     for (pass = 0; pass < q + 1; pass++) {
6566                         tmp = neon_load_reg(rm, pass * 2);
6567                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6568                         tmp = neon_load_reg(rm, pass * 2 + 1);
6569                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6570                         switch (size) {
6571                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6572                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6573                         case 2: tcg_gen_add_i64(CPU_V001); break;
6574                         default: abort();
6575                         }
6576                         if (op >= NEON_2RM_VPADAL) {
6577                             /* Accumulate.  */
6578                             neon_load_reg64(cpu_V1, rd + pass);
6579                             gen_neon_addl(size);
6580                         }
6581                         neon_store_reg64(cpu_V0, rd + pass);
6582                     }
6583                     break;
6584                 case NEON_2RM_VTRN:
6585                     if (size == 2) {
6586                         int n;
6587                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6588                             tmp = neon_load_reg(rm, n);
6589                             tmp2 = neon_load_reg(rd, n + 1);
6590                             neon_store_reg(rm, n, tmp2);
6591                             neon_store_reg(rd, n + 1, tmp);
6592                         }
6593                     } else {
6594                         goto elementwise;
6595                     }
6596                     break;
6597                 case NEON_2RM_VUZP:
6598                     if (gen_neon_unzip(rd, rm, size, q)) {
6599                         return 1;
6600                     }
6601                     break;
6602                 case NEON_2RM_VZIP:
6603                     if (gen_neon_zip(rd, rm, size, q)) {
6604                         return 1;
6605                     }
6606                     break;
6607                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6608                     /* also VQMOVUN; op field and mnemonics don't line up */
6609                     if (rm & 1) {
6610                         return 1;
6611                     }
6612                     TCGV_UNUSED_I32(tmp2);
6613                     for (pass = 0; pass < 2; pass++) {
6614                         neon_load_reg64(cpu_V0, rm + pass);
6615                         tmp = tcg_temp_new_i32();
6616                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6617                                            tmp, cpu_V0);
6618                         if (pass == 0) {
6619                             tmp2 = tmp;
6620                         } else {
6621                             neon_store_reg(rd, 0, tmp2);
6622                             neon_store_reg(rd, 1, tmp);
6623                         }
6624                     }
6625                     break;
6626                 case NEON_2RM_VSHLL:
6627                     if (q || (rd & 1)) {
6628                         return 1;
6629                     }
6630                     tmp = neon_load_reg(rm, 0);
6631                     tmp2 = neon_load_reg(rm, 1);
6632                     for (pass = 0; pass < 2; pass++) {
6633                         if (pass == 1)
6634                             tmp = tmp2;
6635                         gen_neon_widen(cpu_V0, tmp, size, 1);
6636                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6637                         neon_store_reg64(cpu_V0, rd + pass);
6638                     }
6639                     break;
6640                 case NEON_2RM_VCVT_F16_F32:
6641                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6642                         q || (rm & 1)) {
6643                         return 1;
6644                     }
6645                     tmp = tcg_temp_new_i32();
6646                     tmp2 = tcg_temp_new_i32();
6647                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
6648                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6649                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
6650                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6651                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6652                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6653                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
6654                     gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
6655                     tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
6656                     neon_store_reg(rd, 0, tmp2);
6657                     tmp2 = tcg_temp_new_i32();
6658                     gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
6659                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6660                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6661                     neon_store_reg(rd, 1, tmp2);
6662                     tcg_temp_free_i32(tmp);
6663                     break;
6664                 case NEON_2RM_VCVT_F32_F16:
6665                     if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
6666                         q || (rd & 1)) {
6667                         return 1;
6668                     }
6669                     tmp3 = tcg_temp_new_i32();
6670                     tmp = neon_load_reg(rm, 0);
6671                     tmp2 = neon_load_reg(rm, 1);
6672                     tcg_gen_ext16u_i32(tmp3, tmp);
6673                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6674                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
6675                     tcg_gen_shri_i32(tmp3, tmp, 16);
6676                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6677                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
6678                     tcg_temp_free_i32(tmp);
6679                     tcg_gen_ext16u_i32(tmp3, tmp2);
6680                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6681                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
6682                     tcg_gen_shri_i32(tmp3, tmp2, 16);
6683                     gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
6684                     tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
6685                     tcg_temp_free_i32(tmp2);
6686                     tcg_temp_free_i32(tmp3);
6687                     break;
6688                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6689                     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
6690                         || ((rm | rd) & 1)) {
6691                         return 1;
6692                     }
6693                     tmp = tcg_const_i32(rd);
6694                     tmp2 = tcg_const_i32(rm);
6695
6696                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6697                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6698                       */
6699                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6700
6701                     if (op == NEON_2RM_AESE) {
6702                         gen_helper_crypto_aese(cpu_env, tmp, tmp2, tmp3);
6703                     } else {
6704                         gen_helper_crypto_aesmc(cpu_env, tmp, tmp2, tmp3);
6705                     }
6706                     tcg_temp_free_i32(tmp);
6707                     tcg_temp_free_i32(tmp2);
6708                     tcg_temp_free_i32(tmp3);
6709                     break;
6710                 case NEON_2RM_SHA1H:
6711                     if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
6712                         || ((rm | rd) & 1)) {
6713                         return 1;
6714                     }
6715                     tmp = tcg_const_i32(rd);
6716                     tmp2 = tcg_const_i32(rm);
6717
6718                     gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
6719
6720                     tcg_temp_free_i32(tmp);
6721                     tcg_temp_free_i32(tmp2);
6722                     break;
6723                 case NEON_2RM_SHA1SU1:
6724                     if ((rm | rd) & 1) {
6725                             return 1;
6726                     }
6727                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6728                     if (q) {
6729                         if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
6730                             return 1;
6731                         }
6732                     } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
6733                         return 1;
6734                     }
6735                     tmp = tcg_const_i32(rd);
6736                     tmp2 = tcg_const_i32(rm);
6737                     if (q) {
6738                         gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
6739                     } else {
6740                         gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
6741                     }
6742                     tcg_temp_free_i32(tmp);
6743                     tcg_temp_free_i32(tmp2);
6744                     break;
6745                 default:
6746                 elementwise:
6747                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6748                         if (neon_2rm_is_float_op(op)) {
6749                             tcg_gen_ld_f32(cpu_F0s, cpu_env,
6750                                            neon_reg_offset(rm, pass));
6751                             TCGV_UNUSED_I32(tmp);
6752                         } else {
6753                             tmp = neon_load_reg(rm, pass);
6754                         }
6755                         switch (op) {
6756                         case NEON_2RM_VREV32:
6757                             switch (size) {
6758                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6759                             case 1: gen_swap_half(tmp); break;
6760                             default: abort();
6761                             }
6762                             break;
6763                         case NEON_2RM_VREV16:
6764                             gen_rev16(tmp);
6765                             break;
6766                         case NEON_2RM_VCLS:
6767                             switch (size) {
6768                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6769                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6770                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6771                             default: abort();
6772                             }
6773                             break;
6774                         case NEON_2RM_VCLZ:
6775                             switch (size) {
6776                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6777                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6778                             case 2: gen_helper_clz(tmp, tmp); break;
6779                             default: abort();
6780                             }
6781                             break;
6782                         case NEON_2RM_VCNT:
6783                             gen_helper_neon_cnt_u8(tmp, tmp);
6784                             break;
6785                         case NEON_2RM_VMVN:
6786                             tcg_gen_not_i32(tmp, tmp);
6787                             break;
6788                         case NEON_2RM_VQABS:
6789                             switch (size) {
6790                             case 0:
6791                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6792                                 break;
6793                             case 1:
6794                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6795                                 break;
6796                             case 2:
6797                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6798                                 break;
6799                             default: abort();
6800                             }
6801                             break;
6802                         case NEON_2RM_VQNEG:
6803                             switch (size) {
6804                             case 0:
6805                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6806                                 break;
6807                             case 1:
6808                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6809                                 break;
6810                             case 2:
6811                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6812                                 break;
6813                             default: abort();
6814                             }
6815                             break;
6816                         case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6817                             tmp2 = tcg_const_i32(0);
6818                             switch(size) {
6819                             case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6820                             case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6821                             case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6822                             default: abort();
6823                             }
6824                             tcg_temp_free_i32(tmp2);
6825                             if (op == NEON_2RM_VCLE0) {
6826                                 tcg_gen_not_i32(tmp, tmp);
6827                             }
6828                             break;
6829                         case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6830                             tmp2 = tcg_const_i32(0);
6831                             switch(size) {
6832                             case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6833                             case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6834                             case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6835                             default: abort();
6836                             }
6837                             tcg_temp_free_i32(tmp2);
6838                             if (op == NEON_2RM_VCLT0) {
6839                                 tcg_gen_not_i32(tmp, tmp);
6840                             }
6841                             break;
6842                         case NEON_2RM_VCEQ0:
6843                             tmp2 = tcg_const_i32(0);
6844                             switch(size) {
6845                             case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6846                             case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6847                             case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6848                             default: abort();
6849                             }
6850                             tcg_temp_free_i32(tmp2);
6851                             break;
6852                         case NEON_2RM_VABS:
6853                             switch(size) {
6854                             case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
6855                             case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
6856                             case 2: tcg_gen_abs_i32(tmp, tmp); break;
6857                             default: abort();
6858                             }
6859                             break;
6860                         case NEON_2RM_VNEG:
6861                             tmp2 = tcg_const_i32(0);
6862                             gen_neon_rsb(size, tmp, tmp2);
6863                             tcg_temp_free_i32(tmp2);
6864                             break;
6865                         case NEON_2RM_VCGT0_F:
6866                         {
6867                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6868                             tmp2 = tcg_const_i32(0);
6869                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6870                             tcg_temp_free_i32(tmp2);
6871                             tcg_temp_free_ptr(fpstatus);
6872                             break;
6873                         }
6874                         case NEON_2RM_VCGE0_F:
6875                         {
6876                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6877                             tmp2 = tcg_const_i32(0);
6878                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6879                             tcg_temp_free_i32(tmp2);
6880                             tcg_temp_free_ptr(fpstatus);
6881                             break;
6882                         }
6883                         case NEON_2RM_VCEQ0_F:
6884                         {
6885                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6886                             tmp2 = tcg_const_i32(0);
6887                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6888                             tcg_temp_free_i32(tmp2);
6889                             tcg_temp_free_ptr(fpstatus);
6890                             break;
6891                         }
6892                         case NEON_2RM_VCLE0_F:
6893                         {
6894                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6895                             tmp2 = tcg_const_i32(0);
6896                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6897                             tcg_temp_free_i32(tmp2);
6898                             tcg_temp_free_ptr(fpstatus);
6899                             break;
6900                         }
6901                         case NEON_2RM_VCLT0_F:
6902                         {
6903                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6904                             tmp2 = tcg_const_i32(0);
6905                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6906                             tcg_temp_free_i32(tmp2);
6907                             tcg_temp_free_ptr(fpstatus);
6908                             break;
6909                         }
6910                         case NEON_2RM_VABS_F:
6911                             gen_vfp_abs(0);
6912                             break;
6913                         case NEON_2RM_VNEG_F:
6914                             gen_vfp_neg(0);
6915                             break;
6916                         case NEON_2RM_VSWP:
6917                             tmp2 = neon_load_reg(rd, pass);
6918                             neon_store_reg(rm, pass, tmp2);
6919                             break;
6920                         case NEON_2RM_VTRN:
6921                             tmp2 = neon_load_reg(rd, pass);
6922                             switch (size) {
6923                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6924                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6925                             default: abort();
6926                             }
6927                             neon_store_reg(rm, pass, tmp2);
6928                             break;
6929                         case NEON_2RM_VRINTN:
6930                         case NEON_2RM_VRINTA:
6931                         case NEON_2RM_VRINTM:
6932                         case NEON_2RM_VRINTP:
6933                         case NEON_2RM_VRINTZ:
6934                         {
6935                             TCGv_i32 tcg_rmode;
6936                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6937                             int rmode;
6938
6939                             if (op == NEON_2RM_VRINTZ) {
6940                                 rmode = FPROUNDING_ZERO;
6941                             } else {
6942                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6943                             }
6944
6945                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6946                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6947                                                       cpu_env);
6948                             gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
6949                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6950                                                       cpu_env);
6951                             tcg_temp_free_ptr(fpstatus);
6952                             tcg_temp_free_i32(tcg_rmode);
6953                             break;
6954                         }
6955                         case NEON_2RM_VRINTX:
6956                         {
6957                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6958                             gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
6959                             tcg_temp_free_ptr(fpstatus);
6960                             break;
6961                         }
6962                         case NEON_2RM_VCVTAU:
6963                         case NEON_2RM_VCVTAS:
6964                         case NEON_2RM_VCVTNU:
6965                         case NEON_2RM_VCVTNS:
6966                         case NEON_2RM_VCVTPU:
6967                         case NEON_2RM_VCVTPS:
6968                         case NEON_2RM_VCVTMU:
6969                         case NEON_2RM_VCVTMS:
6970                         {
6971                             bool is_signed = !extract32(insn, 7, 1);
6972                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6973                             TCGv_i32 tcg_rmode, tcg_shift;
6974                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6975
6976                             tcg_shift = tcg_const_i32(0);
6977                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6978                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6979                                                       cpu_env);
6980
6981                             if (is_signed) {
6982                                 gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
6983                                                      tcg_shift, fpst);
6984                             } else {
6985                                 gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
6986                                                      tcg_shift, fpst);
6987                             }
6988
6989                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6990                                                       cpu_env);
6991                             tcg_temp_free_i32(tcg_rmode);
6992                             tcg_temp_free_i32(tcg_shift);
6993                             tcg_temp_free_ptr(fpst);
6994                             break;
6995                         }
6996                         case NEON_2RM_VRECPE:
6997                         {
6998                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6999                             gen_helper_recpe_u32(tmp, tmp, fpstatus);
7000                             tcg_temp_free_ptr(fpstatus);
7001                             break;
7002                         }
7003                         case NEON_2RM_VRSQRTE:
7004                         {
7005                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7006                             gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
7007                             tcg_temp_free_ptr(fpstatus);
7008                             break;
7009                         }
7010                         case NEON_2RM_VRECPE_F:
7011                         {
7012                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7013                             gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
7014                             tcg_temp_free_ptr(fpstatus);
7015                             break;
7016                         }
7017                         case NEON_2RM_VRSQRTE_F:
7018                         {
7019                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7020                             gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
7021                             tcg_temp_free_ptr(fpstatus);
7022                             break;
7023                         }
7024                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
7025                             gen_vfp_sito(0, 1);
7026                             break;
7027                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
7028                             gen_vfp_uito(0, 1);
7029                             break;
7030                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
7031                             gen_vfp_tosiz(0, 1);
7032                             break;
7033                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
7034                             gen_vfp_touiz(0, 1);
7035                             break;
7036                         default:
7037                             /* Reserved op values were caught by the
7038                              * neon_2rm_sizes[] check earlier.
7039                              */
7040                             abort();
7041                         }
7042                         if (neon_2rm_is_float_op(op)) {
7043                             tcg_gen_st_f32(cpu_F0s, cpu_env,
7044                                            neon_reg_offset(rd, pass));
7045                         } else {
7046                             neon_store_reg(rd, pass, tmp);
7047                         }
7048                     }
7049                     break;
7050                 }
7051             } else if ((insn & (1 << 10)) == 0) {
7052                 /* VTBL, VTBX.  */
7053                 int n = ((insn >> 8) & 3) + 1;
7054                 if ((rn + n) > 32) {
7055                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
7056                      * helper function running off the end of the register file.
7057                      */
7058                     return 1;
7059                 }
7060                 n <<= 3;
7061                 if (insn & (1 << 6)) {
7062                     tmp = neon_load_reg(rd, 0);
7063                 } else {
7064                     tmp = tcg_temp_new_i32();
7065                     tcg_gen_movi_i32(tmp, 0);
7066                 }
7067                 tmp2 = neon_load_reg(rm, 0);
7068                 tmp4 = tcg_const_i32(rn);
7069                 tmp5 = tcg_const_i32(n);
7070                 gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
7071                 tcg_temp_free_i32(tmp);
7072                 if (insn & (1 << 6)) {
7073                     tmp = neon_load_reg(rd, 1);
7074                 } else {
7075                     tmp = tcg_temp_new_i32();
7076                     tcg_gen_movi_i32(tmp, 0);
7077                 }
7078                 tmp3 = neon_load_reg(rm, 1);
7079                 gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
7080                 tcg_temp_free_i32(tmp5);
7081                 tcg_temp_free_i32(tmp4);
7082                 neon_store_reg(rd, 0, tmp2);
7083                 neon_store_reg(rd, 1, tmp3);
7084                 tcg_temp_free_i32(tmp);
7085             } else if ((insn & 0x380) == 0) {
7086                 /* VDUP */
7087                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7088                     return 1;
7089                 }
7090                 if (insn & (1 << 19)) {
7091                     tmp = neon_load_reg(rm, 1);
7092                 } else {
7093                     tmp = neon_load_reg(rm, 0);
7094                 }
7095                 if (insn & (1 << 16)) {
7096                     gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
7097                 } else if (insn & (1 << 17)) {
7098                     if ((insn >> 18) & 1)
7099                         gen_neon_dup_high16(tmp);
7100                     else
7101                         gen_neon_dup_low16(tmp);
7102                 }
7103                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7104                     tmp2 = tcg_temp_new_i32();
7105                     tcg_gen_mov_i32(tmp2, tmp);
7106                     neon_store_reg(rd, pass, tmp2);
7107                 }
7108                 tcg_temp_free_i32(tmp);
7109             } else {
7110                 return 1;
7111             }
7112         }
7113     }
7114     return 0;
7115 }
7116
7117 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7118 {
7119     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7120     const ARMCPRegInfo *ri;
7121
7122     cpnum = (insn >> 8) & 0xf;
7123
7124     /* First check for coprocessor space used for XScale/iwMMXt insns */
7125     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7126         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7127             return 1;
7128         }
7129         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7130             return disas_iwmmxt_insn(s, insn);
7131         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7132             return disas_dsp_insn(s, insn);
7133         }
7134         return 1;
7135     }
7136
7137     /* Otherwise treat as a generic register access */
7138     is64 = (insn & (1 << 25)) == 0;
7139     if (!is64 && ((insn & (1 << 4)) == 0)) {
7140         /* cdp */
7141         return 1;
7142     }
7143
7144     crm = insn & 0xf;
7145     if (is64) {
7146         crn = 0;
7147         opc1 = (insn >> 4) & 0xf;
7148         opc2 = 0;
7149         rt2 = (insn >> 16) & 0xf;
7150     } else {
7151         crn = (insn >> 16) & 0xf;
7152         opc1 = (insn >> 21) & 7;
7153         opc2 = (insn >> 5) & 7;
7154         rt2 = 0;
7155     }
7156     isread = (insn >> 20) & 1;
7157     rt = (insn >> 12) & 0xf;
7158
7159     ri = get_arm_cp_reginfo(s->cp_regs,
7160             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7161     if (ri) {
7162         /* Check access permissions */
7163         if (!cp_access_ok(s->current_el, ri, isread)) {
7164             return 1;
7165         }
7166
7167         if (ri->accessfn ||
7168             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7169             /* Emit code to perform further access permissions checks at
7170              * runtime; this may result in an exception.
7171              * Note that on XScale all cp0..c13 registers do an access check
7172              * call in order to handle c15_cpar.
7173              */
7174             TCGv_ptr tmpptr;
7175             TCGv_i32 tcg_syn;
7176             uint32_t syndrome;
7177
7178             /* Note that since we are an implementation which takes an
7179              * exception on a trapped conditional instruction only if the
7180              * instruction passes its condition code check, we can take
7181              * advantage of the clause in the ARM ARM that allows us to set
7182              * the COND field in the instruction to 0xE in all cases.
7183              * We could fish the actual condition out of the insn (ARM)
7184              * or the condexec bits (Thumb) but it isn't necessary.
7185              */
7186             switch (cpnum) {
7187             case 14:
7188                 if (is64) {
7189                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7190                                                  isread, s->thumb);
7191                 } else {
7192                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7193                                                 rt, isread, s->thumb);
7194                 }
7195                 break;
7196             case 15:
7197                 if (is64) {
7198                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7199                                                  isread, s->thumb);
7200                 } else {
7201                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7202                                                 rt, isread, s->thumb);
7203                 }
7204                 break;
7205             default:
7206                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7207                  * so this can only happen if this is an ARMv7 or earlier CPU,
7208                  * in which case the syndrome information won't actually be
7209                  * guest visible.
7210                  */
7211                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7212                 syndrome = syn_uncategorized();
7213                 break;
7214             }
7215
7216             gen_set_condexec(s);
7217             gen_set_pc_im(s, s->pc - 4);
7218             tmpptr = tcg_const_ptr(ri);
7219             tcg_syn = tcg_const_i32(syndrome);
7220             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
7221             tcg_temp_free_ptr(tmpptr);
7222             tcg_temp_free_i32(tcg_syn);
7223         }
7224
7225         /* Handle special cases first */
7226         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7227         case ARM_CP_NOP:
7228             return 0;
7229         case ARM_CP_WFI:
7230             if (isread) {
7231                 return 1;
7232             }
7233             gen_set_pc_im(s, s->pc);
7234             s->is_jmp = DISAS_WFI;
7235             return 0;
7236         default:
7237             break;
7238         }
7239
7240         if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7241             gen_io_start();
7242         }
7243
7244         if (isread) {
7245             /* Read */
7246             if (is64) {
7247                 TCGv_i64 tmp64;
7248                 TCGv_i32 tmp;
7249                 if (ri->type & ARM_CP_CONST) {
7250                     tmp64 = tcg_const_i64(ri->resetvalue);
7251                 } else if (ri->readfn) {
7252                     TCGv_ptr tmpptr;
7253                     tmp64 = tcg_temp_new_i64();
7254                     tmpptr = tcg_const_ptr(ri);
7255                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7256                     tcg_temp_free_ptr(tmpptr);
7257                 } else {
7258                     tmp64 = tcg_temp_new_i64();
7259                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7260                 }
7261                 tmp = tcg_temp_new_i32();
7262                 tcg_gen_extrl_i64_i32(tmp, tmp64);
7263                 store_reg(s, rt, tmp);
7264                 tcg_gen_shri_i64(tmp64, tmp64, 32);
7265                 tmp = tcg_temp_new_i32();
7266                 tcg_gen_extrl_i64_i32(tmp, tmp64);
7267                 tcg_temp_free_i64(tmp64);
7268                 store_reg(s, rt2, tmp);
7269             } else {
7270                 TCGv_i32 tmp;
7271                 if (ri->type & ARM_CP_CONST) {
7272                     tmp = tcg_const_i32(ri->resetvalue);
7273                 } else if (ri->readfn) {
7274                     TCGv_ptr tmpptr;
7275                     tmp = tcg_temp_new_i32();
7276                     tmpptr = tcg_const_ptr(ri);
7277                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7278                     tcg_temp_free_ptr(tmpptr);
7279                 } else {
7280                     tmp = load_cpu_offset(ri->fieldoffset);
7281                 }
7282                 if (rt == 15) {
7283                     /* Destination register of r15 for 32 bit loads sets
7284                      * the condition codes from the high 4 bits of the value
7285                      */
7286                     gen_set_nzcv(tmp);
7287                     tcg_temp_free_i32(tmp);
7288                 } else {
7289                     store_reg(s, rt, tmp);
7290                 }
7291             }
7292         } else {
7293             /* Write */
7294             if (ri->type & ARM_CP_CONST) {
7295                 /* If not forbidden by access permissions, treat as WI */
7296                 return 0;
7297             }
7298
7299             if (is64) {
7300                 TCGv_i32 tmplo, tmphi;
7301                 TCGv_i64 tmp64 = tcg_temp_new_i64();
7302                 tmplo = load_reg(s, rt);
7303                 tmphi = load_reg(s, rt2);
7304                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7305                 tcg_temp_free_i32(tmplo);
7306                 tcg_temp_free_i32(tmphi);
7307                 if (ri->writefn) {
7308                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
7309                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7310                     tcg_temp_free_ptr(tmpptr);
7311                 } else {
7312                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7313                 }
7314                 tcg_temp_free_i64(tmp64);
7315             } else {
7316                 if (ri->writefn) {
7317                     TCGv_i32 tmp;
7318                     TCGv_ptr tmpptr;
7319                     tmp = load_reg(s, rt);
7320                     tmpptr = tcg_const_ptr(ri);
7321                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7322                     tcg_temp_free_ptr(tmpptr);
7323                     tcg_temp_free_i32(tmp);
7324                 } else {
7325                     TCGv_i32 tmp = load_reg(s, rt);
7326                     store_cpu_offset(tmp, ri->fieldoffset);
7327                 }
7328             }
7329         }
7330
7331         if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7332             /* I/O operations must end the TB here (whether read or write) */
7333             gen_io_end();
7334             gen_lookup_tb(s);
7335         } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7336             /* We default to ending the TB on a coprocessor register write,
7337              * but allow this to be suppressed by the register definition
7338              * (usually only necessary to work around guest bugs).
7339              */
7340             gen_lookup_tb(s);
7341         }
7342
7343         return 0;
7344     }
7345
7346     /* Unknown register; this might be a guest error or a QEMU
7347      * unimplemented feature.
7348      */
7349     if (is64) {
7350         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7351                       "64 bit system register cp:%d opc1: %d crm:%d "
7352                       "(%s)\n",
7353                       isread ? "read" : "write", cpnum, opc1, crm,
7354                       s->ns ? "non-secure" : "secure");
7355     } else {
7356         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7357                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7358                       "(%s)\n",
7359                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7360                       s->ns ? "non-secure" : "secure");
7361     }
7362
7363     return 1;
7364 }
7365
7366
7367 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7368 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7369 {
7370     TCGv_i32 tmp;
7371     tmp = tcg_temp_new_i32();
7372     tcg_gen_extrl_i64_i32(tmp, val);
7373     store_reg(s, rlow, tmp);
7374     tmp = tcg_temp_new_i32();
7375     tcg_gen_shri_i64(val, val, 32);
7376     tcg_gen_extrl_i64_i32(tmp, val);
7377     store_reg(s, rhigh, tmp);
7378 }
7379
7380 /* load a 32-bit value from a register and perform a 64-bit accumulate.  */
7381 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
7382 {
7383     TCGv_i64 tmp;
7384     TCGv_i32 tmp2;
7385
7386     /* Load value and extend to 64 bits.  */
7387     tmp = tcg_temp_new_i64();
7388     tmp2 = load_reg(s, rlow);
7389     tcg_gen_extu_i32_i64(tmp, tmp2);
7390     tcg_temp_free_i32(tmp2);
7391     tcg_gen_add_i64(val, val, tmp);
7392     tcg_temp_free_i64(tmp);
7393 }
7394
7395 /* load and add a 64-bit value from a register pair.  */
7396 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7397 {
7398     TCGv_i64 tmp;
7399     TCGv_i32 tmpl;
7400     TCGv_i32 tmph;
7401
7402     /* Load 64-bit value rd:rn.  */
7403     tmpl = load_reg(s, rlow);
7404     tmph = load_reg(s, rhigh);
7405     tmp = tcg_temp_new_i64();
7406     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7407     tcg_temp_free_i32(tmpl);
7408     tcg_temp_free_i32(tmph);
7409     tcg_gen_add_i64(val, val, tmp);
7410     tcg_temp_free_i64(tmp);
7411 }
7412
7413 /* Set N and Z flags from hi|lo.  */
7414 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7415 {
7416     tcg_gen_mov_i32(cpu_NF, hi);
7417     tcg_gen_or_i32(cpu_ZF, lo, hi);
7418 }
7419
7420 /* Load/Store exclusive instructions are implemented by remembering
7421    the value/address loaded, and seeing if these are the same
7422    when the store is performed. This should be sufficient to implement
7423    the architecturally mandated semantics, and avoids having to monitor
7424    regular stores.
7425
7426    In system emulation mode only one CPU will be running at once, so
7427    this sequence is effectively atomic.  In user emulation mode we
7428    throw an exception and handle the atomic operation elsewhere.  */
7429 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7430                                TCGv_i32 addr, int size)
7431 {
7432     TCGv_i32 tmp = tcg_temp_new_i32();
7433
7434     s->is_ldex = true;
7435
7436     switch (size) {
7437     case 0:
7438         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7439         break;
7440     case 1:
7441         gen_aa32_ld16ua(tmp, addr, get_mem_index(s));
7442         break;
7443     case 2:
7444     case 3:
7445         gen_aa32_ld32ua(tmp, addr, get_mem_index(s));
7446         break;
7447     default:
7448         abort();
7449     }
7450
7451     if (size == 3) {
7452         TCGv_i32 tmp2 = tcg_temp_new_i32();
7453         TCGv_i32 tmp3 = tcg_temp_new_i32();
7454
7455         tcg_gen_addi_i32(tmp2, addr, 4);
7456         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7457         tcg_temp_free_i32(tmp2);
7458         tcg_gen_concat_i32_i64(cpu_exclusive_val, tmp, tmp3);
7459         store_reg(s, rt2, tmp3);
7460     } else {
7461         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7462     }
7463
7464     store_reg(s, rt, tmp);
7465     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7466 }
7467
7468 static void gen_clrex(DisasContext *s)
7469 {
7470     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7471 }
7472
7473 #ifdef CONFIG_USER_ONLY
7474 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7475                                 TCGv_i32 addr, int size)
7476 {
7477     tcg_gen_extu_i32_i64(cpu_exclusive_test, addr);
7478     tcg_gen_movi_i32(cpu_exclusive_info,
7479                      size | (rd << 4) | (rt << 8) | (rt2 << 12));
7480     gen_exception_internal_insn(s, 4, EXCP_STREX);
7481 }
7482 #else
7483 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7484                                 TCGv_i32 addr, int size)
7485 {
7486     TCGv_i32 tmp;
7487     TCGv_i64 val64, extaddr;
7488     TCGLabel *done_label;
7489     TCGLabel *fail_label;
7490
7491     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7492          [addr] = {Rt};
7493          {Rd} = 0;
7494        } else {
7495          {Rd} = 1;
7496        } */
7497     fail_label = gen_new_label();
7498     done_label = gen_new_label();
7499     extaddr = tcg_temp_new_i64();
7500     tcg_gen_extu_i32_i64(extaddr, addr);
7501     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7502     tcg_temp_free_i64(extaddr);
7503
7504     tmp = tcg_temp_new_i32();
7505     switch (size) {
7506     case 0:
7507         gen_aa32_ld8u(tmp, addr, get_mem_index(s));
7508         break;
7509     case 1:
7510         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
7511         break;
7512     case 2:
7513     case 3:
7514         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7515         break;
7516     default:
7517         abort();
7518     }
7519
7520     val64 = tcg_temp_new_i64();
7521     if (size == 3) {
7522         TCGv_i32 tmp2 = tcg_temp_new_i32();
7523         TCGv_i32 tmp3 = tcg_temp_new_i32();
7524         tcg_gen_addi_i32(tmp2, addr, 4);
7525         gen_aa32_ld32u(tmp3, tmp2, get_mem_index(s));
7526         tcg_temp_free_i32(tmp2);
7527         tcg_gen_concat_i32_i64(val64, tmp, tmp3);
7528         tcg_temp_free_i32(tmp3);
7529     } else {
7530         tcg_gen_extu_i32_i64(val64, tmp);
7531     }
7532     tcg_temp_free_i32(tmp);
7533
7534     tcg_gen_brcond_i64(TCG_COND_NE, val64, cpu_exclusive_val, fail_label);
7535     tcg_temp_free_i64(val64);
7536
7537     tmp = load_reg(s, rt);
7538     switch (size) {
7539     case 0:
7540         gen_aa32_st8(tmp, addr, get_mem_index(s));
7541         break;
7542     case 1:
7543         gen_aa32_st16(tmp, addr, get_mem_index(s));
7544         break;
7545     case 2:
7546     case 3:
7547         gen_aa32_st32(tmp, addr, get_mem_index(s));
7548         break;
7549     default:
7550         abort();
7551     }
7552     tcg_temp_free_i32(tmp);
7553     if (size == 3) {
7554         tcg_gen_addi_i32(addr, addr, 4);
7555         tmp = load_reg(s, rt2);
7556         gen_aa32_st32(tmp, addr, get_mem_index(s));
7557         tcg_temp_free_i32(tmp);
7558     }
7559     tcg_gen_movi_i32(cpu_R[rd], 0);
7560     tcg_gen_br(done_label);
7561     gen_set_label(fail_label);
7562     tcg_gen_movi_i32(cpu_R[rd], 1);
7563     gen_set_label(done_label);
7564     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7565 }
7566 #endif
7567
7568 /* gen_srs:
7569  * @env: CPUARMState
7570  * @s: DisasContext
7571  * @mode: mode field from insn (which stack to store to)
7572  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7573  * @writeback: true if writeback bit set
7574  *
7575  * Generate code for the SRS (Store Return State) insn.
7576  */
7577 static void gen_srs(DisasContext *s,
7578                     uint32_t mode, uint32_t amode, bool writeback)
7579 {
7580     int32_t offset;
7581     TCGv_i32 addr = tcg_temp_new_i32();
7582     TCGv_i32 tmp = tcg_const_i32(mode);
7583     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7584     tcg_temp_free_i32(tmp);
7585     switch (amode) {
7586     case 0: /* DA */
7587         offset = -4;
7588         break;
7589     case 1: /* IA */
7590         offset = 0;
7591         break;
7592     case 2: /* DB */
7593         offset = -8;
7594         break;
7595     case 3: /* IB */
7596         offset = 4;
7597         break;
7598     default:
7599         abort();
7600     }
7601     tcg_gen_addi_i32(addr, addr, offset);
7602     tmp = load_reg(s, 14);
7603     gen_aa32_st32(tmp, addr, get_mem_index(s));
7604     tcg_temp_free_i32(tmp);
7605     tmp = load_cpu_field(spsr);
7606     tcg_gen_addi_i32(addr, addr, 4);
7607     gen_aa32_st32(tmp, addr, get_mem_index(s));
7608     tcg_temp_free_i32(tmp);
7609     if (writeback) {
7610         switch (amode) {
7611         case 0:
7612             offset = -8;
7613             break;
7614         case 1:
7615             offset = 4;
7616             break;
7617         case 2:
7618             offset = -4;
7619             break;
7620         case 3:
7621             offset = 0;
7622             break;
7623         default:
7624             abort();
7625         }
7626         tcg_gen_addi_i32(addr, addr, offset);
7627         tmp = tcg_const_i32(mode);
7628         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7629         tcg_temp_free_i32(tmp);
7630     }
7631     tcg_temp_free_i32(addr);
7632 }
7633
7634 static void disas_arm_insn(DisasContext *s, unsigned int insn)
7635 {
7636     unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
7637     TCGv_i32 tmp;
7638     TCGv_i32 tmp2;
7639     TCGv_i32 tmp3;
7640     TCGv_i32 addr;
7641     TCGv_i64 tmp64;
7642
7643     /* M variants do not implement ARM mode.  */
7644     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7645         goto illegal_op;
7646     }
7647     cond = insn >> 28;
7648     if (cond == 0xf){
7649         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
7650          * choose to UNDEF. In ARMv5 and above the space is used
7651          * for miscellaneous unconditional instructions.
7652          */
7653         ARCH(5);
7654
7655         /* Unconditional instructions.  */
7656         if (((insn >> 25) & 7) == 1) {
7657             /* NEON Data processing.  */
7658             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7659                 goto illegal_op;
7660             }
7661
7662             if (disas_neon_data_insn(s, insn)) {
7663                 goto illegal_op;
7664             }
7665             return;
7666         }
7667         if ((insn & 0x0f100000) == 0x04000000) {
7668             /* NEON load/store.  */
7669             if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7670                 goto illegal_op;
7671             }
7672
7673             if (disas_neon_ls_insn(s, insn)) {
7674                 goto illegal_op;
7675             }
7676             return;
7677         }
7678         if ((insn & 0x0f000e10) == 0x0e000a00) {
7679             /* VFP.  */
7680             if (disas_vfp_insn(s, insn)) {
7681                 goto illegal_op;
7682             }
7683             return;
7684         }
7685         if (((insn & 0x0f30f000) == 0x0510f000) ||
7686             ((insn & 0x0f30f010) == 0x0710f000)) {
7687             if ((insn & (1 << 22)) == 0) {
7688                 /* PLDW; v7MP */
7689                 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7690                     goto illegal_op;
7691                 }
7692             }
7693             /* Otherwise PLD; v5TE+ */
7694             ARCH(5TE);
7695             return;
7696         }
7697         if (((insn & 0x0f70f000) == 0x0450f000) ||
7698             ((insn & 0x0f70f010) == 0x0650f000)) {
7699             ARCH(7);
7700             return; /* PLI; V7 */
7701         }
7702         if (((insn & 0x0f700000) == 0x04100000) ||
7703             ((insn & 0x0f700010) == 0x06100000)) {
7704             if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7705                 goto illegal_op;
7706             }
7707             return; /* v7MP: Unallocated memory hint: must NOP */
7708         }
7709
7710         if ((insn & 0x0ffffdff) == 0x01010000) {
7711             ARCH(6);
7712             /* setend */
7713             if (((insn >> 9) & 1) != s->bswap_code) {
7714                 /* Dynamic endianness switching not implemented. */
7715                 qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
7716                 goto illegal_op;
7717             }
7718             return;
7719         } else if ((insn & 0x0fffff00) == 0x057ff000) {
7720             switch ((insn >> 4) & 0xf) {
7721             case 1: /* clrex */
7722                 ARCH(6K);
7723                 gen_clrex(s);
7724                 return;
7725             case 4: /* dsb */
7726             case 5: /* dmb */
7727                 ARCH(7);
7728                 /* We don't emulate caches so these are a no-op.  */
7729                 return;
7730             case 6: /* isb */
7731                 /* We need to break the TB after this insn to execute
7732                  * self-modifying code correctly and also to take
7733                  * any pending interrupts immediately.
7734                  */
7735                 gen_lookup_tb(s);
7736                 return;
7737             default:
7738                 goto illegal_op;
7739             }
7740         } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
7741             /* srs */
7742             if (IS_USER(s)) {
7743                 goto illegal_op;
7744             }
7745             ARCH(6);
7746             gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
7747             return;
7748         } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
7749             /* rfe */
7750             int32_t offset;
7751             if (IS_USER(s))
7752                 goto illegal_op;
7753             ARCH(6);
7754             rn = (insn >> 16) & 0xf;
7755             addr = load_reg(s, rn);
7756             i = (insn >> 23) & 3;
7757             switch (i) {
7758             case 0: offset = -4; break; /* DA */
7759             case 1: offset = 0; break; /* IA */
7760             case 2: offset = -8; break; /* DB */
7761             case 3: offset = 4; break; /* IB */
7762             default: abort();
7763             }
7764             if (offset)
7765                 tcg_gen_addi_i32(addr, addr, offset);
7766             /* Load PC into tmp and CPSR into tmp2.  */
7767             tmp = tcg_temp_new_i32();
7768             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
7769             tcg_gen_addi_i32(addr, addr, 4);
7770             tmp2 = tcg_temp_new_i32();
7771             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
7772             if (insn & (1 << 21)) {
7773                 /* Base writeback.  */
7774                 switch (i) {
7775                 case 0: offset = -8; break;
7776                 case 1: offset = 4; break;
7777                 case 2: offset = -4; break;
7778                 case 3: offset = 0; break;
7779                 default: abort();
7780                 }
7781                 if (offset)
7782                     tcg_gen_addi_i32(addr, addr, offset);
7783                 store_reg(s, rn, addr);
7784             } else {
7785                 tcg_temp_free_i32(addr);
7786             }
7787             gen_rfe(s, tmp, tmp2);
7788             return;
7789         } else if ((insn & 0x0e000000) == 0x0a000000) {
7790             /* branch link and change to thumb (blx <offset>) */
7791             int32_t offset;
7792
7793             val = (uint32_t)s->pc;
7794             tmp = tcg_temp_new_i32();
7795             tcg_gen_movi_i32(tmp, val);
7796             store_reg(s, 14, tmp);
7797             /* Sign-extend the 24-bit offset */
7798             offset = (((int32_t)insn) << 8) >> 8;
7799             /* offset * 4 + bit24 * 2 + (thumb bit) */
7800             val += (offset << 2) | ((insn >> 23) & 2) | 1;
7801             /* pipeline offset */
7802             val += 4;
7803             /* protected by ARCH(5); above, near the start of uncond block */
7804             gen_bx_im(s, val);
7805             return;
7806         } else if ((insn & 0x0e000f00) == 0x0c000100) {
7807             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7808                 /* iWMMXt register transfer.  */
7809                 if (extract32(s->c15_cpar, 1, 1)) {
7810                     if (!disas_iwmmxt_insn(s, insn)) {
7811                         return;
7812                     }
7813                 }
7814             }
7815         } else if ((insn & 0x0fe00000) == 0x0c400000) {
7816             /* Coprocessor double register transfer.  */
7817             ARCH(5TE);
7818         } else if ((insn & 0x0f000010) == 0x0e000010) {
7819             /* Additional coprocessor register transfer.  */
7820         } else if ((insn & 0x0ff10020) == 0x01000000) {
7821             uint32_t mask;
7822             uint32_t val;
7823             /* cps (privileged) */
7824             if (IS_USER(s))
7825                 return;
7826             mask = val = 0;
7827             if (insn & (1 << 19)) {
7828                 if (insn & (1 << 8))
7829                     mask |= CPSR_A;
7830                 if (insn & (1 << 7))
7831                     mask |= CPSR_I;
7832                 if (insn & (1 << 6))
7833                     mask |= CPSR_F;
7834                 if (insn & (1 << 18))
7835                     val |= mask;
7836             }
7837             if (insn & (1 << 17)) {
7838                 mask |= CPSR_M;
7839                 val |= (insn & 0x1f);
7840             }
7841             if (mask) {
7842                 gen_set_psr_im(s, mask, 0, val);
7843             }
7844             return;
7845         }
7846         goto illegal_op;
7847     }
7848     if (cond != 0xe) {
7849         /* if not always execute, we generate a conditional jump to
7850            next instruction */
7851         s->condlabel = gen_new_label();
7852         arm_gen_test_cc(cond ^ 1, s->condlabel);
7853         s->condjmp = 1;
7854     }
7855     if ((insn & 0x0f900000) == 0x03000000) {
7856         if ((insn & (1 << 21)) == 0) {
7857             ARCH(6T2);
7858             rd = (insn >> 12) & 0xf;
7859             val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
7860             if ((insn & (1 << 22)) == 0) {
7861                 /* MOVW */
7862                 tmp = tcg_temp_new_i32();
7863                 tcg_gen_movi_i32(tmp, val);
7864             } else {
7865                 /* MOVT */
7866                 tmp = load_reg(s, rd);
7867                 tcg_gen_ext16u_i32(tmp, tmp);
7868                 tcg_gen_ori_i32(tmp, tmp, val << 16);
7869             }
7870             store_reg(s, rd, tmp);
7871         } else {
7872             if (((insn >> 12) & 0xf) != 0xf)
7873                 goto illegal_op;
7874             if (((insn >> 16) & 0xf) == 0) {
7875                 gen_nop_hint(s, insn & 0xff);
7876             } else {
7877                 /* CPSR = immediate */
7878                 val = insn & 0xff;
7879                 shift = ((insn >> 8) & 0xf) * 2;
7880                 if (shift)
7881                     val = (val >> shift) | (val << (32 - shift));
7882                 i = ((insn & (1 << 22)) != 0);
7883                 if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
7884                                    i, val)) {
7885                     goto illegal_op;
7886                 }
7887             }
7888         }
7889     } else if ((insn & 0x0f900000) == 0x01000000
7890                && (insn & 0x00000090) != 0x00000090) {
7891         /* miscellaneous instructions */
7892         op1 = (insn >> 21) & 3;
7893         sh = (insn >> 4) & 0xf;
7894         rm = insn & 0xf;
7895         switch (sh) {
7896         case 0x0: /* move program status register */
7897             if (op1 & 1) {
7898                 /* PSR = reg */
7899                 tmp = load_reg(s, rm);
7900                 i = ((op1 & 2) != 0);
7901                 if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
7902                     goto illegal_op;
7903             } else {
7904                 /* reg = PSR */
7905                 rd = (insn >> 12) & 0xf;
7906                 if (op1 & 2) {
7907                     if (IS_USER(s))
7908                         goto illegal_op;
7909                     tmp = load_cpu_field(spsr);
7910                 } else {
7911                     tmp = tcg_temp_new_i32();
7912                     gen_helper_cpsr_read(tmp, cpu_env);
7913                 }
7914                 store_reg(s, rd, tmp);
7915             }
7916             break;
7917         case 0x1:
7918             if (op1 == 1) {
7919                 /* branch/exchange thumb (bx).  */
7920                 ARCH(4T);
7921                 tmp = load_reg(s, rm);
7922                 gen_bx(s, tmp);
7923             } else if (op1 == 3) {
7924                 /* clz */
7925                 ARCH(5);
7926                 rd = (insn >> 12) & 0xf;
7927                 tmp = load_reg(s, rm);
7928                 gen_helper_clz(tmp, tmp);
7929                 store_reg(s, rd, tmp);
7930             } else {
7931                 goto illegal_op;
7932             }
7933             break;
7934         case 0x2:
7935             if (op1 == 1) {
7936                 ARCH(5J); /* bxj */
7937                 /* Trivial implementation equivalent to bx.  */
7938                 tmp = load_reg(s, rm);
7939                 gen_bx(s, tmp);
7940             } else {
7941                 goto illegal_op;
7942             }
7943             break;
7944         case 0x3:
7945             if (op1 != 1)
7946               goto illegal_op;
7947
7948             ARCH(5);
7949             /* branch link/exchange thumb (blx) */
7950             tmp = load_reg(s, rm);
7951             tmp2 = tcg_temp_new_i32();
7952             tcg_gen_movi_i32(tmp2, s->pc);
7953             store_reg(s, 14, tmp2);
7954             gen_bx(s, tmp);
7955             break;
7956         case 0x4:
7957         {
7958             /* crc32/crc32c */
7959             uint32_t c = extract32(insn, 8, 4);
7960
7961             /* Check this CPU supports ARMv8 CRC instructions.
7962              * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
7963              * Bits 8, 10 and 11 should be zero.
7964              */
7965             if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
7966                 (c & 0xd) != 0) {
7967                 goto illegal_op;
7968             }
7969
7970             rn = extract32(insn, 16, 4);
7971             rd = extract32(insn, 12, 4);
7972
7973             tmp = load_reg(s, rn);
7974             tmp2 = load_reg(s, rm);
7975             if (op1 == 0) {
7976                 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
7977             } else if (op1 == 1) {
7978                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
7979             }
7980             tmp3 = tcg_const_i32(1 << op1);
7981             if (c & 0x2) {
7982                 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
7983             } else {
7984                 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
7985             }
7986             tcg_temp_free_i32(tmp2);
7987             tcg_temp_free_i32(tmp3);
7988             store_reg(s, rd, tmp);
7989             break;
7990         }
7991         case 0x5: /* saturating add/subtract */
7992             ARCH(5TE);
7993             rd = (insn >> 12) & 0xf;
7994             rn = (insn >> 16) & 0xf;
7995             tmp = load_reg(s, rm);
7996             tmp2 = load_reg(s, rn);
7997             if (op1 & 2)
7998                 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
7999             if (op1 & 1)
8000                 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
8001             else
8002                 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
8003             tcg_temp_free_i32(tmp2);
8004             store_reg(s, rd, tmp);
8005             break;
8006         case 7:
8007         {
8008             int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
8009             switch (op1) {
8010             case 1:
8011                 /* bkpt */
8012                 ARCH(5);
8013                 gen_exception_insn(s, 4, EXCP_BKPT,
8014                                    syn_aa32_bkpt(imm16, false),
8015                                    default_exception_el(s));
8016                 break;
8017             case 2:
8018                 /* Hypervisor call (v7) */
8019                 ARCH(7);
8020                 if (IS_USER(s)) {
8021                     goto illegal_op;
8022                 }
8023                 gen_hvc(s, imm16);
8024                 break;
8025             case 3:
8026                 /* Secure monitor call (v6+) */
8027                 ARCH(6K);
8028                 if (IS_USER(s)) {
8029                     goto illegal_op;
8030                 }
8031                 gen_smc(s);
8032                 break;
8033             default:
8034                 goto illegal_op;
8035             }
8036             break;
8037         }
8038         case 0x8: /* signed multiply */
8039         case 0xa:
8040         case 0xc:
8041         case 0xe:
8042             ARCH(5TE);
8043             rs = (insn >> 8) & 0xf;
8044             rn = (insn >> 12) & 0xf;
8045             rd = (insn >> 16) & 0xf;
8046             if (op1 == 1) {
8047                 /* (32 * 16) >> 16 */
8048                 tmp = load_reg(s, rm);
8049                 tmp2 = load_reg(s, rs);
8050                 if (sh & 4)
8051                     tcg_gen_sari_i32(tmp2, tmp2, 16);
8052                 else
8053                     gen_sxth(tmp2);
8054                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8055                 tcg_gen_shri_i64(tmp64, tmp64, 16);
8056                 tmp = tcg_temp_new_i32();
8057                 tcg_gen_extrl_i64_i32(tmp, tmp64);
8058                 tcg_temp_free_i64(tmp64);
8059                 if ((sh & 2) == 0) {
8060                     tmp2 = load_reg(s, rn);
8061                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8062                     tcg_temp_free_i32(tmp2);
8063                 }
8064                 store_reg(s, rd, tmp);
8065             } else {
8066                 /* 16 * 16 */
8067                 tmp = load_reg(s, rm);
8068                 tmp2 = load_reg(s, rs);
8069                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
8070                 tcg_temp_free_i32(tmp2);
8071                 if (op1 == 2) {
8072                     tmp64 = tcg_temp_new_i64();
8073                     tcg_gen_ext_i32_i64(tmp64, tmp);
8074                     tcg_temp_free_i32(tmp);
8075                     gen_addq(s, tmp64, rn, rd);
8076                     gen_storeq_reg(s, rn, rd, tmp64);
8077                     tcg_temp_free_i64(tmp64);
8078                 } else {
8079                     if (op1 == 0) {
8080                         tmp2 = load_reg(s, rn);
8081                         gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8082                         tcg_temp_free_i32(tmp2);
8083                     }
8084                     store_reg(s, rd, tmp);
8085                 }
8086             }
8087             break;
8088         default:
8089             goto illegal_op;
8090         }
8091     } else if (((insn & 0x0e000000) == 0 &&
8092                 (insn & 0x00000090) != 0x90) ||
8093                ((insn & 0x0e000000) == (1 << 25))) {
8094         int set_cc, logic_cc, shiftop;
8095
8096         op1 = (insn >> 21) & 0xf;
8097         set_cc = (insn >> 20) & 1;
8098         logic_cc = table_logic_cc[op1] & set_cc;
8099
8100         /* data processing instruction */
8101         if (insn & (1 << 25)) {
8102             /* immediate operand */
8103             val = insn & 0xff;
8104             shift = ((insn >> 8) & 0xf) * 2;
8105             if (shift) {
8106                 val = (val >> shift) | (val << (32 - shift));
8107             }
8108             tmp2 = tcg_temp_new_i32();
8109             tcg_gen_movi_i32(tmp2, val);
8110             if (logic_cc && shift) {
8111                 gen_set_CF_bit31(tmp2);
8112             }
8113         } else {
8114             /* register */
8115             rm = (insn) & 0xf;
8116             tmp2 = load_reg(s, rm);
8117             shiftop = (insn >> 5) & 3;
8118             if (!(insn & (1 << 4))) {
8119                 shift = (insn >> 7) & 0x1f;
8120                 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8121             } else {
8122                 rs = (insn >> 8) & 0xf;
8123                 tmp = load_reg(s, rs);
8124                 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
8125             }
8126         }
8127         if (op1 != 0x0f && op1 != 0x0d) {
8128             rn = (insn >> 16) & 0xf;
8129             tmp = load_reg(s, rn);
8130         } else {
8131             TCGV_UNUSED_I32(tmp);
8132         }
8133         rd = (insn >> 12) & 0xf;
8134         switch(op1) {
8135         case 0x00:
8136             tcg_gen_and_i32(tmp, tmp, tmp2);
8137             if (logic_cc) {
8138                 gen_logic_CC(tmp);
8139             }
8140             store_reg_bx(s, rd, tmp);
8141             break;
8142         case 0x01:
8143             tcg_gen_xor_i32(tmp, tmp, tmp2);
8144             if (logic_cc) {
8145                 gen_logic_CC(tmp);
8146             }
8147             store_reg_bx(s, rd, tmp);
8148             break;
8149         case 0x02:
8150             if (set_cc && rd == 15) {
8151                 /* SUBS r15, ... is used for exception return.  */
8152                 if (IS_USER(s)) {
8153                     goto illegal_op;
8154                 }
8155                 gen_sub_CC(tmp, tmp, tmp2);
8156                 gen_exception_return(s, tmp);
8157             } else {
8158                 if (set_cc) {
8159                     gen_sub_CC(tmp, tmp, tmp2);
8160                 } else {
8161                     tcg_gen_sub_i32(tmp, tmp, tmp2);
8162                 }
8163                 store_reg_bx(s, rd, tmp);
8164             }
8165             break;
8166         case 0x03:
8167             if (set_cc) {
8168                 gen_sub_CC(tmp, tmp2, tmp);
8169             } else {
8170                 tcg_gen_sub_i32(tmp, tmp2, tmp);
8171             }
8172             store_reg_bx(s, rd, tmp);
8173             break;
8174         case 0x04:
8175             if (set_cc) {
8176                 gen_add_CC(tmp, tmp, tmp2);
8177             } else {
8178                 tcg_gen_add_i32(tmp, tmp, tmp2);
8179             }
8180             store_reg_bx(s, rd, tmp);
8181             break;
8182         case 0x05:
8183             if (set_cc) {
8184                 gen_adc_CC(tmp, tmp, tmp2);
8185             } else {
8186                 gen_add_carry(tmp, tmp, tmp2);
8187             }
8188             store_reg_bx(s, rd, tmp);
8189             break;
8190         case 0x06:
8191             if (set_cc) {
8192                 gen_sbc_CC(tmp, tmp, tmp2);
8193             } else {
8194                 gen_sub_carry(tmp, tmp, tmp2);
8195             }
8196             store_reg_bx(s, rd, tmp);
8197             break;
8198         case 0x07:
8199             if (set_cc) {
8200                 gen_sbc_CC(tmp, tmp2, tmp);
8201             } else {
8202                 gen_sub_carry(tmp, tmp2, tmp);
8203             }
8204             store_reg_bx(s, rd, tmp);
8205             break;
8206         case 0x08:
8207             if (set_cc) {
8208                 tcg_gen_and_i32(tmp, tmp, tmp2);
8209                 gen_logic_CC(tmp);
8210             }
8211             tcg_temp_free_i32(tmp);
8212             break;
8213         case 0x09:
8214             if (set_cc) {
8215                 tcg_gen_xor_i32(tmp, tmp, tmp2);
8216                 gen_logic_CC(tmp);
8217             }
8218             tcg_temp_free_i32(tmp);
8219             break;
8220         case 0x0a:
8221             if (set_cc) {
8222                 gen_sub_CC(tmp, tmp, tmp2);
8223             }
8224             tcg_temp_free_i32(tmp);
8225             break;
8226         case 0x0b:
8227             if (set_cc) {
8228                 gen_add_CC(tmp, tmp, tmp2);
8229             }
8230             tcg_temp_free_i32(tmp);
8231             break;
8232         case 0x0c:
8233             tcg_gen_or_i32(tmp, tmp, tmp2);
8234             if (logic_cc) {
8235                 gen_logic_CC(tmp);
8236             }
8237             store_reg_bx(s, rd, tmp);
8238             break;
8239         case 0x0d:
8240             if (logic_cc && rd == 15) {
8241                 /* MOVS r15, ... is used for exception return.  */
8242                 if (IS_USER(s)) {
8243                     goto illegal_op;
8244                 }
8245                 gen_exception_return(s, tmp2);
8246             } else {
8247                 if (logic_cc) {
8248                     gen_logic_CC(tmp2);
8249                 }
8250                 store_reg_bx(s, rd, tmp2);
8251             }
8252             break;
8253         case 0x0e:
8254             tcg_gen_andc_i32(tmp, tmp, tmp2);
8255             if (logic_cc) {
8256                 gen_logic_CC(tmp);
8257             }
8258             store_reg_bx(s, rd, tmp);
8259             break;
8260         default:
8261         case 0x0f:
8262             tcg_gen_not_i32(tmp2, tmp2);
8263             if (logic_cc) {
8264                 gen_logic_CC(tmp2);
8265             }
8266             store_reg_bx(s, rd, tmp2);
8267             break;
8268         }
8269         if (op1 != 0x0f && op1 != 0x0d) {
8270             tcg_temp_free_i32(tmp2);
8271         }
8272     } else {
8273         /* other instructions */
8274         op1 = (insn >> 24) & 0xf;
8275         switch(op1) {
8276         case 0x0:
8277         case 0x1:
8278             /* multiplies, extra load/stores */
8279             sh = (insn >> 5) & 3;
8280             if (sh == 0) {
8281                 if (op1 == 0x0) {
8282                     rd = (insn >> 16) & 0xf;
8283                     rn = (insn >> 12) & 0xf;
8284                     rs = (insn >> 8) & 0xf;
8285                     rm = (insn) & 0xf;
8286                     op1 = (insn >> 20) & 0xf;
8287                     switch (op1) {
8288                     case 0: case 1: case 2: case 3: case 6:
8289                         /* 32 bit mul */
8290                         tmp = load_reg(s, rs);
8291                         tmp2 = load_reg(s, rm);
8292                         tcg_gen_mul_i32(tmp, tmp, tmp2);
8293                         tcg_temp_free_i32(tmp2);
8294                         if (insn & (1 << 22)) {
8295                             /* Subtract (mls) */
8296                             ARCH(6T2);
8297                             tmp2 = load_reg(s, rn);
8298                             tcg_gen_sub_i32(tmp, tmp2, tmp);
8299                             tcg_temp_free_i32(tmp2);
8300                         } else if (insn & (1 << 21)) {
8301                             /* Add */
8302                             tmp2 = load_reg(s, rn);
8303                             tcg_gen_add_i32(tmp, tmp, tmp2);
8304                             tcg_temp_free_i32(tmp2);
8305                         }
8306                         if (insn & (1 << 20))
8307                             gen_logic_CC(tmp);
8308                         store_reg(s, rd, tmp);
8309                         break;
8310                     case 4:
8311                         /* 64 bit mul double accumulate (UMAAL) */
8312                         ARCH(6);
8313                         tmp = load_reg(s, rs);
8314                         tmp2 = load_reg(s, rm);
8315                         tmp64 = gen_mulu_i64_i32(tmp, tmp2);
8316                         gen_addq_lo(s, tmp64, rn);
8317                         gen_addq_lo(s, tmp64, rd);
8318                         gen_storeq_reg(s, rn, rd, tmp64);
8319                         tcg_temp_free_i64(tmp64);
8320                         break;
8321                     case 8: case 9: case 10: case 11:
8322                     case 12: case 13: case 14: case 15:
8323                         /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
8324                         tmp = load_reg(s, rs);
8325                         tmp2 = load_reg(s, rm);
8326                         if (insn & (1 << 22)) {
8327                             tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
8328                         } else {
8329                             tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
8330                         }
8331                         if (insn & (1 << 21)) { /* mult accumulate */
8332                             TCGv_i32 al = load_reg(s, rn);
8333                             TCGv_i32 ah = load_reg(s, rd);
8334                             tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
8335                             tcg_temp_free_i32(al);
8336                             tcg_temp_free_i32(ah);
8337                         }
8338                         if (insn & (1 << 20)) {
8339                             gen_logicq_cc(tmp, tmp2);
8340                         }
8341                         store_reg(s, rn, tmp);
8342                         store_reg(s, rd, tmp2);
8343                         break;
8344                     default:
8345                         goto illegal_op;
8346                     }
8347                 } else {
8348                     rn = (insn >> 16) & 0xf;
8349                     rd = (insn >> 12) & 0xf;
8350                     if (insn & (1 << 23)) {
8351                         /* load/store exclusive */
8352                         int op2 = (insn >> 8) & 3;
8353                         op1 = (insn >> 21) & 0x3;
8354
8355                         switch (op2) {
8356                         case 0: /* lda/stl */
8357                             if (op1 == 1) {
8358                                 goto illegal_op;
8359                             }
8360                             ARCH(8);
8361                             break;
8362                         case 1: /* reserved */
8363                             goto illegal_op;
8364                         case 2: /* ldaex/stlex */
8365                             ARCH(8);
8366                             break;
8367                         case 3: /* ldrex/strex */
8368                             if (op1) {
8369                                 ARCH(6K);
8370                             } else {
8371                                 ARCH(6);
8372                             }
8373                             break;
8374                         }
8375
8376                         addr = tcg_temp_local_new_i32();
8377                         load_reg_var(s, addr, rn);
8378
8379                         /* Since the emulation does not have barriers,
8380                            the acquire/release semantics need no special
8381                            handling */
8382                         if (op2 == 0) {
8383                             if (insn & (1 << 20)) {
8384                                 tmp = tcg_temp_new_i32();
8385                                 switch (op1) {
8386                                 case 0: /* lda */
8387                                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8388                                     break;
8389                                 case 2: /* ldab */
8390                                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
8391                                     break;
8392                                 case 3: /* ldah */
8393                                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8394                                     break;
8395                                 default:
8396                                     abort();
8397                                 }
8398                                 store_reg(s, rd, tmp);
8399                             } else {
8400                                 rm = insn & 0xf;
8401                                 tmp = load_reg(s, rm);
8402                                 switch (op1) {
8403                                 case 0: /* stl */
8404                                     gen_aa32_st32(tmp, addr, get_mem_index(s));
8405                                     break;
8406                                 case 2: /* stlb */
8407                                     gen_aa32_st8(tmp, addr, get_mem_index(s));
8408                                     break;
8409                                 case 3: /* stlh */
8410                                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8411                                     break;
8412                                 default:
8413                                     abort();
8414                                 }
8415                                 tcg_temp_free_i32(tmp);
8416                             }
8417                         } else if (insn & (1 << 20)) {
8418                             switch (op1) {
8419                             case 0: /* ldrex */
8420                                 gen_load_exclusive(s, rd, 15, addr, 2);
8421                                 break;
8422                             case 1: /* ldrexd */
8423                                 gen_load_exclusive(s, rd, rd + 1, addr, 3);
8424                                 break;
8425                             case 2: /* ldrexb */
8426                                 gen_load_exclusive(s, rd, 15, addr, 0);
8427                                 break;
8428                             case 3: /* ldrexh */
8429                                 gen_load_exclusive(s, rd, 15, addr, 1);
8430                                 break;
8431                             default:
8432                                 abort();
8433                             }
8434                         } else {
8435                             rm = insn & 0xf;
8436                             switch (op1) {
8437                             case 0:  /*  strex */
8438                                 gen_store_exclusive(s, rd, rm, 15, addr, 2);
8439                                 break;
8440                             case 1: /*  strexd */
8441                                 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
8442                                 break;
8443                             case 2: /*  strexb */
8444                                 gen_store_exclusive(s, rd, rm, 15, addr, 0);
8445                                 break;
8446                             case 3: /* strexh */
8447                                 gen_store_exclusive(s, rd, rm, 15, addr, 1);
8448                                 break;
8449                             default:
8450                                 abort();
8451                             }
8452                         }
8453                         tcg_temp_free_i32(addr);
8454                     } else {
8455                         /* SWP instruction */
8456                         rm = (insn) & 0xf;
8457
8458                         /* ??? This is not really atomic.  However we know
8459                            we never have multiple CPUs running in parallel,
8460                            so it is good enough.  */
8461                         addr = load_reg(s, rn);
8462                         tmp = load_reg(s, rm);
8463                         tmp2 = tcg_temp_new_i32();
8464                         if (insn & (1 << 22)) {
8465                             gen_aa32_ld8u(tmp2, addr, get_mem_index(s));
8466                             gen_aa32_st8(tmp, addr, get_mem_index(s));
8467                         } else {
8468                             gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
8469                             gen_aa32_st32(tmp, addr, get_mem_index(s));
8470                         }
8471                         tcg_temp_free_i32(tmp);
8472                         tcg_temp_free_i32(addr);
8473                         store_reg(s, rd, tmp2);
8474                     }
8475                 }
8476             } else {
8477                 int address_offset;
8478                 bool load = insn & (1 << 20);
8479                 bool doubleword = false;
8480                 /* Misc load/store */
8481                 rn = (insn >> 16) & 0xf;
8482                 rd = (insn >> 12) & 0xf;
8483
8484                 if (!load && (sh & 2)) {
8485                     /* doubleword */
8486                     ARCH(5TE);
8487                     if (rd & 1) {
8488                         /* UNPREDICTABLE; we choose to UNDEF */
8489                         goto illegal_op;
8490                     }
8491                     load = (sh & 1) == 0;
8492                     doubleword = true;
8493                 }
8494
8495                 addr = load_reg(s, rn);
8496                 if (insn & (1 << 24))
8497                     gen_add_datah_offset(s, insn, 0, addr);
8498                 address_offset = 0;
8499
8500                 if (doubleword) {
8501                     if (!load) {
8502                         /* store */
8503                         tmp = load_reg(s, rd);
8504                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8505                         tcg_temp_free_i32(tmp);
8506                         tcg_gen_addi_i32(addr, addr, 4);
8507                         tmp = load_reg(s, rd + 1);
8508                         gen_aa32_st32(tmp, addr, get_mem_index(s));
8509                         tcg_temp_free_i32(tmp);
8510                     } else {
8511                         /* load */
8512                         tmp = tcg_temp_new_i32();
8513                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8514                         store_reg(s, rd, tmp);
8515                         tcg_gen_addi_i32(addr, addr, 4);
8516                         tmp = tcg_temp_new_i32();
8517                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8518                         rd++;
8519                     }
8520                     address_offset = -4;
8521                 } else if (load) {
8522                     /* load */
8523                     tmp = tcg_temp_new_i32();
8524                     switch (sh) {
8525                     case 1:
8526                         gen_aa32_ld16u(tmp, addr, get_mem_index(s));
8527                         break;
8528                     case 2:
8529                         gen_aa32_ld8s(tmp, addr, get_mem_index(s));
8530                         break;
8531                     default:
8532                     case 3:
8533                         gen_aa32_ld16s(tmp, addr, get_mem_index(s));
8534                         break;
8535                     }
8536                 } else {
8537                     /* store */
8538                     tmp = load_reg(s, rd);
8539                     gen_aa32_st16(tmp, addr, get_mem_index(s));
8540                     tcg_temp_free_i32(tmp);
8541                 }
8542                 /* Perform base writeback before the loaded value to
8543                    ensure correct behavior with overlapping index registers.
8544                    ldrd with base writeback is undefined if the
8545                    destination and index registers overlap.  */
8546                 if (!(insn & (1 << 24))) {
8547                     gen_add_datah_offset(s, insn, address_offset, addr);
8548                     store_reg(s, rn, addr);
8549                 } else if (insn & (1 << 21)) {
8550                     if (address_offset)
8551                         tcg_gen_addi_i32(addr, addr, address_offset);
8552                     store_reg(s, rn, addr);
8553                 } else {
8554                     tcg_temp_free_i32(addr);
8555                 }
8556                 if (load) {
8557                     /* Complete the load.  */
8558                     store_reg(s, rd, tmp);
8559                 }
8560             }
8561             break;
8562         case 0x4:
8563         case 0x5:
8564             goto do_ldst;
8565         case 0x6:
8566         case 0x7:
8567             if (insn & (1 << 4)) {
8568                 ARCH(6);
8569                 /* Armv6 Media instructions.  */
8570                 rm = insn & 0xf;
8571                 rn = (insn >> 16) & 0xf;
8572                 rd = (insn >> 12) & 0xf;
8573                 rs = (insn >> 8) & 0xf;
8574                 switch ((insn >> 23) & 3) {
8575                 case 0: /* Parallel add/subtract.  */
8576                     op1 = (insn >> 20) & 7;
8577                     tmp = load_reg(s, rn);
8578                     tmp2 = load_reg(s, rm);
8579                     sh = (insn >> 5) & 7;
8580                     if ((op1 & 3) == 0 || sh == 5 || sh == 6)
8581                         goto illegal_op;
8582                     gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
8583                     tcg_temp_free_i32(tmp2);
8584                     store_reg(s, rd, tmp);
8585                     break;
8586                 case 1:
8587                     if ((insn & 0x00700020) == 0) {
8588                         /* Halfword pack.  */
8589                         tmp = load_reg(s, rn);
8590                         tmp2 = load_reg(s, rm);
8591                         shift = (insn >> 7) & 0x1f;
8592                         if (insn & (1 << 6)) {
8593                             /* pkhtb */
8594                             if (shift == 0)
8595                                 shift = 31;
8596                             tcg_gen_sari_i32(tmp2, tmp2, shift);
8597                             tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
8598                             tcg_gen_ext16u_i32(tmp2, tmp2);
8599                         } else {
8600                             /* pkhbt */
8601                             if (shift)
8602                                 tcg_gen_shli_i32(tmp2, tmp2, shift);
8603                             tcg_gen_ext16u_i32(tmp, tmp);
8604                             tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
8605                         }
8606                         tcg_gen_or_i32(tmp, tmp, tmp2);
8607                         tcg_temp_free_i32(tmp2);
8608                         store_reg(s, rd, tmp);
8609                     } else if ((insn & 0x00200020) == 0x00200000) {
8610                         /* [us]sat */
8611                         tmp = load_reg(s, rm);
8612                         shift = (insn >> 7) & 0x1f;
8613                         if (insn & (1 << 6)) {
8614                             if (shift == 0)
8615                                 shift = 31;
8616                             tcg_gen_sari_i32(tmp, tmp, shift);
8617                         } else {
8618                             tcg_gen_shli_i32(tmp, tmp, shift);
8619                         }
8620                         sh = (insn >> 16) & 0x1f;
8621                         tmp2 = tcg_const_i32(sh);
8622                         if (insn & (1 << 22))
8623                           gen_helper_usat(tmp, cpu_env, tmp, tmp2);
8624                         else
8625                           gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
8626                         tcg_temp_free_i32(tmp2);
8627                         store_reg(s, rd, tmp);
8628                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
8629                         /* [us]sat16 */
8630                         tmp = load_reg(s, rm);
8631                         sh = (insn >> 16) & 0x1f;
8632                         tmp2 = tcg_const_i32(sh);
8633                         if (insn & (1 << 22))
8634                           gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
8635                         else
8636                           gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
8637                         tcg_temp_free_i32(tmp2);
8638                         store_reg(s, rd, tmp);
8639                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
8640                         /* Select bytes.  */
8641                         tmp = load_reg(s, rn);
8642                         tmp2 = load_reg(s, rm);
8643                         tmp3 = tcg_temp_new_i32();
8644                         tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
8645                         gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
8646                         tcg_temp_free_i32(tmp3);
8647                         tcg_temp_free_i32(tmp2);
8648                         store_reg(s, rd, tmp);
8649                     } else if ((insn & 0x000003e0) == 0x00000060) {
8650                         tmp = load_reg(s, rm);
8651                         shift = (insn >> 10) & 3;
8652                         /* ??? In many cases it's not necessary to do a
8653                            rotate, a shift is sufficient.  */
8654                         if (shift != 0)
8655                             tcg_gen_rotri_i32(tmp, tmp, shift * 8);
8656                         op1 = (insn >> 20) & 7;
8657                         switch (op1) {
8658                         case 0: gen_sxtb16(tmp);  break;
8659                         case 2: gen_sxtb(tmp);    break;
8660                         case 3: gen_sxth(tmp);    break;
8661                         case 4: gen_uxtb16(tmp);  break;
8662                         case 6: gen_uxtb(tmp);    break;
8663                         case 7: gen_uxth(tmp);    break;
8664                         default: goto illegal_op;
8665                         }
8666                         if (rn != 15) {
8667                             tmp2 = load_reg(s, rn);
8668                             if ((op1 & 3) == 0) {
8669                                 gen_add16(tmp, tmp2);
8670                             } else {
8671                                 tcg_gen_add_i32(tmp, tmp, tmp2);
8672                                 tcg_temp_free_i32(tmp2);
8673                             }
8674                         }
8675                         store_reg(s, rd, tmp);
8676                     } else if ((insn & 0x003f0f60) == 0x003f0f20) {
8677                         /* rev */
8678                         tmp = load_reg(s, rm);
8679                         if (insn & (1 << 22)) {
8680                             if (insn & (1 << 7)) {
8681                                 gen_revsh(tmp);
8682                             } else {
8683                                 ARCH(6T2);
8684                                 gen_helper_rbit(tmp, tmp);
8685                             }
8686                         } else {
8687                             if (insn & (1 << 7))
8688                                 gen_rev16(tmp);
8689                             else
8690                                 tcg_gen_bswap32_i32(tmp, tmp);
8691                         }
8692                         store_reg(s, rd, tmp);
8693                     } else {
8694                         goto illegal_op;
8695                     }
8696                     break;
8697                 case 2: /* Multiplies (Type 3).  */
8698                     switch ((insn >> 20) & 0x7) {
8699                     case 5:
8700                         if (((insn >> 6) ^ (insn >> 7)) & 1) {
8701                             /* op2 not 00x or 11x : UNDEF */
8702                             goto illegal_op;
8703                         }
8704                         /* Signed multiply most significant [accumulate].
8705                            (SMMUL, SMMLA, SMMLS) */
8706                         tmp = load_reg(s, rm);
8707                         tmp2 = load_reg(s, rs);
8708                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
8709
8710                         if (rd != 15) {
8711                             tmp = load_reg(s, rd);
8712                             if (insn & (1 << 6)) {
8713                                 tmp64 = gen_subq_msw(tmp64, tmp);
8714                             } else {
8715                                 tmp64 = gen_addq_msw(tmp64, tmp);
8716                             }
8717                         }
8718                         if (insn & (1 << 5)) {
8719                             tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
8720                         }
8721                         tcg_gen_shri_i64(tmp64, tmp64, 32);
8722                         tmp = tcg_temp_new_i32();
8723                         tcg_gen_extrl_i64_i32(tmp, tmp64);
8724                         tcg_temp_free_i64(tmp64);
8725                         store_reg(s, rn, tmp);
8726                         break;
8727                     case 0:
8728                     case 4:
8729                         /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
8730                         if (insn & (1 << 7)) {
8731                             goto illegal_op;
8732                         }
8733                         tmp = load_reg(s, rm);
8734                         tmp2 = load_reg(s, rs);
8735                         if (insn & (1 << 5))
8736                             gen_swap_half(tmp2);
8737                         gen_smul_dual(tmp, tmp2);
8738                         if (insn & (1 << 22)) {
8739                             /* smlald, smlsld */
8740                             TCGv_i64 tmp64_2;
8741
8742                             tmp64 = tcg_temp_new_i64();
8743                             tmp64_2 = tcg_temp_new_i64();
8744                             tcg_gen_ext_i32_i64(tmp64, tmp);
8745                             tcg_gen_ext_i32_i64(tmp64_2, tmp2);
8746                             tcg_temp_free_i32(tmp);
8747                             tcg_temp_free_i32(tmp2);
8748                             if (insn & (1 << 6)) {
8749                                 tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
8750                             } else {
8751                                 tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
8752                             }
8753                             tcg_temp_free_i64(tmp64_2);
8754                             gen_addq(s, tmp64, rd, rn);
8755                             gen_storeq_reg(s, rd, rn, tmp64);
8756                             tcg_temp_free_i64(tmp64);
8757                         } else {
8758                             /* smuad, smusd, smlad, smlsd */
8759                             if (insn & (1 << 6)) {
8760                                 /* This subtraction cannot overflow. */
8761                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
8762                             } else {
8763                                 /* This addition cannot overflow 32 bits;
8764                                  * however it may overflow considered as a
8765                                  * signed operation, in which case we must set
8766                                  * the Q flag.
8767                                  */
8768                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8769                             }
8770                             tcg_temp_free_i32(tmp2);
8771                             if (rd != 15)
8772                               {
8773                                 tmp2 = load_reg(s, rd);
8774                                 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8775                                 tcg_temp_free_i32(tmp2);
8776                               }
8777                             store_reg(s, rn, tmp);
8778                         }
8779                         break;
8780                     case 1:
8781                     case 3:
8782                         /* SDIV, UDIV */
8783                         if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
8784                             goto illegal_op;
8785                         }
8786                         if (((insn >> 5) & 7) || (rd != 15)) {
8787                             goto illegal_op;
8788                         }
8789                         tmp = load_reg(s, rm);
8790                         tmp2 = load_reg(s, rs);
8791                         if (insn & (1 << 21)) {
8792                             gen_helper_udiv(tmp, tmp, tmp2);
8793                         } else {
8794                             gen_helper_sdiv(tmp, tmp, tmp2);
8795                         }
8796                         tcg_temp_free_i32(tmp2);
8797                         store_reg(s, rn, tmp);
8798                         break;
8799                     default:
8800                         goto illegal_op;
8801                     }
8802                     break;
8803                 case 3:
8804                     op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
8805                     switch (op1) {
8806                     case 0: /* Unsigned sum of absolute differences.  */
8807                         ARCH(6);
8808                         tmp = load_reg(s, rm);
8809                         tmp2 = load_reg(s, rs);
8810                         gen_helper_usad8(tmp, tmp, tmp2);
8811                         tcg_temp_free_i32(tmp2);
8812                         if (rd != 15) {
8813                             tmp2 = load_reg(s, rd);
8814                             tcg_gen_add_i32(tmp, tmp, tmp2);
8815                             tcg_temp_free_i32(tmp2);
8816                         }
8817                         store_reg(s, rn, tmp);
8818                         break;
8819                     case 0x20: case 0x24: case 0x28: case 0x2c:
8820                         /* Bitfield insert/clear.  */
8821                         ARCH(6T2);
8822                         shift = (insn >> 7) & 0x1f;
8823                         i = (insn >> 16) & 0x1f;
8824                         if (i < shift) {
8825                             /* UNPREDICTABLE; we choose to UNDEF */
8826                             goto illegal_op;
8827                         }
8828                         i = i + 1 - shift;
8829                         if (rm == 15) {
8830                             tmp = tcg_temp_new_i32();
8831                             tcg_gen_movi_i32(tmp, 0);
8832                         } else {
8833                             tmp = load_reg(s, rm);
8834                         }
8835                         if (i != 32) {
8836                             tmp2 = load_reg(s, rd);
8837                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
8838                             tcg_temp_free_i32(tmp2);
8839                         }
8840                         store_reg(s, rd, tmp);
8841                         break;
8842                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
8843                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
8844                         ARCH(6T2);
8845                         tmp = load_reg(s, rm);
8846                         shift = (insn >> 7) & 0x1f;
8847                         i = ((insn >> 16) & 0x1f) + 1;
8848                         if (shift + i > 32)
8849                             goto illegal_op;
8850                         if (i < 32) {
8851                             if (op1 & 0x20) {
8852                                 gen_ubfx(tmp, shift, (1u << i) - 1);
8853                             } else {
8854                                 gen_sbfx(tmp, shift, i);
8855                             }
8856                         }
8857                         store_reg(s, rd, tmp);
8858                         break;
8859                     default:
8860                         goto illegal_op;
8861                     }
8862                     break;
8863                 }
8864                 break;
8865             }
8866         do_ldst:
8867             /* Check for undefined extension instructions
8868              * per the ARM Bible IE:
8869              * xxxx 0111 1111 xxxx  xxxx xxxx 1111 xxxx
8870              */
8871             sh = (0xf << 20) | (0xf << 4);
8872             if (op1 == 0x7 && ((insn & sh) == sh))
8873             {
8874                 goto illegal_op;
8875             }
8876             /* load/store byte/word */
8877             rn = (insn >> 16) & 0xf;
8878             rd = (insn >> 12) & 0xf;
8879             tmp2 = load_reg(s, rn);
8880             if ((insn & 0x01200000) == 0x00200000) {
8881                 /* ldrt/strt */
8882                 i = get_a32_user_mem_index(s);
8883             } else {
8884                 i = get_mem_index(s);
8885             }
8886             if (insn & (1 << 24))
8887                 gen_add_data_offset(s, insn, tmp2);
8888             if (insn & (1 << 20)) {
8889                 /* load */
8890                 tmp = tcg_temp_new_i32();
8891                 if (insn & (1 << 22)) {
8892                     gen_aa32_ld8u(tmp, tmp2, i);
8893                 } else {
8894                     gen_aa32_ld32u(tmp, tmp2, i);
8895                 }
8896             } else {
8897                 /* store */
8898                 tmp = load_reg(s, rd);
8899                 if (insn & (1 << 22)) {
8900                     gen_aa32_st8(tmp, tmp2, i);
8901                 } else {
8902                     gen_aa32_st32(tmp, tmp2, i);
8903                 }
8904                 tcg_temp_free_i32(tmp);
8905             }
8906             if (!(insn & (1 << 24))) {
8907                 gen_add_data_offset(s, insn, tmp2);
8908                 store_reg(s, rn, tmp2);
8909             } else if (insn & (1 << 21)) {
8910                 store_reg(s, rn, tmp2);
8911             } else {
8912                 tcg_temp_free_i32(tmp2);
8913             }
8914             if (insn & (1 << 20)) {
8915                 /* Complete the load.  */
8916                 store_reg_from_load(s, rd, tmp);
8917             }
8918             break;
8919         case 0x08:
8920         case 0x09:
8921             {
8922                 int j, n, loaded_base;
8923                 bool exc_return = false;
8924                 bool is_load = extract32(insn, 20, 1);
8925                 bool user = false;
8926                 TCGv_i32 loaded_var;
8927                 /* load/store multiple words */
8928                 /* XXX: store correct base if write back */
8929                 if (insn & (1 << 22)) {
8930                     /* LDM (user), LDM (exception return) and STM (user) */
8931                     if (IS_USER(s))
8932                         goto illegal_op; /* only usable in supervisor mode */
8933
8934                     if (is_load && extract32(insn, 15, 1)) {
8935                         exc_return = true;
8936                     } else {
8937                         user = true;
8938                     }
8939                 }
8940                 rn = (insn >> 16) & 0xf;
8941                 addr = load_reg(s, rn);
8942
8943                 /* compute total size */
8944                 loaded_base = 0;
8945                 TCGV_UNUSED_I32(loaded_var);
8946                 n = 0;
8947                 for(i=0;i<16;i++) {
8948                     if (insn & (1 << i))
8949                         n++;
8950                 }
8951                 /* XXX: test invalid n == 0 case ? */
8952                 if (insn & (1 << 23)) {
8953                     if (insn & (1 << 24)) {
8954                         /* pre increment */
8955                         tcg_gen_addi_i32(addr, addr, 4);
8956                     } else {
8957                         /* post increment */
8958                     }
8959                 } else {
8960                     if (insn & (1 << 24)) {
8961                         /* pre decrement */
8962                         tcg_gen_addi_i32(addr, addr, -(n * 4));
8963                     } else {
8964                         /* post decrement */
8965                         if (n != 1)
8966                         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
8967                     }
8968                 }
8969                 j = 0;
8970                 for(i=0;i<16;i++) {
8971                     if (insn & (1 << i)) {
8972                         if (is_load) {
8973                             /* load */
8974                             tmp = tcg_temp_new_i32();
8975                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
8976                             if (user) {
8977                                 tmp2 = tcg_const_i32(i);
8978                                 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
8979                                 tcg_temp_free_i32(tmp2);
8980                                 tcg_temp_free_i32(tmp);
8981                             } else if (i == rn) {
8982                                 loaded_var = tmp;
8983                                 loaded_base = 1;
8984                             } else {
8985                                 store_reg_from_load(s, i, tmp);
8986                             }
8987                         } else {
8988                             /* store */
8989                             if (i == 15) {
8990                                 /* special case: r15 = PC + 8 */
8991                                 val = (long)s->pc + 4;
8992                                 tmp = tcg_temp_new_i32();
8993                                 tcg_gen_movi_i32(tmp, val);
8994                             } else if (user) {
8995                                 tmp = tcg_temp_new_i32();
8996                                 tmp2 = tcg_const_i32(i);
8997                                 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
8998                                 tcg_temp_free_i32(tmp2);
8999                             } else {
9000                                 tmp = load_reg(s, i);
9001                             }
9002                             gen_aa32_st32(tmp, addr, get_mem_index(s));
9003                             tcg_temp_free_i32(tmp);
9004                         }
9005                         j++;
9006                         /* no need to add after the last transfer */
9007                         if (j != n)
9008                             tcg_gen_addi_i32(addr, addr, 4);
9009                     }
9010                 }
9011                 if (insn & (1 << 21)) {
9012                     /* write back */
9013                     if (insn & (1 << 23)) {
9014                         if (insn & (1 << 24)) {
9015                             /* pre increment */
9016                         } else {
9017                             /* post increment */
9018                             tcg_gen_addi_i32(addr, addr, 4);
9019                         }
9020                     } else {
9021                         if (insn & (1 << 24)) {
9022                             /* pre decrement */
9023                             if (n != 1)
9024                                 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9025                         } else {
9026                             /* post decrement */
9027                             tcg_gen_addi_i32(addr, addr, -(n * 4));
9028                         }
9029                     }
9030                     store_reg(s, rn, addr);
9031                 } else {
9032                     tcg_temp_free_i32(addr);
9033                 }
9034                 if (loaded_base) {
9035                     store_reg(s, rn, loaded_var);
9036                 }
9037                 if (exc_return) {
9038                     /* Restore CPSR from SPSR.  */
9039                     tmp = load_cpu_field(spsr);
9040                     gen_set_cpsr(tmp, CPSR_ERET_MASK);
9041                     tcg_temp_free_i32(tmp);
9042                     s->is_jmp = DISAS_JUMP;
9043                 }
9044             }
9045             break;
9046         case 0xa:
9047         case 0xb:
9048             {
9049                 int32_t offset;
9050
9051                 /* branch (and link) */
9052                 val = (int32_t)s->pc;
9053                 if (insn & (1 << 24)) {
9054                     tmp = tcg_temp_new_i32();
9055                     tcg_gen_movi_i32(tmp, val);
9056                     store_reg(s, 14, tmp);
9057                 }
9058                 offset = sextract32(insn << 2, 0, 26);
9059                 val += offset + 4;
9060                 gen_jmp(s, val);
9061             }
9062             break;
9063         case 0xc:
9064         case 0xd:
9065         case 0xe:
9066             if (((insn >> 8) & 0xe) == 10) {
9067                 /* VFP.  */
9068                 if (disas_vfp_insn(s, insn)) {
9069                     goto illegal_op;
9070                 }
9071             } else if (disas_coproc_insn(s, insn)) {
9072                 /* Coprocessor.  */
9073                 goto illegal_op;
9074             }
9075             break;
9076         case 0xf:
9077             /* swi */
9078             gen_set_pc_im(s, s->pc);
9079             s->svc_imm = extract32(insn, 0, 24);
9080             s->is_jmp = DISAS_SWI;
9081             break;
9082         default:
9083         illegal_op:
9084             gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
9085                                default_exception_el(s));
9086             break;
9087         }
9088     }
9089 }
9090
9091 /* Return true if this is a Thumb-2 logical op.  */
9092 static int
9093 thumb2_logic_op(int op)
9094 {
9095     return (op < 8);
9096 }
9097
9098 /* Generate code for a Thumb-2 data processing operation.  If CONDS is nonzero
9099    then set condition code flags based on the result of the operation.
9100    If SHIFTER_OUT is nonzero then set the carry flag for logical operations
9101    to the high bit of T1.
9102    Returns zero if the opcode is valid.  */
9103
9104 static int
9105 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
9106                    TCGv_i32 t0, TCGv_i32 t1)
9107 {
9108     int logic_cc;
9109
9110     logic_cc = 0;
9111     switch (op) {
9112     case 0: /* and */
9113         tcg_gen_and_i32(t0, t0, t1);
9114         logic_cc = conds;
9115         break;
9116     case 1: /* bic */
9117         tcg_gen_andc_i32(t0, t0, t1);
9118         logic_cc = conds;
9119         break;
9120     case 2: /* orr */
9121         tcg_gen_or_i32(t0, t0, t1);
9122         logic_cc = conds;
9123         break;
9124     case 3: /* orn */
9125         tcg_gen_orc_i32(t0, t0, t1);
9126         logic_cc = conds;
9127         break;
9128     case 4: /* eor */
9129         tcg_gen_xor_i32(t0, t0, t1);
9130         logic_cc = conds;
9131         break;
9132     case 8: /* add */
9133         if (conds)
9134             gen_add_CC(t0, t0, t1);
9135         else
9136             tcg_gen_add_i32(t0, t0, t1);
9137         break;
9138     case 10: /* adc */
9139         if (conds)
9140             gen_adc_CC(t0, t0, t1);
9141         else
9142             gen_adc(t0, t1);
9143         break;
9144     case 11: /* sbc */
9145         if (conds) {
9146             gen_sbc_CC(t0, t0, t1);
9147         } else {
9148             gen_sub_carry(t0, t0, t1);
9149         }
9150         break;
9151     case 13: /* sub */
9152         if (conds)
9153             gen_sub_CC(t0, t0, t1);
9154         else
9155             tcg_gen_sub_i32(t0, t0, t1);
9156         break;
9157     case 14: /* rsb */
9158         if (conds)
9159             gen_sub_CC(t0, t1, t0);
9160         else
9161             tcg_gen_sub_i32(t0, t1, t0);
9162         break;
9163     default: /* 5, 6, 7, 9, 12, 15. */
9164         return 1;
9165     }
9166     if (logic_cc) {
9167         gen_logic_CC(t0);
9168         if (shifter_out)
9169             gen_set_CF_bit31(t1);
9170     }
9171     return 0;
9172 }
9173
9174 /* Translate a 32-bit thumb instruction.  Returns nonzero if the instruction
9175    is not legal.  */
9176 static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw1)
9177 {
9178     uint32_t insn, imm, shift, offset;
9179     uint32_t rd, rn, rm, rs;
9180     TCGv_i32 tmp;
9181     TCGv_i32 tmp2;
9182     TCGv_i32 tmp3;
9183     TCGv_i32 addr;
9184     TCGv_i64 tmp64;
9185     int op;
9186     int shiftop;
9187     int conds;
9188     int logic_cc;
9189
9190     if (!(arm_dc_feature(s, ARM_FEATURE_THUMB2)
9191           || arm_dc_feature(s, ARM_FEATURE_M))) {
9192         /* Thumb-1 cores may need to treat bl and blx as a pair of
9193            16-bit instructions to get correct prefetch abort behavior.  */
9194         insn = insn_hw1;
9195         if ((insn & (1 << 12)) == 0) {
9196             ARCH(5);
9197             /* Second half of blx.  */
9198             offset = ((insn & 0x7ff) << 1);
9199             tmp = load_reg(s, 14);
9200             tcg_gen_addi_i32(tmp, tmp, offset);
9201             tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
9202
9203             tmp2 = tcg_temp_new_i32();
9204             tcg_gen_movi_i32(tmp2, s->pc | 1);
9205             store_reg(s, 14, tmp2);
9206             gen_bx(s, tmp);
9207             return 0;
9208         }
9209         if (insn & (1 << 11)) {
9210             /* Second half of bl.  */
9211             offset = ((insn & 0x7ff) << 1) | 1;
9212             tmp = load_reg(s, 14);
9213             tcg_gen_addi_i32(tmp, tmp, offset);
9214
9215             tmp2 = tcg_temp_new_i32();
9216             tcg_gen_movi_i32(tmp2, s->pc | 1);
9217             store_reg(s, 14, tmp2);
9218             gen_bx(s, tmp);
9219             return 0;
9220         }
9221         if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
9222             /* Instruction spans a page boundary.  Implement it as two
9223                16-bit instructions in case the second half causes an
9224                prefetch abort.  */
9225             offset = ((int32_t)insn << 21) >> 9;
9226             tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + offset);
9227             return 0;
9228         }
9229         /* Fall through to 32-bit decode.  */
9230     }
9231
9232     insn = arm_lduw_code(env, s->pc, s->bswap_code);
9233     s->pc += 2;
9234     insn |= (uint32_t)insn_hw1 << 16;
9235
9236     if ((insn & 0xf800e800) != 0xf000e800) {
9237         ARCH(6T2);
9238     }
9239
9240     rn = (insn >> 16) & 0xf;
9241     rs = (insn >> 12) & 0xf;
9242     rd = (insn >> 8) & 0xf;
9243     rm = insn & 0xf;
9244     switch ((insn >> 25) & 0xf) {
9245     case 0: case 1: case 2: case 3:
9246         /* 16-bit instructions.  Should never happen.  */
9247         abort();
9248     case 4:
9249         if (insn & (1 << 22)) {
9250             /* Other load/store, table branch.  */
9251             if (insn & 0x01200000) {
9252                 /* Load/store doubleword.  */
9253                 if (rn == 15) {
9254                     addr = tcg_temp_new_i32();
9255                     tcg_gen_movi_i32(addr, s->pc & ~3);
9256                 } else {
9257                     addr = load_reg(s, rn);
9258                 }
9259                 offset = (insn & 0xff) * 4;
9260                 if ((insn & (1 << 23)) == 0)
9261                     offset = -offset;
9262                 if (insn & (1 << 24)) {
9263                     tcg_gen_addi_i32(addr, addr, offset);
9264                     offset = 0;
9265                 }
9266                 if (insn & (1 << 20)) {
9267                     /* ldrd */
9268                     tmp = tcg_temp_new_i32();
9269                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9270                     store_reg(s, rs, tmp);
9271                     tcg_gen_addi_i32(addr, addr, 4);
9272                     tmp = tcg_temp_new_i32();
9273                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9274                     store_reg(s, rd, tmp);
9275                 } else {
9276                     /* strd */
9277                     tmp = load_reg(s, rs);
9278                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9279                     tcg_temp_free_i32(tmp);
9280                     tcg_gen_addi_i32(addr, addr, 4);
9281                     tmp = load_reg(s, rd);
9282                     gen_aa32_st32(tmp, addr, get_mem_index(s));
9283                     tcg_temp_free_i32(tmp);
9284                 }
9285                 if (insn & (1 << 21)) {
9286                     /* Base writeback.  */
9287                     if (rn == 15)
9288                         goto illegal_op;
9289                     tcg_gen_addi_i32(addr, addr, offset - 4);
9290                     store_reg(s, rn, addr);
9291                 } else {
9292                     tcg_temp_free_i32(addr);
9293                 }
9294             } else if ((insn & (1 << 23)) == 0) {
9295                 /* Load/store exclusive word.  */
9296                 addr = tcg_temp_local_new_i32();
9297                 load_reg_var(s, addr, rn);
9298                 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
9299                 if (insn & (1 << 20)) {
9300                     gen_load_exclusive(s, rs, 15, addr, 2);
9301                 } else {
9302                     gen_store_exclusive(s, rd, rs, 15, addr, 2);
9303                 }
9304                 tcg_temp_free_i32(addr);
9305             } else if ((insn & (7 << 5)) == 0) {
9306                 /* Table Branch.  */
9307                 if (rn == 15) {
9308                     addr = tcg_temp_new_i32();
9309                     tcg_gen_movi_i32(addr, s->pc);
9310                 } else {
9311                     addr = load_reg(s, rn);
9312                 }
9313                 tmp = load_reg(s, rm);
9314                 tcg_gen_add_i32(addr, addr, tmp);
9315                 if (insn & (1 << 4)) {
9316                     /* tbh */
9317                     tcg_gen_add_i32(addr, addr, tmp);
9318                     tcg_temp_free_i32(tmp);
9319                     tmp = tcg_temp_new_i32();
9320                     gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9321                 } else { /* tbb */
9322                     tcg_temp_free_i32(tmp);
9323                     tmp = tcg_temp_new_i32();
9324                     gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9325                 }
9326                 tcg_temp_free_i32(addr);
9327                 tcg_gen_shli_i32(tmp, tmp, 1);
9328                 tcg_gen_addi_i32(tmp, tmp, s->pc);
9329                 store_reg(s, 15, tmp);
9330             } else {
9331                 int op2 = (insn >> 6) & 0x3;
9332                 op = (insn >> 4) & 0x3;
9333                 switch (op2) {
9334                 case 0:
9335                     goto illegal_op;
9336                 case 1:
9337                     /* Load/store exclusive byte/halfword/doubleword */
9338                     if (op == 2) {
9339                         goto illegal_op;
9340                     }
9341                     ARCH(7);
9342                     break;
9343                 case 2:
9344                     /* Load-acquire/store-release */
9345                     if (op == 3) {
9346                         goto illegal_op;
9347                     }
9348                     /* Fall through */
9349                 case 3:
9350                     /* Load-acquire/store-release exclusive */
9351                     ARCH(8);
9352                     break;
9353                 }
9354                 addr = tcg_temp_local_new_i32();
9355                 load_reg_var(s, addr, rn);
9356                 if (!(op2 & 1)) {
9357                     if (insn & (1 << 20)) {
9358                         tmp = tcg_temp_new_i32();
9359                         switch (op) {
9360                         case 0: /* ldab */
9361                             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
9362                             break;
9363                         case 1: /* ldah */
9364                             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
9365                             break;
9366                         case 2: /* lda */
9367                             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9368                             break;
9369                         default:
9370                             abort();
9371                         }
9372                         store_reg(s, rs, tmp);
9373                     } else {
9374                         tmp = load_reg(s, rs);
9375                         switch (op) {
9376                         case 0: /* stlb */
9377                             gen_aa32_st8(tmp, addr, get_mem_index(s));
9378                             break;
9379                         case 1: /* stlh */
9380                             gen_aa32_st16(tmp, addr, get_mem_index(s));
9381                             break;
9382                         case 2: /* stl */
9383                             gen_aa32_st32(tmp, addr, get_mem_index(s));
9384                             break;
9385                         default:
9386                             abort();
9387                         }
9388                         tcg_temp_free_i32(tmp);
9389                     }
9390                 } else if (insn & (1 << 20)) {
9391                     gen_load_exclusive(s, rs, rd, addr, op);
9392                 } else {
9393                     gen_store_exclusive(s, rm, rs, rd, addr, op);
9394                 }
9395                 tcg_temp_free_i32(addr);
9396             }
9397         } else {
9398             /* Load/store multiple, RFE, SRS.  */
9399             if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
9400                 /* RFE, SRS: not available in user mode or on M profile */
9401                 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
9402                     goto illegal_op;
9403                 }
9404                 if (insn & (1 << 20)) {
9405                     /* rfe */
9406                     addr = load_reg(s, rn);
9407                     if ((insn & (1 << 24)) == 0)
9408                         tcg_gen_addi_i32(addr, addr, -8);
9409                     /* Load PC into tmp and CPSR into tmp2.  */
9410                     tmp = tcg_temp_new_i32();
9411                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9412                     tcg_gen_addi_i32(addr, addr, 4);
9413                     tmp2 = tcg_temp_new_i32();
9414                     gen_aa32_ld32u(tmp2, addr, get_mem_index(s));
9415                     if (insn & (1 << 21)) {
9416                         /* Base writeback.  */
9417                         if (insn & (1 << 24)) {
9418                             tcg_gen_addi_i32(addr, addr, 4);
9419                         } else {
9420                             tcg_gen_addi_i32(addr, addr, -4);
9421                         }
9422                         store_reg(s, rn, addr);
9423                     } else {
9424                         tcg_temp_free_i32(addr);
9425                     }
9426                     gen_rfe(s, tmp, tmp2);
9427                 } else {
9428                     /* srs */
9429                     gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
9430                             insn & (1 << 21));
9431                 }
9432             } else {
9433                 int i, loaded_base = 0;
9434                 TCGv_i32 loaded_var;
9435                 /* Load/store multiple.  */
9436                 addr = load_reg(s, rn);
9437                 offset = 0;
9438                 for (i = 0; i < 16; i++) {
9439                     if (insn & (1 << i))
9440                         offset += 4;
9441                 }
9442                 if (insn & (1 << 24)) {
9443                     tcg_gen_addi_i32(addr, addr, -offset);
9444                 }
9445
9446                 TCGV_UNUSED_I32(loaded_var);
9447                 for (i = 0; i < 16; i++) {
9448                     if ((insn & (1 << i)) == 0)
9449                         continue;
9450                     if (insn & (1 << 20)) {
9451                         /* Load.  */
9452                         tmp = tcg_temp_new_i32();
9453                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
9454                         if (i == 15) {
9455                             gen_bx(s, tmp);
9456                         } else if (i == rn) {
9457                             loaded_var = tmp;
9458                             loaded_base = 1;
9459                         } else {
9460                             store_reg(s, i, tmp);
9461                         }
9462                     } else {
9463                         /* Store.  */
9464                         tmp = load_reg(s, i);
9465                         gen_aa32_st32(tmp, addr, get_mem_index(s));
9466                         tcg_temp_free_i32(tmp);
9467                     }
9468                     tcg_gen_addi_i32(addr, addr, 4);
9469                 }
9470                 if (loaded_base) {
9471                     store_reg(s, rn, loaded_var);
9472                 }
9473                 if (insn & (1 << 21)) {
9474                     /* Base register writeback.  */
9475                     if (insn & (1 << 24)) {
9476                         tcg_gen_addi_i32(addr, addr, -offset);
9477                     }
9478                     /* Fault if writeback register is in register list.  */
9479                     if (insn & (1 << rn))
9480                         goto illegal_op;
9481                     store_reg(s, rn, addr);
9482                 } else {
9483                     tcg_temp_free_i32(addr);
9484                 }
9485             }
9486         }
9487         break;
9488     case 5:
9489
9490         op = (insn >> 21) & 0xf;
9491         if (op == 6) {
9492             if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9493                 goto illegal_op;
9494             }
9495             /* Halfword pack.  */
9496             tmp = load_reg(s, rn);
9497             tmp2 = load_reg(s, rm);
9498             shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
9499             if (insn & (1 << 5)) {
9500                 /* pkhtb */
9501                 if (shift == 0)
9502                     shift = 31;
9503                 tcg_gen_sari_i32(tmp2, tmp2, shift);
9504                 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
9505                 tcg_gen_ext16u_i32(tmp2, tmp2);
9506             } else {
9507                 /* pkhbt */
9508                 if (shift)
9509                     tcg_gen_shli_i32(tmp2, tmp2, shift);
9510                 tcg_gen_ext16u_i32(tmp, tmp);
9511                 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
9512             }
9513             tcg_gen_or_i32(tmp, tmp, tmp2);
9514             tcg_temp_free_i32(tmp2);
9515             store_reg(s, rd, tmp);
9516         } else {
9517             /* Data processing register constant shift.  */
9518             if (rn == 15) {
9519                 tmp = tcg_temp_new_i32();
9520                 tcg_gen_movi_i32(tmp, 0);
9521             } else {
9522                 tmp = load_reg(s, rn);
9523             }
9524             tmp2 = load_reg(s, rm);
9525
9526             shiftop = (insn >> 4) & 3;
9527             shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
9528             conds = (insn & (1 << 20)) != 0;
9529             logic_cc = (conds && thumb2_logic_op(op));
9530             gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
9531             if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
9532                 goto illegal_op;
9533             tcg_temp_free_i32(tmp2);
9534             if (rd != 15) {
9535                 store_reg(s, rd, tmp);
9536             } else {
9537                 tcg_temp_free_i32(tmp);
9538             }
9539         }
9540         break;
9541     case 13: /* Misc data processing.  */
9542         op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
9543         if (op < 4 && (insn & 0xf000) != 0xf000)
9544             goto illegal_op;
9545         switch (op) {
9546         case 0: /* Register controlled shift.  */
9547             tmp = load_reg(s, rn);
9548             tmp2 = load_reg(s, rm);
9549             if ((insn & 0x70) != 0)
9550                 goto illegal_op;
9551             op = (insn >> 21) & 3;
9552             logic_cc = (insn & (1 << 20)) != 0;
9553             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
9554             if (logic_cc)
9555                 gen_logic_CC(tmp);
9556             store_reg_bx(s, rd, tmp);
9557             break;
9558         case 1: /* Sign/zero extend.  */
9559             op = (insn >> 20) & 7;
9560             switch (op) {
9561             case 0: /* SXTAH, SXTH */
9562             case 1: /* UXTAH, UXTH */
9563             case 4: /* SXTAB, SXTB */
9564             case 5: /* UXTAB, UXTB */
9565                 break;
9566             case 2: /* SXTAB16, SXTB16 */
9567             case 3: /* UXTAB16, UXTB16 */
9568                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9569                     goto illegal_op;
9570                 }
9571                 break;
9572             default:
9573                 goto illegal_op;
9574             }
9575             if (rn != 15) {
9576                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9577                     goto illegal_op;
9578                 }
9579             }
9580             tmp = load_reg(s, rm);
9581             shift = (insn >> 4) & 3;
9582             /* ??? In many cases it's not necessary to do a
9583                rotate, a shift is sufficient.  */
9584             if (shift != 0)
9585                 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
9586             op = (insn >> 20) & 7;
9587             switch (op) {
9588             case 0: gen_sxth(tmp);   break;
9589             case 1: gen_uxth(tmp);   break;
9590             case 2: gen_sxtb16(tmp); break;
9591             case 3: gen_uxtb16(tmp); break;
9592             case 4: gen_sxtb(tmp);   break;
9593             case 5: gen_uxtb(tmp);   break;
9594             default:
9595                 g_assert_not_reached();
9596             }
9597             if (rn != 15) {
9598                 tmp2 = load_reg(s, rn);
9599                 if ((op >> 1) == 1) {
9600                     gen_add16(tmp, tmp2);
9601                 } else {
9602                     tcg_gen_add_i32(tmp, tmp, tmp2);
9603                     tcg_temp_free_i32(tmp2);
9604                 }
9605             }
9606             store_reg(s, rd, tmp);
9607             break;
9608         case 2: /* SIMD add/subtract.  */
9609             if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9610                 goto illegal_op;
9611             }
9612             op = (insn >> 20) & 7;
9613             shift = (insn >> 4) & 7;
9614             if ((op & 3) == 3 || (shift & 3) == 3)
9615                 goto illegal_op;
9616             tmp = load_reg(s, rn);
9617             tmp2 = load_reg(s, rm);
9618             gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
9619             tcg_temp_free_i32(tmp2);
9620             store_reg(s, rd, tmp);
9621             break;
9622         case 3: /* Other data processing.  */
9623             op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
9624             if (op < 4) {
9625                 /* Saturating add/subtract.  */
9626                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9627                     goto illegal_op;
9628                 }
9629                 tmp = load_reg(s, rn);
9630                 tmp2 = load_reg(s, rm);
9631                 if (op & 1)
9632                     gen_helper_double_saturate(tmp, cpu_env, tmp);
9633                 if (op & 2)
9634                     gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
9635                 else
9636                     gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
9637                 tcg_temp_free_i32(tmp2);
9638             } else {
9639                 switch (op) {
9640                 case 0x0a: /* rbit */
9641                 case 0x08: /* rev */
9642                 case 0x09: /* rev16 */
9643                 case 0x0b: /* revsh */
9644                 case 0x18: /* clz */
9645                     break;
9646                 case 0x10: /* sel */
9647                     if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9648                         goto illegal_op;
9649                     }
9650                     break;
9651                 case 0x20: /* crc32/crc32c */
9652                 case 0x21:
9653                 case 0x22:
9654                 case 0x28:
9655                 case 0x29:
9656                 case 0x2a:
9657                     if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
9658                         goto illegal_op;
9659                     }
9660                     break;
9661                 default:
9662                     goto illegal_op;
9663                 }
9664                 tmp = load_reg(s, rn);
9665                 switch (op) {
9666                 case 0x0a: /* rbit */
9667                     gen_helper_rbit(tmp, tmp);
9668                     break;
9669                 case 0x08: /* rev */
9670                     tcg_gen_bswap32_i32(tmp, tmp);
9671                     break;
9672                 case 0x09: /* rev16 */
9673                     gen_rev16(tmp);
9674                     break;
9675                 case 0x0b: /* revsh */
9676                     gen_revsh(tmp);
9677                     break;
9678                 case 0x10: /* sel */
9679                     tmp2 = load_reg(s, rm);
9680                     tmp3 = tcg_temp_new_i32();
9681                     tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
9682                     gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
9683                     tcg_temp_free_i32(tmp3);
9684                     tcg_temp_free_i32(tmp2);
9685                     break;
9686                 case 0x18: /* clz */
9687                     gen_helper_clz(tmp, tmp);
9688                     break;
9689                 case 0x20:
9690                 case 0x21:
9691                 case 0x22:
9692                 case 0x28:
9693                 case 0x29:
9694                 case 0x2a:
9695                 {
9696                     /* crc32/crc32c */
9697                     uint32_t sz = op & 0x3;
9698                     uint32_t c = op & 0x8;
9699
9700                     tmp2 = load_reg(s, rm);
9701                     if (sz == 0) {
9702                         tcg_gen_andi_i32(tmp2, tmp2, 0xff);
9703                     } else if (sz == 1) {
9704                         tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
9705                     }
9706                     tmp3 = tcg_const_i32(1 << sz);
9707                     if (c) {
9708                         gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
9709                     } else {
9710                         gen_helper_crc32(tmp, tmp, tmp2, tmp3);
9711                     }
9712                     tcg_temp_free_i32(tmp2);
9713                     tcg_temp_free_i32(tmp3);
9714                     break;
9715                 }
9716                 default:
9717                     g_assert_not_reached();
9718                 }
9719             }
9720             store_reg(s, rd, tmp);
9721             break;
9722         case 4: case 5: /* 32-bit multiply.  Sum of absolute differences.  */
9723             switch ((insn >> 20) & 7) {
9724             case 0: /* 32 x 32 -> 32 */
9725             case 7: /* Unsigned sum of absolute differences.  */
9726                 break;
9727             case 1: /* 16 x 16 -> 32 */
9728             case 2: /* Dual multiply add.  */
9729             case 3: /* 32 * 16 -> 32msb */
9730             case 4: /* Dual multiply subtract.  */
9731             case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
9732                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9733                     goto illegal_op;
9734                 }
9735                 break;
9736             }
9737             op = (insn >> 4) & 0xf;
9738             tmp = load_reg(s, rn);
9739             tmp2 = load_reg(s, rm);
9740             switch ((insn >> 20) & 7) {
9741             case 0: /* 32 x 32 -> 32 */
9742                 tcg_gen_mul_i32(tmp, tmp, tmp2);
9743                 tcg_temp_free_i32(tmp2);
9744                 if (rs != 15) {
9745                     tmp2 = load_reg(s, rs);
9746                     if (op)
9747                         tcg_gen_sub_i32(tmp, tmp2, tmp);
9748                     else
9749                         tcg_gen_add_i32(tmp, tmp, tmp2);
9750                     tcg_temp_free_i32(tmp2);
9751                 }
9752                 break;
9753             case 1: /* 16 x 16 -> 32 */
9754                 gen_mulxy(tmp, tmp2, op & 2, op & 1);
9755                 tcg_temp_free_i32(tmp2);
9756                 if (rs != 15) {
9757                     tmp2 = load_reg(s, rs);
9758                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9759                     tcg_temp_free_i32(tmp2);
9760                 }
9761                 break;
9762             case 2: /* Dual multiply add.  */
9763             case 4: /* Dual multiply subtract.  */
9764                 if (op)
9765                     gen_swap_half(tmp2);
9766                 gen_smul_dual(tmp, tmp2);
9767                 if (insn & (1 << 22)) {
9768                     /* This subtraction cannot overflow. */
9769                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9770                 } else {
9771                     /* This addition cannot overflow 32 bits;
9772                      * however it may overflow considered as a signed
9773                      * operation, in which case we must set the Q flag.
9774                      */
9775                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9776                 }
9777                 tcg_temp_free_i32(tmp2);
9778                 if (rs != 15)
9779                   {
9780                     tmp2 = load_reg(s, rs);
9781                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9782                     tcg_temp_free_i32(tmp2);
9783                   }
9784                 break;
9785             case 3: /* 32 * 16 -> 32msb */
9786                 if (op)
9787                     tcg_gen_sari_i32(tmp2, tmp2, 16);
9788                 else
9789                     gen_sxth(tmp2);
9790                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9791                 tcg_gen_shri_i64(tmp64, tmp64, 16);
9792                 tmp = tcg_temp_new_i32();
9793                 tcg_gen_extrl_i64_i32(tmp, tmp64);
9794                 tcg_temp_free_i64(tmp64);
9795                 if (rs != 15)
9796                   {
9797                     tmp2 = load_reg(s, rs);
9798                     gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9799                     tcg_temp_free_i32(tmp2);
9800                   }
9801                 break;
9802             case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
9803                 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9804                 if (rs != 15) {
9805                     tmp = load_reg(s, rs);
9806                     if (insn & (1 << 20)) {
9807                         tmp64 = gen_addq_msw(tmp64, tmp);
9808                     } else {
9809                         tmp64 = gen_subq_msw(tmp64, tmp);
9810                     }
9811                 }
9812                 if (insn & (1 << 4)) {
9813                     tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
9814                 }
9815                 tcg_gen_shri_i64(tmp64, tmp64, 32);
9816                 tmp = tcg_temp_new_i32();
9817                 tcg_gen_extrl_i64_i32(tmp, tmp64);
9818                 tcg_temp_free_i64(tmp64);
9819                 break;
9820             case 7: /* Unsigned sum of absolute differences.  */
9821                 gen_helper_usad8(tmp, tmp, tmp2);
9822                 tcg_temp_free_i32(tmp2);
9823                 if (rs != 15) {
9824                     tmp2 = load_reg(s, rs);
9825                     tcg_gen_add_i32(tmp, tmp, tmp2);
9826                     tcg_temp_free_i32(tmp2);
9827                 }
9828                 break;
9829             }
9830             store_reg(s, rd, tmp);
9831             break;
9832         case 6: case 7: /* 64-bit multiply, Divide.  */
9833             op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
9834             tmp = load_reg(s, rn);
9835             tmp2 = load_reg(s, rm);
9836             if ((op & 0x50) == 0x10) {
9837                 /* sdiv, udiv */
9838                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
9839                     goto illegal_op;
9840                 }
9841                 if (op & 0x20)
9842                     gen_helper_udiv(tmp, tmp, tmp2);
9843                 else
9844                     gen_helper_sdiv(tmp, tmp, tmp2);
9845                 tcg_temp_free_i32(tmp2);
9846                 store_reg(s, rd, tmp);
9847             } else if ((op & 0xe) == 0xc) {
9848                 /* Dual multiply accumulate long.  */
9849                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9850                     tcg_temp_free_i32(tmp);
9851                     tcg_temp_free_i32(tmp2);
9852                     goto illegal_op;
9853                 }
9854                 if (op & 1)
9855                     gen_swap_half(tmp2);
9856                 gen_smul_dual(tmp, tmp2);
9857                 if (op & 0x10) {
9858                     tcg_gen_sub_i32(tmp, tmp, tmp2);
9859                 } else {
9860                     tcg_gen_add_i32(tmp, tmp, tmp2);
9861                 }
9862                 tcg_temp_free_i32(tmp2);
9863                 /* BUGFIX */
9864                 tmp64 = tcg_temp_new_i64();
9865                 tcg_gen_ext_i32_i64(tmp64, tmp);
9866                 tcg_temp_free_i32(tmp);
9867                 gen_addq(s, tmp64, rs, rd);
9868                 gen_storeq_reg(s, rs, rd, tmp64);
9869                 tcg_temp_free_i64(tmp64);
9870             } else {
9871                 if (op & 0x20) {
9872                     /* Unsigned 64-bit multiply  */
9873                     tmp64 = gen_mulu_i64_i32(tmp, tmp2);
9874                 } else {
9875                     if (op & 8) {
9876                         /* smlalxy */
9877                         if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9878                             tcg_temp_free_i32(tmp2);
9879                             tcg_temp_free_i32(tmp);
9880                             goto illegal_op;
9881                         }
9882                         gen_mulxy(tmp, tmp2, op & 2, op & 1);
9883                         tcg_temp_free_i32(tmp2);
9884                         tmp64 = tcg_temp_new_i64();
9885                         tcg_gen_ext_i32_i64(tmp64, tmp);
9886                         tcg_temp_free_i32(tmp);
9887                     } else {
9888                         /* Signed 64-bit multiply  */
9889                         tmp64 = gen_muls_i64_i32(tmp, tmp2);
9890                     }
9891                 }
9892                 if (op & 4) {
9893                     /* umaal */
9894                     if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9895                         tcg_temp_free_i64(tmp64);
9896                         goto illegal_op;
9897                     }
9898                     gen_addq_lo(s, tmp64, rs);
9899                     gen_addq_lo(s, tmp64, rd);
9900                 } else if (op & 0x40) {
9901                     /* 64-bit accumulate.  */
9902                     gen_addq(s, tmp64, rs, rd);
9903                 }
9904                 gen_storeq_reg(s, rs, rd, tmp64);
9905                 tcg_temp_free_i64(tmp64);
9906             }
9907             break;
9908         }
9909         break;
9910     case 6: case 7: case 14: case 15:
9911         /* Coprocessor.  */
9912         if (((insn >> 24) & 3) == 3) {
9913             /* Translate into the equivalent ARM encoding.  */
9914             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
9915             if (disas_neon_data_insn(s, insn)) {
9916                 goto illegal_op;
9917             }
9918         } else if (((insn >> 8) & 0xe) == 10) {
9919             if (disas_vfp_insn(s, insn)) {
9920                 goto illegal_op;
9921             }
9922         } else {
9923             if (insn & (1 << 28))
9924                 goto illegal_op;
9925             if (disas_coproc_insn(s, insn)) {
9926                 goto illegal_op;
9927             }
9928         }
9929         break;
9930     case 8: case 9: case 10: case 11:
9931         if (insn & (1 << 15)) {
9932             /* Branches, misc control.  */
9933             if (insn & 0x5000) {
9934                 /* Unconditional branch.  */
9935                 /* signextend(hw1[10:0]) -> offset[:12].  */
9936                 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
9937                 /* hw1[10:0] -> offset[11:1].  */
9938                 offset |= (insn & 0x7ff) << 1;
9939                 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
9940                    offset[24:22] already have the same value because of the
9941                    sign extension above.  */
9942                 offset ^= ((~insn) & (1 << 13)) << 10;
9943                 offset ^= ((~insn) & (1 << 11)) << 11;
9944
9945                 if (insn & (1 << 14)) {
9946                     /* Branch and link.  */
9947                     tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
9948                 }
9949
9950                 offset += s->pc;
9951                 if (insn & (1 << 12)) {
9952                     /* b/bl */
9953                     gen_jmp(s, offset);
9954                 } else {
9955                     /* blx */
9956                     offset &= ~(uint32_t)2;
9957                     /* thumb2 bx, no need to check */
9958                     gen_bx_im(s, offset);
9959                 }
9960             } else if (((insn >> 23) & 7) == 7) {
9961                 /* Misc control */
9962                 if (insn & (1 << 13))
9963                     goto illegal_op;
9964
9965                 if (insn & (1 << 26)) {
9966                     if (!(insn & (1 << 20))) {
9967                         /* Hypervisor call (v7) */
9968                         int imm16 = extract32(insn, 16, 4) << 12
9969                             | extract32(insn, 0, 12);
9970                         ARCH(7);
9971                         if (IS_USER(s)) {
9972                             goto illegal_op;
9973                         }
9974                         gen_hvc(s, imm16);
9975                     } else {
9976                         /* Secure monitor call (v6+) */
9977                         ARCH(6K);
9978                         if (IS_USER(s)) {
9979                             goto illegal_op;
9980                         }
9981                         gen_smc(s);
9982                     }
9983                 } else {
9984                     op = (insn >> 20) & 7;
9985                     switch (op) {
9986                     case 0: /* msr cpsr.  */
9987                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9988                             tmp = load_reg(s, rn);
9989                             addr = tcg_const_i32(insn & 0xff);
9990                             gen_helper_v7m_msr(cpu_env, addr, tmp);
9991                             tcg_temp_free_i32(addr);
9992                             tcg_temp_free_i32(tmp);
9993                             gen_lookup_tb(s);
9994                             break;
9995                         }
9996                         /* fall through */
9997                     case 1: /* msr spsr.  */
9998                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9999                             goto illegal_op;
10000                         }
10001                         tmp = load_reg(s, rn);
10002                         if (gen_set_psr(s,
10003                               msr_mask(s, (insn >> 8) & 0xf, op == 1),
10004                               op == 1, tmp))
10005                             goto illegal_op;
10006                         break;
10007                     case 2: /* cps, nop-hint.  */
10008                         if (((insn >> 8) & 7) == 0) {
10009                             gen_nop_hint(s, insn & 0xff);
10010                         }
10011                         /* Implemented as NOP in user mode.  */
10012                         if (IS_USER(s))
10013                             break;
10014                         offset = 0;
10015                         imm = 0;
10016                         if (insn & (1 << 10)) {
10017                             if (insn & (1 << 7))
10018                                 offset |= CPSR_A;
10019                             if (insn & (1 << 6))
10020                                 offset |= CPSR_I;
10021                             if (insn & (1 << 5))
10022                                 offset |= CPSR_F;
10023                             if (insn & (1 << 9))
10024                                 imm = CPSR_A | CPSR_I | CPSR_F;
10025                         }
10026                         if (insn & (1 << 8)) {
10027                             offset |= 0x1f;
10028                             imm |= (insn & 0x1f);
10029                         }
10030                         if (offset) {
10031                             gen_set_psr_im(s, offset, 0, imm);
10032                         }
10033                         break;
10034                     case 3: /* Special control operations.  */
10035                         ARCH(7);
10036                         op = (insn >> 4) & 0xf;
10037                         switch (op) {
10038                         case 2: /* clrex */
10039                             gen_clrex(s);
10040                             break;
10041                         case 4: /* dsb */
10042                         case 5: /* dmb */
10043                             /* These execute as NOPs.  */
10044                             break;
10045                         case 6: /* isb */
10046                             /* We need to break the TB after this insn
10047                              * to execute self-modifying code correctly
10048                              * and also to take any pending interrupts
10049                              * immediately.
10050                              */
10051                             gen_lookup_tb(s);
10052                             break;
10053                         default:
10054                             goto illegal_op;
10055                         }
10056                         break;
10057                     case 4: /* bxj */
10058                         /* Trivial implementation equivalent to bx.  */
10059                         tmp = load_reg(s, rn);
10060                         gen_bx(s, tmp);
10061                         break;
10062                     case 5: /* Exception return.  */
10063                         if (IS_USER(s)) {
10064                             goto illegal_op;
10065                         }
10066                         if (rn != 14 || rd != 15) {
10067                             goto illegal_op;
10068                         }
10069                         tmp = load_reg(s, rn);
10070                         tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
10071                         gen_exception_return(s, tmp);
10072                         break;
10073                     case 6: /* mrs cpsr.  */
10074                         tmp = tcg_temp_new_i32();
10075                         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10076                             addr = tcg_const_i32(insn & 0xff);
10077                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
10078                             tcg_temp_free_i32(addr);
10079                         } else {
10080                             gen_helper_cpsr_read(tmp, cpu_env);
10081                         }
10082                         store_reg(s, rd, tmp);
10083                         break;
10084                     case 7: /* mrs spsr.  */
10085                         /* Not accessible in user mode.  */
10086                         if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
10087                             goto illegal_op;
10088                         }
10089                         tmp = load_cpu_field(spsr);
10090                         store_reg(s, rd, tmp);
10091                         break;
10092                     }
10093                 }
10094             } else {
10095                 /* Conditional branch.  */
10096                 op = (insn >> 22) & 0xf;
10097                 /* Generate a conditional jump to next instruction.  */
10098                 s->condlabel = gen_new_label();
10099                 arm_gen_test_cc(op ^ 1, s->condlabel);
10100                 s->condjmp = 1;
10101
10102                 /* offset[11:1] = insn[10:0] */
10103                 offset = (insn & 0x7ff) << 1;
10104                 /* offset[17:12] = insn[21:16].  */
10105                 offset |= (insn & 0x003f0000) >> 4;
10106                 /* offset[31:20] = insn[26].  */
10107                 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
10108                 /* offset[18] = insn[13].  */
10109                 offset |= (insn & (1 << 13)) << 5;
10110                 /* offset[19] = insn[11].  */
10111                 offset |= (insn & (1 << 11)) << 8;
10112
10113                 /* jump to the offset */
10114                 gen_jmp(s, s->pc + offset);
10115             }
10116         } else {
10117             /* Data processing immediate.  */
10118             if (insn & (1 << 25)) {
10119                 if (insn & (1 << 24)) {
10120                     if (insn & (1 << 20))
10121                         goto illegal_op;
10122                     /* Bitfield/Saturate.  */
10123                     op = (insn >> 21) & 7;
10124                     imm = insn & 0x1f;
10125                     shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
10126                     if (rn == 15) {
10127                         tmp = tcg_temp_new_i32();
10128                         tcg_gen_movi_i32(tmp, 0);
10129                     } else {
10130                         tmp = load_reg(s, rn);
10131                     }
10132                     switch (op) {
10133                     case 2: /* Signed bitfield extract.  */
10134                         imm++;
10135                         if (shift + imm > 32)
10136                             goto illegal_op;
10137                         if (imm < 32)
10138                             gen_sbfx(tmp, shift, imm);
10139                         break;
10140                     case 6: /* Unsigned bitfield extract.  */
10141                         imm++;
10142                         if (shift + imm > 32)
10143                             goto illegal_op;
10144                         if (imm < 32)
10145                             gen_ubfx(tmp, shift, (1u << imm) - 1);
10146                         break;
10147                     case 3: /* Bitfield insert/clear.  */
10148                         if (imm < shift)
10149                             goto illegal_op;
10150                         imm = imm + 1 - shift;
10151                         if (imm != 32) {
10152                             tmp2 = load_reg(s, rd);
10153                             tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
10154                             tcg_temp_free_i32(tmp2);
10155                         }
10156                         break;
10157                     case 7:
10158                         goto illegal_op;
10159                     default: /* Saturate.  */
10160                         if (shift) {
10161                             if (op & 1)
10162                                 tcg_gen_sari_i32(tmp, tmp, shift);
10163                             else
10164                                 tcg_gen_shli_i32(tmp, tmp, shift);
10165                         }
10166                         tmp2 = tcg_const_i32(imm);
10167                         if (op & 4) {
10168                             /* Unsigned.  */
10169                             if ((op & 1) && shift == 0) {
10170                                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10171                                     tcg_temp_free_i32(tmp);
10172                                     tcg_temp_free_i32(tmp2);
10173                                     goto illegal_op;
10174                                 }
10175                                 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
10176                             } else {
10177                                 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
10178                             }
10179                         } else {
10180                             /* Signed.  */
10181                             if ((op & 1) && shift == 0) {
10182                                 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10183                                     tcg_temp_free_i32(tmp);
10184                                     tcg_temp_free_i32(tmp2);
10185                                     goto illegal_op;
10186                                 }
10187                                 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
10188                             } else {
10189                                 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
10190                             }
10191                         }
10192                         tcg_temp_free_i32(tmp2);
10193                         break;
10194                     }
10195                     store_reg(s, rd, tmp);
10196                 } else {
10197                     imm = ((insn & 0x04000000) >> 15)
10198                           | ((insn & 0x7000) >> 4) | (insn & 0xff);
10199                     if (insn & (1 << 22)) {
10200                         /* 16-bit immediate.  */
10201                         imm |= (insn >> 4) & 0xf000;
10202                         if (insn & (1 << 23)) {
10203                             /* movt */
10204                             tmp = load_reg(s, rd);
10205                             tcg_gen_ext16u_i32(tmp, tmp);
10206                             tcg_gen_ori_i32(tmp, tmp, imm << 16);
10207                         } else {
10208                             /* movw */
10209                             tmp = tcg_temp_new_i32();
10210                             tcg_gen_movi_i32(tmp, imm);
10211                         }
10212                     } else {
10213                         /* Add/sub 12-bit immediate.  */
10214                         if (rn == 15) {
10215                             offset = s->pc & ~(uint32_t)3;
10216                             if (insn & (1 << 23))
10217                                 offset -= imm;
10218                             else
10219                                 offset += imm;
10220                             tmp = tcg_temp_new_i32();
10221                             tcg_gen_movi_i32(tmp, offset);
10222                         } else {
10223                             tmp = load_reg(s, rn);
10224                             if (insn & (1 << 23))
10225                                 tcg_gen_subi_i32(tmp, tmp, imm);
10226                             else
10227                                 tcg_gen_addi_i32(tmp, tmp, imm);
10228                         }
10229                     }
10230                     store_reg(s, rd, tmp);
10231                 }
10232             } else {
10233                 int shifter_out = 0;
10234                 /* modified 12-bit immediate.  */
10235                 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
10236                 imm = (insn & 0xff);
10237                 switch (shift) {
10238                 case 0: /* XY */
10239                     /* Nothing to do.  */
10240                     break;
10241                 case 1: /* 00XY00XY */
10242                     imm |= imm << 16;
10243                     break;
10244                 case 2: /* XY00XY00 */
10245                     imm |= imm << 16;
10246                     imm <<= 8;
10247                     break;
10248                 case 3: /* XYXYXYXY */
10249                     imm |= imm << 16;
10250                     imm |= imm << 8;
10251                     break;
10252                 default: /* Rotated constant.  */
10253                     shift = (shift << 1) | (imm >> 7);
10254                     imm |= 0x80;
10255                     imm = imm << (32 - shift);
10256                     shifter_out = 1;
10257                     break;
10258                 }
10259                 tmp2 = tcg_temp_new_i32();
10260                 tcg_gen_movi_i32(tmp2, imm);
10261                 rn = (insn >> 16) & 0xf;
10262                 if (rn == 15) {
10263                     tmp = tcg_temp_new_i32();
10264                     tcg_gen_movi_i32(tmp, 0);
10265                 } else {
10266                     tmp = load_reg(s, rn);
10267                 }
10268                 op = (insn >> 21) & 0xf;
10269                 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
10270                                        shifter_out, tmp, tmp2))
10271                     goto illegal_op;
10272                 tcg_temp_free_i32(tmp2);
10273                 rd = (insn >> 8) & 0xf;
10274                 if (rd != 15) {
10275                     store_reg(s, rd, tmp);
10276                 } else {
10277                     tcg_temp_free_i32(tmp);
10278                 }
10279             }
10280         }
10281         break;
10282     case 12: /* Load/store single data item.  */
10283         {
10284         int postinc = 0;
10285         int writeback = 0;
10286         int memidx;
10287         if ((insn & 0x01100000) == 0x01000000) {
10288             if (disas_neon_ls_insn(s, insn)) {
10289                 goto illegal_op;
10290             }
10291             break;
10292         }
10293         op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
10294         if (rs == 15) {
10295             if (!(insn & (1 << 20))) {
10296                 goto illegal_op;
10297             }
10298             if (op != 2) {
10299                 /* Byte or halfword load space with dest == r15 : memory hints.
10300                  * Catch them early so we don't emit pointless addressing code.
10301                  * This space is a mix of:
10302                  *  PLD/PLDW/PLI,  which we implement as NOPs (note that unlike
10303                  *     the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
10304                  *     cores)
10305                  *  unallocated hints, which must be treated as NOPs
10306                  *  UNPREDICTABLE space, which we NOP or UNDEF depending on
10307                  *     which is easiest for the decoding logic
10308                  *  Some space which must UNDEF
10309                  */
10310                 int op1 = (insn >> 23) & 3;
10311                 int op2 = (insn >> 6) & 0x3f;
10312                 if (op & 2) {
10313                     goto illegal_op;
10314                 }
10315                 if (rn == 15) {
10316                     /* UNPREDICTABLE, unallocated hint or
10317                      * PLD/PLDW/PLI (literal)
10318                      */
10319                     return 0;
10320                 }
10321                 if (op1 & 1) {
10322                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10323                 }
10324                 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
10325                     return 0; /* PLD/PLDW/PLI or unallocated hint */
10326                 }
10327                 /* UNDEF space, or an UNPREDICTABLE */
10328                 return 1;
10329             }
10330         }
10331         memidx = get_mem_index(s);
10332         if (rn == 15) {
10333             addr = tcg_temp_new_i32();
10334             /* PC relative.  */
10335             /* s->pc has already been incremented by 4.  */
10336             imm = s->pc & 0xfffffffc;
10337             if (insn & (1 << 23))
10338                 imm += insn & 0xfff;
10339             else
10340                 imm -= insn & 0xfff;
10341             tcg_gen_movi_i32(addr, imm);
10342         } else {
10343             addr = load_reg(s, rn);
10344             if (insn & (1 << 23)) {
10345                 /* Positive offset.  */
10346                 imm = insn & 0xfff;
10347                 tcg_gen_addi_i32(addr, addr, imm);
10348             } else {
10349                 imm = insn & 0xff;
10350                 switch ((insn >> 8) & 0xf) {
10351                 case 0x0: /* Shifted Register.  */
10352                     shift = (insn >> 4) & 0xf;
10353                     if (shift > 3) {
10354                         tcg_temp_free_i32(addr);
10355                         goto illegal_op;
10356                     }
10357                     tmp = load_reg(s, rm);
10358                     if (shift)
10359                         tcg_gen_shli_i32(tmp, tmp, shift);
10360                     tcg_gen_add_i32(addr, addr, tmp);
10361                     tcg_temp_free_i32(tmp);
10362                     break;
10363                 case 0xc: /* Negative offset.  */
10364                     tcg_gen_addi_i32(addr, addr, -imm);
10365                     break;
10366                 case 0xe: /* User privilege.  */
10367                     tcg_gen_addi_i32(addr, addr, imm);
10368                     memidx = get_a32_user_mem_index(s);
10369                     break;
10370                 case 0x9: /* Post-decrement.  */
10371                     imm = -imm;
10372                     /* Fall through.  */
10373                 case 0xb: /* Post-increment.  */
10374                     postinc = 1;
10375                     writeback = 1;
10376                     break;
10377                 case 0xd: /* Pre-decrement.  */
10378                     imm = -imm;
10379                     /* Fall through.  */
10380                 case 0xf: /* Pre-increment.  */
10381                     tcg_gen_addi_i32(addr, addr, imm);
10382                     writeback = 1;
10383                     break;
10384                 default:
10385                     tcg_temp_free_i32(addr);
10386                     goto illegal_op;
10387                 }
10388             }
10389         }
10390         if (insn & (1 << 20)) {
10391             /* Load.  */
10392             tmp = tcg_temp_new_i32();
10393             switch (op) {
10394             case 0:
10395                 gen_aa32_ld8u(tmp, addr, memidx);
10396                 break;
10397             case 4:
10398                 gen_aa32_ld8s(tmp, addr, memidx);
10399                 break;
10400             case 1:
10401                 gen_aa32_ld16u(tmp, addr, memidx);
10402                 break;
10403             case 5:
10404                 gen_aa32_ld16s(tmp, addr, memidx);
10405                 break;
10406             case 2:
10407                 gen_aa32_ld32u(tmp, addr, memidx);
10408                 break;
10409             default:
10410                 tcg_temp_free_i32(tmp);
10411                 tcg_temp_free_i32(addr);
10412                 goto illegal_op;
10413             }
10414             if (rs == 15) {
10415                 gen_bx(s, tmp);
10416             } else {
10417                 store_reg(s, rs, tmp);
10418             }
10419         } else {
10420             /* Store.  */
10421             tmp = load_reg(s, rs);
10422             switch (op) {
10423             case 0:
10424                 gen_aa32_st8(tmp, addr, memidx);
10425                 break;
10426             case 1:
10427                 gen_aa32_st16(tmp, addr, memidx);
10428                 break;
10429             case 2:
10430                 gen_aa32_st32(tmp, addr, memidx);
10431                 break;
10432             default:
10433                 tcg_temp_free_i32(tmp);
10434                 tcg_temp_free_i32(addr);
10435                 goto illegal_op;
10436             }
10437             tcg_temp_free_i32(tmp);
10438         }
10439         if (postinc)
10440             tcg_gen_addi_i32(addr, addr, imm);
10441         if (writeback) {
10442             store_reg(s, rn, addr);
10443         } else {
10444             tcg_temp_free_i32(addr);
10445         }
10446         }
10447         break;
10448     default:
10449         goto illegal_op;
10450     }
10451     return 0;
10452 illegal_op:
10453     return 1;
10454 }
10455
10456 static void disas_thumb_insn(CPUARMState *env, DisasContext *s)
10457 {
10458     uint32_t val, insn, op, rm, rn, rd, shift, cond;
10459     int32_t offset;
10460     int i;
10461     TCGv_i32 tmp;
10462     TCGv_i32 tmp2;
10463     TCGv_i32 addr;
10464
10465     if (s->condexec_mask) {
10466         cond = s->condexec_cond;
10467         if (cond != 0x0e) {     /* Skip conditional when condition is AL. */
10468           s->condlabel = gen_new_label();
10469           arm_gen_test_cc(cond ^ 1, s->condlabel);
10470           s->condjmp = 1;
10471         }
10472     }
10473
10474     insn = arm_lduw_code(env, s->pc, s->bswap_code);
10475     s->pc += 2;
10476
10477     switch (insn >> 12) {
10478     case 0: case 1:
10479
10480         rd = insn & 7;
10481         op = (insn >> 11) & 3;
10482         if (op == 3) {
10483             /* add/subtract */
10484             rn = (insn >> 3) & 7;
10485             tmp = load_reg(s, rn);
10486             if (insn & (1 << 10)) {
10487                 /* immediate */
10488                 tmp2 = tcg_temp_new_i32();
10489                 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
10490             } else {
10491                 /* reg */
10492                 rm = (insn >> 6) & 7;
10493                 tmp2 = load_reg(s, rm);
10494             }
10495             if (insn & (1 << 9)) {
10496                 if (s->condexec_mask)
10497                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10498                 else
10499                     gen_sub_CC(tmp, tmp, tmp2);
10500             } else {
10501                 if (s->condexec_mask)
10502                     tcg_gen_add_i32(tmp, tmp, tmp2);
10503                 else
10504                     gen_add_CC(tmp, tmp, tmp2);
10505             }
10506             tcg_temp_free_i32(tmp2);
10507             store_reg(s, rd, tmp);
10508         } else {
10509             /* shift immediate */
10510             rm = (insn >> 3) & 7;
10511             shift = (insn >> 6) & 0x1f;
10512             tmp = load_reg(s, rm);
10513             gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
10514             if (!s->condexec_mask)
10515                 gen_logic_CC(tmp);
10516             store_reg(s, rd, tmp);
10517         }
10518         break;
10519     case 2: case 3:
10520         /* arithmetic large immediate */
10521         op = (insn >> 11) & 3;
10522         rd = (insn >> 8) & 0x7;
10523         if (op == 0) { /* mov */
10524             tmp = tcg_temp_new_i32();
10525             tcg_gen_movi_i32(tmp, insn & 0xff);
10526             if (!s->condexec_mask)
10527                 gen_logic_CC(tmp);
10528             store_reg(s, rd, tmp);
10529         } else {
10530             tmp = load_reg(s, rd);
10531             tmp2 = tcg_temp_new_i32();
10532             tcg_gen_movi_i32(tmp2, insn & 0xff);
10533             switch (op) {
10534             case 1: /* cmp */
10535                 gen_sub_CC(tmp, tmp, tmp2);
10536                 tcg_temp_free_i32(tmp);
10537                 tcg_temp_free_i32(tmp2);
10538                 break;
10539             case 2: /* add */
10540                 if (s->condexec_mask)
10541                     tcg_gen_add_i32(tmp, tmp, tmp2);
10542                 else
10543                     gen_add_CC(tmp, tmp, tmp2);
10544                 tcg_temp_free_i32(tmp2);
10545                 store_reg(s, rd, tmp);
10546                 break;
10547             case 3: /* sub */
10548                 if (s->condexec_mask)
10549                     tcg_gen_sub_i32(tmp, tmp, tmp2);
10550                 else
10551                     gen_sub_CC(tmp, tmp, tmp2);
10552                 tcg_temp_free_i32(tmp2);
10553                 store_reg(s, rd, tmp);
10554                 break;
10555             }
10556         }
10557         break;
10558     case 4:
10559         if (insn & (1 << 11)) {
10560             rd = (insn >> 8) & 7;
10561             /* load pc-relative.  Bit 1 of PC is ignored.  */
10562             val = s->pc + 2 + ((insn & 0xff) * 4);
10563             val &= ~(uint32_t)2;
10564             addr = tcg_temp_new_i32();
10565             tcg_gen_movi_i32(addr, val);
10566             tmp = tcg_temp_new_i32();
10567             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10568             tcg_temp_free_i32(addr);
10569             store_reg(s, rd, tmp);
10570             break;
10571         }
10572         if (insn & (1 << 10)) {
10573             /* data processing extended or blx */
10574             rd = (insn & 7) | ((insn >> 4) & 8);
10575             rm = (insn >> 3) & 0xf;
10576             op = (insn >> 8) & 3;
10577             switch (op) {
10578             case 0: /* add */
10579                 tmp = load_reg(s, rd);
10580                 tmp2 = load_reg(s, rm);
10581                 tcg_gen_add_i32(tmp, tmp, tmp2);
10582                 tcg_temp_free_i32(tmp2);
10583                 store_reg(s, rd, tmp);
10584                 break;
10585             case 1: /* cmp */
10586                 tmp = load_reg(s, rd);
10587                 tmp2 = load_reg(s, rm);
10588                 gen_sub_CC(tmp, tmp, tmp2);
10589                 tcg_temp_free_i32(tmp2);
10590                 tcg_temp_free_i32(tmp);
10591                 break;
10592             case 2: /* mov/cpy */
10593                 tmp = load_reg(s, rm);
10594                 store_reg(s, rd, tmp);
10595                 break;
10596             case 3:/* branch [and link] exchange thumb register */
10597                 tmp = load_reg(s, rm);
10598                 if (insn & (1 << 7)) {
10599                     ARCH(5);
10600                     val = (uint32_t)s->pc | 1;
10601                     tmp2 = tcg_temp_new_i32();
10602                     tcg_gen_movi_i32(tmp2, val);
10603                     store_reg(s, 14, tmp2);
10604                 }
10605                 /* already thumb, no need to check */
10606                 gen_bx(s, tmp);
10607                 break;
10608             }
10609             break;
10610         }
10611
10612         /* data processing register */
10613         rd = insn & 7;
10614         rm = (insn >> 3) & 7;
10615         op = (insn >> 6) & 0xf;
10616         if (op == 2 || op == 3 || op == 4 || op == 7) {
10617             /* the shift/rotate ops want the operands backwards */
10618             val = rm;
10619             rm = rd;
10620             rd = val;
10621             val = 1;
10622         } else {
10623             val = 0;
10624         }
10625
10626         if (op == 9) { /* neg */
10627             tmp = tcg_temp_new_i32();
10628             tcg_gen_movi_i32(tmp, 0);
10629         } else if (op != 0xf) { /* mvn doesn't read its first operand */
10630             tmp = load_reg(s, rd);
10631         } else {
10632             TCGV_UNUSED_I32(tmp);
10633         }
10634
10635         tmp2 = load_reg(s, rm);
10636         switch (op) {
10637         case 0x0: /* and */
10638             tcg_gen_and_i32(tmp, tmp, tmp2);
10639             if (!s->condexec_mask)
10640                 gen_logic_CC(tmp);
10641             break;
10642         case 0x1: /* eor */
10643             tcg_gen_xor_i32(tmp, tmp, tmp2);
10644             if (!s->condexec_mask)
10645                 gen_logic_CC(tmp);
10646             break;
10647         case 0x2: /* lsl */
10648             if (s->condexec_mask) {
10649                 gen_shl(tmp2, tmp2, tmp);
10650             } else {
10651                 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
10652                 gen_logic_CC(tmp2);
10653             }
10654             break;
10655         case 0x3: /* lsr */
10656             if (s->condexec_mask) {
10657                 gen_shr(tmp2, tmp2, tmp);
10658             } else {
10659                 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
10660                 gen_logic_CC(tmp2);
10661             }
10662             break;
10663         case 0x4: /* asr */
10664             if (s->condexec_mask) {
10665                 gen_sar(tmp2, tmp2, tmp);
10666             } else {
10667                 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
10668                 gen_logic_CC(tmp2);
10669             }
10670             break;
10671         case 0x5: /* adc */
10672             if (s->condexec_mask) {
10673                 gen_adc(tmp, tmp2);
10674             } else {
10675                 gen_adc_CC(tmp, tmp, tmp2);
10676             }
10677             break;
10678         case 0x6: /* sbc */
10679             if (s->condexec_mask) {
10680                 gen_sub_carry(tmp, tmp, tmp2);
10681             } else {
10682                 gen_sbc_CC(tmp, tmp, tmp2);
10683             }
10684             break;
10685         case 0x7: /* ror */
10686             if (s->condexec_mask) {
10687                 tcg_gen_andi_i32(tmp, tmp, 0x1f);
10688                 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
10689             } else {
10690                 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
10691                 gen_logic_CC(tmp2);
10692             }
10693             break;
10694         case 0x8: /* tst */
10695             tcg_gen_and_i32(tmp, tmp, tmp2);
10696             gen_logic_CC(tmp);
10697             rd = 16;
10698             break;
10699         case 0x9: /* neg */
10700             if (s->condexec_mask)
10701                 tcg_gen_neg_i32(tmp, tmp2);
10702             else
10703                 gen_sub_CC(tmp, tmp, tmp2);
10704             break;
10705         case 0xa: /* cmp */
10706             gen_sub_CC(tmp, tmp, tmp2);
10707             rd = 16;
10708             break;
10709         case 0xb: /* cmn */
10710             gen_add_CC(tmp, tmp, tmp2);
10711             rd = 16;
10712             break;
10713         case 0xc: /* orr */
10714             tcg_gen_or_i32(tmp, tmp, tmp2);
10715             if (!s->condexec_mask)
10716                 gen_logic_CC(tmp);
10717             break;
10718         case 0xd: /* mul */
10719             tcg_gen_mul_i32(tmp, tmp, tmp2);
10720             if (!s->condexec_mask)
10721                 gen_logic_CC(tmp);
10722             break;
10723         case 0xe: /* bic */
10724             tcg_gen_andc_i32(tmp, tmp, tmp2);
10725             if (!s->condexec_mask)
10726                 gen_logic_CC(tmp);
10727             break;
10728         case 0xf: /* mvn */
10729             tcg_gen_not_i32(tmp2, tmp2);
10730             if (!s->condexec_mask)
10731                 gen_logic_CC(tmp2);
10732             val = 1;
10733             rm = rd;
10734             break;
10735         }
10736         if (rd != 16) {
10737             if (val) {
10738                 store_reg(s, rm, tmp2);
10739                 if (op != 0xf)
10740                     tcg_temp_free_i32(tmp);
10741             } else {
10742                 store_reg(s, rd, tmp);
10743                 tcg_temp_free_i32(tmp2);
10744             }
10745         } else {
10746             tcg_temp_free_i32(tmp);
10747             tcg_temp_free_i32(tmp2);
10748         }
10749         break;
10750
10751     case 5:
10752         /* load/store register offset.  */
10753         rd = insn & 7;
10754         rn = (insn >> 3) & 7;
10755         rm = (insn >> 6) & 7;
10756         op = (insn >> 9) & 7;
10757         addr = load_reg(s, rn);
10758         tmp = load_reg(s, rm);
10759         tcg_gen_add_i32(addr, addr, tmp);
10760         tcg_temp_free_i32(tmp);
10761
10762         if (op < 3) { /* store */
10763             tmp = load_reg(s, rd);
10764         } else {
10765             tmp = tcg_temp_new_i32();
10766         }
10767
10768         switch (op) {
10769         case 0: /* str */
10770             gen_aa32_st32(tmp, addr, get_mem_index(s));
10771             break;
10772         case 1: /* strh */
10773             gen_aa32_st16(tmp, addr, get_mem_index(s));
10774             break;
10775         case 2: /* strb */
10776             gen_aa32_st8(tmp, addr, get_mem_index(s));
10777             break;
10778         case 3: /* ldrsb */
10779             gen_aa32_ld8s(tmp, addr, get_mem_index(s));
10780             break;
10781         case 4: /* ldr */
10782             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10783             break;
10784         case 5: /* ldrh */
10785             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10786             break;
10787         case 6: /* ldrb */
10788             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10789             break;
10790         case 7: /* ldrsh */
10791             gen_aa32_ld16s(tmp, addr, get_mem_index(s));
10792             break;
10793         }
10794         if (op >= 3) { /* load */
10795             store_reg(s, rd, tmp);
10796         } else {
10797             tcg_temp_free_i32(tmp);
10798         }
10799         tcg_temp_free_i32(addr);
10800         break;
10801
10802     case 6:
10803         /* load/store word immediate offset */
10804         rd = insn & 7;
10805         rn = (insn >> 3) & 7;
10806         addr = load_reg(s, rn);
10807         val = (insn >> 4) & 0x7c;
10808         tcg_gen_addi_i32(addr, addr, val);
10809
10810         if (insn & (1 << 11)) {
10811             /* load */
10812             tmp = tcg_temp_new_i32();
10813             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10814             store_reg(s, rd, tmp);
10815         } else {
10816             /* store */
10817             tmp = load_reg(s, rd);
10818             gen_aa32_st32(tmp, addr, get_mem_index(s));
10819             tcg_temp_free_i32(tmp);
10820         }
10821         tcg_temp_free_i32(addr);
10822         break;
10823
10824     case 7:
10825         /* load/store byte immediate offset */
10826         rd = insn & 7;
10827         rn = (insn >> 3) & 7;
10828         addr = load_reg(s, rn);
10829         val = (insn >> 6) & 0x1f;
10830         tcg_gen_addi_i32(addr, addr, val);
10831
10832         if (insn & (1 << 11)) {
10833             /* load */
10834             tmp = tcg_temp_new_i32();
10835             gen_aa32_ld8u(tmp, addr, get_mem_index(s));
10836             store_reg(s, rd, tmp);
10837         } else {
10838             /* store */
10839             tmp = load_reg(s, rd);
10840             gen_aa32_st8(tmp, addr, get_mem_index(s));
10841             tcg_temp_free_i32(tmp);
10842         }
10843         tcg_temp_free_i32(addr);
10844         break;
10845
10846     case 8:
10847         /* load/store halfword immediate offset */
10848         rd = insn & 7;
10849         rn = (insn >> 3) & 7;
10850         addr = load_reg(s, rn);
10851         val = (insn >> 5) & 0x3e;
10852         tcg_gen_addi_i32(addr, addr, val);
10853
10854         if (insn & (1 << 11)) {
10855             /* load */
10856             tmp = tcg_temp_new_i32();
10857             gen_aa32_ld16u(tmp, addr, get_mem_index(s));
10858             store_reg(s, rd, tmp);
10859         } else {
10860             /* store */
10861             tmp = load_reg(s, rd);
10862             gen_aa32_st16(tmp, addr, get_mem_index(s));
10863             tcg_temp_free_i32(tmp);
10864         }
10865         tcg_temp_free_i32(addr);
10866         break;
10867
10868     case 9:
10869         /* load/store from stack */
10870         rd = (insn >> 8) & 7;
10871         addr = load_reg(s, 13);
10872         val = (insn & 0xff) * 4;
10873         tcg_gen_addi_i32(addr, addr, val);
10874
10875         if (insn & (1 << 11)) {
10876             /* load */
10877             tmp = tcg_temp_new_i32();
10878             gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10879             store_reg(s, rd, tmp);
10880         } else {
10881             /* store */
10882             tmp = load_reg(s, rd);
10883             gen_aa32_st32(tmp, addr, get_mem_index(s));
10884             tcg_temp_free_i32(tmp);
10885         }
10886         tcg_temp_free_i32(addr);
10887         break;
10888
10889     case 10:
10890         /* add to high reg */
10891         rd = (insn >> 8) & 7;
10892         if (insn & (1 << 11)) {
10893             /* SP */
10894             tmp = load_reg(s, 13);
10895         } else {
10896             /* PC. bit 1 is ignored.  */
10897             tmp = tcg_temp_new_i32();
10898             tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
10899         }
10900         val = (insn & 0xff) * 4;
10901         tcg_gen_addi_i32(tmp, tmp, val);
10902         store_reg(s, rd, tmp);
10903         break;
10904
10905     case 11:
10906         /* misc */
10907         op = (insn >> 8) & 0xf;
10908         switch (op) {
10909         case 0:
10910             /* adjust stack pointer */
10911             tmp = load_reg(s, 13);
10912             val = (insn & 0x7f) * 4;
10913             if (insn & (1 << 7))
10914                 val = -(int32_t)val;
10915             tcg_gen_addi_i32(tmp, tmp, val);
10916             store_reg(s, 13, tmp);
10917             break;
10918
10919         case 2: /* sign/zero extend.  */
10920             ARCH(6);
10921             rd = insn & 7;
10922             rm = (insn >> 3) & 7;
10923             tmp = load_reg(s, rm);
10924             switch ((insn >> 6) & 3) {
10925             case 0: gen_sxth(tmp); break;
10926             case 1: gen_sxtb(tmp); break;
10927             case 2: gen_uxth(tmp); break;
10928             case 3: gen_uxtb(tmp); break;
10929             }
10930             store_reg(s, rd, tmp);
10931             break;
10932         case 4: case 5: case 0xc: case 0xd:
10933             /* push/pop */
10934             addr = load_reg(s, 13);
10935             if (insn & (1 << 8))
10936                 offset = 4;
10937             else
10938                 offset = 0;
10939             for (i = 0; i < 8; i++) {
10940                 if (insn & (1 << i))
10941                     offset += 4;
10942             }
10943             if ((insn & (1 << 11)) == 0) {
10944                 tcg_gen_addi_i32(addr, addr, -offset);
10945             }
10946             for (i = 0; i < 8; i++) {
10947                 if (insn & (1 << i)) {
10948                     if (insn & (1 << 11)) {
10949                         /* pop */
10950                         tmp = tcg_temp_new_i32();
10951                         gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10952                         store_reg(s, i, tmp);
10953                     } else {
10954                         /* push */
10955                         tmp = load_reg(s, i);
10956                         gen_aa32_st32(tmp, addr, get_mem_index(s));
10957                         tcg_temp_free_i32(tmp);
10958                     }
10959                     /* advance to the next address.  */
10960                     tcg_gen_addi_i32(addr, addr, 4);
10961                 }
10962             }
10963             TCGV_UNUSED_I32(tmp);
10964             if (insn & (1 << 8)) {
10965                 if (insn & (1 << 11)) {
10966                     /* pop pc */
10967                     tmp = tcg_temp_new_i32();
10968                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
10969                     /* don't set the pc until the rest of the instruction
10970                        has completed */
10971                 } else {
10972                     /* push lr */
10973                     tmp = load_reg(s, 14);
10974                     gen_aa32_st32(tmp, addr, get_mem_index(s));
10975                     tcg_temp_free_i32(tmp);
10976                 }
10977                 tcg_gen_addi_i32(addr, addr, 4);
10978             }
10979             if ((insn & (1 << 11)) == 0) {
10980                 tcg_gen_addi_i32(addr, addr, -offset);
10981             }
10982             /* write back the new stack pointer */
10983             store_reg(s, 13, addr);
10984             /* set the new PC value */
10985             if ((insn & 0x0900) == 0x0900) {
10986                 store_reg_from_load(s, 15, tmp);
10987             }
10988             break;
10989
10990         case 1: case 3: case 9: case 11: /* czb */
10991             rm = insn & 7;
10992             tmp = load_reg(s, rm);
10993             s->condlabel = gen_new_label();
10994             s->condjmp = 1;
10995             if (insn & (1 << 11))
10996                 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
10997             else
10998                 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
10999             tcg_temp_free_i32(tmp);
11000             offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
11001             val = (uint32_t)s->pc + 2;
11002             val += offset;
11003             gen_jmp(s, val);
11004             break;
11005
11006         case 15: /* IT, nop-hint.  */
11007             if ((insn & 0xf) == 0) {
11008                 gen_nop_hint(s, (insn >> 4) & 0xf);
11009                 break;
11010             }
11011             /* If Then.  */
11012             s->condexec_cond = (insn >> 4) & 0xe;
11013             s->condexec_mask = insn & 0x1f;
11014             /* No actual code generated for this insn, just setup state.  */
11015             break;
11016
11017         case 0xe: /* bkpt */
11018         {
11019             int imm8 = extract32(insn, 0, 8);
11020             ARCH(5);
11021             gen_exception_insn(s, 2, EXCP_BKPT, syn_aa32_bkpt(imm8, true),
11022                                default_exception_el(s));
11023             break;
11024         }
11025
11026         case 0xa: /* rev */
11027             ARCH(6);
11028             rn = (insn >> 3) & 0x7;
11029             rd = insn & 0x7;
11030             tmp = load_reg(s, rn);
11031             switch ((insn >> 6) & 3) {
11032             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
11033             case 1: gen_rev16(tmp); break;
11034             case 3: gen_revsh(tmp); break;
11035             default: goto illegal_op;
11036             }
11037             store_reg(s, rd, tmp);
11038             break;
11039
11040         case 6:
11041             switch ((insn >> 5) & 7) {
11042             case 2:
11043                 /* setend */
11044                 ARCH(6);
11045                 if (((insn >> 3) & 1) != s->bswap_code) {
11046                     /* Dynamic endianness switching not implemented. */
11047                     qemu_log_mask(LOG_UNIMP, "arm: unimplemented setend\n");
11048                     goto illegal_op;
11049                 }
11050                 break;
11051             case 3:
11052                 /* cps */
11053                 ARCH(6);
11054                 if (IS_USER(s)) {
11055                     break;
11056                 }
11057                 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11058                     tmp = tcg_const_i32((insn & (1 << 4)) != 0);
11059                     /* FAULTMASK */
11060                     if (insn & 1) {
11061                         addr = tcg_const_i32(19);
11062                         gen_helper_v7m_msr(cpu_env, addr, tmp);
11063                         tcg_temp_free_i32(addr);
11064                     }
11065                     /* PRIMASK */
11066                     if (insn & 2) {
11067                         addr = tcg_const_i32(16);
11068                         gen_helper_v7m_msr(cpu_env, addr, tmp);
11069                         tcg_temp_free_i32(addr);
11070                     }
11071                     tcg_temp_free_i32(tmp);
11072                     gen_lookup_tb(s);
11073                 } else {
11074                     if (insn & (1 << 4)) {
11075                         shift = CPSR_A | CPSR_I | CPSR_F;
11076                     } else {
11077                         shift = 0;
11078                     }
11079                     gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
11080                 }
11081                 break;
11082             default:
11083                 goto undef;
11084             }
11085             break;
11086
11087         default:
11088             goto undef;
11089         }
11090         break;
11091
11092     case 12:
11093     {
11094         /* load/store multiple */
11095         TCGv_i32 loaded_var;
11096         TCGV_UNUSED_I32(loaded_var);
11097         rn = (insn >> 8) & 0x7;
11098         addr = load_reg(s, rn);
11099         for (i = 0; i < 8; i++) {
11100             if (insn & (1 << i)) {
11101                 if (insn & (1 << 11)) {
11102                     /* load */
11103                     tmp = tcg_temp_new_i32();
11104                     gen_aa32_ld32u(tmp, addr, get_mem_index(s));
11105                     if (i == rn) {
11106                         loaded_var = tmp;
11107                     } else {
11108                         store_reg(s, i, tmp);
11109                     }
11110                 } else {
11111                     /* store */
11112                     tmp = load_reg(s, i);
11113                     gen_aa32_st32(tmp, addr, get_mem_index(s));
11114                     tcg_temp_free_i32(tmp);
11115                 }
11116                 /* advance to the next address */
11117                 tcg_gen_addi_i32(addr, addr, 4);
11118             }
11119         }
11120         if ((insn & (1 << rn)) == 0) {
11121             /* base reg not in list: base register writeback */
11122             store_reg(s, rn, addr);
11123         } else {
11124             /* base reg in list: if load, complete it now */
11125             if (insn & (1 << 11)) {
11126                 store_reg(s, rn, loaded_var);
11127             }
11128             tcg_temp_free_i32(addr);
11129         }
11130         break;
11131     }
11132     case 13:
11133         /* conditional branch or swi */
11134         cond = (insn >> 8) & 0xf;
11135         if (cond == 0xe)
11136             goto undef;
11137
11138         if (cond == 0xf) {
11139             /* swi */
11140             gen_set_pc_im(s, s->pc);
11141             s->svc_imm = extract32(insn, 0, 8);
11142             s->is_jmp = DISAS_SWI;
11143             break;
11144         }
11145         /* generate a conditional jump to next instruction */
11146         s->condlabel = gen_new_label();
11147         arm_gen_test_cc(cond ^ 1, s->condlabel);
11148         s->condjmp = 1;
11149
11150         /* jump to the offset */
11151         val = (uint32_t)s->pc + 2;
11152         offset = ((int32_t)insn << 24) >> 24;
11153         val += offset << 1;
11154         gen_jmp(s, val);
11155         break;
11156
11157     case 14:
11158         if (insn & (1 << 11)) {
11159             if (disas_thumb2_insn(env, s, insn))
11160               goto undef32;
11161             break;
11162         }
11163         /* unconditional branch */
11164         val = (uint32_t)s->pc;
11165         offset = ((int32_t)insn << 21) >> 21;
11166         val += (offset << 1) + 2;
11167         gen_jmp(s, val);
11168         break;
11169
11170     case 15:
11171         if (disas_thumb2_insn(env, s, insn))
11172             goto undef32;
11173         break;
11174     }
11175     return;
11176 undef32:
11177     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
11178                        default_exception_el(s));
11179     return;
11180 illegal_op:
11181 undef:
11182     gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(),
11183                        default_exception_el(s));
11184 }
11185
11186 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
11187 {
11188     /* Return true if the insn at dc->pc might cross a page boundary.
11189      * (False positives are OK, false negatives are not.)
11190      */
11191     uint16_t insn;
11192
11193     if ((s->pc & 3) == 0) {
11194         /* At a 4-aligned address we can't be crossing a page */
11195         return false;
11196     }
11197
11198     /* This must be a Thumb insn */
11199     insn = arm_lduw_code(env, s->pc, s->bswap_code);
11200
11201     if ((insn >> 11) >= 0x1d) {
11202         /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
11203          * First half of a 32-bit Thumb insn. Thumb-1 cores might
11204          * end up actually treating this as two 16-bit insns (see the
11205          * code at the start of disas_thumb2_insn()) but we don't bother
11206          * to check for that as it is unlikely, and false positives here
11207          * are harmless.
11208          */
11209         return true;
11210     }
11211     /* Definitely a 16-bit insn, can't be crossing a page. */
11212     return false;
11213 }
11214
11215 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
11216    basic block 'tb'.  */
11217 void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
11218 {
11219     ARMCPU *cpu = arm_env_get_cpu(env);
11220     CPUState *cs = CPU(cpu);
11221     DisasContext dc1, *dc = &dc1;
11222     target_ulong pc_start;
11223     target_ulong next_page_start;
11224     int num_insns;
11225     int max_insns;
11226     bool end_of_page;
11227
11228     /* generate intermediate code */
11229
11230     /* The A64 decoder has its own top level loop, because it doesn't need
11231      * the A32/T32 complexity to do with conditional execution/IT blocks/etc.
11232      */
11233     if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
11234         gen_intermediate_code_a64(cpu, tb);
11235         return;
11236     }
11237
11238     pc_start = tb->pc;
11239
11240     dc->tb = tb;
11241
11242     dc->is_jmp = DISAS_NEXT;
11243     dc->pc = pc_start;
11244     dc->singlestep_enabled = cs->singlestep_enabled;
11245     dc->condjmp = 0;
11246
11247     dc->aarch64 = 0;
11248     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11249      * there is no secure EL1, so we route exceptions to EL3.
11250      */
11251     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11252                                !arm_el_is_aa64(env, 3);
11253     dc->thumb = ARM_TBFLAG_THUMB(tb->flags);
11254     dc->bswap_code = ARM_TBFLAG_BSWAP_CODE(tb->flags);
11255     dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(tb->flags) & 0xf) << 1;
11256     dc->condexec_cond = ARM_TBFLAG_CONDEXEC(tb->flags) >> 4;
11257     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11258     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11259 #if !defined(CONFIG_USER_ONLY)
11260     dc->user = (dc->current_el == 0);
11261 #endif
11262     dc->ns = ARM_TBFLAG_NS(tb->flags);
11263     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11264     dc->vfp_enabled = ARM_TBFLAG_VFPEN(tb->flags);
11265     dc->vec_len = ARM_TBFLAG_VECLEN(tb->flags);
11266     dc->vec_stride = ARM_TBFLAG_VECSTRIDE(tb->flags);
11267     dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(tb->flags);
11268     dc->cp_regs = cpu->cp_regs;
11269     dc->features = env->features;
11270
11271     /* Single step state. The code-generation logic here is:
11272      *  SS_ACTIVE == 0:
11273      *   generate code with no special handling for single-stepping (except
11274      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11275      *   this happens anyway because those changes are all system register or
11276      *   PSTATE writes).
11277      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11278      *   emit code for one insn
11279      *   emit code to clear PSTATE.SS
11280      *   emit code to generate software step exception for completed step
11281      *   end TB (as usual for having generated an exception)
11282      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11283      *   emit code to generate a software step exception
11284      *   end the TB
11285      */
11286     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11287     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11288     dc->is_ldex = false;
11289     dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
11290
11291     cpu_F0s = tcg_temp_new_i32();
11292     cpu_F1s = tcg_temp_new_i32();
11293     cpu_F0d = tcg_temp_new_i64();
11294     cpu_F1d = tcg_temp_new_i64();
11295     cpu_V0 = cpu_F0d;
11296     cpu_V1 = cpu_F1d;
11297     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
11298     cpu_M0 = tcg_temp_new_i64();
11299     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11300     num_insns = 0;
11301     max_insns = tb->cflags & CF_COUNT_MASK;
11302     if (max_insns == 0) {
11303         max_insns = CF_COUNT_MASK;
11304     }
11305     if (max_insns > TCG_MAX_INSNS) {
11306         max_insns = TCG_MAX_INSNS;
11307     }
11308
11309     gen_tb_start(tb);
11310
11311     tcg_clear_temp_count();
11312
11313     /* A note on handling of the condexec (IT) bits:
11314      *
11315      * We want to avoid the overhead of having to write the updated condexec
11316      * bits back to the CPUARMState for every instruction in an IT block. So:
11317      * (1) if the condexec bits are not already zero then we write
11318      * zero back into the CPUARMState now. This avoids complications trying
11319      * to do it at the end of the block. (For example if we don't do this
11320      * it's hard to identify whether we can safely skip writing condexec
11321      * at the end of the TB, which we definitely want to do for the case
11322      * where a TB doesn't do anything with the IT state at all.)
11323      * (2) if we are going to leave the TB then we call gen_set_condexec()
11324      * which will write the correct value into CPUARMState if zero is wrong.
11325      * This is done both for leaving the TB at the end, and for leaving
11326      * it because of an exception we know will happen, which is done in
11327      * gen_exception_insn(). The latter is necessary because we need to
11328      * leave the TB with the PC/IT state just prior to execution of the
11329      * instruction which caused the exception.
11330      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11331      * then the CPUARMState will be wrong and we need to reset it.
11332      * This is handled in the same way as restoration of the
11333      * PC in these situations; we save the value of the condexec bits
11334      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
11335      * then uses this to restore them after an exception.
11336      *
11337      * Note that there are no instructions which can read the condexec
11338      * bits, and none which can write non-static values to them, so
11339      * we don't need to care about whether CPUARMState is correct in the
11340      * middle of a TB.
11341      */
11342
11343     /* Reset the conditional execution bits immediately. This avoids
11344        complications trying to do it at the end of the block.  */
11345     if (dc->condexec_mask || dc->condexec_cond)
11346       {
11347         TCGv_i32 tmp = tcg_temp_new_i32();
11348         tcg_gen_movi_i32(tmp, 0);
11349         store_cpu_field(tmp, condexec_bits);
11350       }
11351     do {
11352         tcg_gen_insn_start(dc->pc,
11353                            (dc->condexec_cond << 4) | (dc->condexec_mask >> 1));
11354         num_insns++;
11355
11356 #ifdef CONFIG_USER_ONLY
11357         /* Intercept jump to the magic kernel page.  */
11358         if (dc->pc >= 0xffff0000) {
11359             /* We always get here via a jump, so know we are not in a
11360                conditional execution block.  */
11361             gen_exception_internal(EXCP_KERNEL_TRAP);
11362             dc->is_jmp = DISAS_EXC;
11363             break;
11364         }
11365 #else
11366         if (dc->pc >= 0xfffffff0 && arm_dc_feature(dc, ARM_FEATURE_M)) {
11367             /* We always get here via a jump, so know we are not in a
11368                conditional execution block.  */
11369             gen_exception_internal(EXCP_EXCEPTION_EXIT);
11370             dc->is_jmp = DISAS_EXC;
11371             break;
11372         }
11373 #endif
11374
11375         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11376             CPUBreakpoint *bp;
11377             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11378                 if (bp->pc == dc->pc) {
11379                     if (bp->flags & BP_CPU) {
11380                         gen_set_condexec(dc);
11381                         gen_set_pc_im(dc, dc->pc);
11382                         gen_helper_check_breakpoints(cpu_env);
11383                         /* End the TB early; it's likely not going to be executed */
11384                         dc->is_jmp = DISAS_UPDATE;
11385                     } else {
11386                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11387                         /* The address covered by the breakpoint must be
11388                            included in [tb->pc, tb->pc + tb->size) in order
11389                            to for it to be properly cleared -- thus we
11390                            increment the PC here so that the logic setting
11391                            tb->size below does the right thing.  */
11392                         /* TODO: Advance PC by correct instruction length to
11393                          * avoid disassembler error messages */
11394                         dc->pc += 2;
11395                         goto done_generating;
11396                     }
11397                     break;
11398                 }
11399             }
11400         }
11401
11402         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11403             gen_io_start();
11404         }
11405
11406         if (dc->ss_active && !dc->pstate_ss) {
11407             /* Singlestep state is Active-pending.
11408              * If we're in this state at the start of a TB then either
11409              *  a) we just took an exception to an EL which is being debugged
11410              *     and this is the first insn in the exception handler
11411              *  b) debug exceptions were masked and we just unmasked them
11412              *     without changing EL (eg by clearing PSTATE.D)
11413              * In either case we're going to take a swstep exception in the
11414              * "did not step an insn" case, and so the syndrome ISV and EX
11415              * bits should be zero.
11416              */
11417             assert(num_insns == 1);
11418             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11419                           default_exception_el(dc));
11420             goto done_generating;
11421         }
11422
11423         if (dc->thumb) {
11424             disas_thumb_insn(env, dc);
11425             if (dc->condexec_mask) {
11426                 dc->condexec_cond = (dc->condexec_cond & 0xe)
11427                                    | ((dc->condexec_mask >> 4) & 1);
11428                 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11429                 if (dc->condexec_mask == 0) {
11430                     dc->condexec_cond = 0;
11431                 }
11432             }
11433         } else {
11434             unsigned int insn = arm_ldl_code(env, dc->pc, dc->bswap_code);
11435             dc->pc += 4;
11436             disas_arm_insn(dc, insn);
11437         }
11438
11439         if (dc->condjmp && !dc->is_jmp) {
11440             gen_set_label(dc->condlabel);
11441             dc->condjmp = 0;
11442         }
11443
11444         if (tcg_check_temp_count()) {
11445             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11446                     dc->pc);
11447         }
11448
11449         /* Translation stops when a conditional branch is encountered.
11450          * Otherwise the subsequent code could get translated several times.
11451          * Also stop translation when a page boundary is reached.  This
11452          * ensures prefetch aborts occur at the right place.  */
11453
11454         /* We want to stop the TB if the next insn starts in a new page,
11455          * or if it spans between this page and the next. This means that
11456          * if we're looking at the last halfword in the page we need to
11457          * see if it's a 16-bit Thumb insn (which will fit in this TB)
11458          * or a 32-bit Thumb insn (which won't).
11459          * This is to avoid generating a silly TB with a single 16-bit insn
11460          * in it at the end of this page (which would execute correctly
11461          * but isn't very efficient).
11462          */
11463         end_of_page = (dc->pc >= next_page_start) ||
11464             ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc));
11465
11466     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11467              !cs->singlestep_enabled &&
11468              !singlestep &&
11469              !dc->ss_active &&
11470              !end_of_page &&
11471              num_insns < max_insns);
11472
11473     if (tb->cflags & CF_LAST_IO) {
11474         if (dc->condjmp) {
11475             /* FIXME:  This can theoretically happen with self-modifying
11476                code.  */
11477             cpu_abort(cs, "IO on conditional branch instruction");
11478         }
11479         gen_io_end();
11480     }
11481
11482     /* At this stage dc->condjmp will only be set when the skipped
11483        instruction was a conditional branch or trap, and the PC has
11484        already been written.  */
11485     if (unlikely(cs->singlestep_enabled || dc->ss_active)) {
11486         /* Unconditional and "condition passed" instruction codepath. */
11487         gen_set_condexec(dc);
11488         switch (dc->is_jmp) {
11489         case DISAS_SWI:
11490             gen_ss_advance(dc);
11491             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11492                           default_exception_el(dc));
11493             break;
11494         case DISAS_HVC:
11495             gen_ss_advance(dc);
11496             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11497             break;
11498         case DISAS_SMC:
11499             gen_ss_advance(dc);
11500             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11501             break;
11502         case DISAS_NEXT:
11503         case DISAS_UPDATE:
11504             gen_set_pc_im(dc, dc->pc);
11505             /* fall through */
11506         default:
11507             if (dc->ss_active) {
11508                 gen_step_complete_exception(dc);
11509             } else {
11510                 /* FIXME: Single stepping a WFI insn will not halt
11511                    the CPU.  */
11512                 gen_exception_internal(EXCP_DEBUG);
11513             }
11514         }
11515         if (dc->condjmp) {
11516             /* "Condition failed" instruction codepath. */
11517             gen_set_label(dc->condlabel);
11518             gen_set_condexec(dc);
11519             gen_set_pc_im(dc, dc->pc);
11520             if (dc->ss_active) {
11521                 gen_step_complete_exception(dc);
11522             } else {
11523                 gen_exception_internal(EXCP_DEBUG);
11524             }
11525         }
11526     } else {
11527         /* While branches must always occur at the end of an IT block,
11528            there are a few other things that can cause us to terminate
11529            the TB in the middle of an IT block:
11530             - Exception generating instructions (bkpt, swi, undefined).
11531             - Page boundaries.
11532             - Hardware watchpoints.
11533            Hardware breakpoints have already been handled and skip this code.
11534          */
11535         gen_set_condexec(dc);
11536         switch(dc->is_jmp) {
11537         case DISAS_NEXT:
11538             gen_goto_tb(dc, 1, dc->pc);
11539             break;
11540         case DISAS_UPDATE:
11541             gen_set_pc_im(dc, dc->pc);
11542             /* fall through */
11543         case DISAS_JUMP:
11544         default:
11545             /* indicate that the hash table must be used to find the next TB */
11546             tcg_gen_exit_tb(0);
11547             break;
11548         case DISAS_TB_JUMP:
11549             /* nothing more to generate */
11550             break;
11551         case DISAS_WFI:
11552             gen_helper_wfi(cpu_env);
11553             /* The helper doesn't necessarily throw an exception, but we
11554              * must go back to the main loop to check for interrupts anyway.
11555              */
11556             tcg_gen_exit_tb(0);
11557             break;
11558         case DISAS_WFE:
11559             gen_helper_wfe(cpu_env);
11560             break;
11561         case DISAS_YIELD:
11562             gen_helper_yield(cpu_env);
11563             break;
11564         case DISAS_SWI:
11565             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11566                           default_exception_el(dc));
11567             break;
11568         case DISAS_HVC:
11569             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11570             break;
11571         case DISAS_SMC:
11572             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11573             break;
11574         }
11575         if (dc->condjmp) {
11576             gen_set_label(dc->condlabel);
11577             gen_set_condexec(dc);
11578             gen_goto_tb(dc, 1, dc->pc);
11579             dc->condjmp = 0;
11580         }
11581     }
11582
11583 done_generating:
11584     gen_tb_end(tb, num_insns);
11585
11586 #ifdef DEBUG_DISAS
11587     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11588         qemu_log("----------------\n");
11589         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11590         log_target_disas(cs, pc_start, dc->pc - pc_start,
11591                          dc->thumb | (dc->bswap_code << 1));
11592         qemu_log("\n");
11593     }
11594 #endif
11595     tb->size = dc->pc - pc_start;
11596     tb->icount = num_insns;
11597 }
11598
11599 static const char *cpu_mode_names[16] = {
11600   "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
11601   "???", "???", "hyp", "und", "???", "???", "???", "sys"
11602 };
11603
11604 void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
11605                         int flags)
11606 {
11607     ARMCPU *cpu = ARM_CPU(cs);
11608     CPUARMState *env = &cpu->env;
11609     int i;
11610     uint32_t psr;
11611     const char *ns_status;
11612
11613     if (is_a64(env)) {
11614         aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
11615         return;
11616     }
11617
11618     for(i=0;i<16;i++) {
11619         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
11620         if ((i % 4) == 3)
11621             cpu_fprintf(f, "\n");
11622         else
11623             cpu_fprintf(f, " ");
11624     }
11625     psr = cpsr_read(env);
11626
11627     if (arm_feature(env, ARM_FEATURE_EL3) &&
11628         (psr & CPSR_M) != ARM_CPU_MODE_MON) {
11629         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
11630     } else {
11631         ns_status = "";
11632     }
11633
11634     cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
11635                 psr,
11636                 psr & (1 << 31) ? 'N' : '-',
11637                 psr & (1 << 30) ? 'Z' : '-',
11638                 psr & (1 << 29) ? 'C' : '-',
11639                 psr & (1 << 28) ? 'V' : '-',
11640                 psr & CPSR_T ? 'T' : 'A',
11641                 ns_status,
11642                 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
11643
11644     if (flags & CPU_DUMP_FPU) {
11645         int numvfpregs = 0;
11646         if (arm_feature(env, ARM_FEATURE_VFP)) {
11647             numvfpregs += 16;
11648         }
11649         if (arm_feature(env, ARM_FEATURE_VFP3)) {
11650             numvfpregs += 16;
11651         }
11652         for (i = 0; i < numvfpregs; i++) {
11653             uint64_t v = float64_val(env->vfp.regs[i]);
11654             cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
11655                         i * 2, (uint32_t)v,
11656                         i * 2 + 1, (uint32_t)(v >> 32),
11657                         i, v);
11658         }
11659         cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
11660     }
11661 }
11662
11663 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11664                           target_ulong *data)
11665 {
11666     if (is_a64(env)) {
11667         env->pc = data[0];
11668         env->condexec_bits = 0;
11669     } else {
11670         env->regs[15] = data[0];
11671         env->condexec_bits = data[1];
11672     }
11673 }
This page took 0.66428 seconds and 4 git commands to generate.