]>
Commit | Line | Data |
---|---|---|
0c51ed93 DM |
1 | /* visemul.c: Emulation of VIS instructions. |
2 | * | |
3 | * Copyright (C) 2006 David S. Miller ([email protected]) | |
4 | */ | |
5 | #include <linux/kernel.h> | |
6 | #include <linux/errno.h> | |
7 | #include <linux/thread_info.h> | |
121dd5f2 | 8 | #include <linux/perf_event.h> |
0c51ed93 DM |
9 | |
10 | #include <asm/ptrace.h> | |
11 | #include <asm/pstate.h> | |
0c51ed93 DM |
12 | #include <asm/fpumacro.h> |
13 | #include <asm/uaccess.h> | |
d550bbd4 | 14 | #include <asm/cacheflush.h> |
0c51ed93 DM |
15 | |
16 | /* OPF field of various VIS instructions. */ | |
17 | ||
18 | /* 000111011 - four 16-bit packs */ | |
19 | #define FPACK16_OPF 0x03b | |
20 | ||
21 | /* 000111010 - two 32-bit packs */ | |
22 | #define FPACK32_OPF 0x03a | |
23 | ||
24 | /* 000111101 - four 16-bit packs */ | |
25 | #define FPACKFIX_OPF 0x03d | |
26 | ||
27 | /* 001001101 - four 16-bit expands */ | |
28 | #define FEXPAND_OPF 0x04d | |
29 | ||
30 | /* 001001011 - two 32-bit merges */ | |
31 | #define FPMERGE_OPF 0x04b | |
32 | ||
33 | /* 000110001 - 8-by-16-bit partitoned product */ | |
34 | #define FMUL8x16_OPF 0x031 | |
35 | ||
36 | /* 000110011 - 8-by-16-bit upper alpha partitioned product */ | |
37 | #define FMUL8x16AU_OPF 0x033 | |
38 | ||
39 | /* 000110101 - 8-by-16-bit lower alpha partitioned product */ | |
40 | #define FMUL8x16AL_OPF 0x035 | |
41 | ||
42 | /* 000110110 - upper 8-by-16-bit partitioned product */ | |
43 | #define FMUL8SUx16_OPF 0x036 | |
44 | ||
45 | /* 000110111 - lower 8-by-16-bit partitioned product */ | |
46 | #define FMUL8ULx16_OPF 0x037 | |
47 | ||
48 | /* 000111000 - upper 8-by-16-bit partitioned product */ | |
49 | #define FMULD8SUx16_OPF 0x038 | |
50 | ||
51 | /* 000111001 - lower unsigned 8-by-16-bit partitioned product */ | |
52 | #define FMULD8ULx16_OPF 0x039 | |
53 | ||
54 | /* 000101000 - four 16-bit compare; set rd if src1 > src2 */ | |
55 | #define FCMPGT16_OPF 0x028 | |
56 | ||
57 | /* 000101100 - two 32-bit compare; set rd if src1 > src2 */ | |
58 | #define FCMPGT32_OPF 0x02c | |
59 | ||
60 | /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */ | |
61 | #define FCMPLE16_OPF 0x020 | |
62 | ||
63 | /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */ | |
64 | #define FCMPLE32_OPF 0x024 | |
65 | ||
66 | /* 000100010 - four 16-bit compare; set rd if src1 != src2 */ | |
67 | #define FCMPNE16_OPF 0x022 | |
68 | ||
69 | /* 000100110 - two 32-bit compare; set rd if src1 != src2 */ | |
70 | #define FCMPNE32_OPF 0x026 | |
71 | ||
72 | /* 000101010 - four 16-bit compare; set rd if src1 == src2 */ | |
73 | #define FCMPEQ16_OPF 0x02a | |
74 | ||
75 | /* 000101110 - two 32-bit compare; set rd if src1 == src2 */ | |
76 | #define FCMPEQ32_OPF 0x02e | |
77 | ||
78 | /* 000000000 - Eight 8-bit edge boundary processing */ | |
79 | #define EDGE8_OPF 0x000 | |
80 | ||
81 | /* 000000001 - Eight 8-bit edge boundary processing, no CC */ | |
82 | #define EDGE8N_OPF 0x001 | |
83 | ||
84 | /* 000000010 - Eight 8-bit edge boundary processing, little-endian */ | |
85 | #define EDGE8L_OPF 0x002 | |
86 | ||
87 | /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */ | |
88 | #define EDGE8LN_OPF 0x003 | |
89 | ||
90 | /* 000000100 - Four 16-bit edge boundary processing */ | |
91 | #define EDGE16_OPF 0x004 | |
92 | ||
93 | /* 000000101 - Four 16-bit edge boundary processing, no CC */ | |
94 | #define EDGE16N_OPF 0x005 | |
95 | ||
96 | /* 000000110 - Four 16-bit edge boundary processing, little-endian */ | |
97 | #define EDGE16L_OPF 0x006 | |
98 | ||
99 | /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */ | |
100 | #define EDGE16LN_OPF 0x007 | |
101 | ||
102 | /* 000001000 - Two 32-bit edge boundary processing */ | |
103 | #define EDGE32_OPF 0x008 | |
104 | ||
105 | /* 000001001 - Two 32-bit edge boundary processing, no CC */ | |
106 | #define EDGE32N_OPF 0x009 | |
107 | ||
108 | /* 000001010 - Two 32-bit edge boundary processing, little-endian */ | |
109 | #define EDGE32L_OPF 0x00a | |
110 | ||
111 | /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */ | |
112 | #define EDGE32LN_OPF 0x00b | |
113 | ||
114 | /* 000111110 - distance between 8 8-bit components */ | |
115 | #define PDIST_OPF 0x03e | |
116 | ||
117 | /* 000010000 - convert 8-bit 3-D address to blocked byte address */ | |
118 | #define ARRAY8_OPF 0x010 | |
119 | ||
120 | /* 000010010 - convert 16-bit 3-D address to blocked byte address */ | |
121 | #define ARRAY16_OPF 0x012 | |
122 | ||
123 | /* 000010100 - convert 32-bit 3-D address to blocked byte address */ | |
124 | #define ARRAY32_OPF 0x014 | |
125 | ||
126 | /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */ | |
127 | #define BMASK_OPF 0x019 | |
128 | ||
129 | /* 001001100 - Permute bytes as specified by GSR.MASK */ | |
130 | #define BSHUFFLE_OPF 0x04c | |
131 | ||
0c51ed93 DM |
132 | #define VIS_OPF_SHIFT 5 |
133 | #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT) | |
134 | ||
726c12f5 | 135 | #define RS1(INSN) (((INSN) >> 14) & 0x1f) |
0c51ed93 DM |
136 | #define RS2(INSN) (((INSN) >> 0) & 0x1f) |
137 | #define RD(INSN) (((INSN) >> 25) & 0x1f) | |
138 | ||
139 | static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2, | |
140 | unsigned int rd, int from_kernel) | |
141 | { | |
142 | if (rs2 >= 16 || rs1 >= 16 || rd >= 16) { | |
143 | if (from_kernel != 0) | |
144 | __asm__ __volatile__("flushw"); | |
145 | else | |
146 | flushw_user(); | |
147 | } | |
148 | } | |
149 | ||
150 | static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs) | |
151 | { | |
517ffce4 | 152 | unsigned long value, fp; |
0c51ed93 DM |
153 | |
154 | if (reg < 16) | |
155 | return (!reg ? 0 : regs->u_regs[reg]); | |
517ffce4 DM |
156 | |
157 | fp = regs->u_regs[UREG_FP]; | |
158 | ||
0c51ed93 DM |
159 | if (regs->tstate & TSTATE_PRIV) { |
160 | struct reg_window *win; | |
517ffce4 | 161 | win = (struct reg_window *)(fp + STACK_BIAS); |
0c51ed93 | 162 | value = win->locals[reg - 16]; |
517ffce4 | 163 | } else if (!test_thread_64bit_stack(fp)) { |
0c51ed93 | 164 | struct reg_window32 __user *win32; |
517ffce4 | 165 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); |
0c51ed93 DM |
166 | get_user(value, &win32->locals[reg - 16]); |
167 | } else { | |
168 | struct reg_window __user *win; | |
517ffce4 | 169 | win = (struct reg_window __user *)(fp + STACK_BIAS); |
0c51ed93 DM |
170 | get_user(value, &win->locals[reg - 16]); |
171 | } | |
172 | return value; | |
173 | } | |
174 | ||
175 | static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg, | |
176 | struct pt_regs *regs) | |
177 | { | |
517ffce4 DM |
178 | unsigned long fp = regs->u_regs[UREG_FP]; |
179 | ||
0c51ed93 DM |
180 | BUG_ON(reg < 16); |
181 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
182 | ||
517ffce4 | 183 | if (!test_thread_64bit_stack(fp)) { |
0c51ed93 | 184 | struct reg_window32 __user *win32; |
517ffce4 | 185 | win32 = (struct reg_window32 __user *)((unsigned long)((u32)fp)); |
0c51ed93 DM |
186 | return (unsigned long __user *)&win32->locals[reg - 16]; |
187 | } else { | |
188 | struct reg_window __user *win; | |
517ffce4 | 189 | win = (struct reg_window __user *)(fp + STACK_BIAS); |
0c51ed93 DM |
190 | return &win->locals[reg - 16]; |
191 | } | |
192 | } | |
193 | ||
194 | static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg, | |
195 | struct pt_regs *regs) | |
196 | { | |
197 | BUG_ON(reg >= 16); | |
198 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
199 | ||
200 | return ®s->u_regs[reg]; | |
201 | } | |
202 | ||
203 | static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd) | |
204 | { | |
205 | if (rd < 16) { | |
206 | unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs); | |
207 | ||
208 | *rd_kern = val; | |
209 | } else { | |
210 | unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs); | |
211 | ||
517ffce4 | 212 | if (!test_thread_64bit_stack(regs->u_regs[UREG_FP])) |
0c51ed93 DM |
213 | __put_user((u32)val, (u32 __user *)rd_user); |
214 | else | |
215 | __put_user(val, rd_user); | |
216 | } | |
217 | } | |
218 | ||
219 | static inline unsigned long fpd_regval(struct fpustate *f, | |
220 | unsigned int insn_regnum) | |
221 | { | |
222 | insn_regnum = (((insn_regnum & 1) << 5) | | |
223 | (insn_regnum & 0x1e)); | |
224 | ||
225 | return *(unsigned long *) &f->regs[insn_regnum]; | |
226 | } | |
227 | ||
228 | static inline unsigned long *fpd_regaddr(struct fpustate *f, | |
229 | unsigned int insn_regnum) | |
230 | { | |
231 | insn_regnum = (((insn_regnum & 1) << 5) | | |
232 | (insn_regnum & 0x1e)); | |
233 | ||
234 | return (unsigned long *) &f->regs[insn_regnum]; | |
235 | } | |
236 | ||
237 | static inline unsigned int fps_regval(struct fpustate *f, | |
238 | unsigned int insn_regnum) | |
239 | { | |
240 | return f->regs[insn_regnum]; | |
241 | } | |
242 | ||
243 | static inline unsigned int *fps_regaddr(struct fpustate *f, | |
244 | unsigned int insn_regnum) | |
245 | { | |
246 | return &f->regs[insn_regnum]; | |
247 | } | |
248 | ||
249 | struct edge_tab { | |
250 | u16 left, right; | |
251 | }; | |
7e0b1e61 | 252 | static struct edge_tab edge8_tab[8] = { |
0c51ed93 DM |
253 | { 0xff, 0x80 }, |
254 | { 0x7f, 0xc0 }, | |
255 | { 0x3f, 0xe0 }, | |
256 | { 0x1f, 0xf0 }, | |
257 | { 0x0f, 0xf8 }, | |
258 | { 0x07, 0xfc }, | |
259 | { 0x03, 0xfe }, | |
260 | { 0x01, 0xff }, | |
261 | }; | |
7e0b1e61 | 262 | static struct edge_tab edge8_tab_l[8] = { |
0c51ed93 DM |
263 | { 0xff, 0x01 }, |
264 | { 0xfe, 0x03 }, | |
265 | { 0xfc, 0x07 }, | |
266 | { 0xf8, 0x0f }, | |
267 | { 0xf0, 0x1f }, | |
268 | { 0xe0, 0x3f }, | |
269 | { 0xc0, 0x7f }, | |
270 | { 0x80, 0xff }, | |
271 | }; | |
7e0b1e61 | 272 | static struct edge_tab edge16_tab[4] = { |
0c51ed93 DM |
273 | { 0xf, 0x8 }, |
274 | { 0x7, 0xc }, | |
275 | { 0x3, 0xe }, | |
276 | { 0x1, 0xf }, | |
277 | }; | |
7e0b1e61 | 278 | static struct edge_tab edge16_tab_l[4] = { |
0c51ed93 DM |
279 | { 0xf, 0x1 }, |
280 | { 0xe, 0x3 }, | |
281 | { 0xc, 0x7 }, | |
282 | { 0x8, 0xf }, | |
283 | }; | |
7e0b1e61 | 284 | static struct edge_tab edge32_tab[2] = { |
0c51ed93 DM |
285 | { 0x3, 0x2 }, |
286 | { 0x1, 0x3 }, | |
287 | }; | |
7e0b1e61 | 288 | static struct edge_tab edge32_tab_l[2] = { |
0c51ed93 DM |
289 | { 0x3, 0x1 }, |
290 | { 0x2, 0x3 }, | |
291 | }; | |
292 | ||
293 | static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
294 | { | |
295 | unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val; | |
296 | u16 left, right; | |
297 | ||
298 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
299 | orig_rs1 = rs1 = fetch_reg(RS1(insn), regs); | |
300 | orig_rs2 = rs2 = fetch_reg(RS2(insn), regs); | |
301 | ||
302 | if (test_thread_flag(TIF_32BIT)) { | |
303 | rs1 = rs1 & 0xffffffff; | |
304 | rs2 = rs2 & 0xffffffff; | |
305 | } | |
306 | switch (opf) { | |
307 | default: | |
308 | case EDGE8_OPF: | |
309 | case EDGE8N_OPF: | |
310 | left = edge8_tab[rs1 & 0x7].left; | |
311 | right = edge8_tab[rs2 & 0x7].right; | |
312 | break; | |
313 | case EDGE8L_OPF: | |
314 | case EDGE8LN_OPF: | |
315 | left = edge8_tab_l[rs1 & 0x7].left; | |
316 | right = edge8_tab_l[rs2 & 0x7].right; | |
317 | break; | |
318 | ||
319 | case EDGE16_OPF: | |
320 | case EDGE16N_OPF: | |
321 | left = edge16_tab[(rs1 >> 1) & 0x3].left; | |
322 | right = edge16_tab[(rs2 >> 1) & 0x3].right; | |
323 | break; | |
324 | ||
325 | case EDGE16L_OPF: | |
326 | case EDGE16LN_OPF: | |
327 | left = edge16_tab_l[(rs1 >> 1) & 0x3].left; | |
328 | right = edge16_tab_l[(rs2 >> 1) & 0x3].right; | |
329 | break; | |
330 | ||
331 | case EDGE32_OPF: | |
332 | case EDGE32N_OPF: | |
333 | left = edge32_tab[(rs1 >> 2) & 0x1].left; | |
334 | right = edge32_tab[(rs2 >> 2) & 0x1].right; | |
335 | break; | |
336 | ||
337 | case EDGE32L_OPF: | |
338 | case EDGE32LN_OPF: | |
339 | left = edge32_tab_l[(rs1 >> 2) & 0x1].left; | |
340 | right = edge32_tab_l[(rs2 >> 2) & 0x1].right; | |
341 | break; | |
6cb79b3f | 342 | } |
0c51ed93 DM |
343 | |
344 | if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL)) | |
345 | rd_val = right & left; | |
346 | else | |
347 | rd_val = left; | |
348 | ||
349 | store_reg(regs, rd_val, RD(insn)); | |
350 | ||
351 | switch (opf) { | |
352 | case EDGE8_OPF: | |
353 | case EDGE8L_OPF: | |
354 | case EDGE16_OPF: | |
355 | case EDGE16L_OPF: | |
356 | case EDGE32_OPF: | |
357 | case EDGE32L_OPF: { | |
358 | unsigned long ccr, tstate; | |
359 | ||
360 | __asm__ __volatile__("subcc %1, %2, %%g0\n\t" | |
361 | "rd %%ccr, %0" | |
362 | : "=r" (ccr) | |
363 | : "r" (orig_rs1), "r" (orig_rs2) | |
364 | : "cc"); | |
365 | tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC); | |
366 | regs->tstate = tstate | (ccr << 32UL); | |
367 | } | |
6cb79b3f | 368 | } |
0c51ed93 DM |
369 | } |
370 | ||
371 | static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
372 | { | |
373 | unsigned long rs1, rs2, rd_val; | |
374 | unsigned int bits, bits_mask; | |
375 | ||
376 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
377 | rs1 = fetch_reg(RS1(insn), regs); | |
378 | rs2 = fetch_reg(RS2(insn), regs); | |
379 | ||
380 | bits = (rs2 > 5 ? 5 : rs2); | |
381 | bits_mask = (1UL << bits) - 1UL; | |
382 | ||
383 | rd_val = ((((rs1 >> 11) & 0x3) << 0) | | |
384 | (((rs1 >> 33) & 0x3) << 2) | | |
385 | (((rs1 >> 55) & 0x1) << 4) | | |
386 | (((rs1 >> 13) & 0xf) << 5) | | |
387 | (((rs1 >> 35) & 0xf) << 9) | | |
388 | (((rs1 >> 56) & 0xf) << 13) | | |
389 | (((rs1 >> 17) & bits_mask) << 17) | | |
390 | (((rs1 >> 39) & bits_mask) << (17 + bits)) | | |
391 | (((rs1 >> 60) & 0xf) << (17 + (2*bits)))); | |
392 | ||
393 | switch (opf) { | |
394 | case ARRAY16_OPF: | |
395 | rd_val <<= 1; | |
396 | break; | |
397 | ||
398 | case ARRAY32_OPF: | |
399 | rd_val <<= 2; | |
6cb79b3f | 400 | } |
0c51ed93 DM |
401 | |
402 | store_reg(regs, rd_val, RD(insn)); | |
403 | } | |
404 | ||
405 | static void bmask(struct pt_regs *regs, unsigned int insn) | |
406 | { | |
407 | unsigned long rs1, rs2, rd_val, gsr; | |
408 | ||
409 | maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0); | |
410 | rs1 = fetch_reg(RS1(insn), regs); | |
411 | rs2 = fetch_reg(RS2(insn), regs); | |
412 | rd_val = rs1 + rs2; | |
413 | ||
414 | store_reg(regs, rd_val, RD(insn)); | |
415 | ||
416 | gsr = current_thread_info()->gsr[0] & 0xffffffff; | |
417 | gsr |= rd_val << 32UL; | |
418 | current_thread_info()->gsr[0] = gsr; | |
419 | } | |
420 | ||
421 | static void bshuffle(struct pt_regs *regs, unsigned int insn) | |
422 | { | |
423 | struct fpustate *f = FPUSTATE; | |
424 | unsigned long rs1, rs2, rd_val; | |
425 | unsigned long bmask, i; | |
426 | ||
427 | bmask = current_thread_info()->gsr[0] >> 32UL; | |
428 | ||
429 | rs1 = fpd_regval(f, RS1(insn)); | |
430 | rs2 = fpd_regval(f, RS2(insn)); | |
431 | ||
432 | rd_val = 0UL; | |
433 | for (i = 0; i < 8; i++) { | |
434 | unsigned long which = (bmask >> (i * 4)) & 0xf; | |
435 | unsigned long byte; | |
436 | ||
437 | if (which < 8) | |
438 | byte = (rs1 >> (which * 8)) & 0xff; | |
439 | else | |
440 | byte = (rs2 >> ((which-8)*8)) & 0xff; | |
441 | rd_val |= (byte << (i * 8)); | |
442 | } | |
443 | ||
444 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
445 | } | |
446 | ||
447 | static void pdist(struct pt_regs *regs, unsigned int insn) | |
448 | { | |
449 | struct fpustate *f = FPUSTATE; | |
450 | unsigned long rs1, rs2, *rd, rd_val; | |
451 | unsigned long i; | |
452 | ||
453 | rs1 = fpd_regval(f, RS1(insn)); | |
726c12f5 | 454 | rs2 = fpd_regval(f, RS2(insn)); |
0c51ed93 DM |
455 | rd = fpd_regaddr(f, RD(insn)); |
456 | ||
457 | rd_val = *rd; | |
458 | ||
459 | for (i = 0; i < 8; i++) { | |
460 | s16 s1, s2; | |
461 | ||
462 | s1 = (rs1 >> (56 - (i * 8))) & 0xff; | |
463 | s2 = (rs2 >> (56 - (i * 8))) & 0xff; | |
464 | ||
465 | /* Absolute value of difference. */ | |
466 | s1 -= s2; | |
467 | if (s1 < 0) | |
468 | s1 = ~s1 + 1; | |
469 | ||
470 | rd_val += s1; | |
471 | } | |
472 | ||
473 | *rd = rd_val; | |
474 | } | |
475 | ||
476 | static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
477 | { | |
478 | struct fpustate *f = FPUSTATE; | |
479 | unsigned long rs1, rs2, gsr, scale, rd_val; | |
480 | ||
481 | gsr = current_thread_info()->gsr[0]; | |
482 | scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f); | |
483 | switch (opf) { | |
484 | case FPACK16_OPF: { | |
485 | unsigned long byte; | |
486 | ||
487 | rs2 = fpd_regval(f, RS2(insn)); | |
488 | rd_val = 0; | |
489 | for (byte = 0; byte < 4; byte++) { | |
490 | unsigned int val; | |
491 | s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL; | |
492 | int scaled = src << scale; | |
493 | int from_fixed = scaled >> 7; | |
494 | ||
495 | val = ((from_fixed < 0) ? | |
496 | 0 : | |
497 | (from_fixed > 255) ? | |
498 | 255 : from_fixed); | |
499 | ||
500 | rd_val |= (val << (8 * byte)); | |
501 | } | |
502 | *fps_regaddr(f, RD(insn)) = rd_val; | |
503 | break; | |
504 | } | |
505 | ||
506 | case FPACK32_OPF: { | |
507 | unsigned long word; | |
508 | ||
509 | rs1 = fpd_regval(f, RS1(insn)); | |
510 | rs2 = fpd_regval(f, RS2(insn)); | |
511 | rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL); | |
512 | for (word = 0; word < 2; word++) { | |
513 | unsigned long val; | |
514 | s32 src = (rs2 >> (word * 32UL)); | |
515 | s64 scaled = src << scale; | |
516 | s64 from_fixed = scaled >> 23; | |
517 | ||
518 | val = ((from_fixed < 0) ? | |
519 | 0 : | |
520 | (from_fixed > 255) ? | |
521 | 255 : from_fixed); | |
522 | ||
523 | rd_val |= (val << (32 * word)); | |
524 | } | |
525 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
526 | break; | |
527 | } | |
528 | ||
529 | case FPACKFIX_OPF: { | |
530 | unsigned long word; | |
531 | ||
532 | rs2 = fpd_regval(f, RS2(insn)); | |
533 | ||
534 | rd_val = 0; | |
535 | for (word = 0; word < 2; word++) { | |
536 | long val; | |
537 | s32 src = (rs2 >> (word * 32UL)); | |
538 | s64 scaled = src << scale; | |
539 | s64 from_fixed = scaled >> 16; | |
540 | ||
541 | val = ((from_fixed < -32768) ? | |
542 | -32768 : | |
543 | (from_fixed > 32767) ? | |
544 | 32767 : from_fixed); | |
545 | ||
546 | rd_val |= ((val & 0xffff) << (word * 16)); | |
547 | } | |
548 | *fps_regaddr(f, RD(insn)) = rd_val; | |
549 | break; | |
550 | } | |
551 | ||
552 | case FEXPAND_OPF: { | |
553 | unsigned long byte; | |
554 | ||
555 | rs2 = fps_regval(f, RS2(insn)); | |
556 | ||
557 | rd_val = 0; | |
558 | for (byte = 0; byte < 4; byte++) { | |
559 | unsigned long val; | |
560 | u8 src = (rs2 >> (byte * 8)) & 0xff; | |
561 | ||
562 | val = src << 4; | |
563 | ||
564 | rd_val |= (val << (byte * 16)); | |
565 | } | |
566 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
567 | break; | |
568 | } | |
569 | ||
570 | case FPMERGE_OPF: { | |
571 | rs1 = fps_regval(f, RS1(insn)); | |
572 | rs2 = fps_regval(f, RS2(insn)); | |
573 | ||
574 | rd_val = (((rs2 & 0x000000ff) << 0) | | |
575 | ((rs1 & 0x000000ff) << 8) | | |
576 | ((rs2 & 0x0000ff00) << 8) | | |
577 | ((rs1 & 0x0000ff00) << 16) | | |
578 | ((rs2 & 0x00ff0000) << 16) | | |
579 | ((rs1 & 0x00ff0000) << 24) | | |
580 | ((rs2 & 0xff000000) << 24) | | |
581 | ((rs1 & 0xff000000) << 32)); | |
582 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
583 | break; | |
584 | } | |
6cb79b3f | 585 | } |
0c51ed93 DM |
586 | } |
587 | ||
588 | static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
589 | { | |
590 | struct fpustate *f = FPUSTATE; | |
591 | unsigned long rs1, rs2, rd_val; | |
592 | ||
593 | switch (opf) { | |
594 | case FMUL8x16_OPF: { | |
595 | unsigned long byte; | |
596 | ||
597 | rs1 = fps_regval(f, RS1(insn)); | |
598 | rs2 = fpd_regval(f, RS2(insn)); | |
599 | ||
600 | rd_val = 0; | |
601 | for (byte = 0; byte < 4; byte++) { | |
602 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
603 | s16 src2 = (rs2 >> (byte * 16)) & 0xffff; | |
604 | u32 prod = src1 * src2; | |
605 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
606 | ||
607 | /* Round up. */ | |
608 | if (prod & 0x80) | |
609 | scaled++; | |
610 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
611 | } | |
612 | ||
613 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
614 | break; | |
615 | } | |
616 | ||
617 | case FMUL8x16AU_OPF: | |
618 | case FMUL8x16AL_OPF: { | |
619 | unsigned long byte; | |
620 | s16 src2; | |
621 | ||
622 | rs1 = fps_regval(f, RS1(insn)); | |
623 | rs2 = fps_regval(f, RS2(insn)); | |
624 | ||
625 | rd_val = 0; | |
88b938e6 | 626 | src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0); |
0c51ed93 DM |
627 | for (byte = 0; byte < 4; byte++) { |
628 | u16 src1 = (rs1 >> (byte * 8)) & 0x00ff; | |
629 | u32 prod = src1 * src2; | |
630 | u16 scaled = ((prod & 0x00ffff00) >> 8); | |
631 | ||
632 | /* Round up. */ | |
633 | if (prod & 0x80) | |
634 | scaled++; | |
635 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
636 | } | |
637 | ||
638 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
639 | break; | |
640 | } | |
641 | ||
642 | case FMUL8SUx16_OPF: | |
643 | case FMUL8ULx16_OPF: { | |
644 | unsigned long byte, ushift; | |
645 | ||
646 | rs1 = fpd_regval(f, RS1(insn)); | |
647 | rs2 = fpd_regval(f, RS2(insn)); | |
648 | ||
649 | rd_val = 0; | |
650 | ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0; | |
651 | for (byte = 0; byte < 4; byte++) { | |
652 | u16 src1; | |
653 | s16 src2; | |
654 | u32 prod; | |
655 | u16 scaled; | |
656 | ||
657 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
658 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
659 | prod = src1 * src2; | |
660 | scaled = ((prod & 0x00ffff00) >> 8); | |
661 | ||
662 | /* Round up. */ | |
663 | if (prod & 0x80) | |
664 | scaled++; | |
665 | rd_val |= ((scaled & 0xffffUL) << (byte * 16UL)); | |
666 | } | |
667 | ||
668 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
669 | break; | |
670 | } | |
671 | ||
672 | case FMULD8SUx16_OPF: | |
673 | case FMULD8ULx16_OPF: { | |
674 | unsigned long byte, ushift; | |
675 | ||
676 | rs1 = fps_regval(f, RS1(insn)); | |
677 | rs2 = fps_regval(f, RS2(insn)); | |
678 | ||
679 | rd_val = 0; | |
680 | ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0; | |
681 | for (byte = 0; byte < 2; byte++) { | |
682 | u16 src1; | |
683 | s16 src2; | |
684 | u32 prod; | |
685 | u16 scaled; | |
686 | ||
687 | src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff); | |
688 | src2 = ((rs2 >> (16 * byte)) & 0xffff); | |
689 | prod = src1 * src2; | |
690 | scaled = ((prod & 0x00ffff00) >> 8); | |
691 | ||
692 | /* Round up. */ | |
693 | if (prod & 0x80) | |
694 | scaled++; | |
695 | rd_val |= ((scaled & 0xffffUL) << | |
696 | ((byte * 32UL) + 7UL)); | |
697 | } | |
698 | *fpd_regaddr(f, RD(insn)) = rd_val; | |
699 | break; | |
700 | } | |
6cb79b3f | 701 | } |
0c51ed93 DM |
702 | } |
703 | ||
704 | static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf) | |
705 | { | |
706 | struct fpustate *f = FPUSTATE; | |
707 | unsigned long rs1, rs2, rd_val, i; | |
708 | ||
709 | rs1 = fpd_regval(f, RS1(insn)); | |
710 | rs2 = fpd_regval(f, RS2(insn)); | |
711 | ||
712 | rd_val = 0; | |
713 | ||
714 | switch (opf) { | |
715 | case FCMPGT16_OPF: | |
716 | for (i = 0; i < 4; i++) { | |
717 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
718 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
719 | ||
720 | if (a > b) | |
2e8ecdc0 | 721 | rd_val |= 8 >> i; |
0c51ed93 DM |
722 | } |
723 | break; | |
724 | ||
725 | case FCMPGT32_OPF: | |
726 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
727 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
728 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
729 | |
730 | if (a > b) | |
2e8ecdc0 | 731 | rd_val |= 2 >> i; |
0c51ed93 DM |
732 | } |
733 | break; | |
734 | ||
735 | case FCMPLE16_OPF: | |
736 | for (i = 0; i < 4; i++) { | |
737 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
738 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
739 | ||
740 | if (a <= b) | |
2e8ecdc0 | 741 | rd_val |= 8 >> i; |
0c51ed93 DM |
742 | } |
743 | break; | |
744 | ||
745 | case FCMPLE32_OPF: | |
746 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
747 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
748 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
749 | |
750 | if (a <= b) | |
2e8ecdc0 | 751 | rd_val |= 2 >> i; |
0c51ed93 DM |
752 | } |
753 | break; | |
754 | ||
755 | case FCMPNE16_OPF: | |
756 | for (i = 0; i < 4; i++) { | |
757 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
758 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
759 | ||
760 | if (a != b) | |
2e8ecdc0 | 761 | rd_val |= 8 >> i; |
0c51ed93 DM |
762 | } |
763 | break; | |
764 | ||
765 | case FCMPNE32_OPF: | |
766 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
767 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
768 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
769 | |
770 | if (a != b) | |
2e8ecdc0 | 771 | rd_val |= 2 >> i; |
0c51ed93 DM |
772 | } |
773 | break; | |
774 | ||
775 | case FCMPEQ16_OPF: | |
776 | for (i = 0; i < 4; i++) { | |
777 | s16 a = (rs1 >> (i * 16)) & 0xffff; | |
778 | s16 b = (rs2 >> (i * 16)) & 0xffff; | |
779 | ||
780 | if (a == b) | |
2e8ecdc0 | 781 | rd_val |= 8 >> i; |
0c51ed93 DM |
782 | } |
783 | break; | |
784 | ||
785 | case FCMPEQ32_OPF: | |
786 | for (i = 0; i < 2; i++) { | |
2e8ecdc0 DM |
787 | s32 a = (rs1 >> (i * 32)) & 0xffffffff; |
788 | s32 b = (rs2 >> (i * 32)) & 0xffffffff; | |
0c51ed93 DM |
789 | |
790 | if (a == b) | |
2e8ecdc0 | 791 | rd_val |= 2 >> i; |
0c51ed93 DM |
792 | } |
793 | break; | |
6cb79b3f | 794 | } |
0c51ed93 DM |
795 | |
796 | maybe_flush_windows(0, 0, RD(insn), 0); | |
797 | store_reg(regs, rd_val, RD(insn)); | |
798 | } | |
799 | ||
800 | /* Emulate the VIS instructions which are not implemented in | |
801 | * hardware on Niagara. | |
802 | */ | |
803 | int vis_emul(struct pt_regs *regs, unsigned int insn) | |
804 | { | |
805 | unsigned long pc = regs->tpc; | |
806 | unsigned int opf; | |
807 | ||
808 | BUG_ON(regs->tstate & TSTATE_PRIV); | |
809 | ||
a8b0ca17 | 810 | perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); |
121dd5f2 | 811 | |
0c51ed93 DM |
812 | if (test_thread_flag(TIF_32BIT)) |
813 | pc = (u32)pc; | |
814 | ||
815 | if (get_user(insn, (u32 __user *) pc)) | |
816 | return -EFAULT; | |
817 | ||
410d2c81 HP |
818 | save_and_clear_fpu(); |
819 | ||
0c51ed93 DM |
820 | opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT; |
821 | switch (opf) { | |
822 | default: | |
823 | return -EINVAL; | |
824 | ||
825 | /* Pixel Formatting Instructions. */ | |
826 | case FPACK16_OPF: | |
827 | case FPACK32_OPF: | |
828 | case FPACKFIX_OPF: | |
829 | case FEXPAND_OPF: | |
830 | case FPMERGE_OPF: | |
831 | pformat(regs, insn, opf); | |
832 | break; | |
833 | ||
834 | /* Partitioned Multiply Instructions */ | |
835 | case FMUL8x16_OPF: | |
836 | case FMUL8x16AU_OPF: | |
837 | case FMUL8x16AL_OPF: | |
838 | case FMUL8SUx16_OPF: | |
839 | case FMUL8ULx16_OPF: | |
840 | case FMULD8SUx16_OPF: | |
841 | case FMULD8ULx16_OPF: | |
842 | pmul(regs, insn, opf); | |
843 | break; | |
844 | ||
845 | /* Pixel Compare Instructions */ | |
846 | case FCMPGT16_OPF: | |
847 | case FCMPGT32_OPF: | |
848 | case FCMPLE16_OPF: | |
849 | case FCMPLE32_OPF: | |
850 | case FCMPNE16_OPF: | |
851 | case FCMPNE32_OPF: | |
852 | case FCMPEQ16_OPF: | |
853 | case FCMPEQ32_OPF: | |
854 | pcmp(regs, insn, opf); | |
855 | break; | |
856 | ||
857 | /* Edge Handling Instructions */ | |
858 | case EDGE8_OPF: | |
859 | case EDGE8N_OPF: | |
860 | case EDGE8L_OPF: | |
861 | case EDGE8LN_OPF: | |
862 | case EDGE16_OPF: | |
863 | case EDGE16N_OPF: | |
864 | case EDGE16L_OPF: | |
865 | case EDGE16LN_OPF: | |
866 | case EDGE32_OPF: | |
867 | case EDGE32N_OPF: | |
868 | case EDGE32L_OPF: | |
869 | case EDGE32LN_OPF: | |
870 | edge(regs, insn, opf); | |
871 | break; | |
872 | ||
873 | /* Pixel Component Distance */ | |
874 | case PDIST_OPF: | |
875 | pdist(regs, insn); | |
876 | break; | |
877 | ||
878 | /* Three-Dimensional Array Addressing Instructions */ | |
879 | case ARRAY8_OPF: | |
880 | case ARRAY16_OPF: | |
881 | case ARRAY32_OPF: | |
882 | array(regs, insn, opf); | |
883 | break; | |
884 | ||
885 | /* Byte Mask and Shuffle Instructions */ | |
886 | case BMASK_OPF: | |
887 | bmask(regs, insn); | |
888 | break; | |
889 | ||
890 | case BSHUFFLE_OPF: | |
891 | bshuffle(regs, insn); | |
892 | break; | |
6cb79b3f | 893 | } |
0c51ed93 DM |
894 | |
895 | regs->tpc = regs->tnpc; | |
896 | regs->tnpc += 4; | |
897 | return 0; | |
898 | } |