]>
Commit | Line | Data |
---|---|---|
1bccec25 BS |
1 | /* |
2 | * VIS op helpers | |
3 | * | |
4 | * Copyright (c) 2003-2005 Fabrice Bellard | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
db5ebe5f | 20 | #include "qemu/osdep.h" |
1bccec25 | 21 | #include "cpu.h" |
2ef6175a | 22 | #include "exec/helper-proto.h" |
1bccec25 | 23 | |
1bccec25 BS |
24 | /* This function uses non-native bit order */ |
25 | #define GET_FIELD(X, FROM, TO) \ | |
26 | ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) | |
27 | ||
28 | /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ | |
29 | #define GET_FIELD_SP(X, FROM, TO) \ | |
30 | GET_FIELD(X, 63 - (TO), 63 - (FROM)) | |
31 | ||
f027c3b1 | 32 | target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) |
1bccec25 BS |
33 | { |
34 | return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | | |
35 | (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | | |
36 | (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | | |
37 | (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | | |
38 | (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | | |
39 | (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | | |
40 | (((pixel_addr >> 55) & 1) << 4) | | |
41 | (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | | |
42 | GET_FIELD_SP(pixel_addr, 11, 12); | |
43 | } | |
44 | ||
1bccec25 BS |
45 | #ifdef HOST_WORDS_BIGENDIAN |
46 | #define VIS_B64(n) b[7 - (n)] | |
47 | #define VIS_W64(n) w[3 - (n)] | |
48 | #define VIS_SW64(n) sw[3 - (n)] | |
49 | #define VIS_L64(n) l[1 - (n)] | |
50 | #define VIS_B32(n) b[3 - (n)] | |
51 | #define VIS_W32(n) w[1 - (n)] | |
52 | #else | |
53 | #define VIS_B64(n) b[n] | |
54 | #define VIS_W64(n) w[n] | |
55 | #define VIS_SW64(n) sw[n] | |
56 | #define VIS_L64(n) l[n] | |
57 | #define VIS_B32(n) b[n] | |
58 | #define VIS_W32(n) w[n] | |
59 | #endif | |
60 | ||
61 | typedef union { | |
62 | uint8_t b[8]; | |
63 | uint16_t w[4]; | |
64 | int16_t sw[4]; | |
65 | uint32_t l[2]; | |
66 | uint64_t ll; | |
67 | float64 d; | |
68 | } VIS64; | |
69 | ||
70 | typedef union { | |
71 | uint8_t b[4]; | |
72 | uint16_t w[2]; | |
73 | uint32_t l; | |
74 | float32 f; | |
75 | } VIS32; | |
76 | ||
f027c3b1 | 77 | uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) |
1bccec25 BS |
78 | { |
79 | VIS64 s, d; | |
80 | ||
03fb8cfc RH |
81 | s.ll = src1; |
82 | d.ll = src2; | |
1bccec25 BS |
83 | |
84 | /* Reverse calculation order to handle overlap */ | |
85 | d.VIS_B64(7) = s.VIS_B64(3); | |
86 | d.VIS_B64(6) = d.VIS_B64(3); | |
87 | d.VIS_B64(5) = s.VIS_B64(2); | |
88 | d.VIS_B64(4) = d.VIS_B64(2); | |
89 | d.VIS_B64(3) = s.VIS_B64(1); | |
90 | d.VIS_B64(2) = d.VIS_B64(1); | |
91 | d.VIS_B64(1) = s.VIS_B64(0); | |
92 | /* d.VIS_B64(0) = d.VIS_B64(0); */ | |
93 | ||
03fb8cfc | 94 | return d.ll; |
1bccec25 BS |
95 | } |
96 | ||
f027c3b1 | 97 | uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
98 | { |
99 | VIS64 s, d; | |
100 | uint32_t tmp; | |
101 | ||
03fb8cfc RH |
102 | s.ll = src1; |
103 | d.ll = src2; | |
1bccec25 BS |
104 | |
105 | #define PMUL(r) \ | |
106 | tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ | |
107 | if ((tmp & 0xff) > 0x7f) { \ | |
108 | tmp += 0x100; \ | |
109 | } \ | |
110 | d.VIS_W64(r) = tmp >> 8; | |
111 | ||
112 | PMUL(0); | |
113 | PMUL(1); | |
114 | PMUL(2); | |
115 | PMUL(3); | |
116 | #undef PMUL | |
117 | ||
03fb8cfc | 118 | return d.ll; |
1bccec25 BS |
119 | } |
120 | ||
f027c3b1 | 121 | uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) |
1bccec25 BS |
122 | { |
123 | VIS64 s, d; | |
124 | uint32_t tmp; | |
125 | ||
03fb8cfc RH |
126 | s.ll = src1; |
127 | d.ll = src2; | |
1bccec25 BS |
128 | |
129 | #define PMUL(r) \ | |
130 | tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ | |
131 | if ((tmp & 0xff) > 0x7f) { \ | |
132 | tmp += 0x100; \ | |
133 | } \ | |
134 | d.VIS_W64(r) = tmp >> 8; | |
135 | ||
136 | PMUL(0); | |
137 | PMUL(1); | |
138 | PMUL(2); | |
139 | PMUL(3); | |
140 | #undef PMUL | |
141 | ||
03fb8cfc | 142 | return d.ll; |
1bccec25 BS |
143 | } |
144 | ||
f027c3b1 | 145 | uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) |
1bccec25 BS |
146 | { |
147 | VIS64 s, d; | |
148 | uint32_t tmp; | |
149 | ||
03fb8cfc RH |
150 | s.ll = src1; |
151 | d.ll = src2; | |
1bccec25 BS |
152 | |
153 | #define PMUL(r) \ | |
154 | tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ | |
155 | if ((tmp & 0xff) > 0x7f) { \ | |
156 | tmp += 0x100; \ | |
157 | } \ | |
158 | d.VIS_W64(r) = tmp >> 8; | |
159 | ||
160 | PMUL(0); | |
161 | PMUL(1); | |
162 | PMUL(2); | |
163 | PMUL(3); | |
164 | #undef PMUL | |
165 | ||
03fb8cfc | 166 | return d.ll; |
1bccec25 BS |
167 | } |
168 | ||
f027c3b1 | 169 | uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
170 | { |
171 | VIS64 s, d; | |
172 | uint32_t tmp; | |
173 | ||
03fb8cfc RH |
174 | s.ll = src1; |
175 | d.ll = src2; | |
1bccec25 BS |
176 | |
177 | #define PMUL(r) \ | |
178 | tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ | |
179 | if ((tmp & 0xff) > 0x7f) { \ | |
180 | tmp += 0x100; \ | |
181 | } \ | |
182 | d.VIS_W64(r) = tmp >> 8; | |
183 | ||
184 | PMUL(0); | |
185 | PMUL(1); | |
186 | PMUL(2); | |
187 | PMUL(3); | |
188 | #undef PMUL | |
189 | ||
03fb8cfc | 190 | return d.ll; |
1bccec25 BS |
191 | } |
192 | ||
f027c3b1 | 193 | uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
194 | { |
195 | VIS64 s, d; | |
196 | uint32_t tmp; | |
197 | ||
03fb8cfc RH |
198 | s.ll = src1; |
199 | d.ll = src2; | |
1bccec25 BS |
200 | |
201 | #define PMUL(r) \ | |
202 | tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ | |
203 | if ((tmp & 0xff) > 0x7f) { \ | |
204 | tmp += 0x100; \ | |
205 | } \ | |
206 | d.VIS_W64(r) = tmp >> 8; | |
207 | ||
208 | PMUL(0); | |
209 | PMUL(1); | |
210 | PMUL(2); | |
211 | PMUL(3); | |
212 | #undef PMUL | |
213 | ||
03fb8cfc | 214 | return d.ll; |
1bccec25 BS |
215 | } |
216 | ||
f027c3b1 | 217 | uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
218 | { |
219 | VIS64 s, d; | |
220 | uint32_t tmp; | |
221 | ||
03fb8cfc RH |
222 | s.ll = src1; |
223 | d.ll = src2; | |
1bccec25 BS |
224 | |
225 | #define PMUL(r) \ | |
226 | tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ | |
227 | if ((tmp & 0xff) > 0x7f) { \ | |
228 | tmp += 0x100; \ | |
229 | } \ | |
230 | d.VIS_L64(r) = tmp; | |
231 | ||
232 | /* Reverse calculation order to handle overlap */ | |
233 | PMUL(1); | |
234 | PMUL(0); | |
235 | #undef PMUL | |
236 | ||
03fb8cfc | 237 | return d.ll; |
1bccec25 BS |
238 | } |
239 | ||
f027c3b1 | 240 | uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
241 | { |
242 | VIS64 s, d; | |
243 | uint32_t tmp; | |
244 | ||
03fb8cfc RH |
245 | s.ll = src1; |
246 | d.ll = src2; | |
1bccec25 BS |
247 | |
248 | #define PMUL(r) \ | |
249 | tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ | |
250 | if ((tmp & 0xff) > 0x7f) { \ | |
251 | tmp += 0x100; \ | |
252 | } \ | |
253 | d.VIS_L64(r) = tmp; | |
254 | ||
255 | /* Reverse calculation order to handle overlap */ | |
256 | PMUL(1); | |
257 | PMUL(0); | |
258 | #undef PMUL | |
259 | ||
03fb8cfc | 260 | return d.ll; |
1bccec25 BS |
261 | } |
262 | ||
f027c3b1 | 263 | uint64_t helper_fexpand(uint64_t src1, uint64_t src2) |
1bccec25 BS |
264 | { |
265 | VIS32 s; | |
266 | VIS64 d; | |
267 | ||
03fb8cfc RH |
268 | s.l = (uint32_t)src1; |
269 | d.ll = src2; | |
1bccec25 BS |
270 | d.VIS_W64(0) = s.VIS_B32(0) << 4; |
271 | d.VIS_W64(1) = s.VIS_B32(1) << 4; | |
272 | d.VIS_W64(2) = s.VIS_B32(2) << 4; | |
273 | d.VIS_W64(3) = s.VIS_B32(3) << 4; | |
274 | ||
03fb8cfc | 275 | return d.ll; |
1bccec25 BS |
276 | } |
277 | ||
278 | #define VIS_HELPER(name, F) \ | |
f027c3b1 | 279 | uint64_t name##16(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
280 | { \ |
281 | VIS64 s, d; \ | |
282 | \ | |
03fb8cfc RH |
283 | s.ll = src1; \ |
284 | d.ll = src2; \ | |
1bccec25 BS |
285 | \ |
286 | d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ | |
287 | d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ | |
288 | d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ | |
289 | d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ | |
290 | \ | |
03fb8cfc | 291 | return d.ll; \ |
1bccec25 BS |
292 | } \ |
293 | \ | |
f027c3b1 | 294 | uint32_t name##16s(uint32_t src1, uint32_t src2) \ |
1bccec25 BS |
295 | { \ |
296 | VIS32 s, d; \ | |
297 | \ | |
298 | s.l = src1; \ | |
299 | d.l = src2; \ | |
300 | \ | |
301 | d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ | |
302 | d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ | |
303 | \ | |
304 | return d.l; \ | |
305 | } \ | |
306 | \ | |
f027c3b1 | 307 | uint64_t name##32(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
308 | { \ |
309 | VIS64 s, d; \ | |
310 | \ | |
03fb8cfc RH |
311 | s.ll = src1; \ |
312 | d.ll = src2; \ | |
1bccec25 BS |
313 | \ |
314 | d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ | |
315 | d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ | |
316 | \ | |
03fb8cfc | 317 | return d.ll; \ |
1bccec25 BS |
318 | } \ |
319 | \ | |
f027c3b1 | 320 | uint32_t name##32s(uint32_t src1, uint32_t src2) \ |
1bccec25 BS |
321 | { \ |
322 | VIS32 s, d; \ | |
323 | \ | |
324 | s.l = src1; \ | |
325 | d.l = src2; \ | |
326 | \ | |
327 | d.l = F(d.l, s.l); \ | |
328 | \ | |
329 | return d.l; \ | |
330 | } | |
331 | ||
332 | #define FADD(a, b) ((a) + (b)) | |
333 | #define FSUB(a, b) ((a) - (b)) | |
334 | VIS_HELPER(helper_fpadd, FADD) | |
335 | VIS_HELPER(helper_fpsub, FSUB) | |
336 | ||
337 | #define VIS_CMPHELPER(name, F) \ | |
f027c3b1 | 338 | uint64_t name##16(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
339 | { \ |
340 | VIS64 s, d; \ | |
341 | \ | |
03fb8cfc RH |
342 | s.ll = src1; \ |
343 | d.ll = src2; \ | |
1bccec25 BS |
344 | \ |
345 | d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ | |
346 | d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ | |
347 | d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ | |
348 | d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ | |
349 | d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ | |
350 | \ | |
351 | return d.ll; \ | |
352 | } \ | |
353 | \ | |
f027c3b1 | 354 | uint64_t name##32(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
355 | { \ |
356 | VIS64 s, d; \ | |
357 | \ | |
03fb8cfc RH |
358 | s.ll = src1; \ |
359 | d.ll = src2; \ | |
1bccec25 BS |
360 | \ |
361 | d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ | |
362 | d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ | |
363 | d.VIS_L64(1) = 0; \ | |
364 | \ | |
365 | return d.ll; \ | |
366 | } | |
367 | ||
368 | #define FCMPGT(a, b) ((a) > (b)) | |
369 | #define FCMPEQ(a, b) ((a) == (b)) | |
370 | #define FCMPLE(a, b) ((a) <= (b)) | |
371 | #define FCMPNE(a, b) ((a) != (b)) | |
372 | ||
373 | VIS_CMPHELPER(helper_fcmpgt, FCMPGT) | |
374 | VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) | |
375 | VIS_CMPHELPER(helper_fcmple, FCMPLE) | |
376 | VIS_CMPHELPER(helper_fcmpne, FCMPNE) | |
f888300b RH |
377 | |
378 | uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) | |
379 | { | |
380 | int i; | |
381 | for (i = 0; i < 8; i++) { | |
382 | int s1, s2; | |
383 | ||
384 | s1 = (src1 >> (56 - (i * 8))) & 0xff; | |
385 | s2 = (src2 >> (56 - (i * 8))) & 0xff; | |
386 | ||
387 | /* Absolute value of difference. */ | |
388 | s1 -= s2; | |
389 | if (s1 < 0) { | |
390 | s1 = -s1; | |
391 | } | |
392 | ||
393 | sum += s1; | |
394 | } | |
395 | ||
396 | return sum; | |
397 | } | |
2dedf314 RH |
398 | |
399 | uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) | |
400 | { | |
401 | int scale = (gsr >> 3) & 0xf; | |
402 | uint32_t ret = 0; | |
403 | int byte; | |
404 | ||
405 | for (byte = 0; byte < 4; byte++) { | |
406 | uint32_t val; | |
407 | int16_t src = rs2 >> (byte * 16); | |
408 | int32_t scaled = src << scale; | |
409 | int32_t from_fixed = scaled >> 7; | |
410 | ||
411 | val = (from_fixed < 0 ? 0 : | |
412 | from_fixed > 255 ? 255 : from_fixed); | |
413 | ||
414 | ret |= val << (8 * byte); | |
415 | } | |
416 | ||
417 | return ret; | |
418 | } | |
419 | ||
420 | uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) | |
421 | { | |
422 | int scale = (gsr >> 3) & 0x1f; | |
423 | uint64_t ret = 0; | |
424 | int word; | |
425 | ||
426 | ret = (rs1 << 8) & ~(0x000000ff000000ffULL); | |
427 | for (word = 0; word < 2; word++) { | |
428 | uint64_t val; | |
429 | int32_t src = rs2 >> (word * 32); | |
430 | int64_t scaled = (int64_t)src << scale; | |
431 | int64_t from_fixed = scaled >> 23; | |
432 | ||
433 | val = (from_fixed < 0 ? 0 : | |
434 | (from_fixed > 255) ? 255 : from_fixed); | |
435 | ||
436 | ret |= val << (32 * word); | |
437 | } | |
438 | ||
439 | return ret; | |
440 | } | |
441 | ||
442 | uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) | |
443 | { | |
444 | int scale = (gsr >> 3) & 0x1f; | |
445 | uint32_t ret = 0; | |
446 | int word; | |
447 | ||
448 | for (word = 0; word < 2; word++) { | |
449 | uint32_t val; | |
450 | int32_t src = rs2 >> (word * 32); | |
12a3567c | 451 | int64_t scaled = (int64_t)src << scale; |
2dedf314 RH |
452 | int64_t from_fixed = scaled >> 16; |
453 | ||
454 | val = (from_fixed < -32768 ? -32768 : | |
455 | from_fixed > 32767 ? 32767 : from_fixed); | |
456 | ||
457 | ret |= (val & 0xffff) << (word * 16); | |
458 | } | |
459 | ||
460 | return ret; | |
461 | } | |
793a137a | 462 | |
520c0d8d | 463 | uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) |
793a137a RH |
464 | { |
465 | union { | |
466 | uint64_t ll[2]; | |
467 | uint8_t b[16]; | |
468 | } s; | |
469 | VIS64 r; | |
470 | uint32_t i, mask, host; | |
471 | ||
472 | /* Set up S such that we can index across all of the bytes. */ | |
473 | #ifdef HOST_WORDS_BIGENDIAN | |
474 | s.ll[0] = src1; | |
475 | s.ll[1] = src2; | |
476 | host = 0; | |
477 | #else | |
478 | s.ll[1] = src1; | |
479 | s.ll[0] = src2; | |
480 | host = 15; | |
481 | #endif | |
482 | mask = gsr >> 32; | |
483 | ||
484 | for (i = 0; i < 8; ++i) { | |
485 | unsigned e = (mask >> (28 - i*4)) & 0xf; | |
486 | r.VIS_B64(i) = s.b[e ^ host]; | |
487 | } | |
488 | ||
489 | return r.ll; | |
490 | } |