]>
Commit | Line | Data |
---|---|---|
1bccec25 BS |
1 | /* |
2 | * VIS op helpers | |
3 | * | |
4 | * Copyright (c) 2003-2005 Fabrice Bellard | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | #include "cpu.h" | |
2ef6175a | 21 | #include "exec/helper-proto.h" |
1bccec25 | 22 | |
1bccec25 BS |
23 | /* This function uses non-native bit order */ |
24 | #define GET_FIELD(X, FROM, TO) \ | |
25 | ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) | |
26 | ||
27 | /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ | |
28 | #define GET_FIELD_SP(X, FROM, TO) \ | |
29 | GET_FIELD(X, 63 - (TO), 63 - (FROM)) | |
30 | ||
f027c3b1 | 31 | target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) |
1bccec25 BS |
32 | { |
33 | return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | | |
34 | (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | | |
35 | (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | | |
36 | (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | | |
37 | (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | | |
38 | (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | | |
39 | (((pixel_addr >> 55) & 1) << 4) | | |
40 | (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | | |
41 | GET_FIELD_SP(pixel_addr, 11, 12); | |
42 | } | |
43 | ||
1bccec25 BS |
44 | #ifdef HOST_WORDS_BIGENDIAN |
45 | #define VIS_B64(n) b[7 - (n)] | |
46 | #define VIS_W64(n) w[3 - (n)] | |
47 | #define VIS_SW64(n) sw[3 - (n)] | |
48 | #define VIS_L64(n) l[1 - (n)] | |
49 | #define VIS_B32(n) b[3 - (n)] | |
50 | #define VIS_W32(n) w[1 - (n)] | |
51 | #else | |
52 | #define VIS_B64(n) b[n] | |
53 | #define VIS_W64(n) w[n] | |
54 | #define VIS_SW64(n) sw[n] | |
55 | #define VIS_L64(n) l[n] | |
56 | #define VIS_B32(n) b[n] | |
57 | #define VIS_W32(n) w[n] | |
58 | #endif | |
59 | ||
60 | typedef union { | |
61 | uint8_t b[8]; | |
62 | uint16_t w[4]; | |
63 | int16_t sw[4]; | |
64 | uint32_t l[2]; | |
65 | uint64_t ll; | |
66 | float64 d; | |
67 | } VIS64; | |
68 | ||
69 | typedef union { | |
70 | uint8_t b[4]; | |
71 | uint16_t w[2]; | |
72 | uint32_t l; | |
73 | float32 f; | |
74 | } VIS32; | |
75 | ||
f027c3b1 | 76 | uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) |
1bccec25 BS |
77 | { |
78 | VIS64 s, d; | |
79 | ||
03fb8cfc RH |
80 | s.ll = src1; |
81 | d.ll = src2; | |
1bccec25 BS |
82 | |
83 | /* Reverse calculation order to handle overlap */ | |
84 | d.VIS_B64(7) = s.VIS_B64(3); | |
85 | d.VIS_B64(6) = d.VIS_B64(3); | |
86 | d.VIS_B64(5) = s.VIS_B64(2); | |
87 | d.VIS_B64(4) = d.VIS_B64(2); | |
88 | d.VIS_B64(3) = s.VIS_B64(1); | |
89 | d.VIS_B64(2) = d.VIS_B64(1); | |
90 | d.VIS_B64(1) = s.VIS_B64(0); | |
91 | /* d.VIS_B64(0) = d.VIS_B64(0); */ | |
92 | ||
03fb8cfc | 93 | return d.ll; |
1bccec25 BS |
94 | } |
95 | ||
f027c3b1 | 96 | uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
97 | { |
98 | VIS64 s, d; | |
99 | uint32_t tmp; | |
100 | ||
03fb8cfc RH |
101 | s.ll = src1; |
102 | d.ll = src2; | |
1bccec25 BS |
103 | |
104 | #define PMUL(r) \ | |
105 | tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ | |
106 | if ((tmp & 0xff) > 0x7f) { \ | |
107 | tmp += 0x100; \ | |
108 | } \ | |
109 | d.VIS_W64(r) = tmp >> 8; | |
110 | ||
111 | PMUL(0); | |
112 | PMUL(1); | |
113 | PMUL(2); | |
114 | PMUL(3); | |
115 | #undef PMUL | |
116 | ||
03fb8cfc | 117 | return d.ll; |
1bccec25 BS |
118 | } |
119 | ||
f027c3b1 | 120 | uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) |
1bccec25 BS |
121 | { |
122 | VIS64 s, d; | |
123 | uint32_t tmp; | |
124 | ||
03fb8cfc RH |
125 | s.ll = src1; |
126 | d.ll = src2; | |
1bccec25 BS |
127 | |
128 | #define PMUL(r) \ | |
129 | tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ | |
130 | if ((tmp & 0xff) > 0x7f) { \ | |
131 | tmp += 0x100; \ | |
132 | } \ | |
133 | d.VIS_W64(r) = tmp >> 8; | |
134 | ||
135 | PMUL(0); | |
136 | PMUL(1); | |
137 | PMUL(2); | |
138 | PMUL(3); | |
139 | #undef PMUL | |
140 | ||
03fb8cfc | 141 | return d.ll; |
1bccec25 BS |
142 | } |
143 | ||
f027c3b1 | 144 | uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) |
1bccec25 BS |
145 | { |
146 | VIS64 s, d; | |
147 | uint32_t tmp; | |
148 | ||
03fb8cfc RH |
149 | s.ll = src1; |
150 | d.ll = src2; | |
1bccec25 BS |
151 | |
152 | #define PMUL(r) \ | |
153 | tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ | |
154 | if ((tmp & 0xff) > 0x7f) { \ | |
155 | tmp += 0x100; \ | |
156 | } \ | |
157 | d.VIS_W64(r) = tmp >> 8; | |
158 | ||
159 | PMUL(0); | |
160 | PMUL(1); | |
161 | PMUL(2); | |
162 | PMUL(3); | |
163 | #undef PMUL | |
164 | ||
03fb8cfc | 165 | return d.ll; |
1bccec25 BS |
166 | } |
167 | ||
f027c3b1 | 168 | uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
169 | { |
170 | VIS64 s, d; | |
171 | uint32_t tmp; | |
172 | ||
03fb8cfc RH |
173 | s.ll = src1; |
174 | d.ll = src2; | |
1bccec25 BS |
175 | |
176 | #define PMUL(r) \ | |
177 | tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ | |
178 | if ((tmp & 0xff) > 0x7f) { \ | |
179 | tmp += 0x100; \ | |
180 | } \ | |
181 | d.VIS_W64(r) = tmp >> 8; | |
182 | ||
183 | PMUL(0); | |
184 | PMUL(1); | |
185 | PMUL(2); | |
186 | PMUL(3); | |
187 | #undef PMUL | |
188 | ||
03fb8cfc | 189 | return d.ll; |
1bccec25 BS |
190 | } |
191 | ||
f027c3b1 | 192 | uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
193 | { |
194 | VIS64 s, d; | |
195 | uint32_t tmp; | |
196 | ||
03fb8cfc RH |
197 | s.ll = src1; |
198 | d.ll = src2; | |
1bccec25 BS |
199 | |
200 | #define PMUL(r) \ | |
201 | tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ | |
202 | if ((tmp & 0xff) > 0x7f) { \ | |
203 | tmp += 0x100; \ | |
204 | } \ | |
205 | d.VIS_W64(r) = tmp >> 8; | |
206 | ||
207 | PMUL(0); | |
208 | PMUL(1); | |
209 | PMUL(2); | |
210 | PMUL(3); | |
211 | #undef PMUL | |
212 | ||
03fb8cfc | 213 | return d.ll; |
1bccec25 BS |
214 | } |
215 | ||
f027c3b1 | 216 | uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
217 | { |
218 | VIS64 s, d; | |
219 | uint32_t tmp; | |
220 | ||
03fb8cfc RH |
221 | s.ll = src1; |
222 | d.ll = src2; | |
1bccec25 BS |
223 | |
224 | #define PMUL(r) \ | |
225 | tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ | |
226 | if ((tmp & 0xff) > 0x7f) { \ | |
227 | tmp += 0x100; \ | |
228 | } \ | |
229 | d.VIS_L64(r) = tmp; | |
230 | ||
231 | /* Reverse calculation order to handle overlap */ | |
232 | PMUL(1); | |
233 | PMUL(0); | |
234 | #undef PMUL | |
235 | ||
03fb8cfc | 236 | return d.ll; |
1bccec25 BS |
237 | } |
238 | ||
f027c3b1 | 239 | uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) |
1bccec25 BS |
240 | { |
241 | VIS64 s, d; | |
242 | uint32_t tmp; | |
243 | ||
03fb8cfc RH |
244 | s.ll = src1; |
245 | d.ll = src2; | |
1bccec25 BS |
246 | |
247 | #define PMUL(r) \ | |
248 | tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ | |
249 | if ((tmp & 0xff) > 0x7f) { \ | |
250 | tmp += 0x100; \ | |
251 | } \ | |
252 | d.VIS_L64(r) = tmp; | |
253 | ||
254 | /* Reverse calculation order to handle overlap */ | |
255 | PMUL(1); | |
256 | PMUL(0); | |
257 | #undef PMUL | |
258 | ||
03fb8cfc | 259 | return d.ll; |
1bccec25 BS |
260 | } |
261 | ||
f027c3b1 | 262 | uint64_t helper_fexpand(uint64_t src1, uint64_t src2) |
1bccec25 BS |
263 | { |
264 | VIS32 s; | |
265 | VIS64 d; | |
266 | ||
03fb8cfc RH |
267 | s.l = (uint32_t)src1; |
268 | d.ll = src2; | |
1bccec25 BS |
269 | d.VIS_W64(0) = s.VIS_B32(0) << 4; |
270 | d.VIS_W64(1) = s.VIS_B32(1) << 4; | |
271 | d.VIS_W64(2) = s.VIS_B32(2) << 4; | |
272 | d.VIS_W64(3) = s.VIS_B32(3) << 4; | |
273 | ||
03fb8cfc | 274 | return d.ll; |
1bccec25 BS |
275 | } |
276 | ||
277 | #define VIS_HELPER(name, F) \ | |
f027c3b1 | 278 | uint64_t name##16(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
279 | { \ |
280 | VIS64 s, d; \ | |
281 | \ | |
03fb8cfc RH |
282 | s.ll = src1; \ |
283 | d.ll = src2; \ | |
1bccec25 BS |
284 | \ |
285 | d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ | |
286 | d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ | |
287 | d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ | |
288 | d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ | |
289 | \ | |
03fb8cfc | 290 | return d.ll; \ |
1bccec25 BS |
291 | } \ |
292 | \ | |
f027c3b1 | 293 | uint32_t name##16s(uint32_t src1, uint32_t src2) \ |
1bccec25 BS |
294 | { \ |
295 | VIS32 s, d; \ | |
296 | \ | |
297 | s.l = src1; \ | |
298 | d.l = src2; \ | |
299 | \ | |
300 | d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ | |
301 | d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ | |
302 | \ | |
303 | return d.l; \ | |
304 | } \ | |
305 | \ | |
f027c3b1 | 306 | uint64_t name##32(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
307 | { \ |
308 | VIS64 s, d; \ | |
309 | \ | |
03fb8cfc RH |
310 | s.ll = src1; \ |
311 | d.ll = src2; \ | |
1bccec25 BS |
312 | \ |
313 | d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ | |
314 | d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ | |
315 | \ | |
03fb8cfc | 316 | return d.ll; \ |
1bccec25 BS |
317 | } \ |
318 | \ | |
f027c3b1 | 319 | uint32_t name##32s(uint32_t src1, uint32_t src2) \ |
1bccec25 BS |
320 | { \ |
321 | VIS32 s, d; \ | |
322 | \ | |
323 | s.l = src1; \ | |
324 | d.l = src2; \ | |
325 | \ | |
326 | d.l = F(d.l, s.l); \ | |
327 | \ | |
328 | return d.l; \ | |
329 | } | |
330 | ||
331 | #define FADD(a, b) ((a) + (b)) | |
332 | #define FSUB(a, b) ((a) - (b)) | |
333 | VIS_HELPER(helper_fpadd, FADD) | |
334 | VIS_HELPER(helper_fpsub, FSUB) | |
335 | ||
336 | #define VIS_CMPHELPER(name, F) \ | |
f027c3b1 | 337 | uint64_t name##16(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
338 | { \ |
339 | VIS64 s, d; \ | |
340 | \ | |
03fb8cfc RH |
341 | s.ll = src1; \ |
342 | d.ll = src2; \ | |
1bccec25 BS |
343 | \ |
344 | d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ | |
345 | d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ | |
346 | d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ | |
347 | d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ | |
348 | d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ | |
349 | \ | |
350 | return d.ll; \ | |
351 | } \ | |
352 | \ | |
f027c3b1 | 353 | uint64_t name##32(uint64_t src1, uint64_t src2) \ |
1bccec25 BS |
354 | { \ |
355 | VIS64 s, d; \ | |
356 | \ | |
03fb8cfc RH |
357 | s.ll = src1; \ |
358 | d.ll = src2; \ | |
1bccec25 BS |
359 | \ |
360 | d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ | |
361 | d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ | |
362 | d.VIS_L64(1) = 0; \ | |
363 | \ | |
364 | return d.ll; \ | |
365 | } | |
366 | ||
367 | #define FCMPGT(a, b) ((a) > (b)) | |
368 | #define FCMPEQ(a, b) ((a) == (b)) | |
369 | #define FCMPLE(a, b) ((a) <= (b)) | |
370 | #define FCMPNE(a, b) ((a) != (b)) | |
371 | ||
372 | VIS_CMPHELPER(helper_fcmpgt, FCMPGT) | |
373 | VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) | |
374 | VIS_CMPHELPER(helper_fcmple, FCMPLE) | |
375 | VIS_CMPHELPER(helper_fcmpne, FCMPNE) | |
f888300b RH |
376 | |
377 | uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) | |
378 | { | |
379 | int i; | |
380 | for (i = 0; i < 8; i++) { | |
381 | int s1, s2; | |
382 | ||
383 | s1 = (src1 >> (56 - (i * 8))) & 0xff; | |
384 | s2 = (src2 >> (56 - (i * 8))) & 0xff; | |
385 | ||
386 | /* Absolute value of difference. */ | |
387 | s1 -= s2; | |
388 | if (s1 < 0) { | |
389 | s1 = -s1; | |
390 | } | |
391 | ||
392 | sum += s1; | |
393 | } | |
394 | ||
395 | return sum; | |
396 | } | |
2dedf314 RH |
397 | |
398 | uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) | |
399 | { | |
400 | int scale = (gsr >> 3) & 0xf; | |
401 | uint32_t ret = 0; | |
402 | int byte; | |
403 | ||
404 | for (byte = 0; byte < 4; byte++) { | |
405 | uint32_t val; | |
406 | int16_t src = rs2 >> (byte * 16); | |
407 | int32_t scaled = src << scale; | |
408 | int32_t from_fixed = scaled >> 7; | |
409 | ||
410 | val = (from_fixed < 0 ? 0 : | |
411 | from_fixed > 255 ? 255 : from_fixed); | |
412 | ||
413 | ret |= val << (8 * byte); | |
414 | } | |
415 | ||
416 | return ret; | |
417 | } | |
418 | ||
419 | uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) | |
420 | { | |
421 | int scale = (gsr >> 3) & 0x1f; | |
422 | uint64_t ret = 0; | |
423 | int word; | |
424 | ||
425 | ret = (rs1 << 8) & ~(0x000000ff000000ffULL); | |
426 | for (word = 0; word < 2; word++) { | |
427 | uint64_t val; | |
428 | int32_t src = rs2 >> (word * 32); | |
429 | int64_t scaled = (int64_t)src << scale; | |
430 | int64_t from_fixed = scaled >> 23; | |
431 | ||
432 | val = (from_fixed < 0 ? 0 : | |
433 | (from_fixed > 255) ? 255 : from_fixed); | |
434 | ||
435 | ret |= val << (32 * word); | |
436 | } | |
437 | ||
438 | return ret; | |
439 | } | |
440 | ||
441 | uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) | |
442 | { | |
443 | int scale = (gsr >> 3) & 0x1f; | |
444 | uint32_t ret = 0; | |
445 | int word; | |
446 | ||
447 | for (word = 0; word < 2; word++) { | |
448 | uint32_t val; | |
449 | int32_t src = rs2 >> (word * 32); | |
450 | int64_t scaled = src << scale; | |
451 | int64_t from_fixed = scaled >> 16; | |
452 | ||
453 | val = (from_fixed < -32768 ? -32768 : | |
454 | from_fixed > 32767 ? 32767 : from_fixed); | |
455 | ||
456 | ret |= (val & 0xffff) << (word * 16); | |
457 | } | |
458 | ||
459 | return ret; | |
460 | } | |
793a137a | 461 | |
520c0d8d | 462 | uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) |
793a137a RH |
463 | { |
464 | union { | |
465 | uint64_t ll[2]; | |
466 | uint8_t b[16]; | |
467 | } s; | |
468 | VIS64 r; | |
469 | uint32_t i, mask, host; | |
470 | ||
471 | /* Set up S such that we can index across all of the bytes. */ | |
472 | #ifdef HOST_WORDS_BIGENDIAN | |
473 | s.ll[0] = src1; | |
474 | s.ll[1] = src2; | |
475 | host = 0; | |
476 | #else | |
477 | s.ll[1] = src1; | |
478 | s.ll[0] = src2; | |
479 | host = 15; | |
480 | #endif | |
481 | mask = gsr >> 32; | |
482 | ||
483 | for (i = 0; i < 8; ++i) { | |
484 | unsigned e = (mask >> (28 - i*4)) & 0xf; | |
485 | r.VIS_B64(i) = s.b[e ^ host]; | |
486 | } | |
487 | ||
488 | return r.ll; | |
489 | } |