]>
Commit | Line | Data |
---|---|---|
bd277fa1 RH |
1 | /* |
2 | * Loongson Multimedia Instruction emulation helpers for QEMU. | |
3 | * | |
4 | * Copyright (c) 2011 Richard Henderson <[email protected]> | |
5 | * | |
6 | * This library is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2 of the License, or (at your option) any later version. | |
10 | * | |
11 | * This library is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
c684822a | 20 | #include "qemu/osdep.h" |
bd277fa1 | 21 | #include "cpu.h" |
2ef6175a | 22 | #include "exec/helper-proto.h" |
bd277fa1 | 23 | |
baf50011 AM |
24 | /* |
25 | * If the byte ordering doesn't matter, i.e. all columns are treated | |
26 | * identically, then this union can be used directly. If byte ordering | |
27 | * does matter, we generally ignore dumping to memory. | |
28 | */ | |
bd277fa1 RH |
29 | typedef union { |
30 | uint8_t ub[8]; | |
31 | int8_t sb[8]; | |
32 | uint16_t uh[4]; | |
33 | int16_t sh[4]; | |
34 | uint32_t uw[2]; | |
35 | int32_t sw[2]; | |
36 | uint64_t d; | |
37 | } LMIValue; | |
38 | ||
39 | /* Some byte ordering issues can be mitigated by XORing in the following. */ | |
40 | #ifdef HOST_WORDS_BIGENDIAN | |
41 | # define BYTE_ORDER_XOR(N) N | |
42 | #else | |
43 | # define BYTE_ORDER_XOR(N) 0 | |
44 | #endif | |
45 | ||
46 | #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x) | |
47 | #define SATUB(x) (x > 0xff ? 0xff : x) | |
48 | ||
49 | #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x) | |
50 | #define SATUH(x) (x > 0xffff ? 0xffff : x) | |
51 | ||
52 | #define SATSW(x) \ | |
53 | (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x) | |
54 | #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x) | |
55 | ||
56 | uint64_t helper_paddsb(uint64_t fs, uint64_t ft) | |
57 | { | |
58 | LMIValue vs, vt; | |
59 | unsigned int i; | |
60 | ||
61 | vs.d = fs; | |
62 | vt.d = ft; | |
63 | for (i = 0; i < 8; ++i) { | |
64 | int r = vs.sb[i] + vt.sb[i]; | |
65 | vs.sb[i] = SATSB(r); | |
66 | } | |
67 | return vs.d; | |
68 | } | |
69 | ||
70 | uint64_t helper_paddusb(uint64_t fs, uint64_t ft) | |
71 | { | |
72 | LMIValue vs, vt; | |
73 | unsigned int i; | |
74 | ||
75 | vs.d = fs; | |
76 | vt.d = ft; | |
77 | for (i = 0; i < 8; ++i) { | |
78 | int r = vs.ub[i] + vt.ub[i]; | |
79 | vs.ub[i] = SATUB(r); | |
80 | } | |
81 | return vs.d; | |
82 | } | |
83 | ||
84 | uint64_t helper_paddsh(uint64_t fs, uint64_t ft) | |
85 | { | |
86 | LMIValue vs, vt; | |
87 | unsigned int i; | |
88 | ||
89 | vs.d = fs; | |
90 | vt.d = ft; | |
91 | for (i = 0; i < 4; ++i) { | |
92 | int r = vs.sh[i] + vt.sh[i]; | |
93 | vs.sh[i] = SATSH(r); | |
94 | } | |
95 | return vs.d; | |
96 | } | |
97 | ||
98 | uint64_t helper_paddush(uint64_t fs, uint64_t ft) | |
99 | { | |
100 | LMIValue vs, vt; | |
101 | unsigned int i; | |
102 | ||
103 | vs.d = fs; | |
104 | vt.d = ft; | |
105 | for (i = 0; i < 4; ++i) { | |
106 | int r = vs.uh[i] + vt.uh[i]; | |
107 | vs.uh[i] = SATUH(r); | |
108 | } | |
109 | return vs.d; | |
110 | } | |
111 | ||
112 | uint64_t helper_paddb(uint64_t fs, uint64_t ft) | |
113 | { | |
114 | LMIValue vs, vt; | |
115 | unsigned int i; | |
116 | ||
117 | vs.d = fs; | |
118 | vt.d = ft; | |
119 | for (i = 0; i < 8; ++i) { | |
120 | vs.ub[i] += vt.ub[i]; | |
121 | } | |
122 | return vs.d; | |
123 | } | |
124 | ||
125 | uint64_t helper_paddh(uint64_t fs, uint64_t ft) | |
126 | { | |
127 | LMIValue vs, vt; | |
128 | unsigned int i; | |
129 | ||
130 | vs.d = fs; | |
131 | vt.d = ft; | |
132 | for (i = 0; i < 4; ++i) { | |
133 | vs.uh[i] += vt.uh[i]; | |
134 | } | |
135 | return vs.d; | |
136 | } | |
137 | ||
138 | uint64_t helper_paddw(uint64_t fs, uint64_t ft) | |
139 | { | |
140 | LMIValue vs, vt; | |
141 | unsigned int i; | |
142 | ||
143 | vs.d = fs; | |
144 | vt.d = ft; | |
145 | for (i = 0; i < 2; ++i) { | |
146 | vs.uw[i] += vt.uw[i]; | |
147 | } | |
148 | return vs.d; | |
149 | } | |
150 | ||
151 | uint64_t helper_psubsb(uint64_t fs, uint64_t ft) | |
152 | { | |
153 | LMIValue vs, vt; | |
154 | unsigned int i; | |
155 | ||
156 | vs.d = fs; | |
157 | vt.d = ft; | |
158 | for (i = 0; i < 8; ++i) { | |
159 | int r = vs.sb[i] - vt.sb[i]; | |
160 | vs.sb[i] = SATSB(r); | |
161 | } | |
162 | return vs.d; | |
163 | } | |
164 | ||
165 | uint64_t helper_psubusb(uint64_t fs, uint64_t ft) | |
166 | { | |
167 | LMIValue vs, vt; | |
168 | unsigned int i; | |
169 | ||
170 | vs.d = fs; | |
171 | vt.d = ft; | |
172 | for (i = 0; i < 8; ++i) { | |
173 | int r = vs.ub[i] - vt.ub[i]; | |
174 | vs.ub[i] = SATUB(r); | |
175 | } | |
176 | return vs.d; | |
177 | } | |
178 | ||
179 | uint64_t helper_psubsh(uint64_t fs, uint64_t ft) | |
180 | { | |
181 | LMIValue vs, vt; | |
182 | unsigned int i; | |
183 | ||
184 | vs.d = fs; | |
185 | vt.d = ft; | |
186 | for (i = 0; i < 4; ++i) { | |
187 | int r = vs.sh[i] - vt.sh[i]; | |
188 | vs.sh[i] = SATSH(r); | |
189 | } | |
190 | return vs.d; | |
191 | } | |
192 | ||
193 | uint64_t helper_psubush(uint64_t fs, uint64_t ft) | |
194 | { | |
195 | LMIValue vs, vt; | |
196 | unsigned int i; | |
197 | ||
198 | vs.d = fs; | |
199 | vt.d = ft; | |
200 | for (i = 0; i < 4; ++i) { | |
201 | int r = vs.uh[i] - vt.uh[i]; | |
202 | vs.uh[i] = SATUH(r); | |
203 | } | |
204 | return vs.d; | |
205 | } | |
206 | ||
207 | uint64_t helper_psubb(uint64_t fs, uint64_t ft) | |
208 | { | |
209 | LMIValue vs, vt; | |
210 | unsigned int i; | |
211 | ||
212 | vs.d = fs; | |
213 | vt.d = ft; | |
214 | for (i = 0; i < 8; ++i) { | |
215 | vs.ub[i] -= vt.ub[i]; | |
216 | } | |
217 | return vs.d; | |
218 | } | |
219 | ||
220 | uint64_t helper_psubh(uint64_t fs, uint64_t ft) | |
221 | { | |
222 | LMIValue vs, vt; | |
223 | unsigned int i; | |
224 | ||
225 | vs.d = fs; | |
226 | vt.d = ft; | |
227 | for (i = 0; i < 4; ++i) { | |
228 | vs.uh[i] -= vt.uh[i]; | |
229 | } | |
230 | return vs.d; | |
231 | } | |
232 | ||
233 | uint64_t helper_psubw(uint64_t fs, uint64_t ft) | |
234 | { | |
235 | LMIValue vs, vt; | |
236 | unsigned int i; | |
237 | ||
238 | vs.d = fs; | |
239 | vt.d = ft; | |
240 | for (i = 0; i < 2; ++i) { | |
241 | vs.uw[i] -= vt.uw[i]; | |
242 | } | |
243 | return vs.d; | |
244 | } | |
245 | ||
246 | uint64_t helper_pshufh(uint64_t fs, uint64_t ft) | |
247 | { | |
248 | unsigned host = BYTE_ORDER_XOR(3); | |
249 | LMIValue vd, vs; | |
250 | unsigned i; | |
251 | ||
252 | vs.d = fs; | |
253 | vd.d = 0; | |
254 | for (i = 0; i < 4; i++, ft >>= 2) { | |
255 | vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host]; | |
256 | } | |
257 | return vd.d; | |
258 | } | |
259 | ||
260 | uint64_t helper_packsswh(uint64_t fs, uint64_t ft) | |
261 | { | |
262 | uint64_t fd = 0; | |
263 | int64_t tmp; | |
264 | ||
265 | tmp = (int32_t)(fs >> 0); | |
266 | tmp = SATSH(tmp); | |
267 | fd |= (tmp & 0xffff) << 0; | |
268 | ||
269 | tmp = (int32_t)(fs >> 32); | |
270 | tmp = SATSH(tmp); | |
271 | fd |= (tmp & 0xffff) << 16; | |
272 | ||
273 | tmp = (int32_t)(ft >> 0); | |
274 | tmp = SATSH(tmp); | |
275 | fd |= (tmp & 0xffff) << 32; | |
276 | ||
277 | tmp = (int32_t)(ft >> 32); | |
278 | tmp = SATSH(tmp); | |
279 | fd |= (tmp & 0xffff) << 48; | |
280 | ||
281 | return fd; | |
282 | } | |
283 | ||
284 | uint64_t helper_packsshb(uint64_t fs, uint64_t ft) | |
285 | { | |
286 | uint64_t fd = 0; | |
287 | unsigned int i; | |
288 | ||
289 | for (i = 0; i < 4; ++i) { | |
290 | int16_t tmp = fs >> (i * 16); | |
291 | tmp = SATSB(tmp); | |
292 | fd |= (uint64_t)(tmp & 0xff) << (i * 8); | |
293 | } | |
294 | for (i = 0; i < 4; ++i) { | |
295 | int16_t tmp = ft >> (i * 16); | |
296 | tmp = SATSB(tmp); | |
297 | fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); | |
298 | } | |
299 | ||
300 | return fd; | |
301 | } | |
302 | ||
303 | uint64_t helper_packushb(uint64_t fs, uint64_t ft) | |
304 | { | |
305 | uint64_t fd = 0; | |
306 | unsigned int i; | |
307 | ||
308 | for (i = 0; i < 4; ++i) { | |
309 | int16_t tmp = fs >> (i * 16); | |
310 | tmp = SATUB(tmp); | |
311 | fd |= (uint64_t)(tmp & 0xff) << (i * 8); | |
312 | } | |
313 | for (i = 0; i < 4; ++i) { | |
314 | int16_t tmp = ft >> (i * 16); | |
315 | tmp = SATUB(tmp); | |
316 | fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); | |
317 | } | |
318 | ||
319 | return fd; | |
320 | } | |
321 | ||
322 | uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft) | |
323 | { | |
324 | return (fs & 0xffffffff) | (ft << 32); | |
325 | } | |
326 | ||
327 | uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft) | |
328 | { | |
329 | return (fs >> 32) | (ft & ~0xffffffffull); | |
330 | } | |
331 | ||
332 | uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft) | |
333 | { | |
334 | unsigned host = BYTE_ORDER_XOR(3); | |
335 | LMIValue vd, vs, vt; | |
336 | ||
337 | vs.d = fs; | |
338 | vt.d = ft; | |
339 | vd.uh[0 ^ host] = vs.uh[0 ^ host]; | |
340 | vd.uh[1 ^ host] = vt.uh[0 ^ host]; | |
341 | vd.uh[2 ^ host] = vs.uh[1 ^ host]; | |
342 | vd.uh[3 ^ host] = vt.uh[1 ^ host]; | |
343 | ||
344 | return vd.d; | |
345 | } | |
346 | ||
347 | uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft) | |
348 | { | |
349 | unsigned host = BYTE_ORDER_XOR(3); | |
350 | LMIValue vd, vs, vt; | |
351 | ||
352 | vs.d = fs; | |
353 | vt.d = ft; | |
354 | vd.uh[0 ^ host] = vs.uh[2 ^ host]; | |
355 | vd.uh[1 ^ host] = vt.uh[2 ^ host]; | |
356 | vd.uh[2 ^ host] = vs.uh[3 ^ host]; | |
357 | vd.uh[3 ^ host] = vt.uh[3 ^ host]; | |
358 | ||
359 | return vd.d; | |
360 | } | |
361 | ||
362 | uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft) | |
363 | { | |
364 | unsigned host = BYTE_ORDER_XOR(7); | |
365 | LMIValue vd, vs, vt; | |
366 | ||
367 | vs.d = fs; | |
368 | vt.d = ft; | |
369 | vd.ub[0 ^ host] = vs.ub[0 ^ host]; | |
370 | vd.ub[1 ^ host] = vt.ub[0 ^ host]; | |
371 | vd.ub[2 ^ host] = vs.ub[1 ^ host]; | |
372 | vd.ub[3 ^ host] = vt.ub[1 ^ host]; | |
373 | vd.ub[4 ^ host] = vs.ub[2 ^ host]; | |
374 | vd.ub[5 ^ host] = vt.ub[2 ^ host]; | |
375 | vd.ub[6 ^ host] = vs.ub[3 ^ host]; | |
376 | vd.ub[7 ^ host] = vt.ub[3 ^ host]; | |
377 | ||
378 | return vd.d; | |
379 | } | |
380 | ||
381 | uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft) | |
382 | { | |
383 | unsigned host = BYTE_ORDER_XOR(7); | |
384 | LMIValue vd, vs, vt; | |
385 | ||
386 | vs.d = fs; | |
387 | vt.d = ft; | |
388 | vd.ub[0 ^ host] = vs.ub[4 ^ host]; | |
389 | vd.ub[1 ^ host] = vt.ub[4 ^ host]; | |
390 | vd.ub[2 ^ host] = vs.ub[5 ^ host]; | |
391 | vd.ub[3 ^ host] = vt.ub[5 ^ host]; | |
392 | vd.ub[4 ^ host] = vs.ub[6 ^ host]; | |
393 | vd.ub[5 ^ host] = vt.ub[6 ^ host]; | |
394 | vd.ub[6 ^ host] = vs.ub[7 ^ host]; | |
395 | vd.ub[7 ^ host] = vt.ub[7 ^ host]; | |
396 | ||
397 | return vd.d; | |
398 | } | |
399 | ||
400 | uint64_t helper_pavgh(uint64_t fs, uint64_t ft) | |
401 | { | |
402 | LMIValue vs, vt; | |
403 | unsigned i; | |
404 | ||
405 | vs.d = fs; | |
406 | vt.d = ft; | |
407 | for (i = 0; i < 4; i++) { | |
408 | vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1; | |
409 | } | |
410 | return vs.d; | |
411 | } | |
412 | ||
413 | uint64_t helper_pavgb(uint64_t fs, uint64_t ft) | |
414 | { | |
415 | LMIValue vs, vt; | |
416 | unsigned i; | |
417 | ||
418 | vs.d = fs; | |
419 | vt.d = ft; | |
420 | for (i = 0; i < 8; i++) { | |
421 | vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1; | |
422 | } | |
423 | return vs.d; | |
424 | } | |
425 | ||
426 | uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft) | |
427 | { | |
428 | LMIValue vs, vt; | |
429 | unsigned i; | |
430 | ||
431 | vs.d = fs; | |
432 | vt.d = ft; | |
433 | for (i = 0; i < 4; i++) { | |
434 | vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]); | |
435 | } | |
436 | return vs.d; | |
437 | } | |
438 | ||
439 | uint64_t helper_pminsh(uint64_t fs, uint64_t ft) | |
440 | { | |
441 | LMIValue vs, vt; | |
442 | unsigned i; | |
443 | ||
444 | vs.d = fs; | |
445 | vt.d = ft; | |
446 | for (i = 0; i < 4; i++) { | |
447 | vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]); | |
448 | } | |
449 | return vs.d; | |
450 | } | |
451 | ||
452 | uint64_t helper_pmaxub(uint64_t fs, uint64_t ft) | |
453 | { | |
454 | LMIValue vs, vt; | |
455 | unsigned i; | |
456 | ||
457 | vs.d = fs; | |
458 | vt.d = ft; | |
459 | for (i = 0; i < 4; i++) { | |
460 | vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]); | |
461 | } | |
462 | return vs.d; | |
463 | } | |
464 | ||
465 | uint64_t helper_pminub(uint64_t fs, uint64_t ft) | |
466 | { | |
467 | LMIValue vs, vt; | |
468 | unsigned i; | |
469 | ||
470 | vs.d = fs; | |
471 | vt.d = ft; | |
472 | for (i = 0; i < 4; i++) { | |
473 | vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]); | |
474 | } | |
475 | return vs.d; | |
476 | } | |
477 | ||
478 | uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft) | |
479 | { | |
480 | LMIValue vs, vt; | |
481 | unsigned i; | |
482 | ||
483 | vs.d = fs; | |
484 | vt.d = ft; | |
485 | for (i = 0; i < 2; i++) { | |
486 | vs.uw[i] = -(vs.uw[i] == vt.uw[i]); | |
487 | } | |
488 | return vs.d; | |
489 | } | |
490 | ||
491 | uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft) | |
492 | { | |
493 | LMIValue vs, vt; | |
494 | unsigned i; | |
495 | ||
496 | vs.d = fs; | |
497 | vt.d = ft; | |
498 | for (i = 0; i < 2; i++) { | |
499 | vs.uw[i] = -(vs.uw[i] > vt.uw[i]); | |
500 | } | |
501 | return vs.d; | |
502 | } | |
503 | ||
504 | uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft) | |
505 | { | |
506 | LMIValue vs, vt; | |
507 | unsigned i; | |
508 | ||
509 | vs.d = fs; | |
510 | vt.d = ft; | |
511 | for (i = 0; i < 4; i++) { | |
512 | vs.uh[i] = -(vs.uh[i] == vt.uh[i]); | |
513 | } | |
514 | return vs.d; | |
515 | } | |
516 | ||
517 | uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft) | |
518 | { | |
519 | LMIValue vs, vt; | |
520 | unsigned i; | |
521 | ||
522 | vs.d = fs; | |
523 | vt.d = ft; | |
524 | for (i = 0; i < 4; i++) { | |
525 | vs.uh[i] = -(vs.uh[i] > vt.uh[i]); | |
526 | } | |
527 | return vs.d; | |
528 | } | |
529 | ||
530 | uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft) | |
531 | { | |
532 | LMIValue vs, vt; | |
533 | unsigned i; | |
534 | ||
535 | vs.d = fs; | |
536 | vt.d = ft; | |
537 | for (i = 0; i < 8; i++) { | |
538 | vs.ub[i] = -(vs.ub[i] == vt.ub[i]); | |
539 | } | |
540 | return vs.d; | |
541 | } | |
542 | ||
543 | uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft) | |
544 | { | |
545 | LMIValue vs, vt; | |
546 | unsigned i; | |
547 | ||
548 | vs.d = fs; | |
549 | vt.d = ft; | |
550 | for (i = 0; i < 8; i++) { | |
551 | vs.ub[i] = -(vs.ub[i] > vt.ub[i]); | |
552 | } | |
553 | return vs.d; | |
554 | } | |
555 | ||
556 | uint64_t helper_psllw(uint64_t fs, uint64_t ft) | |
557 | { | |
558 | LMIValue vs; | |
559 | unsigned i; | |
560 | ||
561 | ft &= 0x7f; | |
562 | if (ft > 31) { | |
563 | return 0; | |
564 | } | |
565 | vs.d = fs; | |
566 | for (i = 0; i < 2; ++i) { | |
567 | vs.uw[i] <<= ft; | |
568 | } | |
569 | return vs.d; | |
570 | } | |
571 | ||
572 | uint64_t helper_psrlw(uint64_t fs, uint64_t ft) | |
573 | { | |
574 | LMIValue vs; | |
575 | unsigned i; | |
576 | ||
577 | ft &= 0x7f; | |
578 | if (ft > 31) { | |
579 | return 0; | |
580 | } | |
581 | vs.d = fs; | |
582 | for (i = 0; i < 2; ++i) { | |
583 | vs.uw[i] >>= ft; | |
584 | } | |
585 | return vs.d; | |
586 | } | |
587 | ||
588 | uint64_t helper_psraw(uint64_t fs, uint64_t ft) | |
589 | { | |
590 | LMIValue vs; | |
591 | unsigned i; | |
592 | ||
593 | ft &= 0x7f; | |
594 | if (ft > 31) { | |
595 | ft = 31; | |
596 | } | |
597 | vs.d = fs; | |
598 | for (i = 0; i < 2; ++i) { | |
599 | vs.sw[i] >>= ft; | |
600 | } | |
601 | return vs.d; | |
602 | } | |
603 | ||
604 | uint64_t helper_psllh(uint64_t fs, uint64_t ft) | |
605 | { | |
606 | LMIValue vs; | |
607 | unsigned i; | |
608 | ||
609 | ft &= 0x7f; | |
610 | if (ft > 15) { | |
611 | return 0; | |
612 | } | |
613 | vs.d = fs; | |
614 | for (i = 0; i < 4; ++i) { | |
615 | vs.uh[i] <<= ft; | |
616 | } | |
617 | return vs.d; | |
618 | } | |
619 | ||
620 | uint64_t helper_psrlh(uint64_t fs, uint64_t ft) | |
621 | { | |
622 | LMIValue vs; | |
623 | unsigned i; | |
624 | ||
625 | ft &= 0x7f; | |
626 | if (ft > 15) { | |
627 | return 0; | |
628 | } | |
629 | vs.d = fs; | |
630 | for (i = 0; i < 4; ++i) { | |
631 | vs.uh[i] >>= ft; | |
632 | } | |
633 | return vs.d; | |
634 | } | |
635 | ||
636 | uint64_t helper_psrah(uint64_t fs, uint64_t ft) | |
637 | { | |
638 | LMIValue vs; | |
639 | unsigned i; | |
640 | ||
641 | ft &= 0x7f; | |
642 | if (ft > 15) { | |
643 | ft = 15; | |
644 | } | |
645 | vs.d = fs; | |
646 | for (i = 0; i < 4; ++i) { | |
647 | vs.sh[i] >>= ft; | |
648 | } | |
649 | return vs.d; | |
650 | } | |
651 | ||
652 | uint64_t helper_pmullh(uint64_t fs, uint64_t ft) | |
653 | { | |
654 | LMIValue vs, vt; | |
655 | unsigned i; | |
656 | ||
657 | vs.d = fs; | |
658 | vt.d = ft; | |
659 | for (i = 0; i < 4; ++i) { | |
660 | vs.sh[i] *= vt.sh[i]; | |
661 | } | |
662 | return vs.d; | |
663 | } | |
664 | ||
665 | uint64_t helper_pmulhh(uint64_t fs, uint64_t ft) | |
666 | { | |
667 | LMIValue vs, vt; | |
668 | unsigned i; | |
669 | ||
670 | vs.d = fs; | |
671 | vt.d = ft; | |
672 | for (i = 0; i < 4; ++i) { | |
673 | int32_t r = vs.sh[i] * vt.sh[i]; | |
674 | vs.sh[i] = r >> 16; | |
675 | } | |
676 | return vs.d; | |
677 | } | |
678 | ||
679 | uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft) | |
680 | { | |
681 | LMIValue vs, vt; | |
682 | unsigned i; | |
683 | ||
684 | vs.d = fs; | |
685 | vt.d = ft; | |
686 | for (i = 0; i < 4; ++i) { | |
687 | uint32_t r = vs.uh[i] * vt.uh[i]; | |
688 | vs.uh[i] = r >> 16; | |
689 | } | |
690 | return vs.d; | |
691 | } | |
692 | ||
693 | uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft) | |
694 | { | |
695 | unsigned host = BYTE_ORDER_XOR(3); | |
696 | LMIValue vs, vt; | |
697 | uint32_t p0, p1; | |
698 | ||
699 | vs.d = fs; | |
700 | vt.d = ft; | |
701 | p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host]; | |
702 | p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host]; | |
703 | p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host]; | |
704 | p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host]; | |
705 | ||
706 | return ((uint64_t)p1 << 32) | p0; | |
707 | } | |
708 | ||
709 | uint64_t helper_pasubub(uint64_t fs, uint64_t ft) | |
710 | { | |
711 | LMIValue vs, vt; | |
712 | unsigned i; | |
713 | ||
714 | vs.d = fs; | |
715 | vt.d = ft; | |
716 | for (i = 0; i < 8; ++i) { | |
717 | int r = vs.ub[i] - vt.ub[i]; | |
718 | vs.ub[i] = (r < 0 ? -r : r); | |
719 | } | |
720 | return vs.d; | |
721 | } | |
722 | ||
723 | uint64_t helper_biadd(uint64_t fs) | |
724 | { | |
725 | unsigned i, fd; | |
726 | ||
727 | for (i = fd = 0; i < 8; ++i) { | |
728 | fd += (fs >> (i * 8)) & 0xff; | |
729 | } | |
730 | return fd & 0xffff; | |
731 | } | |
732 | ||
733 | uint64_t helper_pmovmskb(uint64_t fs) | |
734 | { | |
735 | unsigned fd = 0; | |
736 | ||
737 | fd |= ((fs >> 7) & 1) << 0; | |
738 | fd |= ((fs >> 15) & 1) << 1; | |
739 | fd |= ((fs >> 23) & 1) << 2; | |
740 | fd |= ((fs >> 31) & 1) << 3; | |
741 | fd |= ((fs >> 39) & 1) << 4; | |
742 | fd |= ((fs >> 47) & 1) << 5; | |
743 | fd |= ((fs >> 55) & 1) << 6; | |
744 | fd |= ((fs >> 63) & 1) << 7; | |
745 | ||
746 | return fd & 0xff; | |
747 | } |