]> Git Repo - qemu.git/blame - accel/tcg/tcg-runtime-gvec.c
Merge remote-tracking branch 'remotes/palmer/tags/riscv-for-master-4.0-rc1' into...
[qemu.git] / accel / tcg / tcg-runtime-gvec.c
CommitLineData
db432672
RH
1/*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
fb0343d5 9 * version 2.1 of the License, or (at your option) any later version.
db432672
RH
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "qemu/host-utils.h"
22#include "cpu.h"
23#include "exec/helper-proto.h"
24#include "tcg-gvec-desc.h"
25
26
27/* Virtually all hosts support 16-byte vectors. Those that don't can emulate
28 * them via GCC's generic vector extension. This turns out to be simpler and
29 * more reliable than getting the compiler to autovectorize.
30 *
31 * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
32 * are multiples of 16.
33 *
34 * When the compiler does not support all of the operations we require, the
35 * loops are written so that we can always fall back on the base types.
36 */
37#ifdef CONFIG_VECTOR16
38typedef uint8_t vec8 __attribute__((vector_size(16)));
39typedef uint16_t vec16 __attribute__((vector_size(16)));
40typedef uint32_t vec32 __attribute__((vector_size(16)));
41typedef uint64_t vec64 __attribute__((vector_size(16)));
42
43typedef int8_t svec8 __attribute__((vector_size(16)));
44typedef int16_t svec16 __attribute__((vector_size(16)));
45typedef int32_t svec32 __attribute__((vector_size(16)));
46typedef int64_t svec64 __attribute__((vector_size(16)));
47
48#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
49#define DUP8(X) { X, X, X, X, X, X, X, X }
50#define DUP4(X) { X, X, X, X }
51#define DUP2(X) { X, X }
52#else
53typedef uint8_t vec8;
54typedef uint16_t vec16;
55typedef uint32_t vec32;
56typedef uint64_t vec64;
57
58typedef int8_t svec8;
59typedef int16_t svec16;
60typedef int32_t svec32;
61typedef int64_t svec64;
62
63#define DUP16(X) X
64#define DUP8(X) X
65#define DUP4(X) X
66#define DUP2(X) X
67#endif /* CONFIG_VECTOR16 */
68
69static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
70{
71 intptr_t maxsz = simd_maxsz(desc);
72 intptr_t i;
73
74 if (unlikely(maxsz > oprsz)) {
75 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
76 *(uint64_t *)(d + i) = 0;
77 }
78 }
79}
80
81void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
82{
83 intptr_t oprsz = simd_oprsz(desc);
84 intptr_t i;
85
86 for (i = 0; i < oprsz; i += sizeof(vec8)) {
87 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
88 }
89 clear_high(d, oprsz, desc);
90}
91
92void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
93{
94 intptr_t oprsz = simd_oprsz(desc);
95 intptr_t i;
96
97 for (i = 0; i < oprsz; i += sizeof(vec16)) {
98 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
99 }
100 clear_high(d, oprsz, desc);
101}
102
103void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
104{
105 intptr_t oprsz = simd_oprsz(desc);
106 intptr_t i;
107
108 for (i = 0; i < oprsz; i += sizeof(vec32)) {
109 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
110 }
111 clear_high(d, oprsz, desc);
112}
113
114void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
115{
116 intptr_t oprsz = simd_oprsz(desc);
117 intptr_t i;
118
119 for (i = 0; i < oprsz; i += sizeof(vec64)) {
120 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
121 }
122 clear_high(d, oprsz, desc);
123}
124
22fc3527
RH
125void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
126{
127 intptr_t oprsz = simd_oprsz(desc);
128 vec8 vecb = (vec8)DUP16(b);
129 intptr_t i;
130
131 for (i = 0; i < oprsz; i += sizeof(vec8)) {
132 *(vec8 *)(d + i) = *(vec8 *)(a + i) + vecb;
133 }
134 clear_high(d, oprsz, desc);
135}
136
137void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
138{
139 intptr_t oprsz = simd_oprsz(desc);
140 vec16 vecb = (vec16)DUP8(b);
141 intptr_t i;
142
143 for (i = 0; i < oprsz; i += sizeof(vec16)) {
144 *(vec16 *)(d + i) = *(vec16 *)(a + i) + vecb;
145 }
146 clear_high(d, oprsz, desc);
147}
148
149void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
150{
151 intptr_t oprsz = simd_oprsz(desc);
152 vec32 vecb = (vec32)DUP4(b);
153 intptr_t i;
154
155 for (i = 0; i < oprsz; i += sizeof(vec32)) {
156 *(vec32 *)(d + i) = *(vec32 *)(a + i) + vecb;
157 }
158 clear_high(d, oprsz, desc);
159}
160
161void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
162{
163 intptr_t oprsz = simd_oprsz(desc);
164 vec64 vecb = (vec64)DUP2(b);
165 intptr_t i;
166
167 for (i = 0; i < oprsz; i += sizeof(vec64)) {
168 *(vec64 *)(d + i) = *(vec64 *)(a + i) + vecb;
169 }
170 clear_high(d, oprsz, desc);
171}
172
db432672
RH
173void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
174{
175 intptr_t oprsz = simd_oprsz(desc);
176 intptr_t i;
177
178 for (i = 0; i < oprsz; i += sizeof(vec8)) {
179 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
180 }
181 clear_high(d, oprsz, desc);
182}
183
184void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
185{
186 intptr_t oprsz = simd_oprsz(desc);
187 intptr_t i;
188
189 for (i = 0; i < oprsz; i += sizeof(vec16)) {
190 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
191 }
192 clear_high(d, oprsz, desc);
193}
194
195void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
196{
197 intptr_t oprsz = simd_oprsz(desc);
198 intptr_t i;
199
200 for (i = 0; i < oprsz; i += sizeof(vec32)) {
201 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
202 }
203 clear_high(d, oprsz, desc);
204}
205
206void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
207{
208 intptr_t oprsz = simd_oprsz(desc);
209 intptr_t i;
210
211 for (i = 0; i < oprsz; i += sizeof(vec64)) {
212 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
213 }
214 clear_high(d, oprsz, desc);
215}
216
22fc3527
RH
217void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
218{
219 intptr_t oprsz = simd_oprsz(desc);
220 vec8 vecb = (vec8)DUP16(b);
221 intptr_t i;
222
223 for (i = 0; i < oprsz; i += sizeof(vec8)) {
224 *(vec8 *)(d + i) = *(vec8 *)(a + i) - vecb;
225 }
226 clear_high(d, oprsz, desc);
227}
228
229void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
230{
231 intptr_t oprsz = simd_oprsz(desc);
232 vec16 vecb = (vec16)DUP8(b);
233 intptr_t i;
234
235 for (i = 0; i < oprsz; i += sizeof(vec16)) {
236 *(vec16 *)(d + i) = *(vec16 *)(a + i) - vecb;
237 }
238 clear_high(d, oprsz, desc);
239}
240
241void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
242{
243 intptr_t oprsz = simd_oprsz(desc);
244 vec32 vecb = (vec32)DUP4(b);
245 intptr_t i;
246
247 for (i = 0; i < oprsz; i += sizeof(vec32)) {
248 *(vec32 *)(d + i) = *(vec32 *)(a + i) - vecb;
249 }
250 clear_high(d, oprsz, desc);
251}
252
253void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
254{
255 intptr_t oprsz = simd_oprsz(desc);
256 vec64 vecb = (vec64)DUP2(b);
257 intptr_t i;
258
259 for (i = 0; i < oprsz; i += sizeof(vec64)) {
260 *(vec64 *)(d + i) = *(vec64 *)(a + i) - vecb;
261 }
262 clear_high(d, oprsz, desc);
263}
264
3774030a
RH
265void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
266{
267 intptr_t oprsz = simd_oprsz(desc);
268 intptr_t i;
269
270 for (i = 0; i < oprsz; i += sizeof(vec8)) {
271 *(vec8 *)(d + i) = *(vec8 *)(a + i) * *(vec8 *)(b + i);
272 }
273 clear_high(d, oprsz, desc);
274}
275
276void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
277{
278 intptr_t oprsz = simd_oprsz(desc);
279 intptr_t i;
280
281 for (i = 0; i < oprsz; i += sizeof(vec16)) {
282 *(vec16 *)(d + i) = *(vec16 *)(a + i) * *(vec16 *)(b + i);
283 }
284 clear_high(d, oprsz, desc);
285}
286
287void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
288{
289 intptr_t oprsz = simd_oprsz(desc);
290 intptr_t i;
291
292 for (i = 0; i < oprsz; i += sizeof(vec32)) {
293 *(vec32 *)(d + i) = *(vec32 *)(a + i) * *(vec32 *)(b + i);
294 }
295 clear_high(d, oprsz, desc);
296}
297
298void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
299{
300 intptr_t oprsz = simd_oprsz(desc);
301 intptr_t i;
302
303 for (i = 0; i < oprsz; i += sizeof(vec64)) {
304 *(vec64 *)(d + i) = *(vec64 *)(a + i) * *(vec64 *)(b + i);
305 }
306 clear_high(d, oprsz, desc);
307}
308
22fc3527
RH
309void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
310{
311 intptr_t oprsz = simd_oprsz(desc);
312 vec8 vecb = (vec8)DUP16(b);
313 intptr_t i;
314
315 for (i = 0; i < oprsz; i += sizeof(vec8)) {
316 *(vec8 *)(d + i) = *(vec8 *)(a + i) * vecb;
317 }
318 clear_high(d, oprsz, desc);
319}
320
321void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
322{
323 intptr_t oprsz = simd_oprsz(desc);
324 vec16 vecb = (vec16)DUP8(b);
325 intptr_t i;
326
327 for (i = 0; i < oprsz; i += sizeof(vec16)) {
328 *(vec16 *)(d + i) = *(vec16 *)(a + i) * vecb;
329 }
330 clear_high(d, oprsz, desc);
331}
332
333void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
334{
335 intptr_t oprsz = simd_oprsz(desc);
336 vec32 vecb = (vec32)DUP4(b);
337 intptr_t i;
338
339 for (i = 0; i < oprsz; i += sizeof(vec32)) {
340 *(vec32 *)(d + i) = *(vec32 *)(a + i) * vecb;
341 }
342 clear_high(d, oprsz, desc);
343}
344
345void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
346{
347 intptr_t oprsz = simd_oprsz(desc);
348 vec64 vecb = (vec64)DUP2(b);
349 intptr_t i;
350
351 for (i = 0; i < oprsz; i += sizeof(vec64)) {
352 *(vec64 *)(d + i) = *(vec64 *)(a + i) * vecb;
353 }
354 clear_high(d, oprsz, desc);
355}
356
db432672
RH
357void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
358{
359 intptr_t oprsz = simd_oprsz(desc);
360 intptr_t i;
361
362 for (i = 0; i < oprsz; i += sizeof(vec8)) {
363 *(vec8 *)(d + i) = -*(vec8 *)(a + i);
364 }
365 clear_high(d, oprsz, desc);
366}
367
368void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
369{
370 intptr_t oprsz = simd_oprsz(desc);
371 intptr_t i;
372
373 for (i = 0; i < oprsz; i += sizeof(vec16)) {
374 *(vec16 *)(d + i) = -*(vec16 *)(a + i);
375 }
376 clear_high(d, oprsz, desc);
377}
378
379void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
380{
381 intptr_t oprsz = simd_oprsz(desc);
382 intptr_t i;
383
384 for (i = 0; i < oprsz; i += sizeof(vec32)) {
385 *(vec32 *)(d + i) = -*(vec32 *)(a + i);
386 }
387 clear_high(d, oprsz, desc);
388}
389
390void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
391{
392 intptr_t oprsz = simd_oprsz(desc);
393 intptr_t i;
394
395 for (i = 0; i < oprsz; i += sizeof(vec64)) {
396 *(vec64 *)(d + i) = -*(vec64 *)(a + i);
397 }
398 clear_high(d, oprsz, desc);
399}
400
401void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
402{
403 intptr_t oprsz = simd_oprsz(desc);
404
405 memcpy(d, a, oprsz);
406 clear_high(d, oprsz, desc);
407}
408
409void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
410{
411 intptr_t oprsz = simd_oprsz(desc);
412 intptr_t i;
413
414 if (c == 0) {
415 oprsz = 0;
416 } else {
417 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
418 *(uint64_t *)(d + i) = c;
419 }
420 }
421 clear_high(d, oprsz, desc);
422}
423
424void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
425{
426 intptr_t oprsz = simd_oprsz(desc);
427 intptr_t i;
428
429 if (c == 0) {
430 oprsz = 0;
431 } else {
432 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
433 *(uint32_t *)(d + i) = c;
434 }
435 }
436 clear_high(d, oprsz, desc);
437}
438
439void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
440{
441 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
442}
443
444void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
445{
446 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
447}
448
449void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
450{
451 intptr_t oprsz = simd_oprsz(desc);
452 intptr_t i;
453
454 for (i = 0; i < oprsz; i += sizeof(vec64)) {
455 *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
456 }
457 clear_high(d, oprsz, desc);
458}
459
460void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
461{
462 intptr_t oprsz = simd_oprsz(desc);
463 intptr_t i;
464
465 for (i = 0; i < oprsz; i += sizeof(vec64)) {
466 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
467 }
468 clear_high(d, oprsz, desc);
469}
470
471void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
472{
473 intptr_t oprsz = simd_oprsz(desc);
474 intptr_t i;
475
476 for (i = 0; i < oprsz; i += sizeof(vec64)) {
477 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
478 }
479 clear_high(d, oprsz, desc);
480}
481
482void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
483{
484 intptr_t oprsz = simd_oprsz(desc);
485 intptr_t i;
486
487 for (i = 0; i < oprsz; i += sizeof(vec64)) {
488 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
489 }
490 clear_high(d, oprsz, desc);
491}
492
493void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
494{
495 intptr_t oprsz = simd_oprsz(desc);
496 intptr_t i;
497
498 for (i = 0; i < oprsz; i += sizeof(vec64)) {
499 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
500 }
501 clear_high(d, oprsz, desc);
502}
503
504void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
505{
506 intptr_t oprsz = simd_oprsz(desc);
507 intptr_t i;
508
509 for (i = 0; i < oprsz; i += sizeof(vec64)) {
510 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
511 }
512 clear_high(d, oprsz, desc);
513}
d0ec9796 514
f550805d
RH
515void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
516{
517 intptr_t oprsz = simd_oprsz(desc);
518 intptr_t i;
519
520 for (i = 0; i < oprsz; i += sizeof(vec64)) {
521 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) & *(vec64 *)(b + i));
522 }
523 clear_high(d, oprsz, desc);
524}
525
526void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
527{
528 intptr_t oprsz = simd_oprsz(desc);
529 intptr_t i;
530
531 for (i = 0; i < oprsz; i += sizeof(vec64)) {
532 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) | *(vec64 *)(b + i));
533 }
534 clear_high(d, oprsz, desc);
535}
536
537void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
538{
539 intptr_t oprsz = simd_oprsz(desc);
540 intptr_t i;
541
542 for (i = 0; i < oprsz; i += sizeof(vec64)) {
543 *(vec64 *)(d + i) = ~(*(vec64 *)(a + i) ^ *(vec64 *)(b + i));
544 }
545 clear_high(d, oprsz, desc);
546}
547
22fc3527
RH
548void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
549{
550 intptr_t oprsz = simd_oprsz(desc);
551 vec64 vecb = (vec64)DUP2(b);
552 intptr_t i;
553
554 for (i = 0; i < oprsz; i += sizeof(vec64)) {
555 *(vec64 *)(d + i) = *(vec64 *)(a + i) & vecb;
556 }
557 clear_high(d, oprsz, desc);
558}
559
560void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
561{
562 intptr_t oprsz = simd_oprsz(desc);
563 vec64 vecb = (vec64)DUP2(b);
564 intptr_t i;
565
566 for (i = 0; i < oprsz; i += sizeof(vec64)) {
567 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ vecb;
568 }
569 clear_high(d, oprsz, desc);
570}
571
572void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
573{
574 intptr_t oprsz = simd_oprsz(desc);
575 vec64 vecb = (vec64)DUP2(b);
576 intptr_t i;
577
578 for (i = 0; i < oprsz; i += sizeof(vec64)) {
579 *(vec64 *)(d + i) = *(vec64 *)(a + i) | vecb;
580 }
581 clear_high(d, oprsz, desc);
582}
583
d0ec9796
RH
584void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
585{
586 intptr_t oprsz = simd_oprsz(desc);
587 int shift = simd_data(desc);
588 intptr_t i;
589
590 for (i = 0; i < oprsz; i += sizeof(vec8)) {
591 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
592 }
593 clear_high(d, oprsz, desc);
594}
595
596void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
597{
598 intptr_t oprsz = simd_oprsz(desc);
599 int shift = simd_data(desc);
600 intptr_t i;
601
602 for (i = 0; i < oprsz; i += sizeof(vec16)) {
603 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
604 }
605 clear_high(d, oprsz, desc);
606}
607
608void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
609{
610 intptr_t oprsz = simd_oprsz(desc);
611 int shift = simd_data(desc);
612 intptr_t i;
613
614 for (i = 0; i < oprsz; i += sizeof(vec32)) {
615 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
616 }
617 clear_high(d, oprsz, desc);
618}
619
620void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
621{
622 intptr_t oprsz = simd_oprsz(desc);
623 int shift = simd_data(desc);
624 intptr_t i;
625
626 for (i = 0; i < oprsz; i += sizeof(vec64)) {
627 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
628 }
629 clear_high(d, oprsz, desc);
630}
631
632void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
633{
634 intptr_t oprsz = simd_oprsz(desc);
635 int shift = simd_data(desc);
636 intptr_t i;
637
638 for (i = 0; i < oprsz; i += sizeof(vec8)) {
639 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
640 }
641 clear_high(d, oprsz, desc);
642}
643
644void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
645{
646 intptr_t oprsz = simd_oprsz(desc);
647 int shift = simd_data(desc);
648 intptr_t i;
649
650 for (i = 0; i < oprsz; i += sizeof(vec16)) {
651 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
652 }
653 clear_high(d, oprsz, desc);
654}
655
656void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
657{
658 intptr_t oprsz = simd_oprsz(desc);
659 int shift = simd_data(desc);
660 intptr_t i;
661
662 for (i = 0; i < oprsz; i += sizeof(vec32)) {
663 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
664 }
665 clear_high(d, oprsz, desc);
666}
667
668void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
669{
670 intptr_t oprsz = simd_oprsz(desc);
671 int shift = simd_data(desc);
672 intptr_t i;
673
674 for (i = 0; i < oprsz; i += sizeof(vec64)) {
675 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
676 }
677 clear_high(d, oprsz, desc);
678}
679
680void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
681{
682 intptr_t oprsz = simd_oprsz(desc);
683 int shift = simd_data(desc);
684 intptr_t i;
685
686 for (i = 0; i < oprsz; i += sizeof(vec8)) {
687 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
688 }
689 clear_high(d, oprsz, desc);
690}
691
692void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
693{
694 intptr_t oprsz = simd_oprsz(desc);
695 int shift = simd_data(desc);
696 intptr_t i;
697
698 for (i = 0; i < oprsz; i += sizeof(vec16)) {
699 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
700 }
701 clear_high(d, oprsz, desc);
702}
703
704void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
705{
706 intptr_t oprsz = simd_oprsz(desc);
707 int shift = simd_data(desc);
708 intptr_t i;
709
710 for (i = 0; i < oprsz; i += sizeof(vec32)) {
711 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
712 }
713 clear_high(d, oprsz, desc);
714}
715
716void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
717{
718 intptr_t oprsz = simd_oprsz(desc);
719 int shift = simd_data(desc);
720 intptr_t i;
721
722 for (i = 0; i < oprsz; i += sizeof(vec64)) {
723 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
724 }
725 clear_high(d, oprsz, desc);
726}
212be173
RH
727
728/* If vectors are enabled, the compiler fills in -1 for true.
729 Otherwise, we must take care of this by hand. */
730#ifdef CONFIG_VECTOR16
731# define DO_CMP0(X) X
732#else
733# define DO_CMP0(X) -(X)
734#endif
735
736#define DO_CMP1(NAME, TYPE, OP) \
737void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
738{ \
739 intptr_t oprsz = simd_oprsz(desc); \
740 intptr_t i; \
6cb1d3b8 741 for (i = 0; i < oprsz; i += sizeof(TYPE)) { \
212be173
RH
742 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
743 } \
744 clear_high(d, oprsz, desc); \
745}
746
747#define DO_CMP2(SZ) \
748 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
749 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
750 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
751 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
752 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
753 DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
754
755DO_CMP2(8)
756DO_CMP2(16)
757DO_CMP2(32)
758DO_CMP2(64)
759
760#undef DO_CMP0
761#undef DO_CMP1
762#undef DO_CMP2
f49b12c6
RH
763
764void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
765{
766 intptr_t oprsz = simd_oprsz(desc);
767 intptr_t i;
768
769 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
770 int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
771 if (r > INT8_MAX) {
772 r = INT8_MAX;
773 } else if (r < INT8_MIN) {
774 r = INT8_MIN;
775 }
776 *(int8_t *)(d + i) = r;
777 }
778 clear_high(d, oprsz, desc);
779}
780
781void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
782{
783 intptr_t oprsz = simd_oprsz(desc);
784 intptr_t i;
785
786 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
787 int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
788 if (r > INT16_MAX) {
789 r = INT16_MAX;
790 } else if (r < INT16_MIN) {
791 r = INT16_MIN;
792 }
793 *(int16_t *)(d + i) = r;
794 }
795 clear_high(d, oprsz, desc);
796}
797
798void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
799{
800 intptr_t oprsz = simd_oprsz(desc);
801 intptr_t i;
802
803 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
804 int32_t ai = *(int32_t *)(a + i);
805 int32_t bi = *(int32_t *)(b + i);
806 int32_t di = ai + bi;
807 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
808 /* Signed overflow. */
809 di = (di < 0 ? INT32_MAX : INT32_MIN);
810 }
811 *(int32_t *)(d + i) = di;
812 }
813 clear_high(d, oprsz, desc);
814}
815
816void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
817{
818 intptr_t oprsz = simd_oprsz(desc);
819 intptr_t i;
820
821 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
822 int64_t ai = *(int64_t *)(a + i);
823 int64_t bi = *(int64_t *)(b + i);
824 int64_t di = ai + bi;
825 if (((di ^ ai) &~ (ai ^ bi)) < 0) {
826 /* Signed overflow. */
827 di = (di < 0 ? INT64_MAX : INT64_MIN);
828 }
829 *(int64_t *)(d + i) = di;
830 }
831 clear_high(d, oprsz, desc);
832}
833
834void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
835{
836 intptr_t oprsz = simd_oprsz(desc);
837 intptr_t i;
838
839 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
840 int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
841 if (r > INT8_MAX) {
842 r = INT8_MAX;
843 } else if (r < INT8_MIN) {
844 r = INT8_MIN;
845 }
846 *(uint8_t *)(d + i) = r;
847 }
848 clear_high(d, oprsz, desc);
849}
850
851void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
852{
853 intptr_t oprsz = simd_oprsz(desc);
854 intptr_t i;
855
856 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
857 int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
858 if (r > INT16_MAX) {
859 r = INT16_MAX;
860 } else if (r < INT16_MIN) {
861 r = INT16_MIN;
862 }
863 *(int16_t *)(d + i) = r;
864 }
865 clear_high(d, oprsz, desc);
866}
867
868void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
869{
870 intptr_t oprsz = simd_oprsz(desc);
871 intptr_t i;
872
873 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
874 int32_t ai = *(int32_t *)(a + i);
875 int32_t bi = *(int32_t *)(b + i);
876 int32_t di = ai - bi;
877 if (((di ^ ai) & (ai ^ bi)) < 0) {
878 /* Signed overflow. */
879 di = (di < 0 ? INT32_MAX : INT32_MIN);
880 }
881 *(int32_t *)(d + i) = di;
882 }
883 clear_high(d, oprsz, desc);
884}
885
886void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
887{
888 intptr_t oprsz = simd_oprsz(desc);
889 intptr_t i;
890
891 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
892 int64_t ai = *(int64_t *)(a + i);
893 int64_t bi = *(int64_t *)(b + i);
894 int64_t di = ai - bi;
895 if (((di ^ ai) & (ai ^ bi)) < 0) {
896 /* Signed overflow. */
897 di = (di < 0 ? INT64_MAX : INT64_MIN);
898 }
899 *(int64_t *)(d + i) = di;
900 }
901 clear_high(d, oprsz, desc);
902}
903
904void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
905{
906 intptr_t oprsz = simd_oprsz(desc);
907 intptr_t i;
908
909 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
910 unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
911 if (r > UINT8_MAX) {
912 r = UINT8_MAX;
913 }
914 *(uint8_t *)(d + i) = r;
915 }
916 clear_high(d, oprsz, desc);
917}
918
919void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
920{
921 intptr_t oprsz = simd_oprsz(desc);
922 intptr_t i;
923
924 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
925 unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
926 if (r > UINT16_MAX) {
927 r = UINT16_MAX;
928 }
929 *(uint16_t *)(d + i) = r;
930 }
931 clear_high(d, oprsz, desc);
932}
933
934void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
935{
936 intptr_t oprsz = simd_oprsz(desc);
937 intptr_t i;
938
939 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
940 uint32_t ai = *(uint32_t *)(a + i);
941 uint32_t bi = *(uint32_t *)(b + i);
942 uint32_t di = ai + bi;
943 if (di < ai) {
944 di = UINT32_MAX;
945 }
946 *(uint32_t *)(d + i) = di;
947 }
948 clear_high(d, oprsz, desc);
949}
950
951void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
952{
953 intptr_t oprsz = simd_oprsz(desc);
954 intptr_t i;
955
956 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
957 uint64_t ai = *(uint64_t *)(a + i);
958 uint64_t bi = *(uint64_t *)(b + i);
959 uint64_t di = ai + bi;
960 if (di < ai) {
961 di = UINT64_MAX;
962 }
963 *(uint64_t *)(d + i) = di;
964 }
965 clear_high(d, oprsz, desc);
966}
967
968void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
969{
970 intptr_t oprsz = simd_oprsz(desc);
971 intptr_t i;
972
973 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
974 int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
975 if (r < 0) {
976 r = 0;
977 }
978 *(uint8_t *)(d + i) = r;
979 }
980 clear_high(d, oprsz, desc);
981}
982
983void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
984{
985 intptr_t oprsz = simd_oprsz(desc);
986 intptr_t i;
987
988 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
989 int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
990 if (r < 0) {
991 r = 0;
992 }
993 *(uint16_t *)(d + i) = r;
994 }
995 clear_high(d, oprsz, desc);
996}
997
998void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
999{
1000 intptr_t oprsz = simd_oprsz(desc);
1001 intptr_t i;
1002
1003 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1004 uint32_t ai = *(uint32_t *)(a + i);
1005 uint32_t bi = *(uint32_t *)(b + i);
1006 uint32_t di = ai - bi;
1007 if (ai < bi) {
1008 di = 0;
1009 }
1010 *(uint32_t *)(d + i) = di;
1011 }
1012 clear_high(d, oprsz, desc);
1013}
1014
1015void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1016{
1017 intptr_t oprsz = simd_oprsz(desc);
1018 intptr_t i;
1019
1020 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1021 uint64_t ai = *(uint64_t *)(a + i);
1022 uint64_t bi = *(uint64_t *)(b + i);
1023 uint64_t di = ai - bi;
1024 if (ai < bi) {
1025 di = 0;
1026 }
1027 *(uint64_t *)(d + i) = di;
1028 }
1029 clear_high(d, oprsz, desc);
1030}
dd0a0fcd
RH
1031
1032void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1033{
1034 intptr_t oprsz = simd_oprsz(desc);
1035 intptr_t i;
1036
1037 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1038 int8_t aa = *(int8_t *)(a + i);
1039 int8_t bb = *(int8_t *)(b + i);
1040 int8_t dd = aa < bb ? aa : bb;
1041 *(int8_t *)(d + i) = dd;
1042 }
1043 clear_high(d, oprsz, desc);
1044}
1045
1046void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1047{
1048 intptr_t oprsz = simd_oprsz(desc);
1049 intptr_t i;
1050
1051 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1052 int16_t aa = *(int16_t *)(a + i);
1053 int16_t bb = *(int16_t *)(b + i);
1054 int16_t dd = aa < bb ? aa : bb;
1055 *(int16_t *)(d + i) = dd;
1056 }
1057 clear_high(d, oprsz, desc);
1058}
1059
1060void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1061{
1062 intptr_t oprsz = simd_oprsz(desc);
1063 intptr_t i;
1064
1065 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1066 int32_t aa = *(int32_t *)(a + i);
1067 int32_t bb = *(int32_t *)(b + i);
1068 int32_t dd = aa < bb ? aa : bb;
1069 *(int32_t *)(d + i) = dd;
1070 }
1071 clear_high(d, oprsz, desc);
1072}
1073
1074void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1075{
1076 intptr_t oprsz = simd_oprsz(desc);
1077 intptr_t i;
1078
1079 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1080 int64_t aa = *(int64_t *)(a + i);
1081 int64_t bb = *(int64_t *)(b + i);
1082 int64_t dd = aa < bb ? aa : bb;
1083 *(int64_t *)(d + i) = dd;
1084 }
1085 clear_high(d, oprsz, desc);
1086}
1087
1088void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1089{
1090 intptr_t oprsz = simd_oprsz(desc);
1091 intptr_t i;
1092
1093 for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1094 int8_t aa = *(int8_t *)(a + i);
1095 int8_t bb = *(int8_t *)(b + i);
1096 int8_t dd = aa > bb ? aa : bb;
1097 *(int8_t *)(d + i) = dd;
1098 }
1099 clear_high(d, oprsz, desc);
1100}
1101
1102void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1103{
1104 intptr_t oprsz = simd_oprsz(desc);
1105 intptr_t i;
1106
1107 for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1108 int16_t aa = *(int16_t *)(a + i);
1109 int16_t bb = *(int16_t *)(b + i);
1110 int16_t dd = aa > bb ? aa : bb;
1111 *(int16_t *)(d + i) = dd;
1112 }
1113 clear_high(d, oprsz, desc);
1114}
1115
1116void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1117{
1118 intptr_t oprsz = simd_oprsz(desc);
1119 intptr_t i;
1120
1121 for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1122 int32_t aa = *(int32_t *)(a + i);
1123 int32_t bb = *(int32_t *)(b + i);
1124 int32_t dd = aa > bb ? aa : bb;
1125 *(int32_t *)(d + i) = dd;
1126 }
1127 clear_high(d, oprsz, desc);
1128}
1129
1130void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1131{
1132 intptr_t oprsz = simd_oprsz(desc);
1133 intptr_t i;
1134
1135 for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1136 int64_t aa = *(int64_t *)(a + i);
1137 int64_t bb = *(int64_t *)(b + i);
1138 int64_t dd = aa > bb ? aa : bb;
1139 *(int64_t *)(d + i) = dd;
1140 }
1141 clear_high(d, oprsz, desc);
1142}
1143
1144void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1145{
1146 intptr_t oprsz = simd_oprsz(desc);
1147 intptr_t i;
1148
1149 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1150 uint8_t aa = *(uint8_t *)(a + i);
1151 uint8_t bb = *(uint8_t *)(b + i);
1152 uint8_t dd = aa < bb ? aa : bb;
1153 *(uint8_t *)(d + i) = dd;
1154 }
1155 clear_high(d, oprsz, desc);
1156}
1157
1158void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1159{
1160 intptr_t oprsz = simd_oprsz(desc);
1161 intptr_t i;
1162
1163 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1164 uint16_t aa = *(uint16_t *)(a + i);
1165 uint16_t bb = *(uint16_t *)(b + i);
1166 uint16_t dd = aa < bb ? aa : bb;
1167 *(uint16_t *)(d + i) = dd;
1168 }
1169 clear_high(d, oprsz, desc);
1170}
1171
1172void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1173{
1174 intptr_t oprsz = simd_oprsz(desc);
1175 intptr_t i;
1176
1177 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1178 uint32_t aa = *(uint32_t *)(a + i);
1179 uint32_t bb = *(uint32_t *)(b + i);
1180 uint32_t dd = aa < bb ? aa : bb;
1181 *(uint32_t *)(d + i) = dd;
1182 }
1183 clear_high(d, oprsz, desc);
1184}
1185
1186void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1187{
1188 intptr_t oprsz = simd_oprsz(desc);
1189 intptr_t i;
1190
1191 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1192 uint64_t aa = *(uint64_t *)(a + i);
1193 uint64_t bb = *(uint64_t *)(b + i);
1194 uint64_t dd = aa < bb ? aa : bb;
1195 *(uint64_t *)(d + i) = dd;
1196 }
1197 clear_high(d, oprsz, desc);
1198}
1199
1200void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1201{
1202 intptr_t oprsz = simd_oprsz(desc);
1203 intptr_t i;
1204
1205 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1206 uint8_t aa = *(uint8_t *)(a + i);
1207 uint8_t bb = *(uint8_t *)(b + i);
1208 uint8_t dd = aa > bb ? aa : bb;
1209 *(uint8_t *)(d + i) = dd;
1210 }
1211 clear_high(d, oprsz, desc);
1212}
1213
1214void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1215{
1216 intptr_t oprsz = simd_oprsz(desc);
1217 intptr_t i;
1218
1219 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1220 uint16_t aa = *(uint16_t *)(a + i);
1221 uint16_t bb = *(uint16_t *)(b + i);
1222 uint16_t dd = aa > bb ? aa : bb;
1223 *(uint16_t *)(d + i) = dd;
1224 }
1225 clear_high(d, oprsz, desc);
1226}
1227
1228void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1229{
1230 intptr_t oprsz = simd_oprsz(desc);
1231 intptr_t i;
1232
1233 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1234 uint32_t aa = *(uint32_t *)(a + i);
1235 uint32_t bb = *(uint32_t *)(b + i);
1236 uint32_t dd = aa > bb ? aa : bb;
1237 *(uint32_t *)(d + i) = dd;
1238 }
1239 clear_high(d, oprsz, desc);
1240}
1241
1242void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1243{
1244 intptr_t oprsz = simd_oprsz(desc);
1245 intptr_t i;
1246
1247 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1248 uint64_t aa = *(uint64_t *)(a + i);
1249 uint64_t bb = *(uint64_t *)(b + i);
1250 uint64_t dd = aa > bb ? aa : bb;
1251 *(uint64_t *)(d + i) = dd;
1252 }
1253 clear_high(d, oprsz, desc);
1254}
This page took 0.20021 seconds and 4 git commands to generate.