]> Git Repo - qemu.git/blame - accel/tcg/tcg-runtime-gvec.c
tcg: Add generic vector ops for comparisons
[qemu.git] / accel / tcg / tcg-runtime-gvec.c
CommitLineData
db432672
RH
1/*
2 * Generic vectorized operation runtime
3 *
4 * Copyright (c) 2018 Linaro
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "qemu/host-utils.h"
22#include "cpu.h"
23#include "exec/helper-proto.h"
24#include "tcg-gvec-desc.h"
25
26
27/* Virtually all hosts support 16-byte vectors. Those that don't can emulate
28 * them via GCC's generic vector extension. This turns out to be simpler and
29 * more reliable than getting the compiler to autovectorize.
30 *
31 * In tcg-op-gvec.c, we asserted that both the size and alignment of the data
32 * are multiples of 16.
33 *
34 * When the compiler does not support all of the operations we require, the
35 * loops are written so that we can always fall back on the base types.
36 */
37#ifdef CONFIG_VECTOR16
38typedef uint8_t vec8 __attribute__((vector_size(16)));
39typedef uint16_t vec16 __attribute__((vector_size(16)));
40typedef uint32_t vec32 __attribute__((vector_size(16)));
41typedef uint64_t vec64 __attribute__((vector_size(16)));
42
43typedef int8_t svec8 __attribute__((vector_size(16)));
44typedef int16_t svec16 __attribute__((vector_size(16)));
45typedef int32_t svec32 __attribute__((vector_size(16)));
46typedef int64_t svec64 __attribute__((vector_size(16)));
47
48#define DUP16(X) { X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X }
49#define DUP8(X) { X, X, X, X, X, X, X, X }
50#define DUP4(X) { X, X, X, X }
51#define DUP2(X) { X, X }
52#else
53typedef uint8_t vec8;
54typedef uint16_t vec16;
55typedef uint32_t vec32;
56typedef uint64_t vec64;
57
58typedef int8_t svec8;
59typedef int16_t svec16;
60typedef int32_t svec32;
61typedef int64_t svec64;
62
63#define DUP16(X) X
64#define DUP8(X) X
65#define DUP4(X) X
66#define DUP2(X) X
67#endif /* CONFIG_VECTOR16 */
68
69static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
70{
71 intptr_t maxsz = simd_maxsz(desc);
72 intptr_t i;
73
74 if (unlikely(maxsz > oprsz)) {
75 for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
76 *(uint64_t *)(d + i) = 0;
77 }
78 }
79}
80
81void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
82{
83 intptr_t oprsz = simd_oprsz(desc);
84 intptr_t i;
85
86 for (i = 0; i < oprsz; i += sizeof(vec8)) {
87 *(vec8 *)(d + i) = *(vec8 *)(a + i) + *(vec8 *)(b + i);
88 }
89 clear_high(d, oprsz, desc);
90}
91
92void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
93{
94 intptr_t oprsz = simd_oprsz(desc);
95 intptr_t i;
96
97 for (i = 0; i < oprsz; i += sizeof(vec16)) {
98 *(vec16 *)(d + i) = *(vec16 *)(a + i) + *(vec16 *)(b + i);
99 }
100 clear_high(d, oprsz, desc);
101}
102
103void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
104{
105 intptr_t oprsz = simd_oprsz(desc);
106 intptr_t i;
107
108 for (i = 0; i < oprsz; i += sizeof(vec32)) {
109 *(vec32 *)(d + i) = *(vec32 *)(a + i) + *(vec32 *)(b + i);
110 }
111 clear_high(d, oprsz, desc);
112}
113
114void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
115{
116 intptr_t oprsz = simd_oprsz(desc);
117 intptr_t i;
118
119 for (i = 0; i < oprsz; i += sizeof(vec64)) {
120 *(vec64 *)(d + i) = *(vec64 *)(a + i) + *(vec64 *)(b + i);
121 }
122 clear_high(d, oprsz, desc);
123}
124
125void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
126{
127 intptr_t oprsz = simd_oprsz(desc);
128 intptr_t i;
129
130 for (i = 0; i < oprsz; i += sizeof(vec8)) {
131 *(vec8 *)(d + i) = *(vec8 *)(a + i) - *(vec8 *)(b + i);
132 }
133 clear_high(d, oprsz, desc);
134}
135
136void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
137{
138 intptr_t oprsz = simd_oprsz(desc);
139 intptr_t i;
140
141 for (i = 0; i < oprsz; i += sizeof(vec16)) {
142 *(vec16 *)(d + i) = *(vec16 *)(a + i) - *(vec16 *)(b + i);
143 }
144 clear_high(d, oprsz, desc);
145}
146
147void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
148{
149 intptr_t oprsz = simd_oprsz(desc);
150 intptr_t i;
151
152 for (i = 0; i < oprsz; i += sizeof(vec32)) {
153 *(vec32 *)(d + i) = *(vec32 *)(a + i) - *(vec32 *)(b + i);
154 }
155 clear_high(d, oprsz, desc);
156}
157
158void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
159{
160 intptr_t oprsz = simd_oprsz(desc);
161 intptr_t i;
162
163 for (i = 0; i < oprsz; i += sizeof(vec64)) {
164 *(vec64 *)(d + i) = *(vec64 *)(a + i) - *(vec64 *)(b + i);
165 }
166 clear_high(d, oprsz, desc);
167}
168
169void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
170{
171 intptr_t oprsz = simd_oprsz(desc);
172 intptr_t i;
173
174 for (i = 0; i < oprsz; i += sizeof(vec8)) {
175 *(vec8 *)(d + i) = -*(vec8 *)(a + i);
176 }
177 clear_high(d, oprsz, desc);
178}
179
180void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
181{
182 intptr_t oprsz = simd_oprsz(desc);
183 intptr_t i;
184
185 for (i = 0; i < oprsz; i += sizeof(vec16)) {
186 *(vec16 *)(d + i) = -*(vec16 *)(a + i);
187 }
188 clear_high(d, oprsz, desc);
189}
190
191void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
192{
193 intptr_t oprsz = simd_oprsz(desc);
194 intptr_t i;
195
196 for (i = 0; i < oprsz; i += sizeof(vec32)) {
197 *(vec32 *)(d + i) = -*(vec32 *)(a + i);
198 }
199 clear_high(d, oprsz, desc);
200}
201
202void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
203{
204 intptr_t oprsz = simd_oprsz(desc);
205 intptr_t i;
206
207 for (i = 0; i < oprsz; i += sizeof(vec64)) {
208 *(vec64 *)(d + i) = -*(vec64 *)(a + i);
209 }
210 clear_high(d, oprsz, desc);
211}
212
213void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
214{
215 intptr_t oprsz = simd_oprsz(desc);
216
217 memcpy(d, a, oprsz);
218 clear_high(d, oprsz, desc);
219}
220
221void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
222{
223 intptr_t oprsz = simd_oprsz(desc);
224 intptr_t i;
225
226 if (c == 0) {
227 oprsz = 0;
228 } else {
229 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
230 *(uint64_t *)(d + i) = c;
231 }
232 }
233 clear_high(d, oprsz, desc);
234}
235
236void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
237{
238 intptr_t oprsz = simd_oprsz(desc);
239 intptr_t i;
240
241 if (c == 0) {
242 oprsz = 0;
243 } else {
244 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
245 *(uint32_t *)(d + i) = c;
246 }
247 }
248 clear_high(d, oprsz, desc);
249}
250
251void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
252{
253 HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
254}
255
256void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
257{
258 HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
259}
260
261void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
262{
263 intptr_t oprsz = simd_oprsz(desc);
264 intptr_t i;
265
266 for (i = 0; i < oprsz; i += sizeof(vec64)) {
267 *(vec64 *)(d + i) = ~*(vec64 *)(a + i);
268 }
269 clear_high(d, oprsz, desc);
270}
271
272void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
273{
274 intptr_t oprsz = simd_oprsz(desc);
275 intptr_t i;
276
277 for (i = 0; i < oprsz; i += sizeof(vec64)) {
278 *(vec64 *)(d + i) = *(vec64 *)(a + i) & *(vec64 *)(b + i);
279 }
280 clear_high(d, oprsz, desc);
281}
282
283void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
284{
285 intptr_t oprsz = simd_oprsz(desc);
286 intptr_t i;
287
288 for (i = 0; i < oprsz; i += sizeof(vec64)) {
289 *(vec64 *)(d + i) = *(vec64 *)(a + i) | *(vec64 *)(b + i);
290 }
291 clear_high(d, oprsz, desc);
292}
293
294void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
295{
296 intptr_t oprsz = simd_oprsz(desc);
297 intptr_t i;
298
299 for (i = 0; i < oprsz; i += sizeof(vec64)) {
300 *(vec64 *)(d + i) = *(vec64 *)(a + i) ^ *(vec64 *)(b + i);
301 }
302 clear_high(d, oprsz, desc);
303}
304
305void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
306{
307 intptr_t oprsz = simd_oprsz(desc);
308 intptr_t i;
309
310 for (i = 0; i < oprsz; i += sizeof(vec64)) {
311 *(vec64 *)(d + i) = *(vec64 *)(a + i) &~ *(vec64 *)(b + i);
312 }
313 clear_high(d, oprsz, desc);
314}
315
316void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
317{
318 intptr_t oprsz = simd_oprsz(desc);
319 intptr_t i;
320
321 for (i = 0; i < oprsz; i += sizeof(vec64)) {
322 *(vec64 *)(d + i) = *(vec64 *)(a + i) |~ *(vec64 *)(b + i);
323 }
324 clear_high(d, oprsz, desc);
325}
d0ec9796
RH
326
327void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
328{
329 intptr_t oprsz = simd_oprsz(desc);
330 int shift = simd_data(desc);
331 intptr_t i;
332
333 for (i = 0; i < oprsz; i += sizeof(vec8)) {
334 *(vec8 *)(d + i) = *(vec8 *)(a + i) << shift;
335 }
336 clear_high(d, oprsz, desc);
337}
338
339void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
340{
341 intptr_t oprsz = simd_oprsz(desc);
342 int shift = simd_data(desc);
343 intptr_t i;
344
345 for (i = 0; i < oprsz; i += sizeof(vec16)) {
346 *(vec16 *)(d + i) = *(vec16 *)(a + i) << shift;
347 }
348 clear_high(d, oprsz, desc);
349}
350
351void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
352{
353 intptr_t oprsz = simd_oprsz(desc);
354 int shift = simd_data(desc);
355 intptr_t i;
356
357 for (i = 0; i < oprsz; i += sizeof(vec32)) {
358 *(vec32 *)(d + i) = *(vec32 *)(a + i) << shift;
359 }
360 clear_high(d, oprsz, desc);
361}
362
363void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
364{
365 intptr_t oprsz = simd_oprsz(desc);
366 int shift = simd_data(desc);
367 intptr_t i;
368
369 for (i = 0; i < oprsz; i += sizeof(vec64)) {
370 *(vec64 *)(d + i) = *(vec64 *)(a + i) << shift;
371 }
372 clear_high(d, oprsz, desc);
373}
374
375void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
376{
377 intptr_t oprsz = simd_oprsz(desc);
378 int shift = simd_data(desc);
379 intptr_t i;
380
381 for (i = 0; i < oprsz; i += sizeof(vec8)) {
382 *(vec8 *)(d + i) = *(vec8 *)(a + i) >> shift;
383 }
384 clear_high(d, oprsz, desc);
385}
386
387void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
388{
389 intptr_t oprsz = simd_oprsz(desc);
390 int shift = simd_data(desc);
391 intptr_t i;
392
393 for (i = 0; i < oprsz; i += sizeof(vec16)) {
394 *(vec16 *)(d + i) = *(vec16 *)(a + i) >> shift;
395 }
396 clear_high(d, oprsz, desc);
397}
398
399void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
400{
401 intptr_t oprsz = simd_oprsz(desc);
402 int shift = simd_data(desc);
403 intptr_t i;
404
405 for (i = 0; i < oprsz; i += sizeof(vec32)) {
406 *(vec32 *)(d + i) = *(vec32 *)(a + i) >> shift;
407 }
408 clear_high(d, oprsz, desc);
409}
410
411void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
412{
413 intptr_t oprsz = simd_oprsz(desc);
414 int shift = simd_data(desc);
415 intptr_t i;
416
417 for (i = 0; i < oprsz; i += sizeof(vec64)) {
418 *(vec64 *)(d + i) = *(vec64 *)(a + i) >> shift;
419 }
420 clear_high(d, oprsz, desc);
421}
422
423void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
424{
425 intptr_t oprsz = simd_oprsz(desc);
426 int shift = simd_data(desc);
427 intptr_t i;
428
429 for (i = 0; i < oprsz; i += sizeof(vec8)) {
430 *(svec8 *)(d + i) = *(svec8 *)(a + i) >> shift;
431 }
432 clear_high(d, oprsz, desc);
433}
434
435void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
436{
437 intptr_t oprsz = simd_oprsz(desc);
438 int shift = simd_data(desc);
439 intptr_t i;
440
441 for (i = 0; i < oprsz; i += sizeof(vec16)) {
442 *(svec16 *)(d + i) = *(svec16 *)(a + i) >> shift;
443 }
444 clear_high(d, oprsz, desc);
445}
446
447void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
448{
449 intptr_t oprsz = simd_oprsz(desc);
450 int shift = simd_data(desc);
451 intptr_t i;
452
453 for (i = 0; i < oprsz; i += sizeof(vec32)) {
454 *(svec32 *)(d + i) = *(svec32 *)(a + i) >> shift;
455 }
456 clear_high(d, oprsz, desc);
457}
458
459void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
460{
461 intptr_t oprsz = simd_oprsz(desc);
462 int shift = simd_data(desc);
463 intptr_t i;
464
465 for (i = 0; i < oprsz; i += sizeof(vec64)) {
466 *(svec64 *)(d + i) = *(svec64 *)(a + i) >> shift;
467 }
468 clear_high(d, oprsz, desc);
469}
212be173
RH
470
471/* If vectors are enabled, the compiler fills in -1 for true.
472 Otherwise, we must take care of this by hand. */
473#ifdef CONFIG_VECTOR16
474# define DO_CMP0(X) X
475#else
476# define DO_CMP0(X) -(X)
477#endif
478
479#define DO_CMP1(NAME, TYPE, OP) \
480void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc) \
481{ \
482 intptr_t oprsz = simd_oprsz(desc); \
483 intptr_t i; \
484 for (i = 0; i < oprsz; i += sizeof(vec64)) { \
485 *(TYPE *)(d + i) = DO_CMP0(*(TYPE *)(a + i) OP *(TYPE *)(b + i)); \
486 } \
487 clear_high(d, oprsz, desc); \
488}
489
490#define DO_CMP2(SZ) \
491 DO_CMP1(gvec_eq##SZ, vec##SZ, ==) \
492 DO_CMP1(gvec_ne##SZ, vec##SZ, !=) \
493 DO_CMP1(gvec_lt##SZ, svec##SZ, <) \
494 DO_CMP1(gvec_le##SZ, svec##SZ, <=) \
495 DO_CMP1(gvec_ltu##SZ, vec##SZ, <) \
496 DO_CMP1(gvec_leu##SZ, vec##SZ, <=)
497
498DO_CMP2(8)
499DO_CMP2(16)
500DO_CMP2(32)
501DO_CMP2(64)
502
503#undef DO_CMP0
504#undef DO_CMP1
505#undef DO_CMP2
This page took 0.07167 seconds and 4 git commands to generate.