1 /* SPDX-License-Identifier: GPL-2.0-only */
2 #ifndef _ASM_X86_XOR_AVX_H
3 #define _ASM_X86_XOR_AVX_H
6 * Optimized RAID-5 checksumming functions for AVX
8 * Copyright (C) 2012 Intel Corporation
11 * Based on Ingo Molnar and Zach Brown's respective MMX and SSE routines
14 #include <linux/compiler.h>
15 #include <asm/fpu/api.h>
19 BLOCK(32 * (i + 1), 1) \
20 BLOCK(32 * (i + 2), 2) \
21 BLOCK(32 * (i + 3), 3)
29 static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
30 const unsigned long * __restrict p1)
32 unsigned long lines = bytes >> 9;
38 #define BLOCK(i, reg) \
40 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p1[i / sizeof(*p1)])); \
41 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
42 "m" (p0[i / sizeof(*p0)])); \
43 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
44 "=m" (p0[i / sizeof(*p0)])); \
49 p0 = (unsigned long *)((uintptr_t)p0 + 512);
50 p1 = (unsigned long *)((uintptr_t)p1 + 512);
56 static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
57 const unsigned long * __restrict p1,
58 const unsigned long * __restrict p2)
60 unsigned long lines = bytes >> 9;
66 #define BLOCK(i, reg) \
68 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p2[i / sizeof(*p2)])); \
69 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
70 "m" (p1[i / sizeof(*p1)])); \
71 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
72 "m" (p0[i / sizeof(*p0)])); \
73 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
74 "=m" (p0[i / sizeof(*p0)])); \
79 p0 = (unsigned long *)((uintptr_t)p0 + 512);
80 p1 = (unsigned long *)((uintptr_t)p1 + 512);
81 p2 = (unsigned long *)((uintptr_t)p2 + 512);
87 static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
88 const unsigned long * __restrict p1,
89 const unsigned long * __restrict p2,
90 const unsigned long * __restrict p3)
92 unsigned long lines = bytes >> 9;
98 #define BLOCK(i, reg) \
100 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p3[i / sizeof(*p3)])); \
101 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
102 "m" (p2[i / sizeof(*p2)])); \
103 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
104 "m" (p1[i / sizeof(*p1)])); \
105 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
106 "m" (p0[i / sizeof(*p0)])); \
107 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
108 "=m" (p0[i / sizeof(*p0)])); \
113 p0 = (unsigned long *)((uintptr_t)p0 + 512);
114 p1 = (unsigned long *)((uintptr_t)p1 + 512);
115 p2 = (unsigned long *)((uintptr_t)p2 + 512);
116 p3 = (unsigned long *)((uintptr_t)p3 + 512);
122 static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
123 const unsigned long * __restrict p1,
124 const unsigned long * __restrict p2,
125 const unsigned long * __restrict p3,
126 const unsigned long * __restrict p4)
128 unsigned long lines = bytes >> 9;
134 #define BLOCK(i, reg) \
136 asm volatile("vmovdqa %0, %%ymm" #reg : : "m" (p4[i / sizeof(*p4)])); \
137 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
138 "m" (p3[i / sizeof(*p3)])); \
139 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
140 "m" (p2[i / sizeof(*p2)])); \
141 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
142 "m" (p1[i / sizeof(*p1)])); \
143 asm volatile("vxorps %0, %%ymm" #reg ", %%ymm" #reg : : \
144 "m" (p0[i / sizeof(*p0)])); \
145 asm volatile("vmovdqa %%ymm" #reg ", %0" : \
146 "=m" (p0[i / sizeof(*p0)])); \
151 p0 = (unsigned long *)((uintptr_t)p0 + 512);
152 p1 = (unsigned long *)((uintptr_t)p1 + 512);
153 p2 = (unsigned long *)((uintptr_t)p2 + 512);
154 p3 = (unsigned long *)((uintptr_t)p3 + 512);
155 p4 = (unsigned long *)((uintptr_t)p4 + 512);
161 static struct xor_block_template xor_block_avx = {
169 #define AVX_XOR_SPEED \
171 if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE)) \
172 xor_speed(&xor_block_avx); \
175 #define AVX_SELECT(FASTEST) \
176 (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_OSXSAVE) ? &xor_block_avx : FASTEST)