]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* ----------------------------------------------------------------------- * |
2 | * | |
3 | * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License as published by | |
7 | * the Free Software Foundation, Inc., 53 Temple Place Ste 330, | |
8 | * Bostom MA 02111-1307, USA; either version 2 of the License, or | |
9 | * (at your option) any later version; incorporated herein by reference. | |
10 | * | |
11 | * ----------------------------------------------------------------------- */ | |
12 | ||
13 | /* | |
14 | * raid6x86.h | |
15 | * | |
16 | * Definitions common to x86 and x86-64 RAID-6 code only | |
17 | */ | |
18 | ||
19 | #ifndef LINUX_RAID_RAID6X86_H | |
20 | #define LINUX_RAID_RAID6X86_H | |
21 | ||
22 | #if defined(__i386__) || defined(__x86_64__) | |
23 | ||
24 | #ifdef __x86_64__ | |
25 | ||
26 | typedef struct { | |
27 | unsigned int fsave[27]; | |
28 | unsigned long cr0; | |
29 | } raid6_mmx_save_t __attribute__((aligned(16))); | |
30 | ||
31 | /* N.B.: For SSE we only save %xmm0-%xmm7 even for x86-64, since | |
32 | the code doesn't know about the additional x86-64 registers */ | |
33 | typedef struct { | |
34 | unsigned int sarea[8*4+2]; | |
35 | unsigned long cr0; | |
36 | } raid6_sse_save_t __attribute__((aligned(16))); | |
37 | ||
38 | /* This is for x86-64-specific code which uses all 16 XMM registers */ | |
39 | typedef struct { | |
40 | unsigned int sarea[16*4+2]; | |
41 | unsigned long cr0; | |
42 | } raid6_sse16_save_t __attribute__((aligned(16))); | |
43 | ||
44 | /* On x86-64 the stack *SHOULD* be 16-byte aligned, but currently this | |
45 | is buggy in the kernel and it's only 8-byte aligned in places, so | |
46 | we need to do this anyway. Sigh. */ | |
47 | #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) | |
48 | ||
49 | #else /* __i386__ */ | |
50 | ||
51 | typedef struct { | |
52 | unsigned int fsave[27]; | |
53 | unsigned long cr0; | |
54 | } raid6_mmx_save_t; | |
55 | ||
56 | /* On i386, the stack is only 8-byte aligned, but SSE requires 16-byte | |
57 | alignment. The +3 is so we have the slack space to manually align | |
58 | a properly-sized area correctly. */ | |
59 | typedef struct { | |
60 | unsigned int sarea[8*4+3]; | |
61 | unsigned long cr0; | |
62 | } raid6_sse_save_t; | |
63 | ||
64 | /* Find the 16-byte aligned save area */ | |
65 | #define SAREA(x) ((unsigned int *)((((unsigned long)&(x)->sarea)+15) & ~15)) | |
66 | ||
67 | #endif | |
68 | ||
69 | #ifdef __KERNEL__ /* Real code */ | |
70 | ||
71 | /* Note: %cr0 is 32 bits on i386 and 64 bits on x86-64 */ | |
72 | ||
73 | static inline unsigned long raid6_get_fpu(void) | |
74 | { | |
75 | unsigned long cr0; | |
76 | ||
77 | preempt_disable(); | |
78 | asm volatile("mov %%cr0,%0 ; clts" : "=r" (cr0)); | |
79 | return cr0; | |
80 | } | |
81 | ||
82 | static inline void raid6_put_fpu(unsigned long cr0) | |
83 | { | |
84 | asm volatile("mov %0,%%cr0" : : "r" (cr0)); | |
85 | preempt_enable(); | |
86 | } | |
87 | ||
88 | #else /* Dummy code for user space testing */ | |
89 | ||
90 | static inline unsigned long raid6_get_fpu(void) | |
91 | { | |
92 | return 0xf00ba6; | |
93 | } | |
94 | ||
95 | static inline void raid6_put_fpu(unsigned long cr0) | |
96 | { | |
97 | (void)cr0; | |
98 | } | |
99 | ||
100 | #endif | |
101 | ||
102 | static inline void raid6_before_mmx(raid6_mmx_save_t *s) | |
103 | { | |
104 | s->cr0 = raid6_get_fpu(); | |
105 | asm volatile("fsave %0 ; fwait" : "=m" (s->fsave[0])); | |
106 | } | |
107 | ||
108 | static inline void raid6_after_mmx(raid6_mmx_save_t *s) | |
109 | { | |
110 | asm volatile("frstor %0" : : "m" (s->fsave[0])); | |
111 | raid6_put_fpu(s->cr0); | |
112 | } | |
113 | ||
114 | static inline void raid6_before_sse(raid6_sse_save_t *s) | |
115 | { | |
116 | unsigned int *rsa = SAREA(s); | |
117 | ||
118 | s->cr0 = raid6_get_fpu(); | |
119 | ||
120 | asm volatile("movaps %%xmm0,%0" : "=m" (rsa[0])); | |
121 | asm volatile("movaps %%xmm1,%0" : "=m" (rsa[4])); | |
122 | asm volatile("movaps %%xmm2,%0" : "=m" (rsa[8])); | |
123 | asm volatile("movaps %%xmm3,%0" : "=m" (rsa[12])); | |
124 | asm volatile("movaps %%xmm4,%0" : "=m" (rsa[16])); | |
125 | asm volatile("movaps %%xmm5,%0" : "=m" (rsa[20])); | |
126 | asm volatile("movaps %%xmm6,%0" : "=m" (rsa[24])); | |
127 | asm volatile("movaps %%xmm7,%0" : "=m" (rsa[28])); | |
128 | } | |
129 | ||
130 | static inline void raid6_after_sse(raid6_sse_save_t *s) | |
131 | { | |
132 | unsigned int *rsa = SAREA(s); | |
133 | ||
134 | asm volatile("movaps %0,%%xmm0" : : "m" (rsa[0])); | |
135 | asm volatile("movaps %0,%%xmm1" : : "m" (rsa[4])); | |
136 | asm volatile("movaps %0,%%xmm2" : : "m" (rsa[8])); | |
137 | asm volatile("movaps %0,%%xmm3" : : "m" (rsa[12])); | |
138 | asm volatile("movaps %0,%%xmm4" : : "m" (rsa[16])); | |
139 | asm volatile("movaps %0,%%xmm5" : : "m" (rsa[20])); | |
140 | asm volatile("movaps %0,%%xmm6" : : "m" (rsa[24])); | |
141 | asm volatile("movaps %0,%%xmm7" : : "m" (rsa[28])); | |
142 | ||
143 | raid6_put_fpu(s->cr0); | |
144 | } | |
145 | ||
146 | static inline void raid6_before_sse2(raid6_sse_save_t *s) | |
147 | { | |
148 | unsigned int *rsa = SAREA(s); | |
149 | ||
150 | s->cr0 = raid6_get_fpu(); | |
151 | ||
152 | asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); | |
153 | asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4])); | |
154 | asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8])); | |
155 | asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12])); | |
156 | asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16])); | |
157 | asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20])); | |
158 | asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24])); | |
159 | asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28])); | |
160 | } | |
161 | ||
162 | static inline void raid6_after_sse2(raid6_sse_save_t *s) | |
163 | { | |
164 | unsigned int *rsa = SAREA(s); | |
165 | ||
166 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | |
167 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | |
168 | asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); | |
169 | asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12])); | |
170 | asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16])); | |
171 | asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20])); | |
172 | asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24])); | |
173 | asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28])); | |
174 | ||
175 | raid6_put_fpu(s->cr0); | |
176 | } | |
177 | ||
178 | #ifdef __x86_64__ | |
179 | ||
180 | static inline void raid6_before_sse16(raid6_sse16_save_t *s) | |
181 | { | |
182 | unsigned int *rsa = SAREA(s); | |
183 | ||
184 | s->cr0 = raid6_get_fpu(); | |
185 | ||
186 | asm volatile("movdqa %%xmm0,%0" : "=m" (rsa[0])); | |
187 | asm volatile("movdqa %%xmm1,%0" : "=m" (rsa[4])); | |
188 | asm volatile("movdqa %%xmm2,%0" : "=m" (rsa[8])); | |
189 | asm volatile("movdqa %%xmm3,%0" : "=m" (rsa[12])); | |
190 | asm volatile("movdqa %%xmm4,%0" : "=m" (rsa[16])); | |
191 | asm volatile("movdqa %%xmm5,%0" : "=m" (rsa[20])); | |
192 | asm volatile("movdqa %%xmm6,%0" : "=m" (rsa[24])); | |
193 | asm volatile("movdqa %%xmm7,%0" : "=m" (rsa[28])); | |
194 | asm volatile("movdqa %%xmm8,%0" : "=m" (rsa[32])); | |
195 | asm volatile("movdqa %%xmm9,%0" : "=m" (rsa[36])); | |
196 | asm volatile("movdqa %%xmm10,%0" : "=m" (rsa[40])); | |
197 | asm volatile("movdqa %%xmm11,%0" : "=m" (rsa[44])); | |
198 | asm volatile("movdqa %%xmm12,%0" : "=m" (rsa[48])); | |
199 | asm volatile("movdqa %%xmm13,%0" : "=m" (rsa[52])); | |
200 | asm volatile("movdqa %%xmm14,%0" : "=m" (rsa[56])); | |
201 | asm volatile("movdqa %%xmm15,%0" : "=m" (rsa[60])); | |
202 | } | |
203 | ||
204 | static inline void raid6_after_sse16(raid6_sse16_save_t *s) | |
205 | { | |
206 | unsigned int *rsa = SAREA(s); | |
207 | ||
208 | asm volatile("movdqa %0,%%xmm0" : : "m" (rsa[0])); | |
209 | asm volatile("movdqa %0,%%xmm1" : : "m" (rsa[4])); | |
210 | asm volatile("movdqa %0,%%xmm2" : : "m" (rsa[8])); | |
211 | asm volatile("movdqa %0,%%xmm3" : : "m" (rsa[12])); | |
212 | asm volatile("movdqa %0,%%xmm4" : : "m" (rsa[16])); | |
213 | asm volatile("movdqa %0,%%xmm5" : : "m" (rsa[20])); | |
214 | asm volatile("movdqa %0,%%xmm6" : : "m" (rsa[24])); | |
215 | asm volatile("movdqa %0,%%xmm7" : : "m" (rsa[28])); | |
216 | asm volatile("movdqa %0,%%xmm8" : : "m" (rsa[32])); | |
217 | asm volatile("movdqa %0,%%xmm9" : : "m" (rsa[36])); | |
218 | asm volatile("movdqa %0,%%xmm10" : : "m" (rsa[40])); | |
219 | asm volatile("movdqa %0,%%xmm11" : : "m" (rsa[44])); | |
220 | asm volatile("movdqa %0,%%xmm12" : : "m" (rsa[48])); | |
221 | asm volatile("movdqa %0,%%xmm13" : : "m" (rsa[52])); | |
222 | asm volatile("movdqa %0,%%xmm14" : : "m" (rsa[56])); | |
223 | asm volatile("movdqa %0,%%xmm15" : : "m" (rsa[60])); | |
224 | ||
225 | raid6_put_fpu(s->cr0); | |
226 | } | |
227 | ||
228 | #endif /* __x86_64__ */ | |
229 | ||
230 | /* User space test hack */ | |
231 | #ifndef __KERNEL__ | |
232 | static inline int cpuid_features(void) | |
233 | { | |
234 | u32 eax = 1; | |
235 | u32 ebx, ecx, edx; | |
236 | ||
237 | asm volatile("cpuid" : | |
238 | "+a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)); | |
239 | ||
240 | return edx; | |
241 | } | |
242 | #endif /* ndef __KERNEL__ */ | |
243 | ||
244 | #endif | |
245 | #endif |