]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * include/asm-generic/xor.h | |
3 | * | |
4 | * Generic optimized RAID-5 checksumming functions. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2, or (at your option) | |
9 | * any later version. | |
10 | * | |
11 | * You should have received a copy of the GNU General Public License | |
12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | |
13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
14 | */ | |
15 | ||
268bb0ce | 16 | #include <linux/prefetch.h> |
1da177e4 LT |
17 | |
18 | static void | |
19 | xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
20 | { | |
21 | long lines = bytes / (sizeof (long)) / 8; | |
22 | ||
23 | do { | |
24 | p1[0] ^= p2[0]; | |
25 | p1[1] ^= p2[1]; | |
26 | p1[2] ^= p2[2]; | |
27 | p1[3] ^= p2[3]; | |
28 | p1[4] ^= p2[4]; | |
29 | p1[5] ^= p2[5]; | |
30 | p1[6] ^= p2[6]; | |
31 | p1[7] ^= p2[7]; | |
32 | p1 += 8; | |
33 | p2 += 8; | |
34 | } while (--lines > 0); | |
35 | } | |
36 | ||
37 | static void | |
38 | xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
39 | unsigned long *p3) | |
40 | { | |
41 | long lines = bytes / (sizeof (long)) / 8; | |
42 | ||
43 | do { | |
44 | p1[0] ^= p2[0] ^ p3[0]; | |
45 | p1[1] ^= p2[1] ^ p3[1]; | |
46 | p1[2] ^= p2[2] ^ p3[2]; | |
47 | p1[3] ^= p2[3] ^ p3[3]; | |
48 | p1[4] ^= p2[4] ^ p3[4]; | |
49 | p1[5] ^= p2[5] ^ p3[5]; | |
50 | p1[6] ^= p2[6] ^ p3[6]; | |
51 | p1[7] ^= p2[7] ^ p3[7]; | |
52 | p1 += 8; | |
53 | p2 += 8; | |
54 | p3 += 8; | |
55 | } while (--lines > 0); | |
56 | } | |
57 | ||
58 | static void | |
59 | xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
60 | unsigned long *p3, unsigned long *p4) | |
61 | { | |
62 | long lines = bytes / (sizeof (long)) / 8; | |
63 | ||
64 | do { | |
65 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | |
66 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | |
67 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | |
68 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | |
69 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | |
70 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | |
71 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | |
72 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | |
73 | p1 += 8; | |
74 | p2 += 8; | |
75 | p3 += 8; | |
76 | p4 += 8; | |
77 | } while (--lines > 0); | |
78 | } | |
79 | ||
80 | static void | |
81 | xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
82 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
83 | { | |
84 | long lines = bytes / (sizeof (long)) / 8; | |
85 | ||
86 | do { | |
87 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | |
88 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | |
89 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | |
90 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | |
91 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | |
92 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | |
93 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | |
94 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | |
95 | p1 += 8; | |
96 | p2 += 8; | |
97 | p3 += 8; | |
98 | p4 += 8; | |
99 | p5 += 8; | |
100 | } while (--lines > 0); | |
101 | } | |
102 | ||
103 | static void | |
104 | xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
105 | { | |
106 | long lines = bytes / (sizeof (long)) / 8; | |
107 | ||
108 | do { | |
109 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
110 | d0 = p1[0]; /* Pull the stuff into registers */ | |
111 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
112 | d2 = p1[2]; | |
113 | d3 = p1[3]; | |
114 | d4 = p1[4]; | |
115 | d5 = p1[5]; | |
116 | d6 = p1[6]; | |
117 | d7 = p1[7]; | |
118 | d0 ^= p2[0]; | |
119 | d1 ^= p2[1]; | |
120 | d2 ^= p2[2]; | |
121 | d3 ^= p2[3]; | |
122 | d4 ^= p2[4]; | |
123 | d5 ^= p2[5]; | |
124 | d6 ^= p2[6]; | |
125 | d7 ^= p2[7]; | |
126 | p1[0] = d0; /* Store the result (in bursts) */ | |
127 | p1[1] = d1; | |
128 | p1[2] = d2; | |
129 | p1[3] = d3; | |
130 | p1[4] = d4; | |
131 | p1[5] = d5; | |
132 | p1[6] = d6; | |
133 | p1[7] = d7; | |
134 | p1 += 8; | |
135 | p2 += 8; | |
136 | } while (--lines > 0); | |
137 | } | |
138 | ||
139 | static void | |
140 | xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
141 | unsigned long *p3) | |
142 | { | |
143 | long lines = bytes / (sizeof (long)) / 8; | |
144 | ||
145 | do { | |
146 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
147 | d0 = p1[0]; /* Pull the stuff into registers */ | |
148 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
149 | d2 = p1[2]; | |
150 | d3 = p1[3]; | |
151 | d4 = p1[4]; | |
152 | d5 = p1[5]; | |
153 | d6 = p1[6]; | |
154 | d7 = p1[7]; | |
155 | d0 ^= p2[0]; | |
156 | d1 ^= p2[1]; | |
157 | d2 ^= p2[2]; | |
158 | d3 ^= p2[3]; | |
159 | d4 ^= p2[4]; | |
160 | d5 ^= p2[5]; | |
161 | d6 ^= p2[6]; | |
162 | d7 ^= p2[7]; | |
163 | d0 ^= p3[0]; | |
164 | d1 ^= p3[1]; | |
165 | d2 ^= p3[2]; | |
166 | d3 ^= p3[3]; | |
167 | d4 ^= p3[4]; | |
168 | d5 ^= p3[5]; | |
169 | d6 ^= p3[6]; | |
170 | d7 ^= p3[7]; | |
171 | p1[0] = d0; /* Store the result (in bursts) */ | |
172 | p1[1] = d1; | |
173 | p1[2] = d2; | |
174 | p1[3] = d3; | |
175 | p1[4] = d4; | |
176 | p1[5] = d5; | |
177 | p1[6] = d6; | |
178 | p1[7] = d7; | |
179 | p1 += 8; | |
180 | p2 += 8; | |
181 | p3 += 8; | |
182 | } while (--lines > 0); | |
183 | } | |
184 | ||
185 | static void | |
186 | xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
187 | unsigned long *p3, unsigned long *p4) | |
188 | { | |
189 | long lines = bytes / (sizeof (long)) / 8; | |
190 | ||
191 | do { | |
192 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
193 | d0 = p1[0]; /* Pull the stuff into registers */ | |
194 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
195 | d2 = p1[2]; | |
196 | d3 = p1[3]; | |
197 | d4 = p1[4]; | |
198 | d5 = p1[5]; | |
199 | d6 = p1[6]; | |
200 | d7 = p1[7]; | |
201 | d0 ^= p2[0]; | |
202 | d1 ^= p2[1]; | |
203 | d2 ^= p2[2]; | |
204 | d3 ^= p2[3]; | |
205 | d4 ^= p2[4]; | |
206 | d5 ^= p2[5]; | |
207 | d6 ^= p2[6]; | |
208 | d7 ^= p2[7]; | |
209 | d0 ^= p3[0]; | |
210 | d1 ^= p3[1]; | |
211 | d2 ^= p3[2]; | |
212 | d3 ^= p3[3]; | |
213 | d4 ^= p3[4]; | |
214 | d5 ^= p3[5]; | |
215 | d6 ^= p3[6]; | |
216 | d7 ^= p3[7]; | |
217 | d0 ^= p4[0]; | |
218 | d1 ^= p4[1]; | |
219 | d2 ^= p4[2]; | |
220 | d3 ^= p4[3]; | |
221 | d4 ^= p4[4]; | |
222 | d5 ^= p4[5]; | |
223 | d6 ^= p4[6]; | |
224 | d7 ^= p4[7]; | |
225 | p1[0] = d0; /* Store the result (in bursts) */ | |
226 | p1[1] = d1; | |
227 | p1[2] = d2; | |
228 | p1[3] = d3; | |
229 | p1[4] = d4; | |
230 | p1[5] = d5; | |
231 | p1[6] = d6; | |
232 | p1[7] = d7; | |
233 | p1 += 8; | |
234 | p2 += 8; | |
235 | p3 += 8; | |
236 | p4 += 8; | |
237 | } while (--lines > 0); | |
238 | } | |
239 | ||
240 | static void | |
241 | xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
242 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
243 | { | |
244 | long lines = bytes / (sizeof (long)) / 8; | |
245 | ||
246 | do { | |
247 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
248 | d0 = p1[0]; /* Pull the stuff into registers */ | |
249 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
250 | d2 = p1[2]; | |
251 | d3 = p1[3]; | |
252 | d4 = p1[4]; | |
253 | d5 = p1[5]; | |
254 | d6 = p1[6]; | |
255 | d7 = p1[7]; | |
256 | d0 ^= p2[0]; | |
257 | d1 ^= p2[1]; | |
258 | d2 ^= p2[2]; | |
259 | d3 ^= p2[3]; | |
260 | d4 ^= p2[4]; | |
261 | d5 ^= p2[5]; | |
262 | d6 ^= p2[6]; | |
263 | d7 ^= p2[7]; | |
264 | d0 ^= p3[0]; | |
265 | d1 ^= p3[1]; | |
266 | d2 ^= p3[2]; | |
267 | d3 ^= p3[3]; | |
268 | d4 ^= p3[4]; | |
269 | d5 ^= p3[5]; | |
270 | d6 ^= p3[6]; | |
271 | d7 ^= p3[7]; | |
272 | d0 ^= p4[0]; | |
273 | d1 ^= p4[1]; | |
274 | d2 ^= p4[2]; | |
275 | d3 ^= p4[3]; | |
276 | d4 ^= p4[4]; | |
277 | d5 ^= p4[5]; | |
278 | d6 ^= p4[6]; | |
279 | d7 ^= p4[7]; | |
280 | d0 ^= p5[0]; | |
281 | d1 ^= p5[1]; | |
282 | d2 ^= p5[2]; | |
283 | d3 ^= p5[3]; | |
284 | d4 ^= p5[4]; | |
285 | d5 ^= p5[5]; | |
286 | d6 ^= p5[6]; | |
287 | d7 ^= p5[7]; | |
288 | p1[0] = d0; /* Store the result (in bursts) */ | |
289 | p1[1] = d1; | |
290 | p1[2] = d2; | |
291 | p1[3] = d3; | |
292 | p1[4] = d4; | |
293 | p1[5] = d5; | |
294 | p1[6] = d6; | |
295 | p1[7] = d7; | |
296 | p1 += 8; | |
297 | p2 += 8; | |
298 | p3 += 8; | |
299 | p4 += 8; | |
300 | p5 += 8; | |
301 | } while (--lines > 0); | |
302 | } | |
303 | ||
304 | static void | |
305 | xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
306 | { | |
307 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
308 | prefetchw(p1); | |
309 | prefetch(p2); | |
310 | ||
311 | do { | |
312 | prefetchw(p1+8); | |
313 | prefetch(p2+8); | |
314 | once_more: | |
315 | p1[0] ^= p2[0]; | |
316 | p1[1] ^= p2[1]; | |
317 | p1[2] ^= p2[2]; | |
318 | p1[3] ^= p2[3]; | |
319 | p1[4] ^= p2[4]; | |
320 | p1[5] ^= p2[5]; | |
321 | p1[6] ^= p2[6]; | |
322 | p1[7] ^= p2[7]; | |
323 | p1 += 8; | |
324 | p2 += 8; | |
325 | } while (--lines > 0); | |
326 | if (lines == 0) | |
327 | goto once_more; | |
328 | } | |
329 | ||
330 | static void | |
331 | xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
332 | unsigned long *p3) | |
333 | { | |
334 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
335 | prefetchw(p1); | |
336 | prefetch(p2); | |
337 | prefetch(p3); | |
338 | ||
339 | do { | |
340 | prefetchw(p1+8); | |
341 | prefetch(p2+8); | |
342 | prefetch(p3+8); | |
343 | once_more: | |
344 | p1[0] ^= p2[0] ^ p3[0]; | |
345 | p1[1] ^= p2[1] ^ p3[1]; | |
346 | p1[2] ^= p2[2] ^ p3[2]; | |
347 | p1[3] ^= p2[3] ^ p3[3]; | |
348 | p1[4] ^= p2[4] ^ p3[4]; | |
349 | p1[5] ^= p2[5] ^ p3[5]; | |
350 | p1[6] ^= p2[6] ^ p3[6]; | |
351 | p1[7] ^= p2[7] ^ p3[7]; | |
352 | p1 += 8; | |
353 | p2 += 8; | |
354 | p3 += 8; | |
355 | } while (--lines > 0); | |
356 | if (lines == 0) | |
357 | goto once_more; | |
358 | } | |
359 | ||
360 | static void | |
361 | xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
362 | unsigned long *p3, unsigned long *p4) | |
363 | { | |
364 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
365 | ||
366 | prefetchw(p1); | |
367 | prefetch(p2); | |
368 | prefetch(p3); | |
369 | prefetch(p4); | |
370 | ||
371 | do { | |
372 | prefetchw(p1+8); | |
373 | prefetch(p2+8); | |
374 | prefetch(p3+8); | |
375 | prefetch(p4+8); | |
376 | once_more: | |
377 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0]; | |
378 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1]; | |
379 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2]; | |
380 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3]; | |
381 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4]; | |
382 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5]; | |
383 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6]; | |
384 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7]; | |
385 | p1 += 8; | |
386 | p2 += 8; | |
387 | p3 += 8; | |
388 | p4 += 8; | |
389 | } while (--lines > 0); | |
390 | if (lines == 0) | |
391 | goto once_more; | |
392 | } | |
393 | ||
394 | static void | |
395 | xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
396 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
397 | { | |
398 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
399 | ||
400 | prefetchw(p1); | |
401 | prefetch(p2); | |
402 | prefetch(p3); | |
403 | prefetch(p4); | |
404 | prefetch(p5); | |
405 | ||
406 | do { | |
407 | prefetchw(p1+8); | |
408 | prefetch(p2+8); | |
409 | prefetch(p3+8); | |
410 | prefetch(p4+8); | |
411 | prefetch(p5+8); | |
412 | once_more: | |
413 | p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0]; | |
414 | p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1]; | |
415 | p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2]; | |
416 | p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3]; | |
417 | p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4]; | |
418 | p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5]; | |
419 | p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6]; | |
420 | p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7]; | |
421 | p1 += 8; | |
422 | p2 += 8; | |
423 | p3 += 8; | |
424 | p4 += 8; | |
425 | p5 += 8; | |
426 | } while (--lines > 0); | |
427 | if (lines == 0) | |
428 | goto once_more; | |
429 | } | |
430 | ||
431 | static void | |
432 | xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
433 | { | |
434 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
435 | ||
436 | prefetchw(p1); | |
437 | prefetch(p2); | |
438 | ||
439 | do { | |
440 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
441 | ||
442 | prefetchw(p1+8); | |
443 | prefetch(p2+8); | |
444 | once_more: | |
445 | d0 = p1[0]; /* Pull the stuff into registers */ | |
446 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
447 | d2 = p1[2]; | |
448 | d3 = p1[3]; | |
449 | d4 = p1[4]; | |
450 | d5 = p1[5]; | |
451 | d6 = p1[6]; | |
452 | d7 = p1[7]; | |
453 | d0 ^= p2[0]; | |
454 | d1 ^= p2[1]; | |
455 | d2 ^= p2[2]; | |
456 | d3 ^= p2[3]; | |
457 | d4 ^= p2[4]; | |
458 | d5 ^= p2[5]; | |
459 | d6 ^= p2[6]; | |
460 | d7 ^= p2[7]; | |
461 | p1[0] = d0; /* Store the result (in bursts) */ | |
462 | p1[1] = d1; | |
463 | p1[2] = d2; | |
464 | p1[3] = d3; | |
465 | p1[4] = d4; | |
466 | p1[5] = d5; | |
467 | p1[6] = d6; | |
468 | p1[7] = d7; | |
469 | p1 += 8; | |
470 | p2 += 8; | |
471 | } while (--lines > 0); | |
472 | if (lines == 0) | |
473 | goto once_more; | |
474 | } | |
475 | ||
476 | static void | |
477 | xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
478 | unsigned long *p3) | |
479 | { | |
480 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
481 | ||
482 | prefetchw(p1); | |
483 | prefetch(p2); | |
484 | prefetch(p3); | |
485 | ||
486 | do { | |
487 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
488 | ||
489 | prefetchw(p1+8); | |
490 | prefetch(p2+8); | |
491 | prefetch(p3+8); | |
492 | once_more: | |
493 | d0 = p1[0]; /* Pull the stuff into registers */ | |
494 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
495 | d2 = p1[2]; | |
496 | d3 = p1[3]; | |
497 | d4 = p1[4]; | |
498 | d5 = p1[5]; | |
499 | d6 = p1[6]; | |
500 | d7 = p1[7]; | |
501 | d0 ^= p2[0]; | |
502 | d1 ^= p2[1]; | |
503 | d2 ^= p2[2]; | |
504 | d3 ^= p2[3]; | |
505 | d4 ^= p2[4]; | |
506 | d5 ^= p2[5]; | |
507 | d6 ^= p2[6]; | |
508 | d7 ^= p2[7]; | |
509 | d0 ^= p3[0]; | |
510 | d1 ^= p3[1]; | |
511 | d2 ^= p3[2]; | |
512 | d3 ^= p3[3]; | |
513 | d4 ^= p3[4]; | |
514 | d5 ^= p3[5]; | |
515 | d6 ^= p3[6]; | |
516 | d7 ^= p3[7]; | |
517 | p1[0] = d0; /* Store the result (in bursts) */ | |
518 | p1[1] = d1; | |
519 | p1[2] = d2; | |
520 | p1[3] = d3; | |
521 | p1[4] = d4; | |
522 | p1[5] = d5; | |
523 | p1[6] = d6; | |
524 | p1[7] = d7; | |
525 | p1 += 8; | |
526 | p2 += 8; | |
527 | p3 += 8; | |
528 | } while (--lines > 0); | |
529 | if (lines == 0) | |
530 | goto once_more; | |
531 | } | |
532 | ||
533 | static void | |
534 | xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
535 | unsigned long *p3, unsigned long *p4) | |
536 | { | |
537 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
538 | ||
539 | prefetchw(p1); | |
540 | prefetch(p2); | |
541 | prefetch(p3); | |
542 | prefetch(p4); | |
543 | ||
544 | do { | |
545 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
546 | ||
547 | prefetchw(p1+8); | |
548 | prefetch(p2+8); | |
549 | prefetch(p3+8); | |
550 | prefetch(p4+8); | |
551 | once_more: | |
552 | d0 = p1[0]; /* Pull the stuff into registers */ | |
553 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
554 | d2 = p1[2]; | |
555 | d3 = p1[3]; | |
556 | d4 = p1[4]; | |
557 | d5 = p1[5]; | |
558 | d6 = p1[6]; | |
559 | d7 = p1[7]; | |
560 | d0 ^= p2[0]; | |
561 | d1 ^= p2[1]; | |
562 | d2 ^= p2[2]; | |
563 | d3 ^= p2[3]; | |
564 | d4 ^= p2[4]; | |
565 | d5 ^= p2[5]; | |
566 | d6 ^= p2[6]; | |
567 | d7 ^= p2[7]; | |
568 | d0 ^= p3[0]; | |
569 | d1 ^= p3[1]; | |
570 | d2 ^= p3[2]; | |
571 | d3 ^= p3[3]; | |
572 | d4 ^= p3[4]; | |
573 | d5 ^= p3[5]; | |
574 | d6 ^= p3[6]; | |
575 | d7 ^= p3[7]; | |
576 | d0 ^= p4[0]; | |
577 | d1 ^= p4[1]; | |
578 | d2 ^= p4[2]; | |
579 | d3 ^= p4[3]; | |
580 | d4 ^= p4[4]; | |
581 | d5 ^= p4[5]; | |
582 | d6 ^= p4[6]; | |
583 | d7 ^= p4[7]; | |
584 | p1[0] = d0; /* Store the result (in bursts) */ | |
585 | p1[1] = d1; | |
586 | p1[2] = d2; | |
587 | p1[3] = d3; | |
588 | p1[4] = d4; | |
589 | p1[5] = d5; | |
590 | p1[6] = d6; | |
591 | p1[7] = d7; | |
592 | p1 += 8; | |
593 | p2 += 8; | |
594 | p3 += 8; | |
595 | p4 += 8; | |
596 | } while (--lines > 0); | |
597 | if (lines == 0) | |
598 | goto once_more; | |
599 | } | |
600 | ||
601 | static void | |
602 | xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
603 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
604 | { | |
605 | long lines = bytes / (sizeof (long)) / 8 - 1; | |
606 | ||
607 | prefetchw(p1); | |
608 | prefetch(p2); | |
609 | prefetch(p3); | |
610 | prefetch(p4); | |
611 | prefetch(p5); | |
612 | ||
613 | do { | |
614 | register long d0, d1, d2, d3, d4, d5, d6, d7; | |
615 | ||
616 | prefetchw(p1+8); | |
617 | prefetch(p2+8); | |
618 | prefetch(p3+8); | |
619 | prefetch(p4+8); | |
620 | prefetch(p5+8); | |
621 | once_more: | |
622 | d0 = p1[0]; /* Pull the stuff into registers */ | |
623 | d1 = p1[1]; /* ... in bursts, if possible. */ | |
624 | d2 = p1[2]; | |
625 | d3 = p1[3]; | |
626 | d4 = p1[4]; | |
627 | d5 = p1[5]; | |
628 | d6 = p1[6]; | |
629 | d7 = p1[7]; | |
630 | d0 ^= p2[0]; | |
631 | d1 ^= p2[1]; | |
632 | d2 ^= p2[2]; | |
633 | d3 ^= p2[3]; | |
634 | d4 ^= p2[4]; | |
635 | d5 ^= p2[5]; | |
636 | d6 ^= p2[6]; | |
637 | d7 ^= p2[7]; | |
638 | d0 ^= p3[0]; | |
639 | d1 ^= p3[1]; | |
640 | d2 ^= p3[2]; | |
641 | d3 ^= p3[3]; | |
642 | d4 ^= p3[4]; | |
643 | d5 ^= p3[5]; | |
644 | d6 ^= p3[6]; | |
645 | d7 ^= p3[7]; | |
646 | d0 ^= p4[0]; | |
647 | d1 ^= p4[1]; | |
648 | d2 ^= p4[2]; | |
649 | d3 ^= p4[3]; | |
650 | d4 ^= p4[4]; | |
651 | d5 ^= p4[5]; | |
652 | d6 ^= p4[6]; | |
653 | d7 ^= p4[7]; | |
654 | d0 ^= p5[0]; | |
655 | d1 ^= p5[1]; | |
656 | d2 ^= p5[2]; | |
657 | d3 ^= p5[3]; | |
658 | d4 ^= p5[4]; | |
659 | d5 ^= p5[5]; | |
660 | d6 ^= p5[6]; | |
661 | d7 ^= p5[7]; | |
662 | p1[0] = d0; /* Store the result (in bursts) */ | |
663 | p1[1] = d1; | |
664 | p1[2] = d2; | |
665 | p1[3] = d3; | |
666 | p1[4] = d4; | |
667 | p1[5] = d5; | |
668 | p1[6] = d6; | |
669 | p1[7] = d7; | |
670 | p1 += 8; | |
671 | p2 += 8; | |
672 | p3 += 8; | |
673 | p4 += 8; | |
674 | p5 += 8; | |
675 | } while (--lines > 0); | |
676 | if (lines == 0) | |
677 | goto once_more; | |
678 | } | |
679 | ||
680 | static struct xor_block_template xor_block_8regs = { | |
681 | .name = "8regs", | |
682 | .do_2 = xor_8regs_2, | |
683 | .do_3 = xor_8regs_3, | |
684 | .do_4 = xor_8regs_4, | |
685 | .do_5 = xor_8regs_5, | |
686 | }; | |
687 | ||
688 | static struct xor_block_template xor_block_32regs = { | |
689 | .name = "32regs", | |
690 | .do_2 = xor_32regs_2, | |
691 | .do_3 = xor_32regs_3, | |
692 | .do_4 = xor_32regs_4, | |
693 | .do_5 = xor_32regs_5, | |
694 | }; | |
695 | ||
720fb197 | 696 | static struct xor_block_template xor_block_8regs_p __maybe_unused = { |
1da177e4 LT |
697 | .name = "8regs_prefetch", |
698 | .do_2 = xor_8regs_p_2, | |
699 | .do_3 = xor_8regs_p_3, | |
700 | .do_4 = xor_8regs_p_4, | |
701 | .do_5 = xor_8regs_p_5, | |
702 | }; | |
703 | ||
720fb197 | 704 | static struct xor_block_template xor_block_32regs_p __maybe_unused = { |
1da177e4 LT |
705 | .name = "32regs_prefetch", |
706 | .do_2 = xor_32regs_p_2, | |
707 | .do_3 = xor_32regs_p_3, | |
708 | .do_4 = xor_32regs_p_4, | |
709 | .do_5 = xor_32regs_p_5, | |
710 | }; | |
711 | ||
712 | #define XOR_TRY_TEMPLATES \ | |
713 | do { \ | |
714 | xor_speed(&xor_block_8regs); \ | |
715 | xor_speed(&xor_block_8regs_p); \ | |
716 | xor_speed(&xor_block_32regs); \ | |
717 | xor_speed(&xor_block_32regs_p); \ | |
718 | } while (0) |