]>
Commit | Line | Data |
---|---|---|
14cf11af PM |
1 | /* |
2 | * This file contains assembly-language implementations | |
3 | * of IP-style 1's complement checksum routines. | |
4 | * | |
5 | * Copyright (C) 1995-1996 Gary Thomas ([email protected]) | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License | |
9 | * as published by the Free Software Foundation; either version | |
10 | * 2 of the License, or (at your option) any later version. | |
11 | * | |
12 | * Severely hacked about by Paul Mackerras ([email protected]). | |
13 | */ | |
14 | ||
15 | #include <linux/sys.h> | |
16 | #include <asm/processor.h> | |
7aef4136 | 17 | #include <asm/cache.h> |
14cf11af PM |
18 | #include <asm/errno.h> |
19 | #include <asm/ppc_asm.h> | |
9445aa1a | 20 | #include <asm/export.h> |
14cf11af PM |
21 | |
22 | .text | |
23 | ||
14cf11af PM |
24 | /* |
25 | * computes the checksum of a memory block at buff, length len, | |
26 | * and adds in "sum" (32-bit) | |
27 | * | |
7e393220 | 28 | * __csum_partial(buff, len, sum) |
14cf11af | 29 | */ |
7e393220 | 30 | _GLOBAL(__csum_partial) |
14cf11af | 31 | subi r3,r3,4 |
48821a34 | 32 | srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ |
14cf11af | 33 | beq 3f /* if we're doing < 4 bytes */ |
48821a34 | 34 | andi. r0,r3,2 /* Align buffer to longword boundary */ |
14cf11af | 35 | beq+ 1f |
48821a34 | 36 | lhz r0,4(r3) /* do 2 bytes to get aligned */ |
14cf11af | 37 | subi r4,r4,2 |
48821a34 | 38 | addi r3,r3,2 |
14cf11af | 39 | srwi. r6,r4,2 /* # words to do */ |
48821a34 | 40 | adde r5,r5,r0 |
14cf11af | 41 | beq 3f |
f867d556 CL |
42 | 1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ |
43 | beq 21f | |
44 | mtctr r6 | |
48821a34 CL |
45 | 2: lwzu r0,4(r3) |
46 | adde r5,r5,r0 | |
14cf11af | 47 | bdnz 2b |
f867d556 CL |
48 | 21: srwi. r6,r4,4 /* # blocks of 4 words to do */ |
49 | beq 3f | |
373e098e | 50 | lwz r0,4(r3) |
f867d556 | 51 | mtctr r6 |
f867d556 | 52 | lwz r6,8(r3) |
373e098e | 53 | adde r5,r5,r0 |
f867d556 | 54 | lwz r7,12(r3) |
373e098e | 55 | adde r5,r5,r6 |
f867d556 | 56 | lwzu r8,16(r3) |
373e098e CL |
57 | adde r5,r5,r7 |
58 | bdz 23f | |
59 | 22: lwz r0,4(r3) | |
60 | adde r5,r5,r8 | |
61 | lwz r6,8(r3) | |
f867d556 | 62 | adde r5,r5,r0 |
373e098e | 63 | lwz r7,12(r3) |
f867d556 | 64 | adde r5,r5,r6 |
373e098e | 65 | lwzu r8,16(r3) |
f867d556 | 66 | adde r5,r5,r7 |
f867d556 | 67 | bdnz 22b |
373e098e | 68 | 23: adde r5,r5,r8 |
48821a34 CL |
69 | 3: andi. r0,r4,2 |
70 | beq+ 4f | |
71 | lhz r0,4(r3) | |
14cf11af | 72 | addi r3,r3,2 |
48821a34 CL |
73 | adde r5,r5,r0 |
74 | 4: andi. r0,r4,1 | |
75 | beq+ 5f | |
76 | lbz r0,4(r3) | |
77 | slwi r0,r0,8 /* Upper byte of word */ | |
78 | adde r5,r5,r0 | |
79 | 5: addze r3,r5 /* add in final carry */ | |
14cf11af | 80 | blr |
9445aa1a | 81 | EXPORT_SYMBOL(__csum_partial) |
14cf11af PM |
82 | |
83 | /* | |
84 | * Computes the checksum of a memory block at src, length len, | |
85 | * and adds in "sum" (32-bit), while copying the block to dst. | |
86 | * If an access exception occurs on src or dst, it stores -EFAULT | |
87 | * to *src_err or *dst_err respectively, and (for an error on | |
88 | * src) zeroes the rest of dst. | |
89 | * | |
90 | * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) | |
91 | */ | |
7aef4136 CL |
92 | #define CSUM_COPY_16_BYTES_WITHEX(n) \ |
93 | 8 ## n ## 0: \ | |
94 | lwz r7,4(r4); \ | |
95 | 8 ## n ## 1: \ | |
96 | lwz r8,8(r4); \ | |
97 | 8 ## n ## 2: \ | |
98 | lwz r9,12(r4); \ | |
99 | 8 ## n ## 3: \ | |
100 | lwzu r10,16(r4); \ | |
101 | 8 ## n ## 4: \ | |
102 | stw r7,4(r6); \ | |
103 | adde r12,r12,r7; \ | |
104 | 8 ## n ## 5: \ | |
105 | stw r8,8(r6); \ | |
106 | adde r12,r12,r8; \ | |
107 | 8 ## n ## 6: \ | |
108 | stw r9,12(r6); \ | |
109 | adde r12,r12,r9; \ | |
110 | 8 ## n ## 7: \ | |
111 | stwu r10,16(r6); \ | |
112 | adde r12,r12,r10 | |
113 | ||
114 | #define CSUM_COPY_16_BYTES_EXCODE(n) \ | |
24bfa6a9 NP |
115 | EX_TABLE(8 ## n ## 0b, src_error); \ |
116 | EX_TABLE(8 ## n ## 1b, src_error); \ | |
117 | EX_TABLE(8 ## n ## 2b, src_error); \ | |
118 | EX_TABLE(8 ## n ## 3b, src_error); \ | |
119 | EX_TABLE(8 ## n ## 4b, dst_error); \ | |
120 | EX_TABLE(8 ## n ## 5b, dst_error); \ | |
121 | EX_TABLE(8 ## n ## 6b, dst_error); \ | |
122 | EX_TABLE(8 ## n ## 7b, dst_error); | |
7aef4136 CL |
123 | |
124 | .text | |
125 | .stabs "arch/powerpc/lib/",N_SO,0,0,0f | |
126 | .stabs "checksum_32.S",N_SO,0,0,0f | |
127 | 0: | |
128 | ||
129 | CACHELINE_BYTES = L1_CACHE_BYTES | |
130 | LG_CACHELINE_BYTES = L1_CACHE_SHIFT | |
131 | CACHELINE_MASK = (L1_CACHE_BYTES-1) | |
132 | ||
14cf11af | 133 | _GLOBAL(csum_partial_copy_generic) |
7aef4136 CL |
134 | stwu r1,-16(r1) |
135 | stw r7,12(r1) | |
136 | stw r8,8(r1) | |
137 | ||
7aef4136 CL |
138 | addic r12,r6,0 |
139 | addi r6,r4,-4 | |
140 | neg r0,r4 | |
141 | addi r4,r3,-4 | |
142 | andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ | |
8540571e | 143 | crset 4*cr7+eq |
7aef4136 CL |
144 | beq 58f |
145 | ||
146 | cmplw 0,r5,r0 /* is this more than total to do? */ | |
147 | blt 63f /* if not much to do */ | |
8540571e CL |
148 | rlwinm r7,r6,3,0x8 |
149 | rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ | |
150 | cmplwi cr7,r7,0 /* is destination address even ? */ | |
7aef4136 CL |
151 | andi. r8,r0,3 /* get it word-aligned first */ |
152 | mtctr r8 | |
153 | beq+ 61f | |
154 | li r3,0 | |
155 | 70: lbz r9,4(r4) /* do some bytes */ | |
156 | addi r4,r4,1 | |
157 | slwi r3,r3,8 | |
158 | rlwimi r3,r9,0,24,31 | |
159 | 71: stb r9,4(r6) | |
160 | addi r6,r6,1 | |
161 | bdnz 70b | |
162 | adde r12,r12,r3 | |
163 | 61: subf r5,r0,r5 | |
164 | srwi. r0,r0,2 | |
165 | mtctr r0 | |
166 | beq 58f | |
167 | 72: lwzu r9,4(r4) /* do some words */ | |
168 | adde r12,r12,r9 | |
169 | 73: stwu r9,4(r6) | |
170 | bdnz 72b | |
171 | ||
172 | 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ | |
173 | clrlwi r5,r5,32-LG_CACHELINE_BYTES | |
174 | li r11,4 | |
175 | beq 63f | |
176 | ||
177 | /* Here we decide how far ahead to prefetch the source */ | |
178 | li r3,4 | |
179 | cmpwi r0,1 | |
180 | li r7,0 | |
181 | ble 114f | |
182 | li r7,1 | |
183 | #if MAX_COPY_PREFETCH > 1 | |
184 | /* Heuristically, for large transfers we prefetch | |
185 | MAX_COPY_PREFETCH cachelines ahead. For small transfers | |
186 | we prefetch 1 cacheline ahead. */ | |
187 | cmpwi r0,MAX_COPY_PREFETCH | |
188 | ble 112f | |
189 | li r7,MAX_COPY_PREFETCH | |
190 | 112: mtctr r7 | |
191 | 111: dcbt r3,r4 | |
192 | addi r3,r3,CACHELINE_BYTES | |
193 | bdnz 111b | |
194 | #else | |
195 | dcbt r3,r4 | |
196 | addi r3,r3,CACHELINE_BYTES | |
197 | #endif /* MAX_COPY_PREFETCH > 1 */ | |
198 | ||
199 | 114: subf r8,r7,r0 | |
200 | mr r0,r7 | |
201 | mtctr r8 | |
202 | ||
203 | 53: dcbt r3,r4 | |
204 | 54: dcbz r11,r6 | |
205 | /* the main body of the cacheline loop */ | |
206 | CSUM_COPY_16_BYTES_WITHEX(0) | |
207 | #if L1_CACHE_BYTES >= 32 | |
208 | CSUM_COPY_16_BYTES_WITHEX(1) | |
209 | #if L1_CACHE_BYTES >= 64 | |
210 | CSUM_COPY_16_BYTES_WITHEX(2) | |
211 | CSUM_COPY_16_BYTES_WITHEX(3) | |
212 | #if L1_CACHE_BYTES >= 128 | |
213 | CSUM_COPY_16_BYTES_WITHEX(4) | |
214 | CSUM_COPY_16_BYTES_WITHEX(5) | |
215 | CSUM_COPY_16_BYTES_WITHEX(6) | |
216 | CSUM_COPY_16_BYTES_WITHEX(7) | |
217 | #endif | |
218 | #endif | |
219 | #endif | |
220 | bdnz 53b | |
221 | cmpwi r0,0 | |
222 | li r3,4 | |
223 | li r7,0 | |
224 | bne 114b | |
225 | ||
226 | 63: srwi. r0,r5,2 | |
227 | mtctr r0 | |
228 | beq 64f | |
229 | 30: lwzu r0,4(r4) | |
230 | adde r12,r12,r0 | |
231 | 31: stwu r0,4(r6) | |
232 | bdnz 30b | |
233 | ||
234 | 64: andi. r0,r5,2 | |
235 | beq+ 65f | |
236 | 40: lhz r0,4(r4) | |
14cf11af | 237 | addi r4,r4,2 |
7aef4136 CL |
238 | 41: sth r0,4(r6) |
239 | adde r12,r12,r0 | |
240 | addi r6,r6,2 | |
241 | 65: andi. r0,r5,1 | |
242 | beq+ 66f | |
243 | 50: lbz r0,4(r4) | |
244 | 51: stb r0,4(r6) | |
245 | slwi r0,r0,8 | |
246 | adde r12,r12,r0 | |
247 | 66: addze r3,r12 | |
248 | addi r1,r1,16 | |
249 | beqlr+ cr7 | |
1bc8b816 | 250 | rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ |
14cf11af PM |
251 | blr |
252 | ||
7aef4136 | 253 | /* read fault */ |
14cf11af | 254 | src_error: |
7aef4136 CL |
255 | lwz r7,12(r1) |
256 | addi r1,r1,16 | |
257 | cmpwi cr0,r7,0 | |
258 | beqlr | |
259 | li r0,-EFAULT | |
260 | stw r0,0(r7) | |
14cf11af | 261 | blr |
7aef4136 | 262 | /* write fault */ |
14cf11af | 263 | dst_error: |
7aef4136 CL |
264 | lwz r8,8(r1) |
265 | addi r1,r1,16 | |
266 | cmpwi cr0,r8,0 | |
267 | beqlr | |
268 | li r0,-EFAULT | |
269 | stw r0,0(r8) | |
14cf11af PM |
270 | blr |
271 | ||
24bfa6a9 NP |
272 | EX_TABLE(70b, src_error); |
273 | EX_TABLE(71b, dst_error); | |
274 | EX_TABLE(72b, src_error); | |
275 | EX_TABLE(73b, dst_error); | |
276 | EX_TABLE(54b, dst_error); | |
7aef4136 CL |
277 | |
278 | /* | |
279 | * this stuff handles faults in the cacheline loop and branches to either | |
280 | * src_error (if in read part) or dst_error (if in write part) | |
281 | */ | |
282 | CSUM_COPY_16_BYTES_EXCODE(0) | |
283 | #if L1_CACHE_BYTES >= 32 | |
284 | CSUM_COPY_16_BYTES_EXCODE(1) | |
285 | #if L1_CACHE_BYTES >= 64 | |
286 | CSUM_COPY_16_BYTES_EXCODE(2) | |
287 | CSUM_COPY_16_BYTES_EXCODE(3) | |
288 | #if L1_CACHE_BYTES >= 128 | |
289 | CSUM_COPY_16_BYTES_EXCODE(4) | |
290 | CSUM_COPY_16_BYTES_EXCODE(5) | |
291 | CSUM_COPY_16_BYTES_EXCODE(6) | |
292 | CSUM_COPY_16_BYTES_EXCODE(7) | |
293 | #endif | |
294 | #endif | |
295 | #endif | |
296 | ||
24bfa6a9 NP |
297 | EX_TABLE(30b, src_error); |
298 | EX_TABLE(31b, dst_error); | |
299 | EX_TABLE(40b, src_error); | |
300 | EX_TABLE(41b, dst_error); | |
301 | EX_TABLE(50b, src_error); | |
302 | EX_TABLE(51b, dst_error); | |
303 | ||
9445aa1a | 304 | EXPORT_SYMBOL(csum_partial_copy_generic) |
e9c4943a CL |
305 | |
306 | /* | |
307 | * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, | |
308 | * const struct in6_addr *daddr, | |
309 | * __u32 len, __u8 proto, __wsum sum) | |
310 | */ | |
311 | ||
312 | _GLOBAL(csum_ipv6_magic) | |
313 | lwz r8, 0(r3) | |
314 | lwz r9, 4(r3) | |
315 | addc r0, r7, r8 | |
316 | lwz r10, 8(r3) | |
317 | adde r0, r0, r9 | |
318 | lwz r11, 12(r3) | |
319 | adde r0, r0, r10 | |
320 | lwz r8, 0(r4) | |
321 | adde r0, r0, r11 | |
322 | lwz r9, 4(r4) | |
323 | adde r0, r0, r8 | |
324 | lwz r10, 8(r4) | |
325 | adde r0, r0, r9 | |
326 | lwz r11, 12(r4) | |
327 | adde r0, r0, r10 | |
328 | add r5, r5, r6 /* assumption: len + proto doesn't carry */ | |
329 | adde r0, r0, r11 | |
330 | adde r0, r0, r5 | |
331 | addze r0, r0 | |
332 | rotlwi r3, r0, 16 | |
333 | add r3, r0, r3 | |
334 | not r3, r3 | |
335 | rlwinm r3, r3, 16, 16, 31 | |
336 | blr | |
337 | EXPORT_SYMBOL(csum_ipv6_magic) |