]> Git Repo - linux.git/blame - arch/powerpc/lib/checksum_32.S
Merge tag 'drm-next-2018-06-15' of git://anongit.freedesktop.org/drm/drm
[linux.git] / arch / powerpc / lib / checksum_32.S
CommitLineData
14cf11af
PM
1/*
2 * This file contains assembly-language implementations
3 * of IP-style 1's complement checksum routines.
4 *
5 * Copyright (C) 1995-1996 Gary Thomas ([email protected])
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Severely hacked about by Paul Mackerras ([email protected]).
13 */
14
15#include <linux/sys.h>
16#include <asm/processor.h>
7aef4136 17#include <asm/cache.h>
14cf11af
PM
18#include <asm/errno.h>
19#include <asm/ppc_asm.h>
9445aa1a 20#include <asm/export.h>
14cf11af
PM
21
22 .text
23
14cf11af
PM
24/*
25 * computes the checksum of a memory block at buff, length len,
26 * and adds in "sum" (32-bit)
27 *
7e393220 28 * __csum_partial(buff, len, sum)
14cf11af 29 */
7e393220 30_GLOBAL(__csum_partial)
14cf11af 31 subi r3,r3,4
48821a34 32 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */
14cf11af 33 beq 3f /* if we're doing < 4 bytes */
48821a34 34 andi. r0,r3,2 /* Align buffer to longword boundary */
14cf11af 35 beq+ 1f
48821a34 36 lhz r0,4(r3) /* do 2 bytes to get aligned */
14cf11af 37 subi r4,r4,2
48821a34 38 addi r3,r3,2
14cf11af 39 srwi. r6,r4,2 /* # words to do */
48821a34 40 adde r5,r5,r0
14cf11af 41 beq 3f
f867d556
CL
421: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */
43 beq 21f
44 mtctr r6
48821a34
CL
452: lwzu r0,4(r3)
46 adde r5,r5,r0
14cf11af 47 bdnz 2b
f867d556
CL
4821: srwi. r6,r4,4 /* # blocks of 4 words to do */
49 beq 3f
373e098e 50 lwz r0,4(r3)
f867d556 51 mtctr r6
f867d556 52 lwz r6,8(r3)
373e098e 53 adde r5,r5,r0
f867d556 54 lwz r7,12(r3)
373e098e 55 adde r5,r5,r6
f867d556 56 lwzu r8,16(r3)
373e098e
CL
57 adde r5,r5,r7
58 bdz 23f
5922: lwz r0,4(r3)
60 adde r5,r5,r8
61 lwz r6,8(r3)
f867d556 62 adde r5,r5,r0
373e098e 63 lwz r7,12(r3)
f867d556 64 adde r5,r5,r6
373e098e 65 lwzu r8,16(r3)
f867d556 66 adde r5,r5,r7
f867d556 67 bdnz 22b
373e098e 6823: adde r5,r5,r8
48821a34
CL
693: andi. r0,r4,2
70 beq+ 4f
71 lhz r0,4(r3)
14cf11af 72 addi r3,r3,2
48821a34
CL
73 adde r5,r5,r0
744: andi. r0,r4,1
75 beq+ 5f
76 lbz r0,4(r3)
77 slwi r0,r0,8 /* Upper byte of word */
78 adde r5,r5,r0
795: addze r3,r5 /* add in final carry */
14cf11af 80 blr
9445aa1a 81EXPORT_SYMBOL(__csum_partial)
14cf11af
PM
82
83/*
84 * Computes the checksum of a memory block at src, length len,
85 * and adds in "sum" (32-bit), while copying the block to dst.
86 * If an access exception occurs on src or dst, it stores -EFAULT
87 * to *src_err or *dst_err respectively, and (for an error on
88 * src) zeroes the rest of dst.
89 *
90 * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
91 */
7aef4136
CL
92#define CSUM_COPY_16_BYTES_WITHEX(n) \
938 ## n ## 0: \
94 lwz r7,4(r4); \
958 ## n ## 1: \
96 lwz r8,8(r4); \
978 ## n ## 2: \
98 lwz r9,12(r4); \
998 ## n ## 3: \
100 lwzu r10,16(r4); \
1018 ## n ## 4: \
102 stw r7,4(r6); \
103 adde r12,r12,r7; \
1048 ## n ## 5: \
105 stw r8,8(r6); \
106 adde r12,r12,r8; \
1078 ## n ## 6: \
108 stw r9,12(r6); \
109 adde r12,r12,r9; \
1108 ## n ## 7: \
111 stwu r10,16(r6); \
112 adde r12,r12,r10
113
114#define CSUM_COPY_16_BYTES_EXCODE(n) \
24bfa6a9
NP
115 EX_TABLE(8 ## n ## 0b, src_error); \
116 EX_TABLE(8 ## n ## 1b, src_error); \
117 EX_TABLE(8 ## n ## 2b, src_error); \
118 EX_TABLE(8 ## n ## 3b, src_error); \
119 EX_TABLE(8 ## n ## 4b, dst_error); \
120 EX_TABLE(8 ## n ## 5b, dst_error); \
121 EX_TABLE(8 ## n ## 6b, dst_error); \
122 EX_TABLE(8 ## n ## 7b, dst_error);
7aef4136
CL
123
124 .text
125 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
126 .stabs "checksum_32.S",N_SO,0,0,0f
1270:
128
129CACHELINE_BYTES = L1_CACHE_BYTES
130LG_CACHELINE_BYTES = L1_CACHE_SHIFT
131CACHELINE_MASK = (L1_CACHE_BYTES-1)
132
14cf11af 133_GLOBAL(csum_partial_copy_generic)
7aef4136
CL
134 stwu r1,-16(r1)
135 stw r7,12(r1)
136 stw r8,8(r1)
137
7aef4136
CL
138 addic r12,r6,0
139 addi r6,r4,-4
140 neg r0,r4
141 addi r4,r3,-4
142 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
8540571e 143 crset 4*cr7+eq
7aef4136
CL
144 beq 58f
145
146 cmplw 0,r5,r0 /* is this more than total to do? */
147 blt 63f /* if not much to do */
8540571e
CL
148 rlwinm r7,r6,3,0x8
149 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
150 cmplwi cr7,r7,0 /* is destination address even ? */
7aef4136
CL
151 andi. r8,r0,3 /* get it word-aligned first */
152 mtctr r8
153 beq+ 61f
154 li r3,0
15570: lbz r9,4(r4) /* do some bytes */
156 addi r4,r4,1
157 slwi r3,r3,8
158 rlwimi r3,r9,0,24,31
15971: stb r9,4(r6)
160 addi r6,r6,1
161 bdnz 70b
162 adde r12,r12,r3
16361: subf r5,r0,r5
164 srwi. r0,r0,2
165 mtctr r0
166 beq 58f
16772: lwzu r9,4(r4) /* do some words */
168 adde r12,r12,r9
16973: stwu r9,4(r6)
170 bdnz 72b
171
17258: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
173 clrlwi r5,r5,32-LG_CACHELINE_BYTES
174 li r11,4
175 beq 63f
176
177 /* Here we decide how far ahead to prefetch the source */
178 li r3,4
179 cmpwi r0,1
180 li r7,0
181 ble 114f
182 li r7,1
183#if MAX_COPY_PREFETCH > 1
184 /* Heuristically, for large transfers we prefetch
185 MAX_COPY_PREFETCH cachelines ahead. For small transfers
186 we prefetch 1 cacheline ahead. */
187 cmpwi r0,MAX_COPY_PREFETCH
188 ble 112f
189 li r7,MAX_COPY_PREFETCH
190112: mtctr r7
191111: dcbt r3,r4
192 addi r3,r3,CACHELINE_BYTES
193 bdnz 111b
194#else
195 dcbt r3,r4
196 addi r3,r3,CACHELINE_BYTES
197#endif /* MAX_COPY_PREFETCH > 1 */
198
199114: subf r8,r7,r0
200 mr r0,r7
201 mtctr r8
202
20353: dcbt r3,r4
20454: dcbz r11,r6
205/* the main body of the cacheline loop */
206 CSUM_COPY_16_BYTES_WITHEX(0)
207#if L1_CACHE_BYTES >= 32
208 CSUM_COPY_16_BYTES_WITHEX(1)
209#if L1_CACHE_BYTES >= 64
210 CSUM_COPY_16_BYTES_WITHEX(2)
211 CSUM_COPY_16_BYTES_WITHEX(3)
212#if L1_CACHE_BYTES >= 128
213 CSUM_COPY_16_BYTES_WITHEX(4)
214 CSUM_COPY_16_BYTES_WITHEX(5)
215 CSUM_COPY_16_BYTES_WITHEX(6)
216 CSUM_COPY_16_BYTES_WITHEX(7)
217#endif
218#endif
219#endif
220 bdnz 53b
221 cmpwi r0,0
222 li r3,4
223 li r7,0
224 bne 114b
225
22663: srwi. r0,r5,2
227 mtctr r0
228 beq 64f
22930: lwzu r0,4(r4)
230 adde r12,r12,r0
23131: stwu r0,4(r6)
232 bdnz 30b
233
23464: andi. r0,r5,2
235 beq+ 65f
23640: lhz r0,4(r4)
14cf11af 237 addi r4,r4,2
7aef4136
CL
23841: sth r0,4(r6)
239 adde r12,r12,r0
240 addi r6,r6,2
24165: andi. r0,r5,1
242 beq+ 66f
24350: lbz r0,4(r4)
24451: stb r0,4(r6)
245 slwi r0,r0,8
246 adde r12,r12,r0
24766: addze r3,r12
248 addi r1,r1,16
249 beqlr+ cr7
1bc8b816 250 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
14cf11af
PM
251 blr
252
7aef4136 253/* read fault */
14cf11af 254src_error:
7aef4136
CL
255 lwz r7,12(r1)
256 addi r1,r1,16
257 cmpwi cr0,r7,0
258 beqlr
259 li r0,-EFAULT
260 stw r0,0(r7)
14cf11af 261 blr
7aef4136 262/* write fault */
14cf11af 263dst_error:
7aef4136
CL
264 lwz r8,8(r1)
265 addi r1,r1,16
266 cmpwi cr0,r8,0
267 beqlr
268 li r0,-EFAULT
269 stw r0,0(r8)
14cf11af
PM
270 blr
271
24bfa6a9
NP
272 EX_TABLE(70b, src_error);
273 EX_TABLE(71b, dst_error);
274 EX_TABLE(72b, src_error);
275 EX_TABLE(73b, dst_error);
276 EX_TABLE(54b, dst_error);
7aef4136
CL
277
278/*
279 * this stuff handles faults in the cacheline loop and branches to either
280 * src_error (if in read part) or dst_error (if in write part)
281 */
282 CSUM_COPY_16_BYTES_EXCODE(0)
283#if L1_CACHE_BYTES >= 32
284 CSUM_COPY_16_BYTES_EXCODE(1)
285#if L1_CACHE_BYTES >= 64
286 CSUM_COPY_16_BYTES_EXCODE(2)
287 CSUM_COPY_16_BYTES_EXCODE(3)
288#if L1_CACHE_BYTES >= 128
289 CSUM_COPY_16_BYTES_EXCODE(4)
290 CSUM_COPY_16_BYTES_EXCODE(5)
291 CSUM_COPY_16_BYTES_EXCODE(6)
292 CSUM_COPY_16_BYTES_EXCODE(7)
293#endif
294#endif
295#endif
296
24bfa6a9
NP
297 EX_TABLE(30b, src_error);
298 EX_TABLE(31b, dst_error);
299 EX_TABLE(40b, src_error);
300 EX_TABLE(41b, dst_error);
301 EX_TABLE(50b, src_error);
302 EX_TABLE(51b, dst_error);
303
9445aa1a 304EXPORT_SYMBOL(csum_partial_copy_generic)
e9c4943a
CL
305
306/*
307 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
308 * const struct in6_addr *daddr,
309 * __u32 len, __u8 proto, __wsum sum)
310 */
311
312_GLOBAL(csum_ipv6_magic)
313 lwz r8, 0(r3)
314 lwz r9, 4(r3)
315 addc r0, r7, r8
316 lwz r10, 8(r3)
317 adde r0, r0, r9
318 lwz r11, 12(r3)
319 adde r0, r0, r10
320 lwz r8, 0(r4)
321 adde r0, r0, r11
322 lwz r9, 4(r4)
323 adde r0, r0, r8
324 lwz r10, 8(r4)
325 adde r0, r0, r9
326 lwz r11, 12(r4)
327 adde r0, r0, r10
328 add r5, r5, r6 /* assumption: len + proto doesn't carry */
329 adde r0, r0, r11
330 adde r0, r0, r5
331 addze r0, r0
332 rotlwi r3, r0, 16
333 add r3, r0, r3
334 not r3, r3
335 rlwinm r3, r3, 16, 16, 31
336 blr
337EXPORT_SYMBOL(csum_ipv6_magic)
This page took 0.802697 seconds and 4 git commands to generate.