]>
Commit | Line | Data |
---|---|---|
83d290c5 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
d8834a13 MW |
2 | /* |
3 | * linux/arch/arm/lib/memset.S | |
4 | * | |
5 | * Copyright (C) 1995-2000 Russell King | |
6 | * | |
d8834a13 MW |
7 | * ASM optimised string functions |
8 | */ | |
75d7a0d7 | 9 | #include <linux/linkage.h> |
d8834a13 MW |
10 | #include <asm/assembler.h> |
11 | ||
12 | .text | |
13 | .align 5 | |
d8834a13 | 14 | |
75d7a0d7 | 15 | .syntax unified |
3a649407 | 16 | #if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) && !defined(MEMSET_NO_THUMB_BUILD) |
75d7a0d7 SA |
17 | .thumb |
18 | .thumb_func | |
19 | #endif | |
20 | ENTRY(memset) | |
d8834a13 | 21 | ands r3, r0, #3 @ 1 unaligned? |
75d7a0d7 SA |
22 | mov ip, r0 @ preserve r0 as return value |
23 | bne 6f @ 1 | |
d8834a13 | 24 | /* |
75d7a0d7 | 25 | * we know that the pointer in ip is aligned to a word boundary. |
d8834a13 | 26 | */ |
75d7a0d7 | 27 | 1: orr r1, r1, r1, lsl #8 |
d8834a13 MW |
28 | orr r1, r1, r1, lsl #16 |
29 | mov r3, r1 | |
30 | cmp r2, #16 | |
31 | blt 4f | |
32 | ||
33 | #if ! CALGN(1)+0 | |
34 | ||
35 | /* | |
75d7a0d7 | 36 | * We need 2 extra registers for this loop - use r8 and the LR |
d8834a13 | 37 | */ |
75d7a0d7 SA |
38 | stmfd sp!, {r8, lr} |
39 | mov r8, r1 | |
d8834a13 MW |
40 | mov lr, r1 |
41 | ||
42 | 2: subs r2, r2, #64 | |
75d7a0d7 SA |
43 | stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time. |
44 | stmiage ip!, {r1, r3, r8, lr} | |
45 | stmiage ip!, {r1, r3, r8, lr} | |
46 | stmiage ip!, {r1, r3, r8, lr} | |
d8834a13 | 47 | bgt 2b |
75d7a0d7 | 48 | ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go. |
d8834a13 MW |
49 | /* |
50 | * No need to correct the count; we're only testing bits from now on | |
51 | */ | |
52 | tst r2, #32 | |
75d7a0d7 SA |
53 | stmiane ip!, {r1, r3, r8, lr} |
54 | stmiane ip!, {r1, r3, r8, lr} | |
d8834a13 | 55 | tst r2, #16 |
75d7a0d7 SA |
56 | stmiane ip!, {r1, r3, r8, lr} |
57 | ldmfd sp!, {r8, lr} | |
d8834a13 MW |
58 | |
59 | #else | |
60 | ||
61 | /* | |
62 | * This version aligns the destination pointer in order to write | |
63 | * whole cache lines at once. | |
64 | */ | |
65 | ||
75d7a0d7 | 66 | stmfd sp!, {r4-r8, lr} |
d8834a13 MW |
67 | mov r4, r1 |
68 | mov r5, r1 | |
69 | mov r6, r1 | |
70 | mov r7, r1 | |
75d7a0d7 | 71 | mov r8, r1 |
d8834a13 MW |
72 | mov lr, r1 |
73 | ||
74 | cmp r2, #96 | |
75d7a0d7 | 75 | tstgt ip, #31 |
d8834a13 MW |
76 | ble 3f |
77 | ||
75d7a0d7 SA |
78 | and r8, ip, #31 |
79 | rsb r8, r8, #32 | |
80 | sub r2, r2, r8 | |
81 | movs r8, r8, lsl #(32 - 4) | |
82 | stmiacs ip!, {r4, r5, r6, r7} | |
83 | stmiami ip!, {r4, r5} | |
84 | tst r8, #(1 << 30) | |
85 | mov r8, r1 | |
86 | strne r1, [ip], #4 | |
d8834a13 MW |
87 | |
88 | 3: subs r2, r2, #64 | |
75d7a0d7 SA |
89 | stmiage ip!, {r1, r3-r8, lr} |
90 | stmiage ip!, {r1, r3-r8, lr} | |
d8834a13 | 91 | bgt 3b |
75d7a0d7 | 92 | ldmfdeq sp!, {r4-r8, pc} |
d8834a13 MW |
93 | |
94 | tst r2, #32 | |
75d7a0d7 | 95 | stmiane ip!, {r1, r3-r8, lr} |
d8834a13 | 96 | tst r2, #16 |
75d7a0d7 SA |
97 | stmiane ip!, {r4-r7} |
98 | ldmfd sp!, {r4-r8, lr} | |
d8834a13 MW |
99 | |
100 | #endif | |
101 | ||
102 | 4: tst r2, #8 | |
75d7a0d7 | 103 | stmiane ip!, {r1, r3} |
d8834a13 | 104 | tst r2, #4 |
75d7a0d7 | 105 | strne r1, [ip], #4 |
d8834a13 MW |
106 | /* |
107 | * When we get here, we've got less than 4 bytes to zero. We | |
108 | * may have an unaligned pointer as well. | |
109 | */ | |
110 | 5: tst r2, #2 | |
75d7a0d7 SA |
111 | strbne r1, [ip], #1 |
112 | strbne r1, [ip], #1 | |
d8834a13 | 113 | tst r2, #1 |
75d7a0d7 SA |
114 | strbne r1, [ip], #1 |
115 | ret lr | |
116 | ||
117 | 6: subs r2, r2, #4 @ 1 do we have enough | |
118 | blt 5b @ 1 bytes to align with? | |
119 | cmp r3, #2 @ 1 | |
120 | strblt r1, [ip], #1 @ 1 | |
121 | strble r1, [ip], #1 @ 1 | |
122 | strb r1, [ip], #1 @ 1 | |
123 | add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) | |
124 | b 1b | |
125 | ENDPROC(memset) |