]>
Commit | Line | Data |
---|---|---|
c09f9827 NP |
1 | /* |
2 | * linux/arch/arm/lib/sha1.S | |
3 | * | |
4 | * SHA transform optimized for ARM | |
5 | * | |
6 | * Copyright: (C) 2005 by Nicolas Pitre <[email protected]> | |
7 | * Created: September 17, 2005 | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or modify | |
10 | * it under the terms of the GNU General Public License version 2 as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * The reference implementation for this code is linux/lib/sha1.c | |
14 | */ | |
15 | ||
16 | #include <linux/linkage.h> | |
17 | ||
18 | .text | |
19 | ||
20 | ||
21 | /* | |
22 | * void sha_transform(__u32 *digest, const char *in, __u32 *W) | |
23 | * | |
24 | * Note: the "in" ptr may be unaligned. | |
25 | */ | |
26 | ||
27 | ENTRY(sha_transform) | |
28 | ||
29 | stmfd sp!, {r4 - r8, lr} | |
30 | ||
31 | @ for (i = 0; i < 16; i++) | |
32 | @ W[i] = be32_to_cpu(in[i]); */ | |
33 | ||
34 | #ifdef __ARMEB__ | |
35 | mov r4, r0 | |
36 | mov r0, r2 | |
37 | mov r2, #64 | |
38 | bl memcpy | |
39 | mov r2, r0 | |
40 | mov r0, r4 | |
41 | #else | |
42 | mov r3, r2 | |
43 | mov lr, #16 | |
44 | 1: ldrb r4, [r1], #1 | |
45 | ldrb r5, [r1], #1 | |
46 | ldrb r6, [r1], #1 | |
47 | ldrb r7, [r1], #1 | |
48 | subs lr, lr, #1 | |
49 | orr r5, r5, r4, lsl #8 | |
50 | orr r6, r6, r5, lsl #8 | |
51 | orr r7, r7, r6, lsl #8 | |
52 | str r7, [r3], #4 | |
53 | bne 1b | |
54 | #endif | |
55 | ||
56 | @ for (i = 0; i < 64; i++) | |
57 | @ W[i+16] = ror(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 31); | |
58 | ||
59 | sub r3, r2, #4 | |
60 | mov lr, #64 | |
61 | 2: ldr r4, [r3, #4]! | |
62 | subs lr, lr, #1 | |
63 | ldr r5, [r3, #8] | |
64 | ldr r6, [r3, #32] | |
65 | ldr r7, [r3, #52] | |
66 | eor r4, r4, r5 | |
67 | eor r4, r4, r6 | |
68 | eor r4, r4, r7 | |
69 | mov r4, r4, ror #31 | |
70 | str r4, [r3, #64] | |
71 | bne 2b | |
72 | ||
73 | /* | |
74 | * The SHA functions are: | |
75 | * | |
76 | * f1(B,C,D) = (D ^ (B & (C ^ D))) | |
77 | * f2(B,C,D) = (B ^ C ^ D) | |
78 | * f3(B,C,D) = ((B & C) | (D & (B | C))) | |
79 | * | |
80 | * Then the sub-blocks are processed as follows: | |
81 | * | |
82 | * A' = ror(A, 27) + f(B,C,D) + E + K + *W++ | |
83 | * B' = A | |
84 | * C' = ror(B, 2) | |
85 | * D' = C | |
86 | * E' = D | |
87 | * | |
88 | * We therefore unroll each loop 5 times to avoid register shuffling. | |
89 | * Also the ror for C (and also D and E which are successivelyderived | |
90 | * from it) is applied in place to cut on an additional mov insn for | |
91 | * each round. | |
92 | */ | |
93 | ||
94 | .macro sha_f1, A, B, C, D, E | |
95 | ldr r3, [r2], #4 | |
96 | eor ip, \C, \D | |
97 | add \E, r1, \E, ror #2 | |
98 | and ip, \B, ip, ror #2 | |
99 | add \E, \E, \A, ror #27 | |
100 | eor ip, ip, \D, ror #2 | |
101 | add \E, \E, r3 | |
102 | add \E, \E, ip | |
103 | .endm | |
104 | ||
105 | .macro sha_f2, A, B, C, D, E | |
106 | ldr r3, [r2], #4 | |
107 | add \E, r1, \E, ror #2 | |
108 | eor ip, \B, \C, ror #2 | |
109 | add \E, \E, \A, ror #27 | |
110 | eor ip, ip, \D, ror #2 | |
111 | add \E, \E, r3 | |
112 | add \E, \E, ip | |
113 | .endm | |
114 | ||
115 | .macro sha_f3, A, B, C, D, E | |
116 | ldr r3, [r2], #4 | |
117 | add \E, r1, \E, ror #2 | |
118 | orr ip, \B, \C, ror #2 | |
119 | add \E, \E, \A, ror #27 | |
120 | and ip, ip, \D, ror #2 | |
121 | add \E, \E, r3 | |
122 | and r3, \B, \C, ror #2 | |
123 | orr ip, ip, r3 | |
124 | add \E, \E, ip | |
125 | .endm | |
126 | ||
127 | ldmia r0, {r4 - r8} | |
128 | ||
129 | mov lr, #4 | |
130 | ldr r1, .L_sha_K + 0 | |
131 | ||
132 | /* adjust initial values */ | |
133 | mov r6, r6, ror #30 | |
134 | mov r7, r7, ror #30 | |
135 | mov r8, r8, ror #30 | |
136 | ||
137 | 3: subs lr, lr, #1 | |
138 | sha_f1 r4, r5, r6, r7, r8 | |
139 | sha_f1 r8, r4, r5, r6, r7 | |
140 | sha_f1 r7, r8, r4, r5, r6 | |
141 | sha_f1 r6, r7, r8, r4, r5 | |
142 | sha_f1 r5, r6, r7, r8, r4 | |
143 | bne 3b | |
144 | ||
145 | ldr r1, .L_sha_K + 4 | |
146 | mov lr, #4 | |
147 | ||
148 | 4: subs lr, lr, #1 | |
149 | sha_f2 r4, r5, r6, r7, r8 | |
150 | sha_f2 r8, r4, r5, r6, r7 | |
151 | sha_f2 r7, r8, r4, r5, r6 | |
152 | sha_f2 r6, r7, r8, r4, r5 | |
153 | sha_f2 r5, r6, r7, r8, r4 | |
154 | bne 4b | |
155 | ||
156 | ldr r1, .L_sha_K + 8 | |
157 | mov lr, #4 | |
158 | ||
159 | 5: subs lr, lr, #1 | |
160 | sha_f3 r4, r5, r6, r7, r8 | |
161 | sha_f3 r8, r4, r5, r6, r7 | |
162 | sha_f3 r7, r8, r4, r5, r6 | |
163 | sha_f3 r6, r7, r8, r4, r5 | |
164 | sha_f3 r5, r6, r7, r8, r4 | |
165 | bne 5b | |
166 | ||
167 | ldr r1, .L_sha_K + 12 | |
168 | mov lr, #4 | |
169 | ||
170 | 6: subs lr, lr, #1 | |
171 | sha_f2 r4, r5, r6, r7, r8 | |
172 | sha_f2 r8, r4, r5, r6, r7 | |
173 | sha_f2 r7, r8, r4, r5, r6 | |
174 | sha_f2 r6, r7, r8, r4, r5 | |
175 | sha_f2 r5, r6, r7, r8, r4 | |
176 | bne 6b | |
177 | ||
178 | ldmia r0, {r1, r2, r3, ip, lr} | |
179 | add r4, r1, r4 | |
180 | add r5, r2, r5 | |
181 | add r6, r3, r6, ror #2 | |
182 | add r7, ip, r7, ror #2 | |
183 | add r8, lr, r8, ror #2 | |
184 | stmia r0, {r4 - r8} | |
185 | ||
186 | ldmfd sp!, {r4 - r8, pc} | |
187 | ||
188 | .L_sha_K: | |
189 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | |
190 | ||
191 | ||
192 | /* | |
193 | * void sha_init(__u32 *buf) | |
194 | */ | |
195 | ||
196 | .L_sha_initial_digest: | |
197 | .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 | |
198 | ||
199 | ENTRY(sha_init) | |
200 | ||
201 | str lr, [sp, #-4]! | |
202 | adr r1, .L_sha_initial_digest | |
203 | ldmia r1, {r1, r2, r3, ip, lr} | |
204 | stmia r0, {r1, r2, r3, ip, lr} | |
205 | ldr pc, [sp], #4 | |
206 |