]>
Commit | Line | Data |
---|---|---|
c494e070 RS |
1 | /* gf128mul.h - GF(2^128) multiplication functions |
2 | * | |
3 | * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. | |
4 | * Copyright (c) 2006 Rik Snel <[email protected]> | |
5 | * | |
6 | * Based on Dr Brian Gladman's (GPL'd) work published at | |
7 | * http://fp.gladman.plus.com/cryptography_technology/index.htm | |
8 | * See the original copyright notice below. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify it | |
11 | * under the terms of the GNU General Public License as published by the Free | |
12 | * Software Foundation; either version 2 of the License, or (at your option) | |
13 | * any later version. | |
14 | */ | |
15 | /* | |
16 | --------------------------------------------------------------------------- | |
17 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. | |
18 | ||
19 | LICENSE TERMS | |
20 | ||
21 | The free distribution and use of this software in both source and binary | |
22 | form is allowed (with or without changes) provided that: | |
23 | ||
24 | 1. distributions of this source code include the above copyright | |
25 | notice, this list of conditions and the following disclaimer; | |
26 | ||
27 | 2. distributions in binary form include the above copyright | |
28 | notice, this list of conditions and the following disclaimer | |
29 | in the documentation and/or other associated materials; | |
30 | ||
31 | 3. the copyright holder's name is not used to endorse products | |
32 | built using this software without specific written permission. | |
33 | ||
34 | ALTERNATIVELY, provided that this notice is retained in full, this product | |
35 | may be distributed under the terms of the GNU General Public License (GPL), | |
36 | in which case the provisions of the GPL apply INSTEAD OF those given above. | |
37 | ||
38 | DISCLAIMER | |
39 | ||
40 | This software is provided 'as is' with no explicit or implied warranties | |
41 | in respect of its properties, including, but not limited to, correctness | |
42 | and/or fitness for purpose. | |
43 | --------------------------------------------------------------------------- | |
44 | Issue Date: 31/01/2006 | |
45 | ||
63be5b53 | 46 | An implementation of field multiplication in Galois Field GF(2^128) |
c494e070 RS |
47 | */ |
48 | ||
49 | #ifndef _CRYPTO_GF128MUL_H | |
50 | #define _CRYPTO_GF128MUL_H | |
51 | ||
acb9b159 | 52 | #include <asm/byteorder.h> |
c494e070 RS |
53 | #include <crypto/b128ops.h> |
54 | #include <linux/slab.h> | |
55 | ||
56 | /* Comment by Rik: | |
57 | * | |
631dd1a8 JM |
58 | * For some background on GF(2^128) see for example: |
59 | * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf | |
c494e070 RS |
60 | * |
61 | * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can | |
62 | * be mapped to computer memory in a variety of ways. Let's examine | |
63 | * three common cases. | |
64 | * | |
65 | * Take a look at the 16 binary octets below in memory order. The msb's | |
66 | * are left and the lsb's are right. char b[16] is an array and b[0] is | |
67 | * the first octet. | |
68 | * | |
63be5b53 | 69 | * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 |
c494e070 RS |
70 | * b[0] b[1] b[2] b[3] b[13] b[14] b[15] |
71 | * | |
72 | * Every bit is a coefficient of some power of X. We can store the bits | |
73 | * in every byte in little-endian order and the bytes themselves also in | |
74 | * little endian order. I will call this lle (little-little-endian). | |
75 | * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks | |
76 | * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. | |
77 | * This format was originally implemented in gf128mul and is used | |
78 | * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). | |
79 | * | |
80 | * Another convention says: store the bits in bigendian order and the | |
81 | * bytes also. This is bbe (big-big-endian). Now the buffer above | |
82 | * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, | |
83 | * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe | |
84 | * is partly implemented. | |
85 | * | |
86 | * Both of the above formats are easy to implement on big-endian | |
87 | * machines. | |
88 | * | |
63be5b53 EB |
89 | * XTS and EME (the latter of which is patent encumbered) use the ble |
90 | * format (bits are stored in big endian order and the bytes in little | |
91 | * endian). The above buffer represents X^7 in this case and the | |
92 | * primitive polynomial is b[0] = 0x87. | |
c494e070 RS |
93 | * |
94 | * The common machine word-size is smaller than 128 bits, so to make | |
95 | * an efficient implementation we must split into machine word sizes. | |
63be5b53 EB |
96 | * This implementation uses 64-bit words for the moment. Machine |
97 | * endianness comes into play. The lle format in relation to machine | |
98 | * endianness is discussed below by the original author of gf128mul Dr | |
99 | * Brian Gladman. | |
c494e070 RS |
100 | * |
101 | * Let's look at the bbe and ble format on a little endian machine. | |
102 | * | |
103 | * bbe on a little endian machine u32 x[4]: | |
104 | * | |
105 | * MS x[0] LS MS x[1] LS | |
106 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
107 | * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 | |
108 | * | |
109 | * MS x[2] LS MS x[3] LS | |
110 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
111 | * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 | |
112 | * | |
113 | * ble on a little endian machine | |
114 | * | |
115 | * MS x[0] LS MS x[1] LS | |
116 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
117 | * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 | |
118 | * | |
119 | * MS x[2] LS MS x[3] LS | |
120 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
121 | * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 | |
122 | * | |
123 | * Multiplications in GF(2^128) are mostly bit-shifts, so you see why | |
124 | * ble (and lbe also) are easier to implement on a little-endian | |
125 | * machine than on a big-endian machine. The converse holds for bbe | |
126 | * and lle. | |
127 | * | |
128 | * Note: to have good alignment, it seems to me that it is sufficient | |
129 | * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize | |
130 | * machines this will automatically aligned to wordsize and on a 64-bit | |
131 | * machine also. | |
132 | */ | |
63be5b53 EB |
133 | /* Multiply a GF(2^128) field element by x. Field elements are |
134 | held in arrays of bytes in which field bits 8n..8n + 7 are held in | |
135 | byte[n], with lower indexed bits placed in the more numerically | |
136 | significant bit positions within bytes. | |
c494e070 RS |
137 | |
138 | On little endian machines the bit indexes translate into the bit | |
139 | positions within four 32-bit words in the following way | |
140 | ||
141 | MS x[0] LS MS x[1] LS | |
142 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
143 | 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 | |
144 | ||
145 | MS x[2] LS MS x[3] LS | |
146 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
147 | 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 | |
148 | ||
149 | On big endian machines the bit indexes translate into the bit | |
150 | positions within four 32-bit words in the following way | |
151 | ||
152 | MS x[0] LS MS x[1] LS | |
153 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
154 | 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 | |
155 | ||
156 | MS x[2] LS MS x[3] LS | |
157 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | |
158 | 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 | |
159 | */ | |
160 | ||
161 | /* A slow generic version of gf_mul, implemented for lle and bbe | |
162 | * It multiplies a and b and puts the result in a */ | |
163 | void gf128mul_lle(be128 *a, const be128 *b); | |
164 | ||
165 | void gf128mul_bbe(be128 *a, const be128 *b); | |
166 | ||
acb9b159 OM |
167 | /* |
168 | * The following functions multiply a field element by x in | |
169 | * the polynomial field representation. They use 64-bit word operations | |
170 | * to gain speed but compensate for machine endianness and hence work | |
171 | * correctly on both styles of machine. | |
172 | * | |
173 | * They are defined here for performance. | |
174 | */ | |
175 | ||
176 | static inline u64 gf128mul_mask_from_bit(u64 x, int which) | |
177 | { | |
178 | /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */ | |
179 | return ((s64)(x << (63 - which)) >> 63); | |
180 | } | |
181 | ||
182 | static inline void gf128mul_x_lle(be128 *r, const be128 *x) | |
183 | { | |
184 | u64 a = be64_to_cpu(x->a); | |
185 | u64 b = be64_to_cpu(x->b); | |
186 | ||
187 | /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48 | |
188 | * (see crypto/gf128mul.c): */ | |
189 | u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56); | |
190 | ||
191 | r->b = cpu_to_be64((b >> 1) | (a << 63)); | |
192 | r->a = cpu_to_be64((a >> 1) ^ _tt); | |
193 | } | |
194 | ||
195 | static inline void gf128mul_x_bbe(be128 *r, const be128 *x) | |
196 | { | |
197 | u64 a = be64_to_cpu(x->a); | |
198 | u64 b = be64_to_cpu(x->b); | |
199 | ||
200 | /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */ | |
201 | u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; | |
202 | ||
203 | r->a = cpu_to_be64((a << 1) | (b >> 63)); | |
204 | r->b = cpu_to_be64((b << 1) ^ _tt); | |
205 | } | |
206 | ||
207 | /* needed by XTS */ | |
e55318c8 | 208 | static inline void gf128mul_x_ble(le128 *r, const le128 *x) |
acb9b159 OM |
209 | { |
210 | u64 a = le64_to_cpu(x->a); | |
211 | u64 b = le64_to_cpu(x->b); | |
212 | ||
213 | /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */ | |
e55318c8 | 214 | u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; |
acb9b159 | 215 | |
e55318c8 OM |
216 | r->a = cpu_to_le64((a << 1) | (b >> 63)); |
217 | r->b = cpu_to_le64((b << 1) ^ _tt); | |
acb9b159 | 218 | } |
c494e070 RS |
219 | |
220 | /* 4k table optimization */ | |
221 | ||
222 | struct gf128mul_4k { | |
223 | be128 t[256]; | |
224 | }; | |
225 | ||
226 | struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); | |
227 | struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); | |
3ea996dd EB |
228 | void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t); |
229 | void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t); | |
acfc5878 | 230 | void gf128mul_x8_ble(le128 *r, const le128 *x); |
c494e070 RS |
231 | static inline void gf128mul_free_4k(struct gf128mul_4k *t) |
232 | { | |
75aa0a7c | 233 | kzfree(t); |
c494e070 RS |
234 | } |
235 | ||
236 | ||
d266f44b | 237 | /* 64k table optimization, implemented for bbe */ |
c494e070 RS |
238 | |
239 | struct gf128mul_64k { | |
240 | struct gf128mul_4k *t[16]; | |
241 | }; | |
242 | ||
d266f44b AC |
243 | /* First initialize with the constant factor with which you |
244 | * want to multiply and then call gf128mul_64k_bbe with the other | |
245 | * factor in the first argument, and the table in the second. | |
246 | * Afterwards, the result is stored in *a. | |
247 | */ | |
c494e070 RS |
248 | struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); |
249 | void gf128mul_free_64k(struct gf128mul_64k *t); | |
3ea996dd | 250 | void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t); |
c494e070 RS |
251 | |
252 | #endif /* _CRYPTO_GF128MUL_H */ |